From 3fb19e90c0f88ca583596a23dd645f3217e4677b Mon Sep 17 00:00:00 2001 From: chungyeong Date: Fri, 13 Mar 2026 22:29:22 +0900 Subject: [PATCH] feat: harden runtime evidence and claude agentic validation --- cross_eval/agent.py | 81 +++++++++- cross_eval/discovery.py | 275 +++++++++++++++++++++++++++------- cross_eval/runtime_env.py | 14 +- tests/test_agentic.py | 70 ++++++++- tests/test_runtime_context.py | 274 +++++++++++++++++++++++++++++++++ 5 files changed, 655 insertions(+), 59 deletions(-) diff --git a/cross_eval/agent.py b/cross_eval/agent.py index f2d427d..0af5949 100644 --- a/cross_eval/agent.py +++ b/cross_eval/agent.py @@ -19,6 +19,34 @@ logger = logging.getLogger(__name__) # CLI tools that support --system-prompt flag natively _SYSTEM_PROMPT_AGENTS = ("claude",) _REASONING_EFFORT_AGENTS = ("codex",) +_NO_CHANGE_ACK_MARKERS = ( + "no changes", + "no code changes", + "no file changes", + "did not make any changes", + "nothing to change", + "no modifications were necessary", + "no update was necessary", + "already satisfied", +) +_CHANGE_CLAIM_MARKERS = ( + "summary of all changes made", + "here's a summary of all changes made", + "implemented", + "i implemented", + "added", + "i added", + "updated", + "i updated", + "modified", + "i modified", + "created", + "i created", + "fixed", + "i fixed", + "completed the changes", + "finished the changes", +) class AgentInvocationError(RuntimeError): @@ -106,6 +134,16 @@ def _classify_agent_failure(detail: str) -> tuple[str, str]: ) +def _claims_file_changes(output: str) -> bool: + """Heuristic for agent text that claims code changes were made.""" + normalized = output.lower() + if not normalized.strip(): + return False + if any(marker in normalized for marker in _NO_CHANGE_ACK_MARKERS): + return False + return any(marker in normalized for marker in _CHANGE_CLAIM_MARKERS) + + class _Spinner: """Animated spinner for long-running agent calls.""" @@ -302,6 +340,9 @@ def invoke_agent( command_preview=cmd_preview, stdout=result.stdout, stderr=result.stderr, + exit_code=result.returncode, + duration_seconds=round(duration, 1), + cwd=str(cwd) if cwd else "", ) return AgentResult( @@ -424,6 +465,28 @@ def invoke_agent_agentic( diff_output = capture_diff(worktree_path) if not diff_output: + stdout_excerpt = (result.stdout or "").strip() + stderr_excerpt = (result.stderr or "").strip() + if _claims_file_changes(stdout_excerpt): + if spinner: + spinner.stop(f"[{step_name}] FAILED (empty diff)") + raw_error = stdout_excerpt or "(stdout empty)" + if stderr_excerpt: + raw_error = f"{raw_error}\n\n[stderr]\n{stderr_excerpt}" + if len(raw_error) > 2000: + raw_error = raw_error[:2000] + "..." + raise AgentInvocationError( + agent_name=agent.name, + step_name=step_name, + cmd_preview=cmd_preview, + raw_error=raw_error, + failure_type="EMPTY_DIFF", + suggested_action=( + "Agent reported code changes but produced no git diff. " + "Treat this run as failed and require a real worktree diff before continuing." + ), + ) + diff_output = "(no changes)" logger.warning( "Agent '%s' made no file changes at step '%s'", @@ -438,6 +501,9 @@ def invoke_agent_agentic( command_preview=cmd_preview, stdout=result.stdout, stderr=result.stderr, + exit_code=result.returncode, + duration_seconds=round(duration, 1), + cwd=str(worktree_path), ) return AgentResult( @@ -456,6 +522,9 @@ def _build_transcript( command_preview: str, stdout: str, stderr: str, + exit_code: int = 0, + duration_seconds: float = 0.0, + cwd: str = "", ) -> str: """Build a compact execution transcript for debugging/audit output.""" sections = [ @@ -466,6 +535,16 @@ def _build_transcript( command_preview or "(unknown command)", "```", "", + ] + if cwd: + sections.extend(["## Working Directory", f"`{cwd}`", ""]) + sections.extend([ + f"## Exit Code: {exit_code}", + "", + ]) + if duration_seconds > 0: + sections.extend([f"## Duration: {duration_seconds}s", ""]) + sections.extend([ "## Stdout", "```", (stdout or "(empty)").strip(), @@ -476,5 +555,5 @@ def _build_transcript( (stderr or "(empty)").strip(), "```", "", - ] + ]) return "\n".join(sections) diff --git a/cross_eval/discovery.py b/cross_eval/discovery.py index cbdb234..2800c66 100644 --- a/cross_eval/discovery.py +++ b/cross_eval/discovery.py @@ -12,6 +12,7 @@ class RepoDiscovery: package_managers: set[str] = field(default_factory=set) databases: set[str] = field(default_factory=set) services: set[str] = field(default_factory=set) + frameworks: set[str] = field(default_factory=set) hints: list[str] = field(default_factory=list) @@ -29,27 +30,176 @@ def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) -> target.add(name) +# Shared mapping for database signals found in manifest content +_MANIFEST_DB_SIGNALS: dict[str, str] = { + # PostgreSQL + "psycopg": "postgresql", + "asyncpg": "postgresql", + "postgres": "postgresql", + "pgx": "postgresql", + # MySQL / MariaDB + "mysql": "mysql", + "mariadb": "mysql", + "pymysql": "mysql", + # MongoDB + "pymongo": "mongodb", + "mongodb": "mongodb", + "mongoengine": "mongodb", + "mongosh": "mongodb", + # ClickHouse + "clickhouse": "clickhouse", + "clickhouse-driver": "clickhouse", + "clickhouse_connect": "clickhouse", + # Redis + "redis": "redis", + "ioredis": "redis", + # SQLite + "sqlite": "sqlite", + "better-sqlite3": "sqlite", + "aiosqlite": "sqlite", + # Elasticsearch / OpenSearch + "elasticsearch": "elasticsearch", + "opensearch": "elasticsearch", + # DynamoDB + "dynamodb": "dynamodb", + "boto3": "dynamodb", # broad but common signal + # Cassandra + "cassandra-driver": "cassandra", + "cassandra": "cassandra", + # RabbitMQ + "amqplib": "rabbitmq", + "pika": "rabbitmq", + "rabbitmq": "rabbitmq", + # Kafka + "kafka": "kafka", + "confluent-kafka": "kafka", + "kafkajs": "kafka", + # Neo4j + "neo4j": "neo4j", +} + +# Node package.json dependency → database mapping +_NODE_DEP_DB_SIGNALS: dict[str, str] = { + "pg": "postgresql", + "mysql": "mysql", + "mysql2": "mysql", + "mongoose": "mongodb", + "mongodb": "mongodb", + "@clickhouse/client": "clickhouse", + "redis": "redis", + "ioredis": "redis", + "prisma": "postgresql", + "better-sqlite3": "sqlite", + "sqlite3": "sqlite", + "@elastic/elasticsearch": "elasticsearch", + "@aws-sdk/client-dynamodb": "dynamodb", + "kafkajs": "kafka", + "amqplib": "rabbitmq", + "neo4j-driver": "neo4j", + "cassandra-driver": "cassandra", + "typeorm": "postgresql", + "sequelize": "postgresql", + "knex": "postgresql", +} + +# Docker compose service image → service name mapping +_COMPOSE_SERVICE_SIGNALS: dict[str, str] = { + "clickhouse": "clickhouse", + "postgres": "postgresql", + "mysql": "mysql", + "mariadb": "mysql", + "mongo": "mongodb", + "redis": "redis", + "elasticsearch": "elasticsearch", + "opensearch": "elasticsearch", + "rabbitmq": "rabbitmq", + "kafka": "kafka", + "zookeeper": "kafka", + "cassandra": "cassandra", + "neo4j": "neo4j", + "minio": "s3", + "localstack": "aws-local", + "dynamodb": "dynamodb", + "memcached": "memcached", + "nginx": "nginx", +} + +# Environment variable name patterns → database mapping +_ENV_DB_PATTERNS: list[tuple[str, str]] = [ + ("CLICKHOUSE", "clickhouse"), + ("CH_", "clickhouse"), + ("POSTGRES", "postgresql"), + ("PG", "postgresql"), + ("DATABASE_URL", "postgresql"), + ("MYSQL", "mysql"), + ("MARIADB", "mysql"), + ("MONGO", "mongodb"), + ("REDIS", "redis"), + ("ELASTICSEARCH", "elasticsearch"), + ("OPENSEARCH", "elasticsearch"), + ("DYNAMO", "dynamodb"), + ("CASSANDRA", "cassandra"), + ("KAFKA", "kafka"), + ("RABBIT", "rabbitmq"), + ("AMQP", "rabbitmq"), + ("NEO4J", "neo4j"), + ("SQLITE", "sqlite"), +] + + def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery: """Infer runtime-relevant stack hints from common manifest/config files.""" discovery = RepoDiscovery() env_names = {name.upper() for name in (env_names or set())} - file_map = { + file_map: dict[str, Path] = { "pyproject": project_root / "pyproject.toml", "requirements": project_root / "requirements.txt", + "requirements_dev": project_root / "requirements-dev.txt", + "setup_py": project_root / "setup.py", + "setup_cfg": project_root / "setup.cfg", "package": project_root / "package.json", + "go_mod": project_root / "go.mod", + "cargo": project_root / "Cargo.toml", + "gemfile": project_root / "Gemfile", + "build_gradle": project_root / "build.gradle", + "build_gradle_kts": project_root / "build.gradle.kts", + "pom": project_root / "pom.xml", + "composer": project_root / "composer.json", + "mix": project_root / "mix.exs", "docker_compose": project_root / "docker-compose.yml", "docker_compose_alt": project_root / "docker-compose.yaml", "compose": project_root / "compose.yaml", "prisma": project_root / "prisma" / "schema.prisma", + "dockerfile": project_root / "Dockerfile", } - if file_map["pyproject"].exists() or file_map["requirements"].exists(): + # ---- Language detection ---- + if ( + file_map["pyproject"].exists() + or file_map["requirements"].exists() + or file_map["requirements_dev"].exists() + or file_map["setup_py"].exists() + or file_map["setup_cfg"].exists() + ): discovery.languages.add("python") if file_map["package"].exists(): discovery.languages.add("node") + if file_map["go_mod"].exists(): + discovery.languages.add("go") + if file_map["cargo"].exists(): + discovery.languages.add("rust") + if file_map["gemfile"].exists(): + discovery.languages.add("ruby") + if file_map["build_gradle"].exists() or file_map["build_gradle_kts"].exists() or file_map["pom"].exists(): + discovery.languages.add("java") + if file_map["composer"].exists(): + discovery.languages.add("php") + if file_map["mix"].exists(): + discovery.languages.add("elixir") - if file_map["pyproject"].exists(): + # ---- Package manager detection ---- + if file_map["pyproject"].exists() or file_map["requirements"].exists() or file_map["setup_py"].exists(): discovery.package_managers.add("pip") if file_map["package"].exists(): try: @@ -60,8 +210,29 @@ def discover_repo(project_root: Path, env_names: set[str] | None = None) -> Repo if isinstance(pm, str) and pm: discovery.package_managers.add(pm.split("@", 1)[0]) else: - discovery.package_managers.add("npm") + # Check for lockfiles to distinguish npm/yarn/pnpm + if (project_root / "pnpm-lock.yaml").exists(): + discovery.package_managers.add("pnpm") + elif (project_root / "yarn.lock").exists(): + discovery.package_managers.add("yarn") + else: + discovery.package_managers.add("npm") + if file_map["go_mod"].exists(): + discovery.package_managers.add("go") + if file_map["cargo"].exists(): + discovery.package_managers.add("cargo") + if file_map["gemfile"].exists(): + discovery.package_managers.add("bundler") + if file_map["build_gradle"].exists() or file_map["build_gradle_kts"].exists(): + discovery.package_managers.add("gradle") + if file_map["pom"].exists(): + discovery.package_managers.add("maven") + if file_map["composer"].exists(): + discovery.package_managers.add("composer") + if file_map["mix"].exists(): + discovery.package_managers.add("mix") + # ---- Gather manifest content ---- manifests = { name: _read_text(path) for name, path in file_map.items() @@ -69,24 +240,10 @@ def discover_repo(project_root: Path, env_names: set[str] | None = None) -> Repo } combined = "\n".join(manifests.values()) - _add_if_contains( - discovery.databases, - combined, - { - "psycopg": "postgresql", - "asyncpg": "postgresql", - "postgres": "postgresql", - "mysql": "mysql", - "pymongo": "mongodb", - "mongodb": "mongodb", - "mongoengine": "mongodb", - "clickhouse": "clickhouse", - "clickhouse-driver": "clickhouse", - "clickhouse_connect": "clickhouse", - "redis": "redis", - }, - ) + # ---- Database detection from manifest content ---- + _add_if_contains(discovery.databases, combined, _MANIFEST_DB_SIGNALS) + # ---- Node.js dependency-specific detection ---- if file_map["package"].exists(): try: package_json = json.loads(_read_text(file_map["package"]) or "{}") @@ -97,53 +254,57 @@ def discover_repo(project_root: Path, env_names: set[str] | None = None) -> Repo **(package_json.get("devDependencies") or {}), } dep_blob = "\n".join(deps.keys()).lower() - _add_if_contains( - discovery.databases, - dep_blob, - { - "pg": "postgresql", - "mysql": "mysql", - "mongoose": "mongodb", - "mongodb": "mongodb", - "@clickhouse/client": "clickhouse", - "redis": "redis", - "prisma": "postgresql", - }, - ) + _add_if_contains(discovery.databases, dep_blob, _NODE_DEP_DB_SIGNALS) + # ---- Framework detection from manifest content ---- + _add_if_contains( + discovery.frameworks, + combined, + { + "fastapi": "fastapi", + "django": "django", + "flask": "flask", + "express": "express", + "nextjs": "next.js", + "next": "next.js", + "nestjs": "nestjs", + "spring": "spring", + "rails": "rails", + "laravel": "laravel", + "phoenix": "phoenix", + "gin": "gin", + "actix": "actix", + }, + ) + + # ---- Database detection from environment variable names ---- for env_name in env_names: - if "CLICKHOUSE" in env_name or env_name.startswith("CH_"): - discovery.databases.add("clickhouse") - if "POSTGRES" in env_name or env_name.startswith("PG") or env_name == "DATABASE_URL": - discovery.databases.add("postgresql") - if "MYSQL" in env_name: - discovery.databases.add("mysql") - if "MONGO" in env_name: - discovery.databases.add("mongodb") - if "REDIS" in env_name: - discovery.databases.add("redis") + for pattern, db_name in _ENV_DB_PATTERNS: + if pattern in env_name or env_name.startswith(pattern): + discovery.databases.add(db_name) + break + # ---- Docker compose service detection ---- compose_blob = "\n".join( manifests.get(key, "") for key in ("docker_compose", "docker_compose_alt", "compose") ).lower() - _add_if_contains( - discovery.services, - compose_blob, - { - "clickhouse": "clickhouse", - "postgres": "postgresql", - "mysql": "mysql", - "mongo": "mongodb", - "redis": "redis", - }, - ) + _add_if_contains(discovery.services, compose_blob, _COMPOSE_SERVICE_SIGNALS) + # ---- Hints from config files ---- if file_map["prisma"].exists(): discovery.hints.append("Prisma schema detected.") if (project_root / "alembic.ini").exists(): discovery.hints.append("Alembic migration config detected.") - if (project_root / "docker").exists() or discovery.services: + if (project_root / "knexfile.js").exists() or (project_root / "knexfile.ts").exists(): + discovery.hints.append("Knex migration config detected.") + if (project_root / "ormconfig.json").exists() or (project_root / "ormconfig.ts").exists(): + discovery.hints.append("TypeORM config detected.") + if (project_root / "drizzle.config.ts").exists(): + discovery.hints.append("Drizzle ORM config detected.") + if (project_root / "Makefile").exists(): + discovery.hints.append("Makefile available for build/task automation.") + if file_map["dockerfile"].exists() or (project_root / "docker").exists() or discovery.services: discovery.hints.append("Containerized services may be available for local verification.") return discovery @@ -160,6 +321,8 @@ def format_repo_discovery(discovery: RepoDiscovery) -> str: lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases))) if discovery.services: lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services))) + if discovery.frameworks: + lines.append("Detected frameworks: " + ", ".join(sorted(discovery.frameworks))) if discovery.hints: lines.extend(discovery.hints) if not lines: diff --git a/cross_eval/runtime_env.py b/cross_eval/runtime_env.py index 5604585..caa5a77 100644 --- a/cross_eval/runtime_env.py +++ b/cross_eval/runtime_env.py @@ -14,9 +14,22 @@ _SUMMARY_PREFIXES = ( "PG", "POSTGRES", "MYSQL", + "MARIADB", "REDIS", + "MONGO", + "ELASTICSEARCH", + "OPENSEARCH", + "DYNAMO", + "CASSANDRA", + "KAFKA", + "RABBIT", + "AMQP", + "NEO4J", + "SQLITE", + "MEMCACHED", "AWS", "S3", + "MINIO", ) @@ -116,7 +129,6 @@ def summarize_environment( key for key in set(loaded_values) | set(env) if key.startswith(_SUMMARY_PREFIXES) - or any(prefix in key for prefix in ("CLICKHOUSE", "DATABASE", "DB_")) } ) if visible_names: diff --git a/tests/test_agentic.py b/tests/test_agentic.py index 7b3ea70..dc3768f 100644 --- a/tests/test_agentic.py +++ b/tests/test_agentic.py @@ -14,7 +14,7 @@ import unittest from pathlib import Path from unittest.mock import MagicMock, call, patch -from cross_eval.agent import invoke_agent_agentic +from cross_eval.agent import AgentInvocationError, invoke_agent_agentic from cross_eval.config import BUILTIN_AGENTS, _make_agentic from cross_eval.models import ( AgentConfig, @@ -309,6 +309,74 @@ class TestTaskFileCleanup(unittest.TestCase): self.assertFalse((wt / "CROSS_EVAL_TASK.md").exists()) +class TestAgenticEmptyDiffDetection(unittest.TestCase): + """Agentic coders should not succeed when they only claim changes in stdout.""" + + @patch("cross_eval.worktree.capture_diff", return_value="") + @patch("subprocess.run") + def test_claude_empty_diff_with_change_claim_fails( + self, mock_run: MagicMock, mock_diff: MagicMock, + ) -> None: + mock_run.return_value = MagicMock( + returncode=0, + stdout=( + "All tests pass.\n" + "Here's a summary of all changes made:\n" + "- Updated discovery.py\n" + ), + stderr="", + ) + + agent = AgentConfig( + name="claude-coder", + command="claude", + args=["--setting-sources", "user"], + agentic=True, + ) + + with tempfile.TemporaryDirectory() as td: + wt = Path(td) + _init_git_repo(wt) + + with self.assertRaises(AgentInvocationError) as ctx: + invoke_agent_agentic( + agent, "implement feature X", "coding", + worktree_path=wt, quiet=True, + ) + + self.assertEqual(ctx.exception.failure_type, "EMPTY_DIFF") + self.assertIn("summary of all changes made", ctx.exception.raw_error.lower()) + + @patch("cross_eval.worktree.capture_diff", return_value="") + @patch("subprocess.run") + def test_empty_diff_without_change_claim_is_allowed( + self, mock_run: MagicMock, mock_diff: MagicMock, + ) -> None: + mock_run.return_value = MagicMock( + returncode=0, + stdout="No changes were required; the current implementation already satisfies the task.", + stderr="", + ) + + agent = AgentConfig( + name="claude-coder", + command="claude", + args=["--setting-sources", "user"], + agentic=True, + ) + + with tempfile.TemporaryDirectory() as td: + wt = Path(td) + _init_git_repo(wt) + + result = invoke_agent_agentic( + agent, "check whether any fix is needed", "coding", + worktree_path=wt, quiet=True, + ) + + self.assertEqual(result.output, "(no changes)") + + # =================================================================== # 3. config.py tests # =================================================================== diff --git a/tests/test_runtime_context.py b/tests/test_runtime_context.py index 6c916e0..dedf224 100644 --- a/tests/test_runtime_context.py +++ b/tests/test_runtime_context.py @@ -127,6 +127,280 @@ class AgentTranscriptTest(unittest.TestCase): self.assertIn("hello", result.transcript) self.assertIn("warn", result.transcript) + def test_invoke_agent_transcript_includes_exit_code_and_duration(self) -> None: + def _fake_run(cmd, **kwargs): + class _Result: + returncode = 0 + stdout = "output" + stderr = "" + + return _Result() + + agent = AgentConfig( + name="codex-reviewer", + command="codex", + args=["exec", "--model", "gpt-5.4", "-"], + ) + + with patch("subprocess.run", side_effect=_fake_run): + result = invoke_agent(agent, "prompt", "review", quiet=True) + + self.assertIn("## Exit Code: 0", result.transcript) + + +class RepoDiscoveryExtendedTest(unittest.TestCase): + """Regression tests for broadened repo/service discovery signals.""" + + def test_discover_go_project(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "go.mod").write_text( + "module example.com/myapp\n\ngo 1.21\n", + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("go", discovery.languages) + self.assertIn("go", discovery.package_managers) + + def test_discover_rust_project(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "Cargo.toml").write_text( + '[package]\nname = "myapp"\nversion = "0.1.0"\n', + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("rust", discovery.languages) + self.assertIn("cargo", discovery.package_managers) + + def test_discover_ruby_project(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "Gemfile").write_text( + 'source "https://rubygems.org"\ngem "rails"\n', + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("ruby", discovery.languages) + self.assertIn("bundler", discovery.package_managers) + + def test_discover_java_gradle_project(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "build.gradle").write_text( + "plugins { id 'java' }\n", + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("java", discovery.languages) + self.assertIn("gradle", discovery.package_managers) + + def test_discover_elasticsearch_from_compose(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "docker-compose.yml").write_text( + "services:\n es:\n image: elasticsearch:8.10.0\n", + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("elasticsearch", discovery.services) + + def test_discover_kafka_from_compose(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "docker-compose.yml").write_text( + "services:\n broker:\n image: confluentinc/cp-kafka:latest\n", + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("kafka", discovery.services) + + def test_discover_rabbitmq_from_env(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + discovery = discover_repo(root, {"RABBITMQ_URL"}) + + self.assertIn("rabbitmq", discovery.databases) + + def test_discover_sqlite_from_requirements(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "requirements.txt").write_text( + "aiosqlite==0.19.0\nfastapi\n", + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("python", discovery.languages) + self.assertIn("sqlite", discovery.databases) + + def test_discover_dynamodb_from_env(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + discovery = discover_repo(root, {"DYNAMODB_TABLE"}) + + self.assertIn("dynamodb", discovery.databases) + + def test_discover_frameworks_from_pyproject(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "pyproject.toml").write_text( + '[project]\nname = "svc"\ndependencies = ["fastapi", "uvicorn"]\n', + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("fastapi", discovery.frameworks) + + def test_discover_knex_hint(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "knexfile.js").write_text( + "module.exports = {};\n", + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("Knex migration config detected.", discovery.hints) + + def test_discover_makefile_hint(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "Makefile").write_text( + "all:\n\techo hello\n", + encoding="utf-8", + ) + discovery = discover_repo(root) + + self.assertIn("Makefile available for build/task automation.", discovery.hints) + + def test_format_repo_discovery_includes_frameworks(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "package.json").write_text( + '{"dependencies": {"express": "^4.18.0"}}', + encoding="utf-8", + ) + discovery = discover_repo(root) + summary = format_repo_discovery(discovery) + + self.assertIn("Detected frameworks", summary) + self.assertIn("express", summary) + + def test_discover_pnpm_lockfile(self) -> None: + """Detect pnpm from lockfile when no packageManager field.""" + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "package.json").write_text( + '{"name": "app"}', + encoding="utf-8", + ) + (root / "pnpm-lock.yaml").write_text("lockfileVersion: 6\n", encoding="utf-8") + discovery = discover_repo(root) + + self.assertIn("pnpm", discovery.package_managers) + + def test_discover_yarn_lockfile(self) -> None: + """Detect yarn from lockfile when no packageManager field.""" + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "package.json").write_text( + '{"name": "app"}', + encoding="utf-8", + ) + (root / "yarn.lock").write_text("# yarn lockfile v1\n", encoding="utf-8") + discovery = discover_repo(root) + + self.assertIn("yarn", discovery.package_managers) + + +class SummarizeEnvExtendedTest(unittest.TestCase): + """Regression tests for expanded environment summary prefixes.""" + + def test_summarize_shows_mongo_env_var(self) -> None: + execution = PipelineConfig().execution + summary = summarize_environment( + execution, + [Path("/tmp/.env")], + {"MONGO_URI": "mongodb://localhost"}, + {"MONGO_URI": "mongodb://localhost"}, + ) + self.assertIn("MONGO_URI", summary) + + def test_summarize_shows_kafka_env_var(self) -> None: + execution = PipelineConfig().execution + summary = summarize_environment( + execution, + [Path("/tmp/.env")], + {"KAFKA_BOOTSTRAP_SERVERS": "localhost:9092"}, + {"KAFKA_BOOTSTRAP_SERVERS": "localhost:9092"}, + ) + self.assertIn("KAFKA_BOOTSTRAP_SERVERS", summary) + + def test_summarize_shows_elasticsearch_env_var(self) -> None: + execution = PipelineConfig().execution + summary = summarize_environment( + execution, + [Path("/tmp/.env")], + {"ELASTICSEARCH_URL": "http://localhost:9200"}, + {"ELASTICSEARCH_URL": "http://localhost:9200"}, + ) + self.assertIn("ELASTICSEARCH_URL", summary) + + +class TranscriptSavingRegressionTest(unittest.TestCase): + """Verify that transcripts are saved as step artifacts during pipeline runs.""" + + def test_transcript_files_saved_during_pipeline(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + steps = _build_simple_preset(["claude-coder"], ["claude-reviewer"], []) + config = PipelineConfig( + output_dir=root / "out", + max_iterations=1, + language="en", + inputs={"plan": "Plan", "checklist": "Checklist"}, + agents={name: agent for name, agent in BUILTIN_AGENTS.items()}, + coders=["claude-coder"], + reviewers=["claude-reviewer"], + pipeline=steps, + preset_name="simple", + ) + + def _fake_invoke(agent_config, prompt, step_name, **kwargs): + output = "VERDICT: PASS" if step_name == "review" else "coding output" + return AgentResult( + output=output, + exit_code=0, + agent_name=agent_config.name, + step_name=step_name, + duration_seconds=0.1, + transcript="# Agent Execution Transcript\n\n## Command\n```\nclaude -p\n```", + ) + + with patch("cross_eval.pipeline.invoke_agent", side_effect=_fake_invoke): + result = run_pipeline(config, cwd=root) + + # Verify transcript files were saved + run_dir = result.run_dir + self.assertIsNotNone(run_dir) + coding_transcript = run_dir / "v1" / "coding_transcript.md" + review_transcript = run_dir / "v1" / "review_transcript.md" + self.assertTrue( + coding_transcript.exists(), + f"Expected transcript at {coding_transcript}", + ) + self.assertTrue( + review_transcript.exists(), + f"Expected transcript at {review_transcript}", + ) + if __name__ == "__main__": unittest.main()