feat: harden runtime evidence and claude agentic validation
This commit is contained in:
@@ -127,6 +127,280 @@ class AgentTranscriptTest(unittest.TestCase):
|
||||
self.assertIn("hello", result.transcript)
|
||||
self.assertIn("warn", result.transcript)
|
||||
|
||||
def test_invoke_agent_transcript_includes_exit_code_and_duration(self) -> None:
|
||||
def _fake_run(cmd, **kwargs):
|
||||
class _Result:
|
||||
returncode = 0
|
||||
stdout = "output"
|
||||
stderr = ""
|
||||
|
||||
return _Result()
|
||||
|
||||
agent = AgentConfig(
|
||||
name="codex-reviewer",
|
||||
command="codex",
|
||||
args=["exec", "--model", "gpt-5.4", "-"],
|
||||
)
|
||||
|
||||
with patch("subprocess.run", side_effect=_fake_run):
|
||||
result = invoke_agent(agent, "prompt", "review", quiet=True)
|
||||
|
||||
self.assertIn("## Exit Code: 0", result.transcript)
|
||||
|
||||
|
||||
class RepoDiscoveryExtendedTest(unittest.TestCase):
|
||||
"""Regression tests for broadened repo/service discovery signals."""
|
||||
|
||||
def test_discover_go_project(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "go.mod").write_text(
|
||||
"module example.com/myapp\n\ngo 1.21\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("go", discovery.languages)
|
||||
self.assertIn("go", discovery.package_managers)
|
||||
|
||||
def test_discover_rust_project(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "Cargo.toml").write_text(
|
||||
'[package]\nname = "myapp"\nversion = "0.1.0"\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("rust", discovery.languages)
|
||||
self.assertIn("cargo", discovery.package_managers)
|
||||
|
||||
def test_discover_ruby_project(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "Gemfile").write_text(
|
||||
'source "https://rubygems.org"\ngem "rails"\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("ruby", discovery.languages)
|
||||
self.assertIn("bundler", discovery.package_managers)
|
||||
|
||||
def test_discover_java_gradle_project(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "build.gradle").write_text(
|
||||
"plugins { id 'java' }\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("java", discovery.languages)
|
||||
self.assertIn("gradle", discovery.package_managers)
|
||||
|
||||
def test_discover_elasticsearch_from_compose(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "docker-compose.yml").write_text(
|
||||
"services:\n es:\n image: elasticsearch:8.10.0\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("elasticsearch", discovery.services)
|
||||
|
||||
def test_discover_kafka_from_compose(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "docker-compose.yml").write_text(
|
||||
"services:\n broker:\n image: confluentinc/cp-kafka:latest\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("kafka", discovery.services)
|
||||
|
||||
def test_discover_rabbitmq_from_env(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
discovery = discover_repo(root, {"RABBITMQ_URL"})
|
||||
|
||||
self.assertIn("rabbitmq", discovery.databases)
|
||||
|
||||
def test_discover_sqlite_from_requirements(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "requirements.txt").write_text(
|
||||
"aiosqlite==0.19.0\nfastapi\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("python", discovery.languages)
|
||||
self.assertIn("sqlite", discovery.databases)
|
||||
|
||||
def test_discover_dynamodb_from_env(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
discovery = discover_repo(root, {"DYNAMODB_TABLE"})
|
||||
|
||||
self.assertIn("dynamodb", discovery.databases)
|
||||
|
||||
def test_discover_frameworks_from_pyproject(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "pyproject.toml").write_text(
|
||||
'[project]\nname = "svc"\ndependencies = ["fastapi", "uvicorn"]\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("fastapi", discovery.frameworks)
|
||||
|
||||
def test_discover_knex_hint(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "knexfile.js").write_text(
|
||||
"module.exports = {};\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("Knex migration config detected.", discovery.hints)
|
||||
|
||||
def test_discover_makefile_hint(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "Makefile").write_text(
|
||||
"all:\n\techo hello\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("Makefile available for build/task automation.", discovery.hints)
|
||||
|
||||
def test_format_repo_discovery_includes_frameworks(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "package.json").write_text(
|
||||
'{"dependencies": {"express": "^4.18.0"}}',
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root)
|
||||
summary = format_repo_discovery(discovery)
|
||||
|
||||
self.assertIn("Detected frameworks", summary)
|
||||
self.assertIn("express", summary)
|
||||
|
||||
def test_discover_pnpm_lockfile(self) -> None:
|
||||
"""Detect pnpm from lockfile when no packageManager field."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "package.json").write_text(
|
||||
'{"name": "app"}',
|
||||
encoding="utf-8",
|
||||
)
|
||||
(root / "pnpm-lock.yaml").write_text("lockfileVersion: 6\n", encoding="utf-8")
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("pnpm", discovery.package_managers)
|
||||
|
||||
def test_discover_yarn_lockfile(self) -> None:
|
||||
"""Detect yarn from lockfile when no packageManager field."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "package.json").write_text(
|
||||
'{"name": "app"}',
|
||||
encoding="utf-8",
|
||||
)
|
||||
(root / "yarn.lock").write_text("# yarn lockfile v1\n", encoding="utf-8")
|
||||
discovery = discover_repo(root)
|
||||
|
||||
self.assertIn("yarn", discovery.package_managers)
|
||||
|
||||
|
||||
class SummarizeEnvExtendedTest(unittest.TestCase):
|
||||
"""Regression tests for expanded environment summary prefixes."""
|
||||
|
||||
def test_summarize_shows_mongo_env_var(self) -> None:
|
||||
execution = PipelineConfig().execution
|
||||
summary = summarize_environment(
|
||||
execution,
|
||||
[Path("/tmp/.env")],
|
||||
{"MONGO_URI": "mongodb://localhost"},
|
||||
{"MONGO_URI": "mongodb://localhost"},
|
||||
)
|
||||
self.assertIn("MONGO_URI", summary)
|
||||
|
||||
def test_summarize_shows_kafka_env_var(self) -> None:
|
||||
execution = PipelineConfig().execution
|
||||
summary = summarize_environment(
|
||||
execution,
|
||||
[Path("/tmp/.env")],
|
||||
{"KAFKA_BOOTSTRAP_SERVERS": "localhost:9092"},
|
||||
{"KAFKA_BOOTSTRAP_SERVERS": "localhost:9092"},
|
||||
)
|
||||
self.assertIn("KAFKA_BOOTSTRAP_SERVERS", summary)
|
||||
|
||||
def test_summarize_shows_elasticsearch_env_var(self) -> None:
|
||||
execution = PipelineConfig().execution
|
||||
summary = summarize_environment(
|
||||
execution,
|
||||
[Path("/tmp/.env")],
|
||||
{"ELASTICSEARCH_URL": "http://localhost:9200"},
|
||||
{"ELASTICSEARCH_URL": "http://localhost:9200"},
|
||||
)
|
||||
self.assertIn("ELASTICSEARCH_URL", summary)
|
||||
|
||||
|
||||
class TranscriptSavingRegressionTest(unittest.TestCase):
|
||||
"""Verify that transcripts are saved as step artifacts during pipeline runs."""
|
||||
|
||||
def test_transcript_files_saved_during_pipeline(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
steps = _build_simple_preset(["claude-coder"], ["claude-reviewer"], [])
|
||||
config = PipelineConfig(
|
||||
output_dir=root / "out",
|
||||
max_iterations=1,
|
||||
language="en",
|
||||
inputs={"plan": "Plan", "checklist": "Checklist"},
|
||||
agents={name: agent for name, agent in BUILTIN_AGENTS.items()},
|
||||
coders=["claude-coder"],
|
||||
reviewers=["claude-reviewer"],
|
||||
pipeline=steps,
|
||||
preset_name="simple",
|
||||
)
|
||||
|
||||
def _fake_invoke(agent_config, prompt, step_name, **kwargs):
|
||||
output = "VERDICT: PASS" if step_name == "review" else "coding output"
|
||||
return AgentResult(
|
||||
output=output,
|
||||
exit_code=0,
|
||||
agent_name=agent_config.name,
|
||||
step_name=step_name,
|
||||
duration_seconds=0.1,
|
||||
transcript="# Agent Execution Transcript\n\n## Command\n```\nclaude -p\n```",
|
||||
)
|
||||
|
||||
with patch("cross_eval.pipeline.invoke_agent", side_effect=_fake_invoke):
|
||||
result = run_pipeline(config, cwd=root)
|
||||
|
||||
# Verify transcript files were saved
|
||||
run_dir = result.run_dir
|
||||
self.assertIsNotNone(run_dir)
|
||||
coding_transcript = run_dir / "v1" / "coding_transcript.md"
|
||||
review_transcript = run_dir / "v1" / "review_transcript.md"
|
||||
self.assertTrue(
|
||||
coding_transcript.exists(),
|
||||
f"Expected transcript at {coding_transcript}",
|
||||
)
|
||||
self.assertTrue(
|
||||
review_transcript.exists(),
|
||||
f"Expected transcript at {review_transcript}",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user