feat: add runtime discovery and execution traces

2026-03-13 21:52:13 +09:00
parent 941304398d
commit 28dd794f54
35 changed files with 376 additions and 88 deletions
--- a/tests/pycache/test_config.cpython-312.pyc
+++ b/tests/pycache/test_config.cpython-312.pyc
--- a/tests/test_runtime_context.py
+++ b/tests/test_runtime_context.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from cross_eval.agent import invoke_agent
+from cross_eval.config import BUILTIN_AGENTS
+from cross_eval.discovery import discover_repo, format_repo_discovery
+from cross_eval.models import AgentConfig, AgentResult, PipelineConfig
+from cross_eval.pipeline import run_pipeline
+from cross_eval.prompts import _build_simple_preset
+from cross_eval.runtime_env import build_runtime_environment, summarize_environment
+
+
+class RuntimeEnvTest(unittest.TestCase):
+    def test_build_runtime_environment_loads_dotenv_values(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            (root / ".env").write_text(
+                "CLICKHOUSE_URL=http://localhost:8123\nDATABASE_URL=postgres://db\n",
+                encoding="utf-8",
+            )
+            execution = PipelineConfig().execution
+            env, loaded_files, loaded_values = build_runtime_environment(execution, root)
+
+        self.assertEqual(loaded_files[0].name, ".env")
+        self.assertEqual(loaded_values["CLICKHOUSE_URL"], "http://localhost:8123")
+        self.assertEqual(env["DATABASE_URL"], "postgres://db")
+
+    def test_summarize_environment_mentions_clickhouse_from_env(self) -> None:
+        execution = PipelineConfig().execution
+        summary = summarize_environment(
+            execution,
+            [Path("/tmp/.env")],
+            {"CLICKHOUSE_URL": "http://localhost:8123"},
+            {"CLICKHOUSE_URL": "http://localhost:8123"},
+        )
+        self.assertIn("CLICKHOUSE_URL", summary)
+        self.assertIn("ClickHouse-related", summary)
+
+
+class RepoDiscoveryTest(unittest.TestCase):
+    def test_discover_repo_detects_python_postgres_and_clickhouse(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            (root / "pyproject.toml").write_text(
+                '[project]\nname = "svc"\ndependencies = ["psycopg", "clickhouse-driver"]\n',
+                encoding="utf-8",
+            )
+            (root / "docker-compose.yml").write_text(
+                "services:\n  db:\n    image: postgres:16\n  ch:\n    image: clickhouse/clickhouse-server:latest\n",
+                encoding="utf-8",
+            )
+            discovery = discover_repo(root, {"DATABASE_URL", "CLICKHOUSE_URL"})
+            summary = format_repo_discovery(discovery)
+
+        self.assertIn("python", discovery.languages)
+        self.assertIn("postgresql", discovery.databases)
+        self.assertIn("clickhouse", discovery.databases)
+        self.assertIn("Detected local service containers", summary)
+
+
+class PromptContextTest(unittest.TestCase):
+    def test_run_pipeline_injects_env_and_discovery_context_into_prompt(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            (root / ".env").write_text("CLICKHOUSE_URL=http://localhost:8123\n", encoding="utf-8")
+            steps = _build_simple_preset(["claude-coder"], ["claude-reviewer"], [])
+            config = PipelineConfig(
+                output_dir=root / "out",
+                max_iterations=1,
+                language="en",
+                inputs={"plan": "Plan", "checklist": "Checklist"},
+                agents={name: agent for name, agent in BUILTIN_AGENTS.items()},
+                coders=["claude-coder"],
+                reviewers=["claude-reviewer"],
+                pipeline=steps,
+                preset_name="simple",
+            )
+            prompts: list[str] = []
+
+            def _fake_invoke(agent_config, prompt, step_name, **kwargs):
+                prompts.append(prompt)
+                output = "VERDICT: PASS" if step_name == "review" else "coding output"
+                return AgentResult(
+                    output=output,
+                    exit_code=0,
+                    agent_name=agent_config.name,
+                    step_name=step_name,
+                    duration_seconds=0.1,
+                    transcript="# Agent Execution Transcript",
+                )
+
+            with patch("cross_eval.pipeline.invoke_agent", side_effect=_fake_invoke):
+                run_pipeline(config, cwd=root)
+
+            joined = "\n".join(prompts)
+            self.assertIn("Execution Policy", joined)
+            self.assertIn("Environment Context", joined)
+            self.assertIn("Repository Discovery", joined)
+            self.assertIn("ClickHouse-related environment variables are available", joined)
+            self.assertTrue((root / "out").exists())
+
+
+class AgentTranscriptTest(unittest.TestCase):
+    def test_invoke_agent_records_transcript(self) -> None:
+        def _fake_run(cmd, **kwargs):
+            class _Result:
+                returncode = 0
+                stdout = "hello"
+                stderr = "warn"
+
+            return _Result()
+
+        agent = AgentConfig(
+            name="codex-reviewer",
+            command="codex",
+            args=["exec", "--model", "gpt-5.4", "-"],
+        )
+
+        with patch("subprocess.run", side_effect=_fake_run):
+            result = invoke_agent(agent, "prompt", "review", quiet=True)
+
+        self.assertIn("## Command", result.transcript)
+        self.assertIn("hello", result.transcript)
+        self.assertIn("warn", result.transcript)
+
+
+if __name__ == "__main__":
+    unittest.main()