feat: add runtime discovery and execution traces
This commit is contained in:
Binary file not shown.
132
tests/test_runtime_context.py
Normal file
132
tests/test_runtime_context.py
Normal file
@@ -0,0 +1,132 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from cross_eval.agent import invoke_agent
|
||||
from cross_eval.config import BUILTIN_AGENTS
|
||||
from cross_eval.discovery import discover_repo, format_repo_discovery
|
||||
from cross_eval.models import AgentConfig, AgentResult, PipelineConfig
|
||||
from cross_eval.pipeline import run_pipeline
|
||||
from cross_eval.prompts import _build_simple_preset
|
||||
from cross_eval.runtime_env import build_runtime_environment, summarize_environment
|
||||
|
||||
|
||||
class RuntimeEnvTest(unittest.TestCase):
|
||||
def test_build_runtime_environment_loads_dotenv_values(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / ".env").write_text(
|
||||
"CLICKHOUSE_URL=http://localhost:8123\nDATABASE_URL=postgres://db\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
execution = PipelineConfig().execution
|
||||
env, loaded_files, loaded_values = build_runtime_environment(execution, root)
|
||||
|
||||
self.assertEqual(loaded_files[0].name, ".env")
|
||||
self.assertEqual(loaded_values["CLICKHOUSE_URL"], "http://localhost:8123")
|
||||
self.assertEqual(env["DATABASE_URL"], "postgres://db")
|
||||
|
||||
def test_summarize_environment_mentions_clickhouse_from_env(self) -> None:
|
||||
execution = PipelineConfig().execution
|
||||
summary = summarize_environment(
|
||||
execution,
|
||||
[Path("/tmp/.env")],
|
||||
{"CLICKHOUSE_URL": "http://localhost:8123"},
|
||||
{"CLICKHOUSE_URL": "http://localhost:8123"},
|
||||
)
|
||||
self.assertIn("CLICKHOUSE_URL", summary)
|
||||
self.assertIn("ClickHouse-related", summary)
|
||||
|
||||
|
||||
class RepoDiscoveryTest(unittest.TestCase):
|
||||
def test_discover_repo_detects_python_postgres_and_clickhouse(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / "pyproject.toml").write_text(
|
||||
'[project]\nname = "svc"\ndependencies = ["psycopg", "clickhouse-driver"]\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
(root / "docker-compose.yml").write_text(
|
||||
"services:\n db:\n image: postgres:16\n ch:\n image: clickhouse/clickhouse-server:latest\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
discovery = discover_repo(root, {"DATABASE_URL", "CLICKHOUSE_URL"})
|
||||
summary = format_repo_discovery(discovery)
|
||||
|
||||
self.assertIn("python", discovery.languages)
|
||||
self.assertIn("postgresql", discovery.databases)
|
||||
self.assertIn("clickhouse", discovery.databases)
|
||||
self.assertIn("Detected local service containers", summary)
|
||||
|
||||
|
||||
class PromptContextTest(unittest.TestCase):
|
||||
def test_run_pipeline_injects_env_and_discovery_context_into_prompt(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
(root / ".env").write_text("CLICKHOUSE_URL=http://localhost:8123\n", encoding="utf-8")
|
||||
steps = _build_simple_preset(["claude-coder"], ["claude-reviewer"], [])
|
||||
config = PipelineConfig(
|
||||
output_dir=root / "out",
|
||||
max_iterations=1,
|
||||
language="en",
|
||||
inputs={"plan": "Plan", "checklist": "Checklist"},
|
||||
agents={name: agent for name, agent in BUILTIN_AGENTS.items()},
|
||||
coders=["claude-coder"],
|
||||
reviewers=["claude-reviewer"],
|
||||
pipeline=steps,
|
||||
preset_name="simple",
|
||||
)
|
||||
prompts: list[str] = []
|
||||
|
||||
def _fake_invoke(agent_config, prompt, step_name, **kwargs):
|
||||
prompts.append(prompt)
|
||||
output = "VERDICT: PASS" if step_name == "review" else "coding output"
|
||||
return AgentResult(
|
||||
output=output,
|
||||
exit_code=0,
|
||||
agent_name=agent_config.name,
|
||||
step_name=step_name,
|
||||
duration_seconds=0.1,
|
||||
transcript="# Agent Execution Transcript",
|
||||
)
|
||||
|
||||
with patch("cross_eval.pipeline.invoke_agent", side_effect=_fake_invoke):
|
||||
run_pipeline(config, cwd=root)
|
||||
|
||||
joined = "\n".join(prompts)
|
||||
self.assertIn("Execution Policy", joined)
|
||||
self.assertIn("Environment Context", joined)
|
||||
self.assertIn("Repository Discovery", joined)
|
||||
self.assertIn("ClickHouse-related environment variables are available", joined)
|
||||
self.assertTrue((root / "out").exists())
|
||||
|
||||
|
||||
class AgentTranscriptTest(unittest.TestCase):
|
||||
def test_invoke_agent_records_transcript(self) -> None:
|
||||
def _fake_run(cmd, **kwargs):
|
||||
class _Result:
|
||||
returncode = 0
|
||||
stdout = "hello"
|
||||
stderr = "warn"
|
||||
|
||||
return _Result()
|
||||
|
||||
agent = AgentConfig(
|
||||
name="codex-reviewer",
|
||||
command="codex",
|
||||
args=["exec", "--model", "gpt-5.4", "-"],
|
||||
)
|
||||
|
||||
with patch("subprocess.run", side_effect=_fake_run):
|
||||
result = invoke_agent(agent, "prompt", "review", quiet=True)
|
||||
|
||||
self.assertIn("## Command", result.transcript)
|
||||
self.assertIn("hello", result.transcript)
|
||||
self.assertIn("warn", result.transcript)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user