diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57230dc --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +*.py[cod] +.pytest_cache/ +.idea/ +output/ +.cross-eval/output/ +cross_eval.egg-info/ diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index ab1f416..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Ignored default folder with query files -/queries/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml -# Editor-based HTTP Client requests -/httpRequests/ diff --git a/.idea/cross-eval.iml b/.idea/cross-eval.iml deleted file mode 100644 index b525243..0000000 --- a/.idea/cross-eval.iml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index 03d9549..0000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2d..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index f632e42..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 02ac596..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/cross_eval.egg-info/PKG-INFO b/cross_eval.egg-info/PKG-INFO deleted file mode 100644 index 0f402eb..0000000 --- a/cross_eval.egg-info/PKG-INFO +++ /dev/null @@ -1,6 +0,0 @@ -Metadata-Version: 2.4 -Name: cross-eval -Version: 0.2.0 -Summary: AI agent cross-evaluation CLI tool -Requires-Python: >=3.9 -Requires-Dist: pyyaml>=6.0 diff --git a/cross_eval.egg-info/SOURCES.txt b/cross_eval.egg-info/SOURCES.txt deleted file mode 100644 index 26a3503..0000000 --- a/cross_eval.egg-info/SOURCES.txt +++ /dev/null @@ -1,24 +0,0 @@ -README.md -pyproject.toml -cross_eval/__init__.py -cross_eval/agent.py -cross_eval/cli.py -cross_eval/config.py -cross_eval/demo.py -cross_eval/doctor.py -cross_eval/models.py -cross_eval/pipeline.py -cross_eval/prompts.py -cross_eval/report.py -cross_eval/runtime_env.py -cross_eval/worktree.py -cross_eval.egg-info/PKG-INFO -cross_eval.egg-info/SOURCES.txt -cross_eval.egg-info/dependency_links.txt -cross_eval.egg-info/entry_points.txt -cross_eval.egg-info/requires.txt -cross_eval.egg-info/top_level.txt -tests/test_agentic.py -tests/test_config.py -tests/test_onboarding.py -tests/test_pipeline_integration.py diff --git a/cross_eval.egg-info/dependency_links.txt b/cross_eval.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/cross_eval.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/cross_eval.egg-info/entry_points.txt b/cross_eval.egg-info/entry_points.txt deleted file mode 100644 index f668a8c..0000000 --- a/cross_eval.egg-info/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[console_scripts] -cross-eval = cross_eval.cli:main diff --git a/cross_eval.egg-info/requires.txt b/cross_eval.egg-info/requires.txt deleted file mode 100644 index 3aecde9..0000000 --- a/cross_eval.egg-info/requires.txt +++ /dev/null @@ -1 +0,0 @@ -pyyaml>=6.0 diff --git a/cross_eval.egg-info/top_level.txt b/cross_eval.egg-info/top_level.txt deleted file mode 100644 index 59bc124..0000000 --- a/cross_eval.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -cross_eval diff --git a/cross_eval/__pycache__/__init__.cpython-312.pyc b/cross_eval/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index b05eafd..0000000 Binary files a/cross_eval/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/__init__.cpython-313.pyc b/cross_eval/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index db7e8b0..0000000 Binary files a/cross_eval/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/agent.cpython-312.pyc b/cross_eval/__pycache__/agent.cpython-312.pyc deleted file mode 100644 index 1bda52d..0000000 Binary files a/cross_eval/__pycache__/agent.cpython-312.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/agent.cpython-313.pyc b/cross_eval/__pycache__/agent.cpython-313.pyc deleted file mode 100644 index e40f194..0000000 Binary files a/cross_eval/__pycache__/agent.cpython-313.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/cli.cpython-312.pyc b/cross_eval/__pycache__/cli.cpython-312.pyc deleted file mode 100644 index b3a8b4f..0000000 Binary files a/cross_eval/__pycache__/cli.cpython-312.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/cli.cpython-313.pyc b/cross_eval/__pycache__/cli.cpython-313.pyc deleted file mode 100644 index b136656..0000000 Binary files a/cross_eval/__pycache__/cli.cpython-313.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/config.cpython-312.pyc b/cross_eval/__pycache__/config.cpython-312.pyc deleted file mode 100644 index 08fad4d..0000000 Binary files a/cross_eval/__pycache__/config.cpython-312.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/config.cpython-313.pyc b/cross_eval/__pycache__/config.cpython-313.pyc deleted file mode 100644 index ea5d029..0000000 Binary files a/cross_eval/__pycache__/config.cpython-313.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/models.cpython-312.pyc b/cross_eval/__pycache__/models.cpython-312.pyc deleted file mode 100644 index e872663..0000000 Binary files a/cross_eval/__pycache__/models.cpython-312.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/models.cpython-313.pyc b/cross_eval/__pycache__/models.cpython-313.pyc deleted file mode 100644 index 80cdbff..0000000 Binary files a/cross_eval/__pycache__/models.cpython-313.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/pipeline.cpython-312.pyc b/cross_eval/__pycache__/pipeline.cpython-312.pyc deleted file mode 100644 index 4f80d03..0000000 Binary files a/cross_eval/__pycache__/pipeline.cpython-312.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/pipeline.cpython-313.pyc b/cross_eval/__pycache__/pipeline.cpython-313.pyc deleted file mode 100644 index 1d67873..0000000 Binary files a/cross_eval/__pycache__/pipeline.cpython-313.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/prompts.cpython-312.pyc b/cross_eval/__pycache__/prompts.cpython-312.pyc deleted file mode 100644 index 5a54dcc..0000000 Binary files a/cross_eval/__pycache__/prompts.cpython-312.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/prompts.cpython-313.pyc b/cross_eval/__pycache__/prompts.cpython-313.pyc deleted file mode 100644 index e6edd0a..0000000 Binary files a/cross_eval/__pycache__/prompts.cpython-313.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/report.cpython-312.pyc b/cross_eval/__pycache__/report.cpython-312.pyc deleted file mode 100644 index a89c726..0000000 Binary files a/cross_eval/__pycache__/report.cpython-312.pyc and /dev/null differ diff --git a/cross_eval/__pycache__/report.cpython-313.pyc b/cross_eval/__pycache__/report.cpython-313.pyc deleted file mode 100644 index 9b5cb39..0000000 Binary files a/cross_eval/__pycache__/report.cpython-313.pyc and /dev/null differ diff --git a/cross_eval/agent.py b/cross_eval/agent.py index 243c4a9..f2d427d 100644 --- a/cross_eval/agent.py +++ b/cross_eval/agent.py @@ -218,6 +218,7 @@ def invoke_agent( else: input_data = prompt + cmd_preview = " ".join(cmd[:6]) logger.debug("Invoking agent '%s': %s", agent.name, " ".join(cmd[:5]) + " ...") spinner: Optional[_Spinner] = None @@ -259,7 +260,6 @@ def invoke_agent( err_detail = result.stderr.strip() or result.stdout.strip() if err_detail and len(err_detail) > 500: err_detail = err_detail[:500] + "..." - cmd_preview = " ".join(cmd[:6]) failure_type, suggested_action = _classify_agent_failure(err_detail or "") raise AgentInvocationError( agent_name=agent.name, @@ -298,12 +298,20 @@ def invoke_agent( agent.name, step_name, ) + transcript = _build_transcript( + command_preview=cmd_preview, + stdout=result.stdout, + stderr=result.stderr, + ) + return AgentResult( output=output, exit_code=result.returncode, agent_name=agent.name, step_name=step_name, duration_seconds=round(duration, 1), + transcript=transcript, + command_preview=cmd_preview, ) @@ -360,6 +368,7 @@ def invoke_agent_agentic( f"Work in the current directory." ) + cmd_preview = " ".join(cmd[:6]) logger.debug( "Invoking agent '%s' (agentic) in worktree: %s", agent.name, worktree_path, @@ -401,7 +410,6 @@ def invoke_agent_agentic( err_detail = result.stderr.strip() or result.stdout.strip() if err_detail and len(err_detail) > 500: err_detail = err_detail[:500] + "..." - cmd_preview = " ".join(cmd[:6]) failure_type, suggested_action = _classify_agent_failure(err_detail or "") raise AgentInvocationError( agent_name=agent.name, @@ -426,10 +434,47 @@ def invoke_agent_agentic( if spinner: spinner.stop(f"[{step_name}] done — {chars} chars (agentic)") + transcript = _build_transcript( + command_preview=cmd_preview, + stdout=result.stdout, + stderr=result.stderr, + ) + return AgentResult( output=diff_output, exit_code=result.returncode, agent_name=agent.name, step_name=step_name, duration_seconds=round(duration, 1), + transcript=transcript, + command_preview=cmd_preview, ) + + +def _build_transcript( + *, + command_preview: str, + stdout: str, + stderr: str, +) -> str: + """Build a compact execution transcript for debugging/audit output.""" + sections = [ + "# Agent Execution Transcript", + "", + "## Command", + "```", + command_preview or "(unknown command)", + "```", + "", + "## Stdout", + "```", + (stdout or "(empty)").strip(), + "```", + "", + "## Stderr", + "```", + (stderr or "(empty)").strip(), + "```", + "", + ] + return "\n".join(sections) diff --git a/cross_eval/discovery.py b/cross_eval/discovery.py new file mode 100644 index 0000000..cbdb234 --- /dev/null +++ b/cross_eval/discovery.py @@ -0,0 +1,167 @@ +"""Repository/service discovery helpers for autonomous execution prompts.""" +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from pathlib import Path + + +@dataclass +class RepoDiscovery: + languages: set[str] = field(default_factory=set) + package_managers: set[str] = field(default_factory=set) + databases: set[str] = field(default_factory=set) + services: set[str] = field(default_factory=set) + hints: list[str] = field(default_factory=list) + + +def _read_text(path: Path) -> str: + try: + return path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError): + return "" + + +def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) -> None: + lowered = content.lower() + for needle, name in mapping.items(): + if needle in lowered: + target.add(name) + + +def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery: + """Infer runtime-relevant stack hints from common manifest/config files.""" + discovery = RepoDiscovery() + env_names = {name.upper() for name in (env_names or set())} + + file_map = { + "pyproject": project_root / "pyproject.toml", + "requirements": project_root / "requirements.txt", + "package": project_root / "package.json", + "docker_compose": project_root / "docker-compose.yml", + "docker_compose_alt": project_root / "docker-compose.yaml", + "compose": project_root / "compose.yaml", + "prisma": project_root / "prisma" / "schema.prisma", + } + + if file_map["pyproject"].exists() or file_map["requirements"].exists(): + discovery.languages.add("python") + if file_map["package"].exists(): + discovery.languages.add("node") + + if file_map["pyproject"].exists(): + discovery.package_managers.add("pip") + if file_map["package"].exists(): + try: + package_json = json.loads(_read_text(file_map["package"]) or "{}") + except json.JSONDecodeError: + package_json = {} + pm = package_json.get("packageManager") + if isinstance(pm, str) and pm: + discovery.package_managers.add(pm.split("@", 1)[0]) + else: + discovery.package_managers.add("npm") + + manifests = { + name: _read_text(path) + for name, path in file_map.items() + if path.exists() + } + combined = "\n".join(manifests.values()) + + _add_if_contains( + discovery.databases, + combined, + { + "psycopg": "postgresql", + "asyncpg": "postgresql", + "postgres": "postgresql", + "mysql": "mysql", + "pymongo": "mongodb", + "mongodb": "mongodb", + "mongoengine": "mongodb", + "clickhouse": "clickhouse", + "clickhouse-driver": "clickhouse", + "clickhouse_connect": "clickhouse", + "redis": "redis", + }, + ) + + if file_map["package"].exists(): + try: + package_json = json.loads(_read_text(file_map["package"]) or "{}") + except json.JSONDecodeError: + package_json = {} + deps = { + **(package_json.get("dependencies") or {}), + **(package_json.get("devDependencies") or {}), + } + dep_blob = "\n".join(deps.keys()).lower() + _add_if_contains( + discovery.databases, + dep_blob, + { + "pg": "postgresql", + "mysql": "mysql", + "mongoose": "mongodb", + "mongodb": "mongodb", + "@clickhouse/client": "clickhouse", + "redis": "redis", + "prisma": "postgresql", + }, + ) + + for env_name in env_names: + if "CLICKHOUSE" in env_name or env_name.startswith("CH_"): + discovery.databases.add("clickhouse") + if "POSTGRES" in env_name or env_name.startswith("PG") or env_name == "DATABASE_URL": + discovery.databases.add("postgresql") + if "MYSQL" in env_name: + discovery.databases.add("mysql") + if "MONGO" in env_name: + discovery.databases.add("mongodb") + if "REDIS" in env_name: + discovery.databases.add("redis") + + compose_blob = "\n".join( + manifests.get(key, "") + for key in ("docker_compose", "docker_compose_alt", "compose") + ).lower() + _add_if_contains( + discovery.services, + compose_blob, + { + "clickhouse": "clickhouse", + "postgres": "postgresql", + "mysql": "mysql", + "mongo": "mongodb", + "redis": "redis", + }, + ) + + if file_map["prisma"].exists(): + discovery.hints.append("Prisma schema detected.") + if (project_root / "alembic.ini").exists(): + discovery.hints.append("Alembic migration config detected.") + if (project_root / "docker").exists() or discovery.services: + discovery.hints.append("Containerized services may be available for local verification.") + + return discovery + + +def format_repo_discovery(discovery: RepoDiscovery) -> str: + """Render discovery results into a compact prompt summary.""" + lines: list[str] = [] + if discovery.languages: + lines.append("Detected languages: " + ", ".join(sorted(discovery.languages))) + if discovery.package_managers: + lines.append("Likely package managers: " + ", ".join(sorted(discovery.package_managers))) + if discovery.databases: + lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases))) + if discovery.services: + lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services))) + if discovery.hints: + lines.extend(discovery.hints) + if not lines: + return "No strong runtime/service signals were detected from repository manifests." + return "\n".join(lines) diff --git a/cross_eval/models.py b/cross_eval/models.py index 45b4066..6bab382 100644 --- a/cross_eval/models.py +++ b/cross_eval/models.py @@ -88,6 +88,8 @@ class AgentResult: agent_name: str step_name: str duration_seconds: float + transcript: str = "" + command_preview: str = "" @dataclass diff --git a/cross_eval/pipeline.py b/cross_eval/pipeline.py index b31fc8d..7047318 100644 --- a/cross_eval/pipeline.py +++ b/cross_eval/pipeline.py @@ -13,6 +13,7 @@ from pathlib import Path from cross_eval.agent import AgentInvocationError, invoke_agent, invoke_agent_agentic from cross_eval.worktree import WorktreeError from cross_eval.config import try_reload_config +from cross_eval.discovery import discover_repo, format_repo_discovery from cross_eval.models import ( AgentConfig, AgentResult, @@ -804,6 +805,7 @@ def _execute_step( # 8. Save to disk _save_step_output(run_dir, output_iter, step.name, result.output) + _maybe_save_step_transcript(run_dir, output_iter, step.name, result) def _execute_parallel_batch( @@ -929,6 +931,7 @@ def _execute_parallel_batch( step.name, r.duration_seconds, len(r.output), ) _save_step_output(run_dir, output_iter, step.name, r.output) + _maybe_save_step_transcript(run_dir, output_iter, step.name, r) if errors: spinner.stop(f"[parallel] FAILED ({batch_elapsed}s)") @@ -1001,10 +1004,12 @@ def _build_runtime_inputs( ) -> dict[str, str]: """Load runtime env and expose safe execution hints to prompts.""" env, loaded_files, loaded_values = build_runtime_environment(config.execution, cwd) + discovery = discover_repo(cwd, set(loaded_values) | set(env)) input_contents["execution_policy"] = build_execution_policy(config.execution) input_contents["environment_context"] = summarize_environment( config.execution, loaded_files, env, loaded_values, ) + input_contents["repo_discovery"] = format_repo_discovery(discovery) return env @@ -1018,6 +1023,8 @@ def _augment_prompt_with_runtime_context( extras.append("## Execution Policy\n" + context["execution_policy"]) if context.get("environment_context"): extras.append("## Environment Context\n" + context["environment_context"]) + if context.get("repo_discovery"): + extras.append("## Repository Discovery\n" + context["repo_discovery"]) if not extras: return prompt return prompt.rstrip() + "\n\n" + "\n\n".join(extras) + "\n" @@ -1198,6 +1205,20 @@ def _save_step_output( return path +def _maybe_save_step_transcript( + run_dir: Path, + iteration: int, + step_name: str, + result: AgentResult, +) -> Path | None: + """Persist raw stdout/stderr transcript when available.""" + if not result.transcript: + return None + return _save_step_output( + run_dir, iteration, f"{step_name}_transcript", result.transcript, + ) + + def _format_runtime_error_markdown( exc: Exception, *, diff --git a/tests/__pycache__/test_config.cpython-312.pyc b/tests/__pycache__/test_config.cpython-312.pyc deleted file mode 100644 index b2ce054..0000000 Binary files a/tests/__pycache__/test_config.cpython-312.pyc and /dev/null differ diff --git a/tests/test_runtime_context.py b/tests/test_runtime_context.py new file mode 100644 index 0000000..6c916e0 --- /dev/null +++ b/tests/test_runtime_context.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from cross_eval.agent import invoke_agent +from cross_eval.config import BUILTIN_AGENTS +from cross_eval.discovery import discover_repo, format_repo_discovery +from cross_eval.models import AgentConfig, AgentResult, PipelineConfig +from cross_eval.pipeline import run_pipeline +from cross_eval.prompts import _build_simple_preset +from cross_eval.runtime_env import build_runtime_environment, summarize_environment + + +class RuntimeEnvTest(unittest.TestCase): + def test_build_runtime_environment_loads_dotenv_values(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / ".env").write_text( + "CLICKHOUSE_URL=http://localhost:8123\nDATABASE_URL=postgres://db\n", + encoding="utf-8", + ) + execution = PipelineConfig().execution + env, loaded_files, loaded_values = build_runtime_environment(execution, root) + + self.assertEqual(loaded_files[0].name, ".env") + self.assertEqual(loaded_values["CLICKHOUSE_URL"], "http://localhost:8123") + self.assertEqual(env["DATABASE_URL"], "postgres://db") + + def test_summarize_environment_mentions_clickhouse_from_env(self) -> None: + execution = PipelineConfig().execution + summary = summarize_environment( + execution, + [Path("/tmp/.env")], + {"CLICKHOUSE_URL": "http://localhost:8123"}, + {"CLICKHOUSE_URL": "http://localhost:8123"}, + ) + self.assertIn("CLICKHOUSE_URL", summary) + self.assertIn("ClickHouse-related", summary) + + +class RepoDiscoveryTest(unittest.TestCase): + def test_discover_repo_detects_python_postgres_and_clickhouse(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "pyproject.toml").write_text( + '[project]\nname = "svc"\ndependencies = ["psycopg", "clickhouse-driver"]\n', + encoding="utf-8", + ) + (root / "docker-compose.yml").write_text( + "services:\n db:\n image: postgres:16\n ch:\n image: clickhouse/clickhouse-server:latest\n", + encoding="utf-8", + ) + discovery = discover_repo(root, {"DATABASE_URL", "CLICKHOUSE_URL"}) + summary = format_repo_discovery(discovery) + + self.assertIn("python", discovery.languages) + self.assertIn("postgresql", discovery.databases) + self.assertIn("clickhouse", discovery.databases) + self.assertIn("Detected local service containers", summary) + + +class PromptContextTest(unittest.TestCase): + def test_run_pipeline_injects_env_and_discovery_context_into_prompt(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / ".env").write_text("CLICKHOUSE_URL=http://localhost:8123\n", encoding="utf-8") + steps = _build_simple_preset(["claude-coder"], ["claude-reviewer"], []) + config = PipelineConfig( + output_dir=root / "out", + max_iterations=1, + language="en", + inputs={"plan": "Plan", "checklist": "Checklist"}, + agents={name: agent for name, agent in BUILTIN_AGENTS.items()}, + coders=["claude-coder"], + reviewers=["claude-reviewer"], + pipeline=steps, + preset_name="simple", + ) + prompts: list[str] = [] + + def _fake_invoke(agent_config, prompt, step_name, **kwargs): + prompts.append(prompt) + output = "VERDICT: PASS" if step_name == "review" else "coding output" + return AgentResult( + output=output, + exit_code=0, + agent_name=agent_config.name, + step_name=step_name, + duration_seconds=0.1, + transcript="# Agent Execution Transcript", + ) + + with patch("cross_eval.pipeline.invoke_agent", side_effect=_fake_invoke): + run_pipeline(config, cwd=root) + + joined = "\n".join(prompts) + self.assertIn("Execution Policy", joined) + self.assertIn("Environment Context", joined) + self.assertIn("Repository Discovery", joined) + self.assertIn("ClickHouse-related environment variables are available", joined) + self.assertTrue((root / "out").exists()) + + +class AgentTranscriptTest(unittest.TestCase): + def test_invoke_agent_records_transcript(self) -> None: + def _fake_run(cmd, **kwargs): + class _Result: + returncode = 0 + stdout = "hello" + stderr = "warn" + + return _Result() + + agent = AgentConfig( + name="codex-reviewer", + command="codex", + args=["exec", "--model", "gpt-5.4", "-"], + ) + + with patch("subprocess.run", side_effect=_fake_run): + result = invoke_agent(agent, "prompt", "review", quiet=True) + + self.assertIn("## Command", result.transcript) + self.assertIn("hello", result.transcript) + self.assertIn("warn", result.transcript) + + +if __name__ == "__main__": + unittest.main()