diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..57230dc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+__pycache__/
+*.py[cod]
+.pytest_cache/
+.idea/
+output/
+.cross-eval/output/
+cross_eval.egg-info/
diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index ab1f416..0000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,10 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
-# Ignored default folder with query files
-/queries/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
-# Editor-based HTTP Client requests
-/httpRequests/
diff --git a/.idea/cross-eval.iml b/.idea/cross-eval.iml
deleted file mode 100644
index b525243..0000000
--- a/.idea/cross-eval.iml
+++ /dev/null
@@ -1,14 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
deleted file mode 100644
index 03d9549..0000000
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2d..0000000
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index f632e42..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index 02ac596..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/cross_eval.egg-info/PKG-INFO b/cross_eval.egg-info/PKG-INFO
deleted file mode 100644
index 0f402eb..0000000
--- a/cross_eval.egg-info/PKG-INFO
+++ /dev/null
@@ -1,6 +0,0 @@
-Metadata-Version: 2.4
-Name: cross-eval
-Version: 0.2.0
-Summary: AI agent cross-evaluation CLI tool
-Requires-Python: >=3.9
-Requires-Dist: pyyaml>=6.0
diff --git a/cross_eval.egg-info/SOURCES.txt b/cross_eval.egg-info/SOURCES.txt
deleted file mode 100644
index 26a3503..0000000
--- a/cross_eval.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-README.md
-pyproject.toml
-cross_eval/__init__.py
-cross_eval/agent.py
-cross_eval/cli.py
-cross_eval/config.py
-cross_eval/demo.py
-cross_eval/doctor.py
-cross_eval/models.py
-cross_eval/pipeline.py
-cross_eval/prompts.py
-cross_eval/report.py
-cross_eval/runtime_env.py
-cross_eval/worktree.py
-cross_eval.egg-info/PKG-INFO
-cross_eval.egg-info/SOURCES.txt
-cross_eval.egg-info/dependency_links.txt
-cross_eval.egg-info/entry_points.txt
-cross_eval.egg-info/requires.txt
-cross_eval.egg-info/top_level.txt
-tests/test_agentic.py
-tests/test_config.py
-tests/test_onboarding.py
-tests/test_pipeline_integration.py
diff --git a/cross_eval.egg-info/dependency_links.txt b/cross_eval.egg-info/dependency_links.txt
deleted file mode 100644
index 8b13789..0000000
--- a/cross_eval.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/cross_eval.egg-info/entry_points.txt b/cross_eval.egg-info/entry_points.txt
deleted file mode 100644
index f668a8c..0000000
--- a/cross_eval.egg-info/entry_points.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-[console_scripts]
-cross-eval = cross_eval.cli:main
diff --git a/cross_eval.egg-info/requires.txt b/cross_eval.egg-info/requires.txt
deleted file mode 100644
index 3aecde9..0000000
--- a/cross_eval.egg-info/requires.txt
+++ /dev/null
@@ -1 +0,0 @@
-pyyaml>=6.0
diff --git a/cross_eval.egg-info/top_level.txt b/cross_eval.egg-info/top_level.txt
deleted file mode 100644
index 59bc124..0000000
--- a/cross_eval.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-cross_eval
diff --git a/cross_eval/__pycache__/__init__.cpython-312.pyc b/cross_eval/__pycache__/__init__.cpython-312.pyc
deleted file mode 100644
index b05eafd..0000000
Binary files a/cross_eval/__pycache__/__init__.cpython-312.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/__init__.cpython-313.pyc b/cross_eval/__pycache__/__init__.cpython-313.pyc
deleted file mode 100644
index db7e8b0..0000000
Binary files a/cross_eval/__pycache__/__init__.cpython-313.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/agent.cpython-312.pyc b/cross_eval/__pycache__/agent.cpython-312.pyc
deleted file mode 100644
index 1bda52d..0000000
Binary files a/cross_eval/__pycache__/agent.cpython-312.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/agent.cpython-313.pyc b/cross_eval/__pycache__/agent.cpython-313.pyc
deleted file mode 100644
index e40f194..0000000
Binary files a/cross_eval/__pycache__/agent.cpython-313.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/cli.cpython-312.pyc b/cross_eval/__pycache__/cli.cpython-312.pyc
deleted file mode 100644
index b3a8b4f..0000000
Binary files a/cross_eval/__pycache__/cli.cpython-312.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/cli.cpython-313.pyc b/cross_eval/__pycache__/cli.cpython-313.pyc
deleted file mode 100644
index b136656..0000000
Binary files a/cross_eval/__pycache__/cli.cpython-313.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/config.cpython-312.pyc b/cross_eval/__pycache__/config.cpython-312.pyc
deleted file mode 100644
index 08fad4d..0000000
Binary files a/cross_eval/__pycache__/config.cpython-312.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/config.cpython-313.pyc b/cross_eval/__pycache__/config.cpython-313.pyc
deleted file mode 100644
index ea5d029..0000000
Binary files a/cross_eval/__pycache__/config.cpython-313.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/models.cpython-312.pyc b/cross_eval/__pycache__/models.cpython-312.pyc
deleted file mode 100644
index e872663..0000000
Binary files a/cross_eval/__pycache__/models.cpython-312.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/models.cpython-313.pyc b/cross_eval/__pycache__/models.cpython-313.pyc
deleted file mode 100644
index 80cdbff..0000000
Binary files a/cross_eval/__pycache__/models.cpython-313.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/pipeline.cpython-312.pyc b/cross_eval/__pycache__/pipeline.cpython-312.pyc
deleted file mode 100644
index 4f80d03..0000000
Binary files a/cross_eval/__pycache__/pipeline.cpython-312.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/pipeline.cpython-313.pyc b/cross_eval/__pycache__/pipeline.cpython-313.pyc
deleted file mode 100644
index 1d67873..0000000
Binary files a/cross_eval/__pycache__/pipeline.cpython-313.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/prompts.cpython-312.pyc b/cross_eval/__pycache__/prompts.cpython-312.pyc
deleted file mode 100644
index 5a54dcc..0000000
Binary files a/cross_eval/__pycache__/prompts.cpython-312.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/prompts.cpython-313.pyc b/cross_eval/__pycache__/prompts.cpython-313.pyc
deleted file mode 100644
index e6edd0a..0000000
Binary files a/cross_eval/__pycache__/prompts.cpython-313.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/report.cpython-312.pyc b/cross_eval/__pycache__/report.cpython-312.pyc
deleted file mode 100644
index a89c726..0000000
Binary files a/cross_eval/__pycache__/report.cpython-312.pyc and /dev/null differ
diff --git a/cross_eval/__pycache__/report.cpython-313.pyc b/cross_eval/__pycache__/report.cpython-313.pyc
deleted file mode 100644
index 9b5cb39..0000000
Binary files a/cross_eval/__pycache__/report.cpython-313.pyc and /dev/null differ
diff --git a/cross_eval/agent.py b/cross_eval/agent.py
index 243c4a9..f2d427d 100644
--- a/cross_eval/agent.py
+++ b/cross_eval/agent.py
@@ -218,6 +218,7 @@ def invoke_agent(
else:
input_data = prompt
+ cmd_preview = " ".join(cmd[:6])
logger.debug("Invoking agent '%s': %s", agent.name, " ".join(cmd[:5]) + " ...")
spinner: Optional[_Spinner] = None
@@ -259,7 +260,6 @@ def invoke_agent(
err_detail = result.stderr.strip() or result.stdout.strip()
if err_detail and len(err_detail) > 500:
err_detail = err_detail[:500] + "..."
- cmd_preview = " ".join(cmd[:6])
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
raise AgentInvocationError(
agent_name=agent.name,
@@ -298,12 +298,20 @@ def invoke_agent(
agent.name, step_name,
)
+ transcript = _build_transcript(
+ command_preview=cmd_preview,
+ stdout=result.stdout,
+ stderr=result.stderr,
+ )
+
return AgentResult(
output=output,
exit_code=result.returncode,
agent_name=agent.name,
step_name=step_name,
duration_seconds=round(duration, 1),
+ transcript=transcript,
+ command_preview=cmd_preview,
)
@@ -360,6 +368,7 @@ def invoke_agent_agentic(
f"Work in the current directory."
)
+ cmd_preview = " ".join(cmd[:6])
logger.debug(
"Invoking agent '%s' (agentic) in worktree: %s",
agent.name, worktree_path,
@@ -401,7 +410,6 @@ def invoke_agent_agentic(
err_detail = result.stderr.strip() or result.stdout.strip()
if err_detail and len(err_detail) > 500:
err_detail = err_detail[:500] + "..."
- cmd_preview = " ".join(cmd[:6])
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
raise AgentInvocationError(
agent_name=agent.name,
@@ -426,10 +434,47 @@ def invoke_agent_agentic(
if spinner:
spinner.stop(f"[{step_name}] done — {chars} chars (agentic)")
+ transcript = _build_transcript(
+ command_preview=cmd_preview,
+ stdout=result.stdout,
+ stderr=result.stderr,
+ )
+
return AgentResult(
output=diff_output,
exit_code=result.returncode,
agent_name=agent.name,
step_name=step_name,
duration_seconds=round(duration, 1),
+ transcript=transcript,
+ command_preview=cmd_preview,
)
+
+
+def _build_transcript(
+ *,
+ command_preview: str,
+ stdout: str,
+ stderr: str,
+) -> str:
+ """Build a compact execution transcript for debugging/audit output."""
+ sections = [
+ "# Agent Execution Transcript",
+ "",
+ "## Command",
+ "```",
+ command_preview or "(unknown command)",
+ "```",
+ "",
+ "## Stdout",
+ "```",
+ (stdout or "(empty)").strip(),
+ "```",
+ "",
+ "## Stderr",
+ "```",
+ (stderr or "(empty)").strip(),
+ "```",
+ "",
+ ]
+ return "\n".join(sections)
diff --git a/cross_eval/discovery.py b/cross_eval/discovery.py
new file mode 100644
index 0000000..cbdb234
--- /dev/null
+++ b/cross_eval/discovery.py
@@ -0,0 +1,167 @@
+"""Repository/service discovery helpers for autonomous execution prompts."""
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+
+
+@dataclass
+class RepoDiscovery:
+ languages: set[str] = field(default_factory=set)
+ package_managers: set[str] = field(default_factory=set)
+ databases: set[str] = field(default_factory=set)
+ services: set[str] = field(default_factory=set)
+ hints: list[str] = field(default_factory=list)
+
+
+def _read_text(path: Path) -> str:
+ try:
+ return path.read_text(encoding="utf-8")
+ except (OSError, UnicodeDecodeError):
+ return ""
+
+
+def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) -> None:
+ lowered = content.lower()
+ for needle, name in mapping.items():
+ if needle in lowered:
+ target.add(name)
+
+
+def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery:
+ """Infer runtime-relevant stack hints from common manifest/config files."""
+ discovery = RepoDiscovery()
+ env_names = {name.upper() for name in (env_names or set())}
+
+ file_map = {
+ "pyproject": project_root / "pyproject.toml",
+ "requirements": project_root / "requirements.txt",
+ "package": project_root / "package.json",
+ "docker_compose": project_root / "docker-compose.yml",
+ "docker_compose_alt": project_root / "docker-compose.yaml",
+ "compose": project_root / "compose.yaml",
+ "prisma": project_root / "prisma" / "schema.prisma",
+ }
+
+ if file_map["pyproject"].exists() or file_map["requirements"].exists():
+ discovery.languages.add("python")
+ if file_map["package"].exists():
+ discovery.languages.add("node")
+
+ if file_map["pyproject"].exists():
+ discovery.package_managers.add("pip")
+ if file_map["package"].exists():
+ try:
+ package_json = json.loads(_read_text(file_map["package"]) or "{}")
+ except json.JSONDecodeError:
+ package_json = {}
+ pm = package_json.get("packageManager")
+ if isinstance(pm, str) and pm:
+ discovery.package_managers.add(pm.split("@", 1)[0])
+ else:
+ discovery.package_managers.add("npm")
+
+ manifests = {
+ name: _read_text(path)
+ for name, path in file_map.items()
+ if path.exists()
+ }
+ combined = "\n".join(manifests.values())
+
+ _add_if_contains(
+ discovery.databases,
+ combined,
+ {
+ "psycopg": "postgresql",
+ "asyncpg": "postgresql",
+ "postgres": "postgresql",
+ "mysql": "mysql",
+ "pymongo": "mongodb",
+ "mongodb": "mongodb",
+ "mongoengine": "mongodb",
+ "clickhouse": "clickhouse",
+ "clickhouse-driver": "clickhouse",
+ "clickhouse_connect": "clickhouse",
+ "redis": "redis",
+ },
+ )
+
+ if file_map["package"].exists():
+ try:
+ package_json = json.loads(_read_text(file_map["package"]) or "{}")
+ except json.JSONDecodeError:
+ package_json = {}
+ deps = {
+ **(package_json.get("dependencies") or {}),
+ **(package_json.get("devDependencies") or {}),
+ }
+ dep_blob = "\n".join(deps.keys()).lower()
+ _add_if_contains(
+ discovery.databases,
+ dep_blob,
+ {
+ "pg": "postgresql",
+ "mysql": "mysql",
+ "mongoose": "mongodb",
+ "mongodb": "mongodb",
+ "@clickhouse/client": "clickhouse",
+ "redis": "redis",
+ "prisma": "postgresql",
+ },
+ )
+
+ for env_name in env_names:
+ if "CLICKHOUSE" in env_name or env_name.startswith("CH_"):
+ discovery.databases.add("clickhouse")
+ if "POSTGRES" in env_name or env_name.startswith("PG") or env_name == "DATABASE_URL":
+ discovery.databases.add("postgresql")
+ if "MYSQL" in env_name:
+ discovery.databases.add("mysql")
+ if "MONGO" in env_name:
+ discovery.databases.add("mongodb")
+ if "REDIS" in env_name:
+ discovery.databases.add("redis")
+
+ compose_blob = "\n".join(
+ manifests.get(key, "")
+ for key in ("docker_compose", "docker_compose_alt", "compose")
+ ).lower()
+ _add_if_contains(
+ discovery.services,
+ compose_blob,
+ {
+ "clickhouse": "clickhouse",
+ "postgres": "postgresql",
+ "mysql": "mysql",
+ "mongo": "mongodb",
+ "redis": "redis",
+ },
+ )
+
+ if file_map["prisma"].exists():
+ discovery.hints.append("Prisma schema detected.")
+ if (project_root / "alembic.ini").exists():
+ discovery.hints.append("Alembic migration config detected.")
+ if (project_root / "docker").exists() or discovery.services:
+ discovery.hints.append("Containerized services may be available for local verification.")
+
+ return discovery
+
+
+def format_repo_discovery(discovery: RepoDiscovery) -> str:
+ """Render discovery results into a compact prompt summary."""
+ lines: list[str] = []
+ if discovery.languages:
+ lines.append("Detected languages: " + ", ".join(sorted(discovery.languages)))
+ if discovery.package_managers:
+ lines.append("Likely package managers: " + ", ".join(sorted(discovery.package_managers)))
+ if discovery.databases:
+ lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases)))
+ if discovery.services:
+ lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services)))
+ if discovery.hints:
+ lines.extend(discovery.hints)
+ if not lines:
+ return "No strong runtime/service signals were detected from repository manifests."
+ return "\n".join(lines)
diff --git a/cross_eval/models.py b/cross_eval/models.py
index 45b4066..6bab382 100644
--- a/cross_eval/models.py
+++ b/cross_eval/models.py
@@ -88,6 +88,8 @@ class AgentResult:
agent_name: str
step_name: str
duration_seconds: float
+ transcript: str = ""
+ command_preview: str = ""
@dataclass
diff --git a/cross_eval/pipeline.py b/cross_eval/pipeline.py
index b31fc8d..7047318 100644
--- a/cross_eval/pipeline.py
+++ b/cross_eval/pipeline.py
@@ -13,6 +13,7 @@ from pathlib import Path
from cross_eval.agent import AgentInvocationError, invoke_agent, invoke_agent_agentic
from cross_eval.worktree import WorktreeError
from cross_eval.config import try_reload_config
+from cross_eval.discovery import discover_repo, format_repo_discovery
from cross_eval.models import (
AgentConfig,
AgentResult,
@@ -804,6 +805,7 @@ def _execute_step(
# 8. Save to disk
_save_step_output(run_dir, output_iter, step.name, result.output)
+ _maybe_save_step_transcript(run_dir, output_iter, step.name, result)
def _execute_parallel_batch(
@@ -929,6 +931,7 @@ def _execute_parallel_batch(
step.name, r.duration_seconds, len(r.output),
)
_save_step_output(run_dir, output_iter, step.name, r.output)
+ _maybe_save_step_transcript(run_dir, output_iter, step.name, r)
if errors:
spinner.stop(f"[parallel] FAILED ({batch_elapsed}s)")
@@ -1001,10 +1004,12 @@ def _build_runtime_inputs(
) -> dict[str, str]:
"""Load runtime env and expose safe execution hints to prompts."""
env, loaded_files, loaded_values = build_runtime_environment(config.execution, cwd)
+ discovery = discover_repo(cwd, set(loaded_values) | set(env))
input_contents["execution_policy"] = build_execution_policy(config.execution)
input_contents["environment_context"] = summarize_environment(
config.execution, loaded_files, env, loaded_values,
)
+ input_contents["repo_discovery"] = format_repo_discovery(discovery)
return env
@@ -1018,6 +1023,8 @@ def _augment_prompt_with_runtime_context(
extras.append("## Execution Policy\n" + context["execution_policy"])
if context.get("environment_context"):
extras.append("## Environment Context\n" + context["environment_context"])
+ if context.get("repo_discovery"):
+ extras.append("## Repository Discovery\n" + context["repo_discovery"])
if not extras:
return prompt
return prompt.rstrip() + "\n\n" + "\n\n".join(extras) + "\n"
@@ -1198,6 +1205,20 @@ def _save_step_output(
return path
+def _maybe_save_step_transcript(
+ run_dir: Path,
+ iteration: int,
+ step_name: str,
+ result: AgentResult,
+) -> Path | None:
+ """Persist raw stdout/stderr transcript when available."""
+ if not result.transcript:
+ return None
+ return _save_step_output(
+ run_dir, iteration, f"{step_name}_transcript", result.transcript,
+ )
+
+
def _format_runtime_error_markdown(
exc: Exception,
*,
diff --git a/tests/__pycache__/test_config.cpython-312.pyc b/tests/__pycache__/test_config.cpython-312.pyc
deleted file mode 100644
index b2ce054..0000000
Binary files a/tests/__pycache__/test_config.cpython-312.pyc and /dev/null differ
diff --git a/tests/test_runtime_context.py b/tests/test_runtime_context.py
new file mode 100644
index 0000000..6c916e0
--- /dev/null
+++ b/tests/test_runtime_context.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from cross_eval.agent import invoke_agent
+from cross_eval.config import BUILTIN_AGENTS
+from cross_eval.discovery import discover_repo, format_repo_discovery
+from cross_eval.models import AgentConfig, AgentResult, PipelineConfig
+from cross_eval.pipeline import run_pipeline
+from cross_eval.prompts import _build_simple_preset
+from cross_eval.runtime_env import build_runtime_environment, summarize_environment
+
+
+class RuntimeEnvTest(unittest.TestCase):
+ def test_build_runtime_environment_loads_dotenv_values(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ (root / ".env").write_text(
+ "CLICKHOUSE_URL=http://localhost:8123\nDATABASE_URL=postgres://db\n",
+ encoding="utf-8",
+ )
+ execution = PipelineConfig().execution
+ env, loaded_files, loaded_values = build_runtime_environment(execution, root)
+
+ self.assertEqual(loaded_files[0].name, ".env")
+ self.assertEqual(loaded_values["CLICKHOUSE_URL"], "http://localhost:8123")
+ self.assertEqual(env["DATABASE_URL"], "postgres://db")
+
+ def test_summarize_environment_mentions_clickhouse_from_env(self) -> None:
+ execution = PipelineConfig().execution
+ summary = summarize_environment(
+ execution,
+ [Path("/tmp/.env")],
+ {"CLICKHOUSE_URL": "http://localhost:8123"},
+ {"CLICKHOUSE_URL": "http://localhost:8123"},
+ )
+ self.assertIn("CLICKHOUSE_URL", summary)
+ self.assertIn("ClickHouse-related", summary)
+
+
+class RepoDiscoveryTest(unittest.TestCase):
+ def test_discover_repo_detects_python_postgres_and_clickhouse(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ (root / "pyproject.toml").write_text(
+ '[project]\nname = "svc"\ndependencies = ["psycopg", "clickhouse-driver"]\n',
+ encoding="utf-8",
+ )
+ (root / "docker-compose.yml").write_text(
+ "services:\n db:\n image: postgres:16\n ch:\n image: clickhouse/clickhouse-server:latest\n",
+ encoding="utf-8",
+ )
+ discovery = discover_repo(root, {"DATABASE_URL", "CLICKHOUSE_URL"})
+ summary = format_repo_discovery(discovery)
+
+ self.assertIn("python", discovery.languages)
+ self.assertIn("postgresql", discovery.databases)
+ self.assertIn("clickhouse", discovery.databases)
+ self.assertIn("Detected local service containers", summary)
+
+
+class PromptContextTest(unittest.TestCase):
+ def test_run_pipeline_injects_env_and_discovery_context_into_prompt(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ (root / ".env").write_text("CLICKHOUSE_URL=http://localhost:8123\n", encoding="utf-8")
+ steps = _build_simple_preset(["claude-coder"], ["claude-reviewer"], [])
+ config = PipelineConfig(
+ output_dir=root / "out",
+ max_iterations=1,
+ language="en",
+ inputs={"plan": "Plan", "checklist": "Checklist"},
+ agents={name: agent for name, agent in BUILTIN_AGENTS.items()},
+ coders=["claude-coder"],
+ reviewers=["claude-reviewer"],
+ pipeline=steps,
+ preset_name="simple",
+ )
+ prompts: list[str] = []
+
+ def _fake_invoke(agent_config, prompt, step_name, **kwargs):
+ prompts.append(prompt)
+ output = "VERDICT: PASS" if step_name == "review" else "coding output"
+ return AgentResult(
+ output=output,
+ exit_code=0,
+ agent_name=agent_config.name,
+ step_name=step_name,
+ duration_seconds=0.1,
+ transcript="# Agent Execution Transcript",
+ )
+
+ with patch("cross_eval.pipeline.invoke_agent", side_effect=_fake_invoke):
+ run_pipeline(config, cwd=root)
+
+ joined = "\n".join(prompts)
+ self.assertIn("Execution Policy", joined)
+ self.assertIn("Environment Context", joined)
+ self.assertIn("Repository Discovery", joined)
+ self.assertIn("ClickHouse-related environment variables are available", joined)
+ self.assertTrue((root / "out").exists())
+
+
+class AgentTranscriptTest(unittest.TestCase):
+ def test_invoke_agent_records_transcript(self) -> None:
+ def _fake_run(cmd, **kwargs):
+ class _Result:
+ returncode = 0
+ stdout = "hello"
+ stderr = "warn"
+
+ return _Result()
+
+ agent = AgentConfig(
+ name="codex-reviewer",
+ command="codex",
+ args=["exec", "--model", "gpt-5.4", "-"],
+ )
+
+ with patch("subprocess.run", side_effect=_fake_run):
+ result = invoke_agent(agent, "prompt", "review", quiet=True)
+
+ self.assertIn("## Command", result.transcript)
+ self.assertIn("hello", result.transcript)
+ self.assertIn("warn", result.transcript)
+
+
+if __name__ == "__main__":
+ unittest.main()