feat: add runtime discovery and execution traces
This commit is contained in:
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
.pytest_cache/
|
||||||
|
.idea/
|
||||||
|
output/
|
||||||
|
.cross-eval/output/
|
||||||
|
cross_eval.egg-info/
|
||||||
10
.idea/.gitignore
generated
vendored
10
.idea/.gitignore
generated
vendored
@@ -1,10 +0,0 @@
|
|||||||
# Default ignored files
|
|
||||||
/shelf/
|
|
||||||
/workspace.xml
|
|
||||||
# Ignored default folder with query files
|
|
||||||
/queries/
|
|
||||||
# Datasource local storage ignored files
|
|
||||||
/dataSources/
|
|
||||||
/dataSources.local.xml
|
|
||||||
# Editor-based HTTP Client requests
|
|
||||||
/httpRequests/
|
|
||||||
14
.idea/cross-eval.iml
generated
14
.idea/cross-eval.iml
generated
@@ -1,14 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<module type="PYTHON_MODULE" version="4">
|
|
||||||
<component name="NewModuleRootManager">
|
|
||||||
<content url="file://$MODULE_DIR$">
|
|
||||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
|
||||||
</content>
|
|
||||||
<orderEntry type="jdk" jdkName="Python 3.12 (cross-eval)" jdkType="Python SDK" />
|
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
|
||||||
</component>
|
|
||||||
<component name="PyDocumentationSettings">
|
|
||||||
<option name="format" value="PLAIN" />
|
|
||||||
<option name="myDocStringFormat" value="Plain" />
|
|
||||||
</component>
|
|
||||||
</module>
|
|
||||||
6
.idea/inspectionProfiles/Project_Default.xml
generated
6
.idea/inspectionProfiles/Project_Default.xml
generated
@@ -1,6 +0,0 @@
|
|||||||
<component name="InspectionProjectProfileManager">
|
|
||||||
<profile version="1.0">
|
|
||||||
<option name="myName" value="Project Default" />
|
|
||||||
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
|
|
||||||
</profile>
|
|
||||||
</component>
|
|
||||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
6
.idea/inspectionProfiles/profiles_settings.xml
generated
@@ -1,6 +0,0 @@
|
|||||||
<component name="InspectionProjectProfileManager">
|
|
||||||
<settings>
|
|
||||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
|
||||||
<version value="1.0" />
|
|
||||||
</settings>
|
|
||||||
</component>
|
|
||||||
7
.idea/misc.xml
generated
7
.idea/misc.xml
generated
@@ -1,7 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="Black">
|
|
||||||
<option name="sdkName" value="Python 3.12 (cross-eval)" />
|
|
||||||
</component>
|
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (cross-eval)" project-jdk-type="Python SDK" />
|
|
||||||
</project>
|
|
||||||
8
.idea/modules.xml
generated
8
.idea/modules.xml
generated
@@ -1,8 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="ProjectModuleManager">
|
|
||||||
<modules>
|
|
||||||
<module fileurl="file://$PROJECT_DIR$/.idea/cross-eval.iml" filepath="$PROJECT_DIR$/.idea/cross-eval.iml" />
|
|
||||||
</modules>
|
|
||||||
</component>
|
|
||||||
</project>
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
Metadata-Version: 2.4
|
|
||||||
Name: cross-eval
|
|
||||||
Version: 0.2.0
|
|
||||||
Summary: AI agent cross-evaluation CLI tool
|
|
||||||
Requires-Python: >=3.9
|
|
||||||
Requires-Dist: pyyaml>=6.0
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
README.md
|
|
||||||
pyproject.toml
|
|
||||||
cross_eval/__init__.py
|
|
||||||
cross_eval/agent.py
|
|
||||||
cross_eval/cli.py
|
|
||||||
cross_eval/config.py
|
|
||||||
cross_eval/demo.py
|
|
||||||
cross_eval/doctor.py
|
|
||||||
cross_eval/models.py
|
|
||||||
cross_eval/pipeline.py
|
|
||||||
cross_eval/prompts.py
|
|
||||||
cross_eval/report.py
|
|
||||||
cross_eval/runtime_env.py
|
|
||||||
cross_eval/worktree.py
|
|
||||||
cross_eval.egg-info/PKG-INFO
|
|
||||||
cross_eval.egg-info/SOURCES.txt
|
|
||||||
cross_eval.egg-info/dependency_links.txt
|
|
||||||
cross_eval.egg-info/entry_points.txt
|
|
||||||
cross_eval.egg-info/requires.txt
|
|
||||||
cross_eval.egg-info/top_level.txt
|
|
||||||
tests/test_agentic.py
|
|
||||||
tests/test_config.py
|
|
||||||
tests/test_onboarding.py
|
|
||||||
tests/test_pipeline_integration.py
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
[console_scripts]
|
|
||||||
cross-eval = cross_eval.cli:main
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
pyyaml>=6.0
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
cross_eval
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -218,6 +218,7 @@ def invoke_agent(
|
|||||||
else:
|
else:
|
||||||
input_data = prompt
|
input_data = prompt
|
||||||
|
|
||||||
|
cmd_preview = " ".join(cmd[:6])
|
||||||
logger.debug("Invoking agent '%s': %s", agent.name, " ".join(cmd[:5]) + " ...")
|
logger.debug("Invoking agent '%s': %s", agent.name, " ".join(cmd[:5]) + " ...")
|
||||||
|
|
||||||
spinner: Optional[_Spinner] = None
|
spinner: Optional[_Spinner] = None
|
||||||
@@ -259,7 +260,6 @@ def invoke_agent(
|
|||||||
err_detail = result.stderr.strip() or result.stdout.strip()
|
err_detail = result.stderr.strip() or result.stdout.strip()
|
||||||
if err_detail and len(err_detail) > 500:
|
if err_detail and len(err_detail) > 500:
|
||||||
err_detail = err_detail[:500] + "..."
|
err_detail = err_detail[:500] + "..."
|
||||||
cmd_preview = " ".join(cmd[:6])
|
|
||||||
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
|
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
|
||||||
raise AgentInvocationError(
|
raise AgentInvocationError(
|
||||||
agent_name=agent.name,
|
agent_name=agent.name,
|
||||||
@@ -298,12 +298,20 @@ def invoke_agent(
|
|||||||
agent.name, step_name,
|
agent.name, step_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
transcript = _build_transcript(
|
||||||
|
command_preview=cmd_preview,
|
||||||
|
stdout=result.stdout,
|
||||||
|
stderr=result.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
return AgentResult(
|
return AgentResult(
|
||||||
output=output,
|
output=output,
|
||||||
exit_code=result.returncode,
|
exit_code=result.returncode,
|
||||||
agent_name=agent.name,
|
agent_name=agent.name,
|
||||||
step_name=step_name,
|
step_name=step_name,
|
||||||
duration_seconds=round(duration, 1),
|
duration_seconds=round(duration, 1),
|
||||||
|
transcript=transcript,
|
||||||
|
command_preview=cmd_preview,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -360,6 +368,7 @@ def invoke_agent_agentic(
|
|||||||
f"Work in the current directory."
|
f"Work in the current directory."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cmd_preview = " ".join(cmd[:6])
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Invoking agent '%s' (agentic) in worktree: %s",
|
"Invoking agent '%s' (agentic) in worktree: %s",
|
||||||
agent.name, worktree_path,
|
agent.name, worktree_path,
|
||||||
@@ -401,7 +410,6 @@ def invoke_agent_agentic(
|
|||||||
err_detail = result.stderr.strip() or result.stdout.strip()
|
err_detail = result.stderr.strip() or result.stdout.strip()
|
||||||
if err_detail and len(err_detail) > 500:
|
if err_detail and len(err_detail) > 500:
|
||||||
err_detail = err_detail[:500] + "..."
|
err_detail = err_detail[:500] + "..."
|
||||||
cmd_preview = " ".join(cmd[:6])
|
|
||||||
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
|
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
|
||||||
raise AgentInvocationError(
|
raise AgentInvocationError(
|
||||||
agent_name=agent.name,
|
agent_name=agent.name,
|
||||||
@@ -426,10 +434,47 @@ def invoke_agent_agentic(
|
|||||||
if spinner:
|
if spinner:
|
||||||
spinner.stop(f"[{step_name}] done — {chars} chars (agentic)")
|
spinner.stop(f"[{step_name}] done — {chars} chars (agentic)")
|
||||||
|
|
||||||
|
transcript = _build_transcript(
|
||||||
|
command_preview=cmd_preview,
|
||||||
|
stdout=result.stdout,
|
||||||
|
stderr=result.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
return AgentResult(
|
return AgentResult(
|
||||||
output=diff_output,
|
output=diff_output,
|
||||||
exit_code=result.returncode,
|
exit_code=result.returncode,
|
||||||
agent_name=agent.name,
|
agent_name=agent.name,
|
||||||
step_name=step_name,
|
step_name=step_name,
|
||||||
duration_seconds=round(duration, 1),
|
duration_seconds=round(duration, 1),
|
||||||
|
transcript=transcript,
|
||||||
|
command_preview=cmd_preview,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_transcript(
|
||||||
|
*,
|
||||||
|
command_preview: str,
|
||||||
|
stdout: str,
|
||||||
|
stderr: str,
|
||||||
|
) -> str:
|
||||||
|
"""Build a compact execution transcript for debugging/audit output."""
|
||||||
|
sections = [
|
||||||
|
"# Agent Execution Transcript",
|
||||||
|
"",
|
||||||
|
"## Command",
|
||||||
|
"```",
|
||||||
|
command_preview or "(unknown command)",
|
||||||
|
"```",
|
||||||
|
"",
|
||||||
|
"## Stdout",
|
||||||
|
"```",
|
||||||
|
(stdout or "(empty)").strip(),
|
||||||
|
"```",
|
||||||
|
"",
|
||||||
|
"## Stderr",
|
||||||
|
"```",
|
||||||
|
(stderr or "(empty)").strip(),
|
||||||
|
"```",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
return "\n".join(sections)
|
||||||
|
|||||||
167
cross_eval/discovery.py
Normal file
167
cross_eval/discovery.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
"""Repository/service discovery helpers for autonomous execution prompts."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RepoDiscovery:
|
||||||
|
languages: set[str] = field(default_factory=set)
|
||||||
|
package_managers: set[str] = field(default_factory=set)
|
||||||
|
databases: set[str] = field(default_factory=set)
|
||||||
|
services: set[str] = field(default_factory=set)
|
||||||
|
hints: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _read_text(path: Path) -> str:
|
||||||
|
try:
|
||||||
|
return path.read_text(encoding="utf-8")
|
||||||
|
except (OSError, UnicodeDecodeError):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) -> None:
|
||||||
|
lowered = content.lower()
|
||||||
|
for needle, name in mapping.items():
|
||||||
|
if needle in lowered:
|
||||||
|
target.add(name)
|
||||||
|
|
||||||
|
|
||||||
|
def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery:
|
||||||
|
"""Infer runtime-relevant stack hints from common manifest/config files."""
|
||||||
|
discovery = RepoDiscovery()
|
||||||
|
env_names = {name.upper() for name in (env_names or set())}
|
||||||
|
|
||||||
|
file_map = {
|
||||||
|
"pyproject": project_root / "pyproject.toml",
|
||||||
|
"requirements": project_root / "requirements.txt",
|
||||||
|
"package": project_root / "package.json",
|
||||||
|
"docker_compose": project_root / "docker-compose.yml",
|
||||||
|
"docker_compose_alt": project_root / "docker-compose.yaml",
|
||||||
|
"compose": project_root / "compose.yaml",
|
||||||
|
"prisma": project_root / "prisma" / "schema.prisma",
|
||||||
|
}
|
||||||
|
|
||||||
|
if file_map["pyproject"].exists() or file_map["requirements"].exists():
|
||||||
|
discovery.languages.add("python")
|
||||||
|
if file_map["package"].exists():
|
||||||
|
discovery.languages.add("node")
|
||||||
|
|
||||||
|
if file_map["pyproject"].exists():
|
||||||
|
discovery.package_managers.add("pip")
|
||||||
|
if file_map["package"].exists():
|
||||||
|
try:
|
||||||
|
package_json = json.loads(_read_text(file_map["package"]) or "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
package_json = {}
|
||||||
|
pm = package_json.get("packageManager")
|
||||||
|
if isinstance(pm, str) and pm:
|
||||||
|
discovery.package_managers.add(pm.split("@", 1)[0])
|
||||||
|
else:
|
||||||
|
discovery.package_managers.add("npm")
|
||||||
|
|
||||||
|
manifests = {
|
||||||
|
name: _read_text(path)
|
||||||
|
for name, path in file_map.items()
|
||||||
|
if path.exists()
|
||||||
|
}
|
||||||
|
combined = "\n".join(manifests.values())
|
||||||
|
|
||||||
|
_add_if_contains(
|
||||||
|
discovery.databases,
|
||||||
|
combined,
|
||||||
|
{
|
||||||
|
"psycopg": "postgresql",
|
||||||
|
"asyncpg": "postgresql",
|
||||||
|
"postgres": "postgresql",
|
||||||
|
"mysql": "mysql",
|
||||||
|
"pymongo": "mongodb",
|
||||||
|
"mongodb": "mongodb",
|
||||||
|
"mongoengine": "mongodb",
|
||||||
|
"clickhouse": "clickhouse",
|
||||||
|
"clickhouse-driver": "clickhouse",
|
||||||
|
"clickhouse_connect": "clickhouse",
|
||||||
|
"redis": "redis",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if file_map["package"].exists():
|
||||||
|
try:
|
||||||
|
package_json = json.loads(_read_text(file_map["package"]) or "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
package_json = {}
|
||||||
|
deps = {
|
||||||
|
**(package_json.get("dependencies") or {}),
|
||||||
|
**(package_json.get("devDependencies") or {}),
|
||||||
|
}
|
||||||
|
dep_blob = "\n".join(deps.keys()).lower()
|
||||||
|
_add_if_contains(
|
||||||
|
discovery.databases,
|
||||||
|
dep_blob,
|
||||||
|
{
|
||||||
|
"pg": "postgresql",
|
||||||
|
"mysql": "mysql",
|
||||||
|
"mongoose": "mongodb",
|
||||||
|
"mongodb": "mongodb",
|
||||||
|
"@clickhouse/client": "clickhouse",
|
||||||
|
"redis": "redis",
|
||||||
|
"prisma": "postgresql",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
for env_name in env_names:
|
||||||
|
if "CLICKHOUSE" in env_name or env_name.startswith("CH_"):
|
||||||
|
discovery.databases.add("clickhouse")
|
||||||
|
if "POSTGRES" in env_name or env_name.startswith("PG") or env_name == "DATABASE_URL":
|
||||||
|
discovery.databases.add("postgresql")
|
||||||
|
if "MYSQL" in env_name:
|
||||||
|
discovery.databases.add("mysql")
|
||||||
|
if "MONGO" in env_name:
|
||||||
|
discovery.databases.add("mongodb")
|
||||||
|
if "REDIS" in env_name:
|
||||||
|
discovery.databases.add("redis")
|
||||||
|
|
||||||
|
compose_blob = "\n".join(
|
||||||
|
manifests.get(key, "")
|
||||||
|
for key in ("docker_compose", "docker_compose_alt", "compose")
|
||||||
|
).lower()
|
||||||
|
_add_if_contains(
|
||||||
|
discovery.services,
|
||||||
|
compose_blob,
|
||||||
|
{
|
||||||
|
"clickhouse": "clickhouse",
|
||||||
|
"postgres": "postgresql",
|
||||||
|
"mysql": "mysql",
|
||||||
|
"mongo": "mongodb",
|
||||||
|
"redis": "redis",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if file_map["prisma"].exists():
|
||||||
|
discovery.hints.append("Prisma schema detected.")
|
||||||
|
if (project_root / "alembic.ini").exists():
|
||||||
|
discovery.hints.append("Alembic migration config detected.")
|
||||||
|
if (project_root / "docker").exists() or discovery.services:
|
||||||
|
discovery.hints.append("Containerized services may be available for local verification.")
|
||||||
|
|
||||||
|
return discovery
|
||||||
|
|
||||||
|
|
||||||
|
def format_repo_discovery(discovery: RepoDiscovery) -> str:
|
||||||
|
"""Render discovery results into a compact prompt summary."""
|
||||||
|
lines: list[str] = []
|
||||||
|
if discovery.languages:
|
||||||
|
lines.append("Detected languages: " + ", ".join(sorted(discovery.languages)))
|
||||||
|
if discovery.package_managers:
|
||||||
|
lines.append("Likely package managers: " + ", ".join(sorted(discovery.package_managers)))
|
||||||
|
if discovery.databases:
|
||||||
|
lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases)))
|
||||||
|
if discovery.services:
|
||||||
|
lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services)))
|
||||||
|
if discovery.hints:
|
||||||
|
lines.extend(discovery.hints)
|
||||||
|
if not lines:
|
||||||
|
return "No strong runtime/service signals were detected from repository manifests."
|
||||||
|
return "\n".join(lines)
|
||||||
@@ -88,6 +88,8 @@ class AgentResult:
|
|||||||
agent_name: str
|
agent_name: str
|
||||||
step_name: str
|
step_name: str
|
||||||
duration_seconds: float
|
duration_seconds: float
|
||||||
|
transcript: str = ""
|
||||||
|
command_preview: str = ""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from pathlib import Path
|
|||||||
from cross_eval.agent import AgentInvocationError, invoke_agent, invoke_agent_agentic
|
from cross_eval.agent import AgentInvocationError, invoke_agent, invoke_agent_agentic
|
||||||
from cross_eval.worktree import WorktreeError
|
from cross_eval.worktree import WorktreeError
|
||||||
from cross_eval.config import try_reload_config
|
from cross_eval.config import try_reload_config
|
||||||
|
from cross_eval.discovery import discover_repo, format_repo_discovery
|
||||||
from cross_eval.models import (
|
from cross_eval.models import (
|
||||||
AgentConfig,
|
AgentConfig,
|
||||||
AgentResult,
|
AgentResult,
|
||||||
@@ -804,6 +805,7 @@ def _execute_step(
|
|||||||
|
|
||||||
# 8. Save to disk
|
# 8. Save to disk
|
||||||
_save_step_output(run_dir, output_iter, step.name, result.output)
|
_save_step_output(run_dir, output_iter, step.name, result.output)
|
||||||
|
_maybe_save_step_transcript(run_dir, output_iter, step.name, result)
|
||||||
|
|
||||||
|
|
||||||
def _execute_parallel_batch(
|
def _execute_parallel_batch(
|
||||||
@@ -929,6 +931,7 @@ def _execute_parallel_batch(
|
|||||||
step.name, r.duration_seconds, len(r.output),
|
step.name, r.duration_seconds, len(r.output),
|
||||||
)
|
)
|
||||||
_save_step_output(run_dir, output_iter, step.name, r.output)
|
_save_step_output(run_dir, output_iter, step.name, r.output)
|
||||||
|
_maybe_save_step_transcript(run_dir, output_iter, step.name, r)
|
||||||
|
|
||||||
if errors:
|
if errors:
|
||||||
spinner.stop(f"[parallel] FAILED ({batch_elapsed}s)")
|
spinner.stop(f"[parallel] FAILED ({batch_elapsed}s)")
|
||||||
@@ -1001,10 +1004,12 @@ def _build_runtime_inputs(
|
|||||||
) -> dict[str, str]:
|
) -> dict[str, str]:
|
||||||
"""Load runtime env and expose safe execution hints to prompts."""
|
"""Load runtime env and expose safe execution hints to prompts."""
|
||||||
env, loaded_files, loaded_values = build_runtime_environment(config.execution, cwd)
|
env, loaded_files, loaded_values = build_runtime_environment(config.execution, cwd)
|
||||||
|
discovery = discover_repo(cwd, set(loaded_values) | set(env))
|
||||||
input_contents["execution_policy"] = build_execution_policy(config.execution)
|
input_contents["execution_policy"] = build_execution_policy(config.execution)
|
||||||
input_contents["environment_context"] = summarize_environment(
|
input_contents["environment_context"] = summarize_environment(
|
||||||
config.execution, loaded_files, env, loaded_values,
|
config.execution, loaded_files, env, loaded_values,
|
||||||
)
|
)
|
||||||
|
input_contents["repo_discovery"] = format_repo_discovery(discovery)
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
|
||||||
@@ -1018,6 +1023,8 @@ def _augment_prompt_with_runtime_context(
|
|||||||
extras.append("## Execution Policy\n" + context["execution_policy"])
|
extras.append("## Execution Policy\n" + context["execution_policy"])
|
||||||
if context.get("environment_context"):
|
if context.get("environment_context"):
|
||||||
extras.append("## Environment Context\n" + context["environment_context"])
|
extras.append("## Environment Context\n" + context["environment_context"])
|
||||||
|
if context.get("repo_discovery"):
|
||||||
|
extras.append("## Repository Discovery\n" + context["repo_discovery"])
|
||||||
if not extras:
|
if not extras:
|
||||||
return prompt
|
return prompt
|
||||||
return prompt.rstrip() + "\n\n" + "\n\n".join(extras) + "\n"
|
return prompt.rstrip() + "\n\n" + "\n\n".join(extras) + "\n"
|
||||||
@@ -1198,6 +1205,20 @@ def _save_step_output(
|
|||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def _maybe_save_step_transcript(
|
||||||
|
run_dir: Path,
|
||||||
|
iteration: int,
|
||||||
|
step_name: str,
|
||||||
|
result: AgentResult,
|
||||||
|
) -> Path | None:
|
||||||
|
"""Persist raw stdout/stderr transcript when available."""
|
||||||
|
if not result.transcript:
|
||||||
|
return None
|
||||||
|
return _save_step_output(
|
||||||
|
run_dir, iteration, f"{step_name}_transcript", result.transcript,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _format_runtime_error_markdown(
|
def _format_runtime_error_markdown(
|
||||||
exc: Exception,
|
exc: Exception,
|
||||||
*,
|
*,
|
||||||
|
|||||||
Binary file not shown.
132
tests/test_runtime_context.py
Normal file
132
tests/test_runtime_context.py
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from cross_eval.agent import invoke_agent
|
||||||
|
from cross_eval.config import BUILTIN_AGENTS
|
||||||
|
from cross_eval.discovery import discover_repo, format_repo_discovery
|
||||||
|
from cross_eval.models import AgentConfig, AgentResult, PipelineConfig
|
||||||
|
from cross_eval.pipeline import run_pipeline
|
||||||
|
from cross_eval.prompts import _build_simple_preset
|
||||||
|
from cross_eval.runtime_env import build_runtime_environment, summarize_environment
|
||||||
|
|
||||||
|
|
||||||
|
class RuntimeEnvTest(unittest.TestCase):
|
||||||
|
def test_build_runtime_environment_loads_dotenv_values(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / ".env").write_text(
|
||||||
|
"CLICKHOUSE_URL=http://localhost:8123\nDATABASE_URL=postgres://db\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
execution = PipelineConfig().execution
|
||||||
|
env, loaded_files, loaded_values = build_runtime_environment(execution, root)
|
||||||
|
|
||||||
|
self.assertEqual(loaded_files[0].name, ".env")
|
||||||
|
self.assertEqual(loaded_values["CLICKHOUSE_URL"], "http://localhost:8123")
|
||||||
|
self.assertEqual(env["DATABASE_URL"], "postgres://db")
|
||||||
|
|
||||||
|
def test_summarize_environment_mentions_clickhouse_from_env(self) -> None:
|
||||||
|
execution = PipelineConfig().execution
|
||||||
|
summary = summarize_environment(
|
||||||
|
execution,
|
||||||
|
[Path("/tmp/.env")],
|
||||||
|
{"CLICKHOUSE_URL": "http://localhost:8123"},
|
||||||
|
{"CLICKHOUSE_URL": "http://localhost:8123"},
|
||||||
|
)
|
||||||
|
self.assertIn("CLICKHOUSE_URL", summary)
|
||||||
|
self.assertIn("ClickHouse-related", summary)
|
||||||
|
|
||||||
|
|
||||||
|
class RepoDiscoveryTest(unittest.TestCase):
|
||||||
|
def test_discover_repo_detects_python_postgres_and_clickhouse(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "pyproject.toml").write_text(
|
||||||
|
'[project]\nname = "svc"\ndependencies = ["psycopg", "clickhouse-driver"]\n',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(root / "docker-compose.yml").write_text(
|
||||||
|
"services:\n db:\n image: postgres:16\n ch:\n image: clickhouse/clickhouse-server:latest\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root, {"DATABASE_URL", "CLICKHOUSE_URL"})
|
||||||
|
summary = format_repo_discovery(discovery)
|
||||||
|
|
||||||
|
self.assertIn("python", discovery.languages)
|
||||||
|
self.assertIn("postgresql", discovery.databases)
|
||||||
|
self.assertIn("clickhouse", discovery.databases)
|
||||||
|
self.assertIn("Detected local service containers", summary)
|
||||||
|
|
||||||
|
|
||||||
|
class PromptContextTest(unittest.TestCase):
|
||||||
|
def test_run_pipeline_injects_env_and_discovery_context_into_prompt(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / ".env").write_text("CLICKHOUSE_URL=http://localhost:8123\n", encoding="utf-8")
|
||||||
|
steps = _build_simple_preset(["claude-coder"], ["claude-reviewer"], [])
|
||||||
|
config = PipelineConfig(
|
||||||
|
output_dir=root / "out",
|
||||||
|
max_iterations=1,
|
||||||
|
language="en",
|
||||||
|
inputs={"plan": "Plan", "checklist": "Checklist"},
|
||||||
|
agents={name: agent for name, agent in BUILTIN_AGENTS.items()},
|
||||||
|
coders=["claude-coder"],
|
||||||
|
reviewers=["claude-reviewer"],
|
||||||
|
pipeline=steps,
|
||||||
|
preset_name="simple",
|
||||||
|
)
|
||||||
|
prompts: list[str] = []
|
||||||
|
|
||||||
|
def _fake_invoke(agent_config, prompt, step_name, **kwargs):
|
||||||
|
prompts.append(prompt)
|
||||||
|
output = "VERDICT: PASS" if step_name == "review" else "coding output"
|
||||||
|
return AgentResult(
|
||||||
|
output=output,
|
||||||
|
exit_code=0,
|
||||||
|
agent_name=agent_config.name,
|
||||||
|
step_name=step_name,
|
||||||
|
duration_seconds=0.1,
|
||||||
|
transcript="# Agent Execution Transcript",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("cross_eval.pipeline.invoke_agent", side_effect=_fake_invoke):
|
||||||
|
run_pipeline(config, cwd=root)
|
||||||
|
|
||||||
|
joined = "\n".join(prompts)
|
||||||
|
self.assertIn("Execution Policy", joined)
|
||||||
|
self.assertIn("Environment Context", joined)
|
||||||
|
self.assertIn("Repository Discovery", joined)
|
||||||
|
self.assertIn("ClickHouse-related environment variables are available", joined)
|
||||||
|
self.assertTrue((root / "out").exists())
|
||||||
|
|
||||||
|
|
||||||
|
class AgentTranscriptTest(unittest.TestCase):
|
||||||
|
def test_invoke_agent_records_transcript(self) -> None:
|
||||||
|
def _fake_run(cmd, **kwargs):
|
||||||
|
class _Result:
|
||||||
|
returncode = 0
|
||||||
|
stdout = "hello"
|
||||||
|
stderr = "warn"
|
||||||
|
|
||||||
|
return _Result()
|
||||||
|
|
||||||
|
agent = AgentConfig(
|
||||||
|
name="codex-reviewer",
|
||||||
|
command="codex",
|
||||||
|
args=["exec", "--model", "gpt-5.4", "-"],
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("subprocess.run", side_effect=_fake_run):
|
||||||
|
result = invoke_agent(agent, "prompt", "review", quiet=True)
|
||||||
|
|
||||||
|
self.assertIn("## Command", result.transcript)
|
||||||
|
self.assertIn("hello", result.transcript)
|
||||||
|
self.assertIn("warn", result.transcript)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user