feat: add runtime discovery and execution traces

This commit is contained in:
chungyeong
2026-03-13 21:52:13 +09:00
parent 941304398d
commit 28dd794f54
35 changed files with 376 additions and 88 deletions

View File

@@ -218,6 +218,7 @@ def invoke_agent(
else:
input_data = prompt
cmd_preview = " ".join(cmd[:6])
logger.debug("Invoking agent '%s': %s", agent.name, " ".join(cmd[:5]) + " ...")
spinner: Optional[_Spinner] = None
@@ -259,7 +260,6 @@ def invoke_agent(
err_detail = result.stderr.strip() or result.stdout.strip()
if err_detail and len(err_detail) > 500:
err_detail = err_detail[:500] + "..."
cmd_preview = " ".join(cmd[:6])
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
raise AgentInvocationError(
agent_name=agent.name,
@@ -298,12 +298,20 @@ def invoke_agent(
agent.name, step_name,
)
transcript = _build_transcript(
command_preview=cmd_preview,
stdout=result.stdout,
stderr=result.stderr,
)
return AgentResult(
output=output,
exit_code=result.returncode,
agent_name=agent.name,
step_name=step_name,
duration_seconds=round(duration, 1),
transcript=transcript,
command_preview=cmd_preview,
)
@@ -360,6 +368,7 @@ def invoke_agent_agentic(
f"Work in the current directory."
)
cmd_preview = " ".join(cmd[:6])
logger.debug(
"Invoking agent '%s' (agentic) in worktree: %s",
agent.name, worktree_path,
@@ -401,7 +410,6 @@ def invoke_agent_agentic(
err_detail = result.stderr.strip() or result.stdout.strip()
if err_detail and len(err_detail) > 500:
err_detail = err_detail[:500] + "..."
cmd_preview = " ".join(cmd[:6])
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
raise AgentInvocationError(
agent_name=agent.name,
@@ -426,10 +434,47 @@ def invoke_agent_agentic(
if spinner:
spinner.stop(f"[{step_name}] done — {chars} chars (agentic)")
transcript = _build_transcript(
command_preview=cmd_preview,
stdout=result.stdout,
stderr=result.stderr,
)
return AgentResult(
output=diff_output,
exit_code=result.returncode,
agent_name=agent.name,
step_name=step_name,
duration_seconds=round(duration, 1),
transcript=transcript,
command_preview=cmd_preview,
)
def _build_transcript(
*,
command_preview: str,
stdout: str,
stderr: str,
) -> str:
"""Build a compact execution transcript for debugging/audit output."""
sections = [
"# Agent Execution Transcript",
"",
"## Command",
"```",
command_preview or "(unknown command)",
"```",
"",
"## Stdout",
"```",
(stdout or "(empty)").strip(),
"```",
"",
"## Stderr",
"```",
(stderr or "(empty)").strip(),
"```",
"",
]
return "\n".join(sections)

167
cross_eval/discovery.py Normal file
View File

@@ -0,0 +1,167 @@
"""Repository/service discovery helpers for autonomous execution prompts."""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from pathlib import Path
@dataclass
class RepoDiscovery:
languages: set[str] = field(default_factory=set)
package_managers: set[str] = field(default_factory=set)
databases: set[str] = field(default_factory=set)
services: set[str] = field(default_factory=set)
hints: list[str] = field(default_factory=list)
def _read_text(path: Path) -> str:
try:
return path.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
return ""
def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) -> None:
lowered = content.lower()
for needle, name in mapping.items():
if needle in lowered:
target.add(name)
def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery:
"""Infer runtime-relevant stack hints from common manifest/config files."""
discovery = RepoDiscovery()
env_names = {name.upper() for name in (env_names or set())}
file_map = {
"pyproject": project_root / "pyproject.toml",
"requirements": project_root / "requirements.txt",
"package": project_root / "package.json",
"docker_compose": project_root / "docker-compose.yml",
"docker_compose_alt": project_root / "docker-compose.yaml",
"compose": project_root / "compose.yaml",
"prisma": project_root / "prisma" / "schema.prisma",
}
if file_map["pyproject"].exists() or file_map["requirements"].exists():
discovery.languages.add("python")
if file_map["package"].exists():
discovery.languages.add("node")
if file_map["pyproject"].exists():
discovery.package_managers.add("pip")
if file_map["package"].exists():
try:
package_json = json.loads(_read_text(file_map["package"]) or "{}")
except json.JSONDecodeError:
package_json = {}
pm = package_json.get("packageManager")
if isinstance(pm, str) and pm:
discovery.package_managers.add(pm.split("@", 1)[0])
else:
discovery.package_managers.add("npm")
manifests = {
name: _read_text(path)
for name, path in file_map.items()
if path.exists()
}
combined = "\n".join(manifests.values())
_add_if_contains(
discovery.databases,
combined,
{
"psycopg": "postgresql",
"asyncpg": "postgresql",
"postgres": "postgresql",
"mysql": "mysql",
"pymongo": "mongodb",
"mongodb": "mongodb",
"mongoengine": "mongodb",
"clickhouse": "clickhouse",
"clickhouse-driver": "clickhouse",
"clickhouse_connect": "clickhouse",
"redis": "redis",
},
)
if file_map["package"].exists():
try:
package_json = json.loads(_read_text(file_map["package"]) or "{}")
except json.JSONDecodeError:
package_json = {}
deps = {
**(package_json.get("dependencies") or {}),
**(package_json.get("devDependencies") or {}),
}
dep_blob = "\n".join(deps.keys()).lower()
_add_if_contains(
discovery.databases,
dep_blob,
{
"pg": "postgresql",
"mysql": "mysql",
"mongoose": "mongodb",
"mongodb": "mongodb",
"@clickhouse/client": "clickhouse",
"redis": "redis",
"prisma": "postgresql",
},
)
for env_name in env_names:
if "CLICKHOUSE" in env_name or env_name.startswith("CH_"):
discovery.databases.add("clickhouse")
if "POSTGRES" in env_name or env_name.startswith("PG") or env_name == "DATABASE_URL":
discovery.databases.add("postgresql")
if "MYSQL" in env_name:
discovery.databases.add("mysql")
if "MONGO" in env_name:
discovery.databases.add("mongodb")
if "REDIS" in env_name:
discovery.databases.add("redis")
compose_blob = "\n".join(
manifests.get(key, "")
for key in ("docker_compose", "docker_compose_alt", "compose")
).lower()
_add_if_contains(
discovery.services,
compose_blob,
{
"clickhouse": "clickhouse",
"postgres": "postgresql",
"mysql": "mysql",
"mongo": "mongodb",
"redis": "redis",
},
)
if file_map["prisma"].exists():
discovery.hints.append("Prisma schema detected.")
if (project_root / "alembic.ini").exists():
discovery.hints.append("Alembic migration config detected.")
if (project_root / "docker").exists() or discovery.services:
discovery.hints.append("Containerized services may be available for local verification.")
return discovery
def format_repo_discovery(discovery: RepoDiscovery) -> str:
"""Render discovery results into a compact prompt summary."""
lines: list[str] = []
if discovery.languages:
lines.append("Detected languages: " + ", ".join(sorted(discovery.languages)))
if discovery.package_managers:
lines.append("Likely package managers: " + ", ".join(sorted(discovery.package_managers)))
if discovery.databases:
lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases)))
if discovery.services:
lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services)))
if discovery.hints:
lines.extend(discovery.hints)
if not lines:
return "No strong runtime/service signals were detected from repository manifests."
return "\n".join(lines)

View File

@@ -88,6 +88,8 @@ class AgentResult:
agent_name: str
step_name: str
duration_seconds: float
transcript: str = ""
command_preview: str = ""
@dataclass

View File

@@ -13,6 +13,7 @@ from pathlib import Path
from cross_eval.agent import AgentInvocationError, invoke_agent, invoke_agent_agentic
from cross_eval.worktree import WorktreeError
from cross_eval.config import try_reload_config
from cross_eval.discovery import discover_repo, format_repo_discovery
from cross_eval.models import (
AgentConfig,
AgentResult,
@@ -804,6 +805,7 @@ def _execute_step(
# 8. Save to disk
_save_step_output(run_dir, output_iter, step.name, result.output)
_maybe_save_step_transcript(run_dir, output_iter, step.name, result)
def _execute_parallel_batch(
@@ -929,6 +931,7 @@ def _execute_parallel_batch(
step.name, r.duration_seconds, len(r.output),
)
_save_step_output(run_dir, output_iter, step.name, r.output)
_maybe_save_step_transcript(run_dir, output_iter, step.name, r)
if errors:
spinner.stop(f"[parallel] FAILED ({batch_elapsed}s)")
@@ -1001,10 +1004,12 @@ def _build_runtime_inputs(
) -> dict[str, str]:
"""Load runtime env and expose safe execution hints to prompts."""
env, loaded_files, loaded_values = build_runtime_environment(config.execution, cwd)
discovery = discover_repo(cwd, set(loaded_values) | set(env))
input_contents["execution_policy"] = build_execution_policy(config.execution)
input_contents["environment_context"] = summarize_environment(
config.execution, loaded_files, env, loaded_values,
)
input_contents["repo_discovery"] = format_repo_discovery(discovery)
return env
@@ -1018,6 +1023,8 @@ def _augment_prompt_with_runtime_context(
extras.append("## Execution Policy\n" + context["execution_policy"])
if context.get("environment_context"):
extras.append("## Environment Context\n" + context["environment_context"])
if context.get("repo_discovery"):
extras.append("## Repository Discovery\n" + context["repo_discovery"])
if not extras:
return prompt
return prompt.rstrip() + "\n\n" + "\n\n".join(extras) + "\n"
@@ -1198,6 +1205,20 @@ def _save_step_output(
return path
def _maybe_save_step_transcript(
run_dir: Path,
iteration: int,
step_name: str,
result: AgentResult,
) -> Path | None:
"""Persist raw stdout/stderr transcript when available."""
if not result.transcript:
return None
return _save_step_output(
run_dir, iteration, f"{step_name}_transcript", result.transcript,
)
def _format_runtime_error_markdown(
exc: Exception,
*,