feat: add runtime discovery and execution traces
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -218,6 +218,7 @@ def invoke_agent(
|
||||
else:
|
||||
input_data = prompt
|
||||
|
||||
cmd_preview = " ".join(cmd[:6])
|
||||
logger.debug("Invoking agent '%s': %s", agent.name, " ".join(cmd[:5]) + " ...")
|
||||
|
||||
spinner: Optional[_Spinner] = None
|
||||
@@ -259,7 +260,6 @@ def invoke_agent(
|
||||
err_detail = result.stderr.strip() or result.stdout.strip()
|
||||
if err_detail and len(err_detail) > 500:
|
||||
err_detail = err_detail[:500] + "..."
|
||||
cmd_preview = " ".join(cmd[:6])
|
||||
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
|
||||
raise AgentInvocationError(
|
||||
agent_name=agent.name,
|
||||
@@ -298,12 +298,20 @@ def invoke_agent(
|
||||
agent.name, step_name,
|
||||
)
|
||||
|
||||
transcript = _build_transcript(
|
||||
command_preview=cmd_preview,
|
||||
stdout=result.stdout,
|
||||
stderr=result.stderr,
|
||||
)
|
||||
|
||||
return AgentResult(
|
||||
output=output,
|
||||
exit_code=result.returncode,
|
||||
agent_name=agent.name,
|
||||
step_name=step_name,
|
||||
duration_seconds=round(duration, 1),
|
||||
transcript=transcript,
|
||||
command_preview=cmd_preview,
|
||||
)
|
||||
|
||||
|
||||
@@ -360,6 +368,7 @@ def invoke_agent_agentic(
|
||||
f"Work in the current directory."
|
||||
)
|
||||
|
||||
cmd_preview = " ".join(cmd[:6])
|
||||
logger.debug(
|
||||
"Invoking agent '%s' (agentic) in worktree: %s",
|
||||
agent.name, worktree_path,
|
||||
@@ -401,7 +410,6 @@ def invoke_agent_agentic(
|
||||
err_detail = result.stderr.strip() or result.stdout.strip()
|
||||
if err_detail and len(err_detail) > 500:
|
||||
err_detail = err_detail[:500] + "..."
|
||||
cmd_preview = " ".join(cmd[:6])
|
||||
failure_type, suggested_action = _classify_agent_failure(err_detail or "")
|
||||
raise AgentInvocationError(
|
||||
agent_name=agent.name,
|
||||
@@ -426,10 +434,47 @@ def invoke_agent_agentic(
|
||||
if spinner:
|
||||
spinner.stop(f"[{step_name}] done — {chars} chars (agentic)")
|
||||
|
||||
transcript = _build_transcript(
|
||||
command_preview=cmd_preview,
|
||||
stdout=result.stdout,
|
||||
stderr=result.stderr,
|
||||
)
|
||||
|
||||
return AgentResult(
|
||||
output=diff_output,
|
||||
exit_code=result.returncode,
|
||||
agent_name=agent.name,
|
||||
step_name=step_name,
|
||||
duration_seconds=round(duration, 1),
|
||||
transcript=transcript,
|
||||
command_preview=cmd_preview,
|
||||
)
|
||||
|
||||
|
||||
def _build_transcript(
|
||||
*,
|
||||
command_preview: str,
|
||||
stdout: str,
|
||||
stderr: str,
|
||||
) -> str:
|
||||
"""Build a compact execution transcript for debugging/audit output."""
|
||||
sections = [
|
||||
"# Agent Execution Transcript",
|
||||
"",
|
||||
"## Command",
|
||||
"```",
|
||||
command_preview or "(unknown command)",
|
||||
"```",
|
||||
"",
|
||||
"## Stdout",
|
||||
"```",
|
||||
(stdout or "(empty)").strip(),
|
||||
"```",
|
||||
"",
|
||||
"## Stderr",
|
||||
"```",
|
||||
(stderr or "(empty)").strip(),
|
||||
"```",
|
||||
"",
|
||||
]
|
||||
return "\n".join(sections)
|
||||
|
||||
167
cross_eval/discovery.py
Normal file
167
cross_eval/discovery.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""Repository/service discovery helpers for autonomous execution prompts."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class RepoDiscovery:
|
||||
languages: set[str] = field(default_factory=set)
|
||||
package_managers: set[str] = field(default_factory=set)
|
||||
databases: set[str] = field(default_factory=set)
|
||||
services: set[str] = field(default_factory=set)
|
||||
hints: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _read_text(path: Path) -> str:
|
||||
try:
|
||||
return path.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
return ""
|
||||
|
||||
|
||||
def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) -> None:
|
||||
lowered = content.lower()
|
||||
for needle, name in mapping.items():
|
||||
if needle in lowered:
|
||||
target.add(name)
|
||||
|
||||
|
||||
def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery:
|
||||
"""Infer runtime-relevant stack hints from common manifest/config files."""
|
||||
discovery = RepoDiscovery()
|
||||
env_names = {name.upper() for name in (env_names or set())}
|
||||
|
||||
file_map = {
|
||||
"pyproject": project_root / "pyproject.toml",
|
||||
"requirements": project_root / "requirements.txt",
|
||||
"package": project_root / "package.json",
|
||||
"docker_compose": project_root / "docker-compose.yml",
|
||||
"docker_compose_alt": project_root / "docker-compose.yaml",
|
||||
"compose": project_root / "compose.yaml",
|
||||
"prisma": project_root / "prisma" / "schema.prisma",
|
||||
}
|
||||
|
||||
if file_map["pyproject"].exists() or file_map["requirements"].exists():
|
||||
discovery.languages.add("python")
|
||||
if file_map["package"].exists():
|
||||
discovery.languages.add("node")
|
||||
|
||||
if file_map["pyproject"].exists():
|
||||
discovery.package_managers.add("pip")
|
||||
if file_map["package"].exists():
|
||||
try:
|
||||
package_json = json.loads(_read_text(file_map["package"]) or "{}")
|
||||
except json.JSONDecodeError:
|
||||
package_json = {}
|
||||
pm = package_json.get("packageManager")
|
||||
if isinstance(pm, str) and pm:
|
||||
discovery.package_managers.add(pm.split("@", 1)[0])
|
||||
else:
|
||||
discovery.package_managers.add("npm")
|
||||
|
||||
manifests = {
|
||||
name: _read_text(path)
|
||||
for name, path in file_map.items()
|
||||
if path.exists()
|
||||
}
|
||||
combined = "\n".join(manifests.values())
|
||||
|
||||
_add_if_contains(
|
||||
discovery.databases,
|
||||
combined,
|
||||
{
|
||||
"psycopg": "postgresql",
|
||||
"asyncpg": "postgresql",
|
||||
"postgres": "postgresql",
|
||||
"mysql": "mysql",
|
||||
"pymongo": "mongodb",
|
||||
"mongodb": "mongodb",
|
||||
"mongoengine": "mongodb",
|
||||
"clickhouse": "clickhouse",
|
||||
"clickhouse-driver": "clickhouse",
|
||||
"clickhouse_connect": "clickhouse",
|
||||
"redis": "redis",
|
||||
},
|
||||
)
|
||||
|
||||
if file_map["package"].exists():
|
||||
try:
|
||||
package_json = json.loads(_read_text(file_map["package"]) or "{}")
|
||||
except json.JSONDecodeError:
|
||||
package_json = {}
|
||||
deps = {
|
||||
**(package_json.get("dependencies") or {}),
|
||||
**(package_json.get("devDependencies") or {}),
|
||||
}
|
||||
dep_blob = "\n".join(deps.keys()).lower()
|
||||
_add_if_contains(
|
||||
discovery.databases,
|
||||
dep_blob,
|
||||
{
|
||||
"pg": "postgresql",
|
||||
"mysql": "mysql",
|
||||
"mongoose": "mongodb",
|
||||
"mongodb": "mongodb",
|
||||
"@clickhouse/client": "clickhouse",
|
||||
"redis": "redis",
|
||||
"prisma": "postgresql",
|
||||
},
|
||||
)
|
||||
|
||||
for env_name in env_names:
|
||||
if "CLICKHOUSE" in env_name or env_name.startswith("CH_"):
|
||||
discovery.databases.add("clickhouse")
|
||||
if "POSTGRES" in env_name or env_name.startswith("PG") or env_name == "DATABASE_URL":
|
||||
discovery.databases.add("postgresql")
|
||||
if "MYSQL" in env_name:
|
||||
discovery.databases.add("mysql")
|
||||
if "MONGO" in env_name:
|
||||
discovery.databases.add("mongodb")
|
||||
if "REDIS" in env_name:
|
||||
discovery.databases.add("redis")
|
||||
|
||||
compose_blob = "\n".join(
|
||||
manifests.get(key, "")
|
||||
for key in ("docker_compose", "docker_compose_alt", "compose")
|
||||
).lower()
|
||||
_add_if_contains(
|
||||
discovery.services,
|
||||
compose_blob,
|
||||
{
|
||||
"clickhouse": "clickhouse",
|
||||
"postgres": "postgresql",
|
||||
"mysql": "mysql",
|
||||
"mongo": "mongodb",
|
||||
"redis": "redis",
|
||||
},
|
||||
)
|
||||
|
||||
if file_map["prisma"].exists():
|
||||
discovery.hints.append("Prisma schema detected.")
|
||||
if (project_root / "alembic.ini").exists():
|
||||
discovery.hints.append("Alembic migration config detected.")
|
||||
if (project_root / "docker").exists() or discovery.services:
|
||||
discovery.hints.append("Containerized services may be available for local verification.")
|
||||
|
||||
return discovery
|
||||
|
||||
|
||||
def format_repo_discovery(discovery: RepoDiscovery) -> str:
|
||||
"""Render discovery results into a compact prompt summary."""
|
||||
lines: list[str] = []
|
||||
if discovery.languages:
|
||||
lines.append("Detected languages: " + ", ".join(sorted(discovery.languages)))
|
||||
if discovery.package_managers:
|
||||
lines.append("Likely package managers: " + ", ".join(sorted(discovery.package_managers)))
|
||||
if discovery.databases:
|
||||
lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases)))
|
||||
if discovery.services:
|
||||
lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services)))
|
||||
if discovery.hints:
|
||||
lines.extend(discovery.hints)
|
||||
if not lines:
|
||||
return "No strong runtime/service signals were detected from repository manifests."
|
||||
return "\n".join(lines)
|
||||
@@ -88,6 +88,8 @@ class AgentResult:
|
||||
agent_name: str
|
||||
step_name: str
|
||||
duration_seconds: float
|
||||
transcript: str = ""
|
||||
command_preview: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -13,6 +13,7 @@ from pathlib import Path
|
||||
from cross_eval.agent import AgentInvocationError, invoke_agent, invoke_agent_agentic
|
||||
from cross_eval.worktree import WorktreeError
|
||||
from cross_eval.config import try_reload_config
|
||||
from cross_eval.discovery import discover_repo, format_repo_discovery
|
||||
from cross_eval.models import (
|
||||
AgentConfig,
|
||||
AgentResult,
|
||||
@@ -804,6 +805,7 @@ def _execute_step(
|
||||
|
||||
# 8. Save to disk
|
||||
_save_step_output(run_dir, output_iter, step.name, result.output)
|
||||
_maybe_save_step_transcript(run_dir, output_iter, step.name, result)
|
||||
|
||||
|
||||
def _execute_parallel_batch(
|
||||
@@ -929,6 +931,7 @@ def _execute_parallel_batch(
|
||||
step.name, r.duration_seconds, len(r.output),
|
||||
)
|
||||
_save_step_output(run_dir, output_iter, step.name, r.output)
|
||||
_maybe_save_step_transcript(run_dir, output_iter, step.name, r)
|
||||
|
||||
if errors:
|
||||
spinner.stop(f"[parallel] FAILED ({batch_elapsed}s)")
|
||||
@@ -1001,10 +1004,12 @@ def _build_runtime_inputs(
|
||||
) -> dict[str, str]:
|
||||
"""Load runtime env and expose safe execution hints to prompts."""
|
||||
env, loaded_files, loaded_values = build_runtime_environment(config.execution, cwd)
|
||||
discovery = discover_repo(cwd, set(loaded_values) | set(env))
|
||||
input_contents["execution_policy"] = build_execution_policy(config.execution)
|
||||
input_contents["environment_context"] = summarize_environment(
|
||||
config.execution, loaded_files, env, loaded_values,
|
||||
)
|
||||
input_contents["repo_discovery"] = format_repo_discovery(discovery)
|
||||
return env
|
||||
|
||||
|
||||
@@ -1018,6 +1023,8 @@ def _augment_prompt_with_runtime_context(
|
||||
extras.append("## Execution Policy\n" + context["execution_policy"])
|
||||
if context.get("environment_context"):
|
||||
extras.append("## Environment Context\n" + context["environment_context"])
|
||||
if context.get("repo_discovery"):
|
||||
extras.append("## Repository Discovery\n" + context["repo_discovery"])
|
||||
if not extras:
|
||||
return prompt
|
||||
return prompt.rstrip() + "\n\n" + "\n\n".join(extras) + "\n"
|
||||
@@ -1198,6 +1205,20 @@ def _save_step_output(
|
||||
return path
|
||||
|
||||
|
||||
def _maybe_save_step_transcript(
|
||||
run_dir: Path,
|
||||
iteration: int,
|
||||
step_name: str,
|
||||
result: AgentResult,
|
||||
) -> Path | None:
|
||||
"""Persist raw stdout/stderr transcript when available."""
|
||||
if not result.transcript:
|
||||
return None
|
||||
return _save_step_output(
|
||||
run_dir, iteration, f"{step_name}_transcript", result.transcript,
|
||||
)
|
||||
|
||||
|
||||
def _format_runtime_error_markdown(
|
||||
exc: Exception,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user