feat: harden runtime evidence and claude agentic validation
This commit is contained in:
@@ -19,6 +19,34 @@ logger = logging.getLogger(__name__)
|
|||||||
# CLI tools that support --system-prompt flag natively
|
# CLI tools that support --system-prompt flag natively
|
||||||
_SYSTEM_PROMPT_AGENTS = ("claude",)
|
_SYSTEM_PROMPT_AGENTS = ("claude",)
|
||||||
_REASONING_EFFORT_AGENTS = ("codex",)
|
_REASONING_EFFORT_AGENTS = ("codex",)
|
||||||
|
_NO_CHANGE_ACK_MARKERS = (
|
||||||
|
"no changes",
|
||||||
|
"no code changes",
|
||||||
|
"no file changes",
|
||||||
|
"did not make any changes",
|
||||||
|
"nothing to change",
|
||||||
|
"no modifications were necessary",
|
||||||
|
"no update was necessary",
|
||||||
|
"already satisfied",
|
||||||
|
)
|
||||||
|
_CHANGE_CLAIM_MARKERS = (
|
||||||
|
"summary of all changes made",
|
||||||
|
"here's a summary of all changes made",
|
||||||
|
"implemented",
|
||||||
|
"i implemented",
|
||||||
|
"added",
|
||||||
|
"i added",
|
||||||
|
"updated",
|
||||||
|
"i updated",
|
||||||
|
"modified",
|
||||||
|
"i modified",
|
||||||
|
"created",
|
||||||
|
"i created",
|
||||||
|
"fixed",
|
||||||
|
"i fixed",
|
||||||
|
"completed the changes",
|
||||||
|
"finished the changes",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AgentInvocationError(RuntimeError):
|
class AgentInvocationError(RuntimeError):
|
||||||
@@ -106,6 +134,16 @@ def _classify_agent_failure(detail: str) -> tuple[str, str]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _claims_file_changes(output: str) -> bool:
|
||||||
|
"""Heuristic for agent text that claims code changes were made."""
|
||||||
|
normalized = output.lower()
|
||||||
|
if not normalized.strip():
|
||||||
|
return False
|
||||||
|
if any(marker in normalized for marker in _NO_CHANGE_ACK_MARKERS):
|
||||||
|
return False
|
||||||
|
return any(marker in normalized for marker in _CHANGE_CLAIM_MARKERS)
|
||||||
|
|
||||||
|
|
||||||
class _Spinner:
|
class _Spinner:
|
||||||
"""Animated spinner for long-running agent calls."""
|
"""Animated spinner for long-running agent calls."""
|
||||||
|
|
||||||
@@ -302,6 +340,9 @@ def invoke_agent(
|
|||||||
command_preview=cmd_preview,
|
command_preview=cmd_preview,
|
||||||
stdout=result.stdout,
|
stdout=result.stdout,
|
||||||
stderr=result.stderr,
|
stderr=result.stderr,
|
||||||
|
exit_code=result.returncode,
|
||||||
|
duration_seconds=round(duration, 1),
|
||||||
|
cwd=str(cwd) if cwd else "",
|
||||||
)
|
)
|
||||||
|
|
||||||
return AgentResult(
|
return AgentResult(
|
||||||
@@ -424,6 +465,28 @@ def invoke_agent_agentic(
|
|||||||
diff_output = capture_diff(worktree_path)
|
diff_output = capture_diff(worktree_path)
|
||||||
|
|
||||||
if not diff_output:
|
if not diff_output:
|
||||||
|
stdout_excerpt = (result.stdout or "").strip()
|
||||||
|
stderr_excerpt = (result.stderr or "").strip()
|
||||||
|
if _claims_file_changes(stdout_excerpt):
|
||||||
|
if spinner:
|
||||||
|
spinner.stop(f"[{step_name}] FAILED (empty diff)")
|
||||||
|
raw_error = stdout_excerpt or "(stdout empty)"
|
||||||
|
if stderr_excerpt:
|
||||||
|
raw_error = f"{raw_error}\n\n[stderr]\n{stderr_excerpt}"
|
||||||
|
if len(raw_error) > 2000:
|
||||||
|
raw_error = raw_error[:2000] + "..."
|
||||||
|
raise AgentInvocationError(
|
||||||
|
agent_name=agent.name,
|
||||||
|
step_name=step_name,
|
||||||
|
cmd_preview=cmd_preview,
|
||||||
|
raw_error=raw_error,
|
||||||
|
failure_type="EMPTY_DIFF",
|
||||||
|
suggested_action=(
|
||||||
|
"Agent reported code changes but produced no git diff. "
|
||||||
|
"Treat this run as failed and require a real worktree diff before continuing."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
diff_output = "(no changes)"
|
diff_output = "(no changes)"
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Agent '%s' made no file changes at step '%s'",
|
"Agent '%s' made no file changes at step '%s'",
|
||||||
@@ -438,6 +501,9 @@ def invoke_agent_agentic(
|
|||||||
command_preview=cmd_preview,
|
command_preview=cmd_preview,
|
||||||
stdout=result.stdout,
|
stdout=result.stdout,
|
||||||
stderr=result.stderr,
|
stderr=result.stderr,
|
||||||
|
exit_code=result.returncode,
|
||||||
|
duration_seconds=round(duration, 1),
|
||||||
|
cwd=str(worktree_path),
|
||||||
)
|
)
|
||||||
|
|
||||||
return AgentResult(
|
return AgentResult(
|
||||||
@@ -456,6 +522,9 @@ def _build_transcript(
|
|||||||
command_preview: str,
|
command_preview: str,
|
||||||
stdout: str,
|
stdout: str,
|
||||||
stderr: str,
|
stderr: str,
|
||||||
|
exit_code: int = 0,
|
||||||
|
duration_seconds: float = 0.0,
|
||||||
|
cwd: str = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Build a compact execution transcript for debugging/audit output."""
|
"""Build a compact execution transcript for debugging/audit output."""
|
||||||
sections = [
|
sections = [
|
||||||
@@ -466,6 +535,16 @@ def _build_transcript(
|
|||||||
command_preview or "(unknown command)",
|
command_preview or "(unknown command)",
|
||||||
"```",
|
"```",
|
||||||
"",
|
"",
|
||||||
|
]
|
||||||
|
if cwd:
|
||||||
|
sections.extend(["## Working Directory", f"`{cwd}`", ""])
|
||||||
|
sections.extend([
|
||||||
|
f"## Exit Code: {exit_code}",
|
||||||
|
"",
|
||||||
|
])
|
||||||
|
if duration_seconds > 0:
|
||||||
|
sections.extend([f"## Duration: {duration_seconds}s", ""])
|
||||||
|
sections.extend([
|
||||||
"## Stdout",
|
"## Stdout",
|
||||||
"```",
|
"```",
|
||||||
(stdout or "(empty)").strip(),
|
(stdout or "(empty)").strip(),
|
||||||
@@ -476,5 +555,5 @@ def _build_transcript(
|
|||||||
(stderr or "(empty)").strip(),
|
(stderr or "(empty)").strip(),
|
||||||
"```",
|
"```",
|
||||||
"",
|
"",
|
||||||
]
|
])
|
||||||
return "\n".join(sections)
|
return "\n".join(sections)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ class RepoDiscovery:
|
|||||||
package_managers: set[str] = field(default_factory=set)
|
package_managers: set[str] = field(default_factory=set)
|
||||||
databases: set[str] = field(default_factory=set)
|
databases: set[str] = field(default_factory=set)
|
||||||
services: set[str] = field(default_factory=set)
|
services: set[str] = field(default_factory=set)
|
||||||
|
frameworks: set[str] = field(default_factory=set)
|
||||||
hints: list[str] = field(default_factory=list)
|
hints: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
@@ -29,27 +30,176 @@ def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) ->
|
|||||||
target.add(name)
|
target.add(name)
|
||||||
|
|
||||||
|
|
||||||
|
# Shared mapping for database signals found in manifest content
|
||||||
|
_MANIFEST_DB_SIGNALS: dict[str, str] = {
|
||||||
|
# PostgreSQL
|
||||||
|
"psycopg": "postgresql",
|
||||||
|
"asyncpg": "postgresql",
|
||||||
|
"postgres": "postgresql",
|
||||||
|
"pgx": "postgresql",
|
||||||
|
# MySQL / MariaDB
|
||||||
|
"mysql": "mysql",
|
||||||
|
"mariadb": "mysql",
|
||||||
|
"pymysql": "mysql",
|
||||||
|
# MongoDB
|
||||||
|
"pymongo": "mongodb",
|
||||||
|
"mongodb": "mongodb",
|
||||||
|
"mongoengine": "mongodb",
|
||||||
|
"mongosh": "mongodb",
|
||||||
|
# ClickHouse
|
||||||
|
"clickhouse": "clickhouse",
|
||||||
|
"clickhouse-driver": "clickhouse",
|
||||||
|
"clickhouse_connect": "clickhouse",
|
||||||
|
# Redis
|
||||||
|
"redis": "redis",
|
||||||
|
"ioredis": "redis",
|
||||||
|
# SQLite
|
||||||
|
"sqlite": "sqlite",
|
||||||
|
"better-sqlite3": "sqlite",
|
||||||
|
"aiosqlite": "sqlite",
|
||||||
|
# Elasticsearch / OpenSearch
|
||||||
|
"elasticsearch": "elasticsearch",
|
||||||
|
"opensearch": "elasticsearch",
|
||||||
|
# DynamoDB
|
||||||
|
"dynamodb": "dynamodb",
|
||||||
|
"boto3": "dynamodb", # broad but common signal
|
||||||
|
# Cassandra
|
||||||
|
"cassandra-driver": "cassandra",
|
||||||
|
"cassandra": "cassandra",
|
||||||
|
# RabbitMQ
|
||||||
|
"amqplib": "rabbitmq",
|
||||||
|
"pika": "rabbitmq",
|
||||||
|
"rabbitmq": "rabbitmq",
|
||||||
|
# Kafka
|
||||||
|
"kafka": "kafka",
|
||||||
|
"confluent-kafka": "kafka",
|
||||||
|
"kafkajs": "kafka",
|
||||||
|
# Neo4j
|
||||||
|
"neo4j": "neo4j",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Node package.json dependency → database mapping
|
||||||
|
_NODE_DEP_DB_SIGNALS: dict[str, str] = {
|
||||||
|
"pg": "postgresql",
|
||||||
|
"mysql": "mysql",
|
||||||
|
"mysql2": "mysql",
|
||||||
|
"mongoose": "mongodb",
|
||||||
|
"mongodb": "mongodb",
|
||||||
|
"@clickhouse/client": "clickhouse",
|
||||||
|
"redis": "redis",
|
||||||
|
"ioredis": "redis",
|
||||||
|
"prisma": "postgresql",
|
||||||
|
"better-sqlite3": "sqlite",
|
||||||
|
"sqlite3": "sqlite",
|
||||||
|
"@elastic/elasticsearch": "elasticsearch",
|
||||||
|
"@aws-sdk/client-dynamodb": "dynamodb",
|
||||||
|
"kafkajs": "kafka",
|
||||||
|
"amqplib": "rabbitmq",
|
||||||
|
"neo4j-driver": "neo4j",
|
||||||
|
"cassandra-driver": "cassandra",
|
||||||
|
"typeorm": "postgresql",
|
||||||
|
"sequelize": "postgresql",
|
||||||
|
"knex": "postgresql",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Docker compose service image → service name mapping
|
||||||
|
_COMPOSE_SERVICE_SIGNALS: dict[str, str] = {
|
||||||
|
"clickhouse": "clickhouse",
|
||||||
|
"postgres": "postgresql",
|
||||||
|
"mysql": "mysql",
|
||||||
|
"mariadb": "mysql",
|
||||||
|
"mongo": "mongodb",
|
||||||
|
"redis": "redis",
|
||||||
|
"elasticsearch": "elasticsearch",
|
||||||
|
"opensearch": "elasticsearch",
|
||||||
|
"rabbitmq": "rabbitmq",
|
||||||
|
"kafka": "kafka",
|
||||||
|
"zookeeper": "kafka",
|
||||||
|
"cassandra": "cassandra",
|
||||||
|
"neo4j": "neo4j",
|
||||||
|
"minio": "s3",
|
||||||
|
"localstack": "aws-local",
|
||||||
|
"dynamodb": "dynamodb",
|
||||||
|
"memcached": "memcached",
|
||||||
|
"nginx": "nginx",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Environment variable name patterns → database mapping
|
||||||
|
_ENV_DB_PATTERNS: list[tuple[str, str]] = [
|
||||||
|
("CLICKHOUSE", "clickhouse"),
|
||||||
|
("CH_", "clickhouse"),
|
||||||
|
("POSTGRES", "postgresql"),
|
||||||
|
("PG", "postgresql"),
|
||||||
|
("DATABASE_URL", "postgresql"),
|
||||||
|
("MYSQL", "mysql"),
|
||||||
|
("MARIADB", "mysql"),
|
||||||
|
("MONGO", "mongodb"),
|
||||||
|
("REDIS", "redis"),
|
||||||
|
("ELASTICSEARCH", "elasticsearch"),
|
||||||
|
("OPENSEARCH", "elasticsearch"),
|
||||||
|
("DYNAMO", "dynamodb"),
|
||||||
|
("CASSANDRA", "cassandra"),
|
||||||
|
("KAFKA", "kafka"),
|
||||||
|
("RABBIT", "rabbitmq"),
|
||||||
|
("AMQP", "rabbitmq"),
|
||||||
|
("NEO4J", "neo4j"),
|
||||||
|
("SQLITE", "sqlite"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery:
|
def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery:
|
||||||
"""Infer runtime-relevant stack hints from common manifest/config files."""
|
"""Infer runtime-relevant stack hints from common manifest/config files."""
|
||||||
discovery = RepoDiscovery()
|
discovery = RepoDiscovery()
|
||||||
env_names = {name.upper() for name in (env_names or set())}
|
env_names = {name.upper() for name in (env_names or set())}
|
||||||
|
|
||||||
file_map = {
|
file_map: dict[str, Path] = {
|
||||||
"pyproject": project_root / "pyproject.toml",
|
"pyproject": project_root / "pyproject.toml",
|
||||||
"requirements": project_root / "requirements.txt",
|
"requirements": project_root / "requirements.txt",
|
||||||
|
"requirements_dev": project_root / "requirements-dev.txt",
|
||||||
|
"setup_py": project_root / "setup.py",
|
||||||
|
"setup_cfg": project_root / "setup.cfg",
|
||||||
"package": project_root / "package.json",
|
"package": project_root / "package.json",
|
||||||
|
"go_mod": project_root / "go.mod",
|
||||||
|
"cargo": project_root / "Cargo.toml",
|
||||||
|
"gemfile": project_root / "Gemfile",
|
||||||
|
"build_gradle": project_root / "build.gradle",
|
||||||
|
"build_gradle_kts": project_root / "build.gradle.kts",
|
||||||
|
"pom": project_root / "pom.xml",
|
||||||
|
"composer": project_root / "composer.json",
|
||||||
|
"mix": project_root / "mix.exs",
|
||||||
"docker_compose": project_root / "docker-compose.yml",
|
"docker_compose": project_root / "docker-compose.yml",
|
||||||
"docker_compose_alt": project_root / "docker-compose.yaml",
|
"docker_compose_alt": project_root / "docker-compose.yaml",
|
||||||
"compose": project_root / "compose.yaml",
|
"compose": project_root / "compose.yaml",
|
||||||
"prisma": project_root / "prisma" / "schema.prisma",
|
"prisma": project_root / "prisma" / "schema.prisma",
|
||||||
|
"dockerfile": project_root / "Dockerfile",
|
||||||
}
|
}
|
||||||
|
|
||||||
if file_map["pyproject"].exists() or file_map["requirements"].exists():
|
# ---- Language detection ----
|
||||||
|
if (
|
||||||
|
file_map["pyproject"].exists()
|
||||||
|
or file_map["requirements"].exists()
|
||||||
|
or file_map["requirements_dev"].exists()
|
||||||
|
or file_map["setup_py"].exists()
|
||||||
|
or file_map["setup_cfg"].exists()
|
||||||
|
):
|
||||||
discovery.languages.add("python")
|
discovery.languages.add("python")
|
||||||
if file_map["package"].exists():
|
if file_map["package"].exists():
|
||||||
discovery.languages.add("node")
|
discovery.languages.add("node")
|
||||||
|
if file_map["go_mod"].exists():
|
||||||
|
discovery.languages.add("go")
|
||||||
|
if file_map["cargo"].exists():
|
||||||
|
discovery.languages.add("rust")
|
||||||
|
if file_map["gemfile"].exists():
|
||||||
|
discovery.languages.add("ruby")
|
||||||
|
if file_map["build_gradle"].exists() or file_map["build_gradle_kts"].exists() or file_map["pom"].exists():
|
||||||
|
discovery.languages.add("java")
|
||||||
|
if file_map["composer"].exists():
|
||||||
|
discovery.languages.add("php")
|
||||||
|
if file_map["mix"].exists():
|
||||||
|
discovery.languages.add("elixir")
|
||||||
|
|
||||||
if file_map["pyproject"].exists():
|
# ---- Package manager detection ----
|
||||||
|
if file_map["pyproject"].exists() or file_map["requirements"].exists() or file_map["setup_py"].exists():
|
||||||
discovery.package_managers.add("pip")
|
discovery.package_managers.add("pip")
|
||||||
if file_map["package"].exists():
|
if file_map["package"].exists():
|
||||||
try:
|
try:
|
||||||
@@ -59,9 +209,30 @@ def discover_repo(project_root: Path, env_names: set[str] | None = None) -> Repo
|
|||||||
pm = package_json.get("packageManager")
|
pm = package_json.get("packageManager")
|
||||||
if isinstance(pm, str) and pm:
|
if isinstance(pm, str) and pm:
|
||||||
discovery.package_managers.add(pm.split("@", 1)[0])
|
discovery.package_managers.add(pm.split("@", 1)[0])
|
||||||
|
else:
|
||||||
|
# Check for lockfiles to distinguish npm/yarn/pnpm
|
||||||
|
if (project_root / "pnpm-lock.yaml").exists():
|
||||||
|
discovery.package_managers.add("pnpm")
|
||||||
|
elif (project_root / "yarn.lock").exists():
|
||||||
|
discovery.package_managers.add("yarn")
|
||||||
else:
|
else:
|
||||||
discovery.package_managers.add("npm")
|
discovery.package_managers.add("npm")
|
||||||
|
if file_map["go_mod"].exists():
|
||||||
|
discovery.package_managers.add("go")
|
||||||
|
if file_map["cargo"].exists():
|
||||||
|
discovery.package_managers.add("cargo")
|
||||||
|
if file_map["gemfile"].exists():
|
||||||
|
discovery.package_managers.add("bundler")
|
||||||
|
if file_map["build_gradle"].exists() or file_map["build_gradle_kts"].exists():
|
||||||
|
discovery.package_managers.add("gradle")
|
||||||
|
if file_map["pom"].exists():
|
||||||
|
discovery.package_managers.add("maven")
|
||||||
|
if file_map["composer"].exists():
|
||||||
|
discovery.package_managers.add("composer")
|
||||||
|
if file_map["mix"].exists():
|
||||||
|
discovery.package_managers.add("mix")
|
||||||
|
|
||||||
|
# ---- Gather manifest content ----
|
||||||
manifests = {
|
manifests = {
|
||||||
name: _read_text(path)
|
name: _read_text(path)
|
||||||
for name, path in file_map.items()
|
for name, path in file_map.items()
|
||||||
@@ -69,24 +240,10 @@ def discover_repo(project_root: Path, env_names: set[str] | None = None) -> Repo
|
|||||||
}
|
}
|
||||||
combined = "\n".join(manifests.values())
|
combined = "\n".join(manifests.values())
|
||||||
|
|
||||||
_add_if_contains(
|
# ---- Database detection from manifest content ----
|
||||||
discovery.databases,
|
_add_if_contains(discovery.databases, combined, _MANIFEST_DB_SIGNALS)
|
||||||
combined,
|
|
||||||
{
|
|
||||||
"psycopg": "postgresql",
|
|
||||||
"asyncpg": "postgresql",
|
|
||||||
"postgres": "postgresql",
|
|
||||||
"mysql": "mysql",
|
|
||||||
"pymongo": "mongodb",
|
|
||||||
"mongodb": "mongodb",
|
|
||||||
"mongoengine": "mongodb",
|
|
||||||
"clickhouse": "clickhouse",
|
|
||||||
"clickhouse-driver": "clickhouse",
|
|
||||||
"clickhouse_connect": "clickhouse",
|
|
||||||
"redis": "redis",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# ---- Node.js dependency-specific detection ----
|
||||||
if file_map["package"].exists():
|
if file_map["package"].exists():
|
||||||
try:
|
try:
|
||||||
package_json = json.loads(_read_text(file_map["package"]) or "{}")
|
package_json = json.loads(_read_text(file_map["package"]) or "{}")
|
||||||
@@ -97,53 +254,57 @@ def discover_repo(project_root: Path, env_names: set[str] | None = None) -> Repo
|
|||||||
**(package_json.get("devDependencies") or {}),
|
**(package_json.get("devDependencies") or {}),
|
||||||
}
|
}
|
||||||
dep_blob = "\n".join(deps.keys()).lower()
|
dep_blob = "\n".join(deps.keys()).lower()
|
||||||
|
_add_if_contains(discovery.databases, dep_blob, _NODE_DEP_DB_SIGNALS)
|
||||||
|
|
||||||
|
# ---- Framework detection from manifest content ----
|
||||||
_add_if_contains(
|
_add_if_contains(
|
||||||
discovery.databases,
|
discovery.frameworks,
|
||||||
dep_blob,
|
combined,
|
||||||
{
|
{
|
||||||
"pg": "postgresql",
|
"fastapi": "fastapi",
|
||||||
"mysql": "mysql",
|
"django": "django",
|
||||||
"mongoose": "mongodb",
|
"flask": "flask",
|
||||||
"mongodb": "mongodb",
|
"express": "express",
|
||||||
"@clickhouse/client": "clickhouse",
|
"nextjs": "next.js",
|
||||||
"redis": "redis",
|
"next": "next.js",
|
||||||
"prisma": "postgresql",
|
"nestjs": "nestjs",
|
||||||
|
"spring": "spring",
|
||||||
|
"rails": "rails",
|
||||||
|
"laravel": "laravel",
|
||||||
|
"phoenix": "phoenix",
|
||||||
|
"gin": "gin",
|
||||||
|
"actix": "actix",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ---- Database detection from environment variable names ----
|
||||||
for env_name in env_names:
|
for env_name in env_names:
|
||||||
if "CLICKHOUSE" in env_name or env_name.startswith("CH_"):
|
for pattern, db_name in _ENV_DB_PATTERNS:
|
||||||
discovery.databases.add("clickhouse")
|
if pattern in env_name or env_name.startswith(pattern):
|
||||||
if "POSTGRES" in env_name or env_name.startswith("PG") or env_name == "DATABASE_URL":
|
discovery.databases.add(db_name)
|
||||||
discovery.databases.add("postgresql")
|
break
|
||||||
if "MYSQL" in env_name:
|
|
||||||
discovery.databases.add("mysql")
|
|
||||||
if "MONGO" in env_name:
|
|
||||||
discovery.databases.add("mongodb")
|
|
||||||
if "REDIS" in env_name:
|
|
||||||
discovery.databases.add("redis")
|
|
||||||
|
|
||||||
|
# ---- Docker compose service detection ----
|
||||||
compose_blob = "\n".join(
|
compose_blob = "\n".join(
|
||||||
manifests.get(key, "")
|
manifests.get(key, "")
|
||||||
for key in ("docker_compose", "docker_compose_alt", "compose")
|
for key in ("docker_compose", "docker_compose_alt", "compose")
|
||||||
).lower()
|
).lower()
|
||||||
_add_if_contains(
|
_add_if_contains(discovery.services, compose_blob, _COMPOSE_SERVICE_SIGNALS)
|
||||||
discovery.services,
|
|
||||||
compose_blob,
|
|
||||||
{
|
|
||||||
"clickhouse": "clickhouse",
|
|
||||||
"postgres": "postgresql",
|
|
||||||
"mysql": "mysql",
|
|
||||||
"mongo": "mongodb",
|
|
||||||
"redis": "redis",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# ---- Hints from config files ----
|
||||||
if file_map["prisma"].exists():
|
if file_map["prisma"].exists():
|
||||||
discovery.hints.append("Prisma schema detected.")
|
discovery.hints.append("Prisma schema detected.")
|
||||||
if (project_root / "alembic.ini").exists():
|
if (project_root / "alembic.ini").exists():
|
||||||
discovery.hints.append("Alembic migration config detected.")
|
discovery.hints.append("Alembic migration config detected.")
|
||||||
if (project_root / "docker").exists() or discovery.services:
|
if (project_root / "knexfile.js").exists() or (project_root / "knexfile.ts").exists():
|
||||||
|
discovery.hints.append("Knex migration config detected.")
|
||||||
|
if (project_root / "ormconfig.json").exists() or (project_root / "ormconfig.ts").exists():
|
||||||
|
discovery.hints.append("TypeORM config detected.")
|
||||||
|
if (project_root / "drizzle.config.ts").exists():
|
||||||
|
discovery.hints.append("Drizzle ORM config detected.")
|
||||||
|
if (project_root / "Makefile").exists():
|
||||||
|
discovery.hints.append("Makefile available for build/task automation.")
|
||||||
|
if file_map["dockerfile"].exists() or (project_root / "docker").exists() or discovery.services:
|
||||||
discovery.hints.append("Containerized services may be available for local verification.")
|
discovery.hints.append("Containerized services may be available for local verification.")
|
||||||
|
|
||||||
return discovery
|
return discovery
|
||||||
@@ -160,6 +321,8 @@ def format_repo_discovery(discovery: RepoDiscovery) -> str:
|
|||||||
lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases)))
|
lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases)))
|
||||||
if discovery.services:
|
if discovery.services:
|
||||||
lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services)))
|
lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services)))
|
||||||
|
if discovery.frameworks:
|
||||||
|
lines.append("Detected frameworks: " + ", ".join(sorted(discovery.frameworks)))
|
||||||
if discovery.hints:
|
if discovery.hints:
|
||||||
lines.extend(discovery.hints)
|
lines.extend(discovery.hints)
|
||||||
if not lines:
|
if not lines:
|
||||||
|
|||||||
@@ -14,9 +14,22 @@ _SUMMARY_PREFIXES = (
|
|||||||
"PG",
|
"PG",
|
||||||
"POSTGRES",
|
"POSTGRES",
|
||||||
"MYSQL",
|
"MYSQL",
|
||||||
|
"MARIADB",
|
||||||
"REDIS",
|
"REDIS",
|
||||||
|
"MONGO",
|
||||||
|
"ELASTICSEARCH",
|
||||||
|
"OPENSEARCH",
|
||||||
|
"DYNAMO",
|
||||||
|
"CASSANDRA",
|
||||||
|
"KAFKA",
|
||||||
|
"RABBIT",
|
||||||
|
"AMQP",
|
||||||
|
"NEO4J",
|
||||||
|
"SQLITE",
|
||||||
|
"MEMCACHED",
|
||||||
"AWS",
|
"AWS",
|
||||||
"S3",
|
"S3",
|
||||||
|
"MINIO",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -116,7 +129,6 @@ def summarize_environment(
|
|||||||
key
|
key
|
||||||
for key in set(loaded_values) | set(env)
|
for key in set(loaded_values) | set(env)
|
||||||
if key.startswith(_SUMMARY_PREFIXES)
|
if key.startswith(_SUMMARY_PREFIXES)
|
||||||
or any(prefix in key for prefix in ("CLICKHOUSE", "DATABASE", "DB_"))
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if visible_names:
|
if visible_names:
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import unittest
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import MagicMock, call, patch
|
from unittest.mock import MagicMock, call, patch
|
||||||
|
|
||||||
from cross_eval.agent import invoke_agent_agentic
|
from cross_eval.agent import AgentInvocationError, invoke_agent_agentic
|
||||||
from cross_eval.config import BUILTIN_AGENTS, _make_agentic
|
from cross_eval.config import BUILTIN_AGENTS, _make_agentic
|
||||||
from cross_eval.models import (
|
from cross_eval.models import (
|
||||||
AgentConfig,
|
AgentConfig,
|
||||||
@@ -309,6 +309,74 @@ class TestTaskFileCleanup(unittest.TestCase):
|
|||||||
self.assertFalse((wt / "CROSS_EVAL_TASK.md").exists())
|
self.assertFalse((wt / "CROSS_EVAL_TASK.md").exists())
|
||||||
|
|
||||||
|
|
||||||
|
class TestAgenticEmptyDiffDetection(unittest.TestCase):
|
||||||
|
"""Agentic coders should not succeed when they only claim changes in stdout."""
|
||||||
|
|
||||||
|
@patch("cross_eval.worktree.capture_diff", return_value="")
|
||||||
|
@patch("subprocess.run")
|
||||||
|
def test_claude_empty_diff_with_change_claim_fails(
|
||||||
|
self, mock_run: MagicMock, mock_diff: MagicMock,
|
||||||
|
) -> None:
|
||||||
|
mock_run.return_value = MagicMock(
|
||||||
|
returncode=0,
|
||||||
|
stdout=(
|
||||||
|
"All tests pass.\n"
|
||||||
|
"Here's a summary of all changes made:\n"
|
||||||
|
"- Updated discovery.py\n"
|
||||||
|
),
|
||||||
|
stderr="",
|
||||||
|
)
|
||||||
|
|
||||||
|
agent = AgentConfig(
|
||||||
|
name="claude-coder",
|
||||||
|
command="claude",
|
||||||
|
args=["--setting-sources", "user"],
|
||||||
|
agentic=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
wt = Path(td)
|
||||||
|
_init_git_repo(wt)
|
||||||
|
|
||||||
|
with self.assertRaises(AgentInvocationError) as ctx:
|
||||||
|
invoke_agent_agentic(
|
||||||
|
agent, "implement feature X", "coding",
|
||||||
|
worktree_path=wt, quiet=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(ctx.exception.failure_type, "EMPTY_DIFF")
|
||||||
|
self.assertIn("summary of all changes made", ctx.exception.raw_error.lower())
|
||||||
|
|
||||||
|
@patch("cross_eval.worktree.capture_diff", return_value="")
|
||||||
|
@patch("subprocess.run")
|
||||||
|
def test_empty_diff_without_change_claim_is_allowed(
|
||||||
|
self, mock_run: MagicMock, mock_diff: MagicMock,
|
||||||
|
) -> None:
|
||||||
|
mock_run.return_value = MagicMock(
|
||||||
|
returncode=0,
|
||||||
|
stdout="No changes were required; the current implementation already satisfies the task.",
|
||||||
|
stderr="",
|
||||||
|
)
|
||||||
|
|
||||||
|
agent = AgentConfig(
|
||||||
|
name="claude-coder",
|
||||||
|
command="claude",
|
||||||
|
args=["--setting-sources", "user"],
|
||||||
|
agentic=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
wt = Path(td)
|
||||||
|
_init_git_repo(wt)
|
||||||
|
|
||||||
|
result = invoke_agent_agentic(
|
||||||
|
agent, "check whether any fix is needed", "coding",
|
||||||
|
worktree_path=wt, quiet=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(result.output, "(no changes)")
|
||||||
|
|
||||||
|
|
||||||
# ===================================================================
|
# ===================================================================
|
||||||
# 3. config.py tests
|
# 3. config.py tests
|
||||||
# ===================================================================
|
# ===================================================================
|
||||||
|
|||||||
@@ -127,6 +127,280 @@ class AgentTranscriptTest(unittest.TestCase):
|
|||||||
self.assertIn("hello", result.transcript)
|
self.assertIn("hello", result.transcript)
|
||||||
self.assertIn("warn", result.transcript)
|
self.assertIn("warn", result.transcript)
|
||||||
|
|
||||||
|
def test_invoke_agent_transcript_includes_exit_code_and_duration(self) -> None:
|
||||||
|
def _fake_run(cmd, **kwargs):
|
||||||
|
class _Result:
|
||||||
|
returncode = 0
|
||||||
|
stdout = "output"
|
||||||
|
stderr = ""
|
||||||
|
|
||||||
|
return _Result()
|
||||||
|
|
||||||
|
agent = AgentConfig(
|
||||||
|
name="codex-reviewer",
|
||||||
|
command="codex",
|
||||||
|
args=["exec", "--model", "gpt-5.4", "-"],
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("subprocess.run", side_effect=_fake_run):
|
||||||
|
result = invoke_agent(agent, "prompt", "review", quiet=True)
|
||||||
|
|
||||||
|
self.assertIn("## Exit Code: 0", result.transcript)
|
||||||
|
|
||||||
|
|
||||||
|
class RepoDiscoveryExtendedTest(unittest.TestCase):
|
||||||
|
"""Regression tests for broadened repo/service discovery signals."""
|
||||||
|
|
||||||
|
def test_discover_go_project(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "go.mod").write_text(
|
||||||
|
"module example.com/myapp\n\ngo 1.21\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("go", discovery.languages)
|
||||||
|
self.assertIn("go", discovery.package_managers)
|
||||||
|
|
||||||
|
def test_discover_rust_project(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "Cargo.toml").write_text(
|
||||||
|
'[package]\nname = "myapp"\nversion = "0.1.0"\n',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("rust", discovery.languages)
|
||||||
|
self.assertIn("cargo", discovery.package_managers)
|
||||||
|
|
||||||
|
def test_discover_ruby_project(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "Gemfile").write_text(
|
||||||
|
'source "https://rubygems.org"\ngem "rails"\n',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("ruby", discovery.languages)
|
||||||
|
self.assertIn("bundler", discovery.package_managers)
|
||||||
|
|
||||||
|
def test_discover_java_gradle_project(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "build.gradle").write_text(
|
||||||
|
"plugins { id 'java' }\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("java", discovery.languages)
|
||||||
|
self.assertIn("gradle", discovery.package_managers)
|
||||||
|
|
||||||
|
def test_discover_elasticsearch_from_compose(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "docker-compose.yml").write_text(
|
||||||
|
"services:\n es:\n image: elasticsearch:8.10.0\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("elasticsearch", discovery.services)
|
||||||
|
|
||||||
|
def test_discover_kafka_from_compose(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "docker-compose.yml").write_text(
|
||||||
|
"services:\n broker:\n image: confluentinc/cp-kafka:latest\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("kafka", discovery.services)
|
||||||
|
|
||||||
|
def test_discover_rabbitmq_from_env(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
discovery = discover_repo(root, {"RABBITMQ_URL"})
|
||||||
|
|
||||||
|
self.assertIn("rabbitmq", discovery.databases)
|
||||||
|
|
||||||
|
def test_discover_sqlite_from_requirements(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "requirements.txt").write_text(
|
||||||
|
"aiosqlite==0.19.0\nfastapi\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("python", discovery.languages)
|
||||||
|
self.assertIn("sqlite", discovery.databases)
|
||||||
|
|
||||||
|
def test_discover_dynamodb_from_env(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
discovery = discover_repo(root, {"DYNAMODB_TABLE"})
|
||||||
|
|
||||||
|
self.assertIn("dynamodb", discovery.databases)
|
||||||
|
|
||||||
|
def test_discover_frameworks_from_pyproject(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "pyproject.toml").write_text(
|
||||||
|
'[project]\nname = "svc"\ndependencies = ["fastapi", "uvicorn"]\n',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("fastapi", discovery.frameworks)
|
||||||
|
|
||||||
|
def test_discover_knex_hint(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "knexfile.js").write_text(
|
||||||
|
"module.exports = {};\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("Knex migration config detected.", discovery.hints)
|
||||||
|
|
||||||
|
def test_discover_makefile_hint(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "Makefile").write_text(
|
||||||
|
"all:\n\techo hello\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("Makefile available for build/task automation.", discovery.hints)
|
||||||
|
|
||||||
|
def test_format_repo_discovery_includes_frameworks(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "package.json").write_text(
|
||||||
|
'{"dependencies": {"express": "^4.18.0"}}',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
summary = format_repo_discovery(discovery)
|
||||||
|
|
||||||
|
self.assertIn("Detected frameworks", summary)
|
||||||
|
self.assertIn("express", summary)
|
||||||
|
|
||||||
|
def test_discover_pnpm_lockfile(self) -> None:
|
||||||
|
"""Detect pnpm from lockfile when no packageManager field."""
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "package.json").write_text(
|
||||||
|
'{"name": "app"}',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(root / "pnpm-lock.yaml").write_text("lockfileVersion: 6\n", encoding="utf-8")
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("pnpm", discovery.package_managers)
|
||||||
|
|
||||||
|
def test_discover_yarn_lockfile(self) -> None:
|
||||||
|
"""Detect yarn from lockfile when no packageManager field."""
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
(root / "package.json").write_text(
|
||||||
|
'{"name": "app"}',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(root / "yarn.lock").write_text("# yarn lockfile v1\n", encoding="utf-8")
|
||||||
|
discovery = discover_repo(root)
|
||||||
|
|
||||||
|
self.assertIn("yarn", discovery.package_managers)
|
||||||
|
|
||||||
|
|
||||||
|
class SummarizeEnvExtendedTest(unittest.TestCase):
|
||||||
|
"""Regression tests for expanded environment summary prefixes."""
|
||||||
|
|
||||||
|
def test_summarize_shows_mongo_env_var(self) -> None:
|
||||||
|
execution = PipelineConfig().execution
|
||||||
|
summary = summarize_environment(
|
||||||
|
execution,
|
||||||
|
[Path("/tmp/.env")],
|
||||||
|
{"MONGO_URI": "mongodb://localhost"},
|
||||||
|
{"MONGO_URI": "mongodb://localhost"},
|
||||||
|
)
|
||||||
|
self.assertIn("MONGO_URI", summary)
|
||||||
|
|
||||||
|
def test_summarize_shows_kafka_env_var(self) -> None:
|
||||||
|
execution = PipelineConfig().execution
|
||||||
|
summary = summarize_environment(
|
||||||
|
execution,
|
||||||
|
[Path("/tmp/.env")],
|
||||||
|
{"KAFKA_BOOTSTRAP_SERVERS": "localhost:9092"},
|
||||||
|
{"KAFKA_BOOTSTRAP_SERVERS": "localhost:9092"},
|
||||||
|
)
|
||||||
|
self.assertIn("KAFKA_BOOTSTRAP_SERVERS", summary)
|
||||||
|
|
||||||
|
def test_summarize_shows_elasticsearch_env_var(self) -> None:
|
||||||
|
execution = PipelineConfig().execution
|
||||||
|
summary = summarize_environment(
|
||||||
|
execution,
|
||||||
|
[Path("/tmp/.env")],
|
||||||
|
{"ELASTICSEARCH_URL": "http://localhost:9200"},
|
||||||
|
{"ELASTICSEARCH_URL": "http://localhost:9200"},
|
||||||
|
)
|
||||||
|
self.assertIn("ELASTICSEARCH_URL", summary)
|
||||||
|
|
||||||
|
|
||||||
|
class TranscriptSavingRegressionTest(unittest.TestCase):
|
||||||
|
"""Verify that transcripts are saved as step artifacts during pipeline runs."""
|
||||||
|
|
||||||
|
def test_transcript_files_saved_during_pipeline(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
steps = _build_simple_preset(["claude-coder"], ["claude-reviewer"], [])
|
||||||
|
config = PipelineConfig(
|
||||||
|
output_dir=root / "out",
|
||||||
|
max_iterations=1,
|
||||||
|
language="en",
|
||||||
|
inputs={"plan": "Plan", "checklist": "Checklist"},
|
||||||
|
agents={name: agent for name, agent in BUILTIN_AGENTS.items()},
|
||||||
|
coders=["claude-coder"],
|
||||||
|
reviewers=["claude-reviewer"],
|
||||||
|
pipeline=steps,
|
||||||
|
preset_name="simple",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _fake_invoke(agent_config, prompt, step_name, **kwargs):
|
||||||
|
output = "VERDICT: PASS" if step_name == "review" else "coding output"
|
||||||
|
return AgentResult(
|
||||||
|
output=output,
|
||||||
|
exit_code=0,
|
||||||
|
agent_name=agent_config.name,
|
||||||
|
step_name=step_name,
|
||||||
|
duration_seconds=0.1,
|
||||||
|
transcript="# Agent Execution Transcript\n\n## Command\n```\nclaude -p\n```",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("cross_eval.pipeline.invoke_agent", side_effect=_fake_invoke):
|
||||||
|
result = run_pipeline(config, cwd=root)
|
||||||
|
|
||||||
|
# Verify transcript files were saved
|
||||||
|
run_dir = result.run_dir
|
||||||
|
self.assertIsNotNone(run_dir)
|
||||||
|
coding_transcript = run_dir / "v1" / "coding_transcript.md"
|
||||||
|
review_transcript = run_dir / "v1" / "review_transcript.md"
|
||||||
|
self.assertTrue(
|
||||||
|
coding_transcript.exists(),
|
||||||
|
f"Expected transcript at {coding_transcript}",
|
||||||
|
)
|
||||||
|
self.assertTrue(
|
||||||
|
review_transcript.exists(),
|
||||||
|
f"Expected transcript at {review_transcript}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user