feat: isolate agentic worktrees and surface execution evidence

This commit is contained in:
chungyeong
2026-03-13 22:50:46 +09:00
parent 3fb19e90c0
commit b19d174c98
7 changed files with 758 additions and 14 deletions

View File

@@ -32,20 +32,33 @@ _NO_CHANGE_ACK_MARKERS = (
_CHANGE_CLAIM_MARKERS = (
"summary of all changes made",
"here's a summary of all changes made",
"here is a summary of all changes",
"implemented",
"i implemented",
"i've implemented",
"added",
"i added",
"i've added",
"updated",
"i updated",
"i've updated",
"modified",
"i modified",
"i've modified",
"created",
"i created",
"i've created",
"fixed",
"i fixed",
"i've fixed",
"completed the changes",
"finished the changes",
"made the following changes",
"applied the fix",
"changes have been applied",
"wrote the code",
"refactored",
"i refactored",
)
@@ -134,6 +147,29 @@ def _classify_agent_failure(detail: str) -> tuple[str, str]:
)
_WRITE_FAILURE_MARKERS = (
"permission denied",
"read-only file system",
"read only file system",
"operation not permitted",
"cannot write",
"failed to write",
"could not write",
"unable to write",
"sandbox",
"eacces",
"erofs",
)
def _has_write_failure_indicators(stderr: str) -> bool:
"""Detect stderr patterns indicating the agent could not write files."""
if not stderr.strip():
return False
normalized = stderr.lower()
return any(marker in normalized for marker in _WRITE_FAILURE_MARKERS)
def _claims_file_changes(output: str) -> bool:
"""Heuristic for agent text that claims code changes were made."""
normalized = output.lower()
@@ -406,7 +442,8 @@ def invoke_agent_agentic(
# (avoids OS arg length limits for large prompts)
cmd.append(
f"Read the task file at {task_file} and execute all instructions in it. "
f"Work in the current directory."
f"Work only inside the current directory and do not modify files "
f"outside it."
)
cmd_preview = " ".join(cmd[:6])
@@ -467,7 +504,14 @@ def invoke_agent_agentic(
if not diff_output:
stdout_excerpt = (result.stdout or "").strip()
stderr_excerpt = (result.stderr or "").strip()
if _claims_file_changes(stdout_excerpt):
# Detect two failure modes:
# 1. Agent claims changes in stdout but produced no diff
# 2. Agent stderr contains permission or write-failure indicators
claims_changes = _claims_file_changes(stdout_excerpt)
has_write_failure = _has_write_failure_indicators(stderr_excerpt)
if claims_changes or has_write_failure:
if spinner:
spinner.stop(f"[{step_name}] FAILED (empty diff)")
raw_error = stdout_excerpt or "(stdout empty)"
@@ -475,16 +519,27 @@ def invoke_agent_agentic(
raw_error = f"{raw_error}\n\n[stderr]\n{stderr_excerpt}"
if len(raw_error) > 2000:
raw_error = raw_error[:2000] + "..."
if has_write_failure:
failure_type = "WRITE_FAILURE"
suggested_action = (
"Agent encountered file write errors (permission denied, read-only, "
"or sandbox restriction). Check agent permissions and worktree state."
)
else:
failure_type = "EMPTY_DIFF"
suggested_action = (
"Agent reported code changes but produced no git diff. "
"Treat this run as failed and require a real worktree diff before continuing."
)
raise AgentInvocationError(
agent_name=agent.name,
step_name=step_name,
cmd_preview=cmd_preview,
raw_error=raw_error,
failure_type="EMPTY_DIFF",
suggested_action=(
"Agent reported code changes but produced no git diff. "
"Treat this run as failed and require a real worktree diff before continuing."
),
failure_type=failure_type,
suggested_action=suggested_action,
)
diff_output = "(no changes)"