feat: isolate agentic worktrees and surface execution evidence
This commit is contained in:
@@ -32,20 +32,33 @@ _NO_CHANGE_ACK_MARKERS = (
|
||||
_CHANGE_CLAIM_MARKERS = (
|
||||
"summary of all changes made",
|
||||
"here's a summary of all changes made",
|
||||
"here is a summary of all changes",
|
||||
"implemented",
|
||||
"i implemented",
|
||||
"i've implemented",
|
||||
"added",
|
||||
"i added",
|
||||
"i've added",
|
||||
"updated",
|
||||
"i updated",
|
||||
"i've updated",
|
||||
"modified",
|
||||
"i modified",
|
||||
"i've modified",
|
||||
"created",
|
||||
"i created",
|
||||
"i've created",
|
||||
"fixed",
|
||||
"i fixed",
|
||||
"i've fixed",
|
||||
"completed the changes",
|
||||
"finished the changes",
|
||||
"made the following changes",
|
||||
"applied the fix",
|
||||
"changes have been applied",
|
||||
"wrote the code",
|
||||
"refactored",
|
||||
"i refactored",
|
||||
)
|
||||
|
||||
|
||||
@@ -134,6 +147,29 @@ def _classify_agent_failure(detail: str) -> tuple[str, str]:
|
||||
)
|
||||
|
||||
|
||||
_WRITE_FAILURE_MARKERS = (
|
||||
"permission denied",
|
||||
"read-only file system",
|
||||
"read only file system",
|
||||
"operation not permitted",
|
||||
"cannot write",
|
||||
"failed to write",
|
||||
"could not write",
|
||||
"unable to write",
|
||||
"sandbox",
|
||||
"eacces",
|
||||
"erofs",
|
||||
)
|
||||
|
||||
|
||||
def _has_write_failure_indicators(stderr: str) -> bool:
|
||||
"""Detect stderr patterns indicating the agent could not write files."""
|
||||
if not stderr.strip():
|
||||
return False
|
||||
normalized = stderr.lower()
|
||||
return any(marker in normalized for marker in _WRITE_FAILURE_MARKERS)
|
||||
|
||||
|
||||
def _claims_file_changes(output: str) -> bool:
|
||||
"""Heuristic for agent text that claims code changes were made."""
|
||||
normalized = output.lower()
|
||||
@@ -406,7 +442,8 @@ def invoke_agent_agentic(
|
||||
# (avoids OS arg length limits for large prompts)
|
||||
cmd.append(
|
||||
f"Read the task file at {task_file} and execute all instructions in it. "
|
||||
f"Work in the current directory."
|
||||
f"Work only inside the current directory and do not modify files "
|
||||
f"outside it."
|
||||
)
|
||||
|
||||
cmd_preview = " ".join(cmd[:6])
|
||||
@@ -467,7 +504,14 @@ def invoke_agent_agentic(
|
||||
if not diff_output:
|
||||
stdout_excerpt = (result.stdout or "").strip()
|
||||
stderr_excerpt = (result.stderr or "").strip()
|
||||
if _claims_file_changes(stdout_excerpt):
|
||||
|
||||
# Detect two failure modes:
|
||||
# 1. Agent claims changes in stdout but produced no diff
|
||||
# 2. Agent stderr contains permission or write-failure indicators
|
||||
claims_changes = _claims_file_changes(stdout_excerpt)
|
||||
has_write_failure = _has_write_failure_indicators(stderr_excerpt)
|
||||
|
||||
if claims_changes or has_write_failure:
|
||||
if spinner:
|
||||
spinner.stop(f"[{step_name}] FAILED (empty diff)")
|
||||
raw_error = stdout_excerpt or "(stdout empty)"
|
||||
@@ -475,16 +519,27 @@ def invoke_agent_agentic(
|
||||
raw_error = f"{raw_error}\n\n[stderr]\n{stderr_excerpt}"
|
||||
if len(raw_error) > 2000:
|
||||
raw_error = raw_error[:2000] + "..."
|
||||
|
||||
if has_write_failure:
|
||||
failure_type = "WRITE_FAILURE"
|
||||
suggested_action = (
|
||||
"Agent encountered file write errors (permission denied, read-only, "
|
||||
"or sandbox restriction). Check agent permissions and worktree state."
|
||||
)
|
||||
else:
|
||||
failure_type = "EMPTY_DIFF"
|
||||
suggested_action = (
|
||||
"Agent reported code changes but produced no git diff. "
|
||||
"Treat this run as failed and require a real worktree diff before continuing."
|
||||
)
|
||||
|
||||
raise AgentInvocationError(
|
||||
agent_name=agent.name,
|
||||
step_name=step_name,
|
||||
cmd_preview=cmd_preview,
|
||||
raw_error=raw_error,
|
||||
failure_type="EMPTY_DIFF",
|
||||
suggested_action=(
|
||||
"Agent reported code changes but produced no git diff. "
|
||||
"Treat this run as failed and require a real worktree diff before continuing."
|
||||
),
|
||||
failure_type=failure_type,
|
||||
suggested_action=suggested_action,
|
||||
)
|
||||
|
||||
diff_output = "(no changes)"
|
||||
|
||||
Reference in New Issue
Block a user