feat: tighten agentic runtime handoffs and quality gates

This commit is contained in:
chungyeong
2026-03-14 10:05:25 +09:00
parent 87bc0ffbfb
commit 7b95233edf
15 changed files with 1148 additions and 167 deletions

View File

@@ -415,11 +415,7 @@ def invoke_agent_agentic(
timeout: int | None = None,
quiet: bool = False,
) -> AgentResult:
"""Invoke an agent in agentic mode (no -p, runs in worktree, captures git diff).
The agent runs without print mode so it can modify files directly.
After the agent exits, git diff (since last commit) is captured as the output.
"""
"""Invoke an agent in agentic mode using the worktree as the source of truth."""
from cross_eval.worktree import capture_diff
# Write prompt to a temp file (outside worktree, won't appear in diffs)
@@ -433,10 +429,10 @@ def invoke_agent_agentic(
if agent.reasoning_effort and _supports_reasoning_effort(agent.command):
cmd.extend(["-c", f'model_reasoning_effort="{agent.reasoning_effort}"'])
# Strip stdin sentinel ("-") from args for agentic mode.
# Keep -p/--print: Claude -p mode still has full tool access (Edit, Write,
# Bash, etc.) and is the correct mode for non-interactive subprocess use.
args = [a for a in agent.args if a != "-"]
# Strip print-mode flags and stdin sentinels for agentic mode.
# Agentic runs should operate on the worktree and return a real git diff,
# not behave as a one-shot text completer.
args = [a for a in agent.args if a not in {"-", "-p", "--print"}]
cmd.extend(args)
# System prompt via flag if supported
@@ -454,8 +450,8 @@ def invoke_agent_agentic(
else:
input_data = prompt
else:
# claude -p: deliver prompt via stdin (same as codex).
# -p mode is non-interactive and reads from stdin, then exits.
# claude: deliver the task through stdin and let the worktree be the
# canonical place where files are read/written.
input_data = prompt
cmd_preview = " ".join(cmd[:6])