fix: use incremental diff per iteration instead of cumulative base diff

After each iteration's _commit_iteration, record the new HEAD SHA and use
it as the diff anchor for the next iteration. Previously capture_diff
always diffed against the initial base commit, causing every iteration to
return the same full cumulative diff — reviewers couldn't see what changed
between iterations, leading to repeated feedback and stuck FAIL loops.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
chungyeong
2026-03-15 10:07:11 +09:00
parent bf64d19123
commit 28efd5bb8f
3 changed files with 42 additions and 41 deletions

View File

@@ -62,18 +62,20 @@ def _commit_iteration(
label: str,
iteration: int,
verdict: str | None,
) -> None:
) -> str:
"""Intermediate commit after each agentic iteration.
This resets the diff baseline so the next iteration only captures new changes.
Returns the new HEAD SHA to use as the base_commit for the next iteration.
"""
from cross_eval.worktree import commit_worktree
from cross_eval.worktree import commit_worktree, get_current_head
committed = commit_worktree(
worktree_path,
f"cross-eval: {label} v{iteration} ({verdict or 'no-verdict'})",
)
if committed:
logger.debug(" Intermediate commit: v%d (%s)", iteration, verdict)
return get_current_head(worktree_path)
def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
@@ -388,7 +390,7 @@ def _run_simple_pipeline(
# Intermediate commit so next iteration's diff only shows new changes
if worktree_path is not None:
_commit_iteration(worktree_path, config.preset_name, i, verdict)
agentic_base_commit = _commit_iteration(worktree_path, config.preset_name, i, verdict)
iter_result = IterationResult(
iteration=i,
@@ -588,7 +590,7 @@ def _run_phased_pipeline(
# Intermediate commit so next iteration's diff only shows new changes
if worktree_path is not None:
_commit_iteration(
agentic_base_commit = _commit_iteration(
worktree_path, f"{config.preset_name}/{phase.name}",
global_iter, verdict,
)

View File

@@ -101,19 +101,18 @@ def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[P
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
"""Capture all changes made in the worktree as a unified diff.
"""Capture all changes made in the worktree since ``base_commit``.
Includes both tracked modifications, new untracked files, and changes
that the agent may have committed on its own.
Handles two scenarios:
1. Agent left changes uncommitted → ``git add -A && git diff base HEAD``
2. Agent committed its own changes → HEAD advanced, diff base..HEAD captures them
Args:
base_commit: The commit SHA from when the worktree was created.
If provided, diffs against this fixed base instead of HEAD.
This is critical because agents (e.g. Claude in interactive
mode) may create their own commits, advancing HEAD and
making ``git diff --cached HEAD`` return empty.
base_commit: The diff anchor — typically the worktree HEAD *before* this
iteration started (set by ``get_current_head`` after each
``_commit_iteration``). Falls back to ``HEAD`` if not given.
"""
# Stage any uncommitted changes so they're included in the diff
# Stage any uncommitted changes
subprocess.run(
["git", "add", "-A"],
cwd=worktree_path,
@@ -121,36 +120,34 @@ def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
check=True,
)
if base_commit:
# Diff everything (committed + staged) against the original base.
# This captures changes regardless of whether the agent committed them.
result = subprocess.run(
["git", "diff", base_commit, "--cached"],
cwd=worktree_path,
capture_output=True,
text=True,
)
diff = result.stdout.strip()
if diff:
return diff
# Also check committed changes (agent may have committed and left
# nothing staged)
result = subprocess.run(
["git", "diff", base_commit, "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
)
return result.stdout.strip()
# Fallback: no base_commit, use original behavior
result = subprocess.run(
["git", "diff", "--cached", "HEAD"],
# Commit staged changes so everything is reachable via HEAD
# (this is a no-op if nothing is staged)
subprocess.run(
["git", "commit", "-m", "cross-eval: capture-diff snapshot", "--allow-empty-message"],
cwd=worktree_path,
capture_output=True,
text=True,
)
ref = base_commit or "HEAD~1"
result = subprocess.run(
["git", "diff", ref, "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
)
return result.stdout.strip()
def get_current_head(worktree_path: Path) -> str:
"""Return the current HEAD SHA of the worktree."""
result = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
check=True,
)
return result.stdout.strip()