fix: use incremental diff per iteration instead of cumulative base diff

After each iteration's _commit_iteration, record the new HEAD SHA and use
it as the diff anchor for the next iteration. Previously capture_diff
always diffed against the initial base commit, causing every iteration to
return the same full cumulative diff — reviewers couldn't see what changed
between iterations, leading to repeated feedback and stuck FAIL loops.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
chungyeong
2026-03-15 10:07:11 +09:00
parent bf64d19123
commit 28efd5bb8f
3 changed files with 42 additions and 41 deletions

View File

@@ -62,18 +62,20 @@ def _commit_iteration(
label: str, label: str,
iteration: int, iteration: int,
verdict: str | None, verdict: str | None,
) -> None: ) -> str:
"""Intermediate commit after each agentic iteration. """Intermediate commit after each agentic iteration.
This resets the diff baseline so the next iteration only captures new changes. This resets the diff baseline so the next iteration only captures new changes.
Returns the new HEAD SHA to use as the base_commit for the next iteration.
""" """
from cross_eval.worktree import commit_worktree from cross_eval.worktree import commit_worktree, get_current_head
committed = commit_worktree( committed = commit_worktree(
worktree_path, worktree_path,
f"cross-eval: {label} v{iteration} ({verdict or 'no-verdict'})", f"cross-eval: {label} v{iteration} ({verdict or 'no-verdict'})",
) )
if committed: if committed:
logger.debug(" Intermediate commit: v%d (%s)", iteration, verdict) logger.debug(" Intermediate commit: v%d (%s)", iteration, verdict)
return get_current_head(worktree_path)
def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool: def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
@@ -388,7 +390,7 @@ def _run_simple_pipeline(
# Intermediate commit so next iteration's diff only shows new changes # Intermediate commit so next iteration's diff only shows new changes
if worktree_path is not None: if worktree_path is not None:
_commit_iteration(worktree_path, config.preset_name, i, verdict) agentic_base_commit = _commit_iteration(worktree_path, config.preset_name, i, verdict)
iter_result = IterationResult( iter_result = IterationResult(
iteration=i, iteration=i,
@@ -588,7 +590,7 @@ def _run_phased_pipeline(
# Intermediate commit so next iteration's diff only shows new changes # Intermediate commit so next iteration's diff only shows new changes
if worktree_path is not None: if worktree_path is not None:
_commit_iteration( agentic_base_commit = _commit_iteration(
worktree_path, f"{config.preset_name}/{phase.name}", worktree_path, f"{config.preset_name}/{phase.name}",
global_iter, verdict, global_iter, verdict,
) )

View File

@@ -101,19 +101,18 @@ def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[P
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str: def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
"""Capture all changes made in the worktree as a unified diff. """Capture all changes made in the worktree since ``base_commit``.
Includes both tracked modifications, new untracked files, and changes Handles two scenarios:
that the agent may have committed on its own. 1. Agent left changes uncommitted → ``git add -A && git diff base HEAD``
2. Agent committed its own changes → HEAD advanced, diff base..HEAD captures them
Args: Args:
base_commit: The commit SHA from when the worktree was created. base_commit: The diff anchor — typically the worktree HEAD *before* this
If provided, diffs against this fixed base instead of HEAD. iteration started (set by ``get_current_head`` after each
This is critical because agents (e.g. Claude in interactive ``_commit_iteration``). Falls back to ``HEAD`` if not given.
mode) may create their own commits, advancing HEAD and
making ``git diff --cached HEAD`` return empty.
""" """
# Stage any uncommitted changes so they're included in the diff # Stage any uncommitted changes
subprocess.run( subprocess.run(
["git", "add", "-A"], ["git", "add", "-A"],
cwd=worktree_path, cwd=worktree_path,
@@ -121,36 +120,34 @@ def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
check=True, check=True,
) )
if base_commit: # Commit staged changes so everything is reachable via HEAD
# Diff everything (committed + staged) against the original base. # (this is a no-op if nothing is staged)
# This captures changes regardless of whether the agent committed them. subprocess.run(
result = subprocess.run( ["git", "commit", "-m", "cross-eval: capture-diff snapshot", "--allow-empty-message"],
["git", "diff", base_commit, "--cached"],
cwd=worktree_path,
capture_output=True,
text=True,
)
diff = result.stdout.strip()
if diff:
return diff
# Also check committed changes (agent may have committed and left
# nothing staged)
result = subprocess.run(
["git", "diff", base_commit, "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
)
return result.stdout.strip()
# Fallback: no base_commit, use original behavior
result = subprocess.run(
["git", "diff", "--cached", "HEAD"],
cwd=worktree_path, cwd=worktree_path,
capture_output=True, capture_output=True,
text=True, text=True,
) )
ref = base_commit or "HEAD~1"
result = subprocess.run(
["git", "diff", ref, "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
)
return result.stdout.strip()
def get_current_head(worktree_path: Path) -> str:
"""Return the current HEAD SHA of the worktree."""
result = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
check=True,
)
return result.stdout.strip() return result.stdout.strip()

View File

@@ -413,11 +413,13 @@ class TestInvokeAgenticRuntime(unittest.TestCase):
class TestPipelineHelpers(unittest.TestCase): class TestPipelineHelpers(unittest.TestCase):
@patch("cross_eval.worktree.get_current_head", return_value="a" * 40)
@patch("cross_eval.worktree.commit_worktree", return_value=True) @patch("cross_eval.worktree.commit_worktree", return_value=True)
def test_commit_iteration_logs_only_when_committed(self, mock_commit: MagicMock) -> None: def test_commit_iteration_logs_only_when_committed(self, mock_commit: MagicMock, mock_head: MagicMock) -> None:
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
_commit_iteration(Path(tmpdir), "review-fix", 2, "PASS") new_head = _commit_iteration(Path(tmpdir), "review-fix", 2, "PASS")
mock_commit.assert_called_once() mock_commit.assert_called_once()
self.assertEqual(new_head, "a" * 40)
def test_snapshot_repo_state_includes_untracked_digest(self) -> None: def test_snapshot_repo_state_includes_untracked_digest(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir: