fix: use incremental diff per iteration instead of cumulative base diff
After each iteration's _commit_iteration, record the new HEAD SHA and use it as the diff anchor for the next iteration. Previously capture_diff always diffed against the initial base commit, causing every iteration to return the same full cumulative diff — reviewers couldn't see what changed between iterations, leading to repeated feedback and stuck FAIL loops. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -62,18 +62,20 @@ def _commit_iteration(
|
|||||||
label: str,
|
label: str,
|
||||||
iteration: int,
|
iteration: int,
|
||||||
verdict: str | None,
|
verdict: str | None,
|
||||||
) -> None:
|
) -> str:
|
||||||
"""Intermediate commit after each agentic iteration.
|
"""Intermediate commit after each agentic iteration.
|
||||||
|
|
||||||
This resets the diff baseline so the next iteration only captures new changes.
|
This resets the diff baseline so the next iteration only captures new changes.
|
||||||
|
Returns the new HEAD SHA to use as the base_commit for the next iteration.
|
||||||
"""
|
"""
|
||||||
from cross_eval.worktree import commit_worktree
|
from cross_eval.worktree import commit_worktree, get_current_head
|
||||||
committed = commit_worktree(
|
committed = commit_worktree(
|
||||||
worktree_path,
|
worktree_path,
|
||||||
f"cross-eval: {label} v{iteration} ({verdict or 'no-verdict'})",
|
f"cross-eval: {label} v{iteration} ({verdict or 'no-verdict'})",
|
||||||
)
|
)
|
||||||
if committed:
|
if committed:
|
||||||
logger.debug(" Intermediate commit: v%d (%s)", iteration, verdict)
|
logger.debug(" Intermediate commit: v%d (%s)", iteration, verdict)
|
||||||
|
return get_current_head(worktree_path)
|
||||||
|
|
||||||
|
|
||||||
def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
|
def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
|
||||||
@@ -388,7 +390,7 @@ def _run_simple_pipeline(
|
|||||||
|
|
||||||
# Intermediate commit so next iteration's diff only shows new changes
|
# Intermediate commit so next iteration's diff only shows new changes
|
||||||
if worktree_path is not None:
|
if worktree_path is not None:
|
||||||
_commit_iteration(worktree_path, config.preset_name, i, verdict)
|
agentic_base_commit = _commit_iteration(worktree_path, config.preset_name, i, verdict)
|
||||||
|
|
||||||
iter_result = IterationResult(
|
iter_result = IterationResult(
|
||||||
iteration=i,
|
iteration=i,
|
||||||
@@ -588,7 +590,7 @@ def _run_phased_pipeline(
|
|||||||
|
|
||||||
# Intermediate commit so next iteration's diff only shows new changes
|
# Intermediate commit so next iteration's diff only shows new changes
|
||||||
if worktree_path is not None:
|
if worktree_path is not None:
|
||||||
_commit_iteration(
|
agentic_base_commit = _commit_iteration(
|
||||||
worktree_path, f"{config.preset_name}/{phase.name}",
|
worktree_path, f"{config.preset_name}/{phase.name}",
|
||||||
global_iter, verdict,
|
global_iter, verdict,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -101,19 +101,18 @@ def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[P
|
|||||||
|
|
||||||
|
|
||||||
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
|
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
|
||||||
"""Capture all changes made in the worktree as a unified diff.
|
"""Capture all changes made in the worktree since ``base_commit``.
|
||||||
|
|
||||||
Includes both tracked modifications, new untracked files, and changes
|
Handles two scenarios:
|
||||||
that the agent may have committed on its own.
|
1. Agent left changes uncommitted → ``git add -A && git diff base HEAD``
|
||||||
|
2. Agent committed its own changes → HEAD advanced, diff base..HEAD captures them
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
base_commit: The commit SHA from when the worktree was created.
|
base_commit: The diff anchor — typically the worktree HEAD *before* this
|
||||||
If provided, diffs against this fixed base instead of HEAD.
|
iteration started (set by ``get_current_head`` after each
|
||||||
This is critical because agents (e.g. Claude in interactive
|
``_commit_iteration``). Falls back to ``HEAD`` if not given.
|
||||||
mode) may create their own commits, advancing HEAD and
|
|
||||||
making ``git diff --cached HEAD`` return empty.
|
|
||||||
"""
|
"""
|
||||||
# Stage any uncommitted changes so they're included in the diff
|
# Stage any uncommitted changes
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
["git", "add", "-A"],
|
["git", "add", "-A"],
|
||||||
cwd=worktree_path,
|
cwd=worktree_path,
|
||||||
@@ -121,35 +120,33 @@ def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
|
|||||||
check=True,
|
check=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
if base_commit:
|
# Commit staged changes so everything is reachable via HEAD
|
||||||
# Diff everything (committed + staged) against the original base.
|
# (this is a no-op if nothing is staged)
|
||||||
# This captures changes regardless of whether the agent committed them.
|
subprocess.run(
|
||||||
result = subprocess.run(
|
["git", "commit", "-m", "cross-eval: capture-diff snapshot", "--allow-empty-message"],
|
||||||
["git", "diff", base_commit, "--cached"],
|
|
||||||
cwd=worktree_path,
|
cwd=worktree_path,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
)
|
)
|
||||||
diff = result.stdout.strip()
|
|
||||||
if diff:
|
|
||||||
return diff
|
|
||||||
|
|
||||||
# Also check committed changes (agent may have committed and left
|
ref = base_commit or "HEAD~1"
|
||||||
# nothing staged)
|
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["git", "diff", base_commit, "HEAD"],
|
["git", "diff", ref, "HEAD"],
|
||||||
cwd=worktree_path,
|
cwd=worktree_path,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
)
|
)
|
||||||
return result.stdout.strip()
|
return result.stdout.strip()
|
||||||
|
|
||||||
# Fallback: no base_commit, use original behavior
|
|
||||||
|
def get_current_head(worktree_path: Path) -> str:
|
||||||
|
"""Return the current HEAD SHA of the worktree."""
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["git", "diff", "--cached", "HEAD"],
|
["git", "rev-parse", "HEAD"],
|
||||||
cwd=worktree_path,
|
cwd=worktree_path,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
|
check=True,
|
||||||
)
|
)
|
||||||
return result.stdout.strip()
|
return result.stdout.strip()
|
||||||
|
|
||||||
|
|||||||
@@ -413,11 +413,13 @@ class TestInvokeAgenticRuntime(unittest.TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestPipelineHelpers(unittest.TestCase):
|
class TestPipelineHelpers(unittest.TestCase):
|
||||||
|
@patch("cross_eval.worktree.get_current_head", return_value="a" * 40)
|
||||||
@patch("cross_eval.worktree.commit_worktree", return_value=True)
|
@patch("cross_eval.worktree.commit_worktree", return_value=True)
|
||||||
def test_commit_iteration_logs_only_when_committed(self, mock_commit: MagicMock) -> None:
|
def test_commit_iteration_logs_only_when_committed(self, mock_commit: MagicMock, mock_head: MagicMock) -> None:
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
_commit_iteration(Path(tmpdir), "review-fix", 2, "PASS")
|
new_head = _commit_iteration(Path(tmpdir), "review-fix", 2, "PASS")
|
||||||
mock_commit.assert_called_once()
|
mock_commit.assert_called_once()
|
||||||
|
self.assertEqual(new_head, "a" * 40)
|
||||||
|
|
||||||
def test_snapshot_repo_state_includes_untracked_digest(self) -> None:
|
def test_snapshot_repo_state_includes_untracked_digest(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
|||||||
Reference in New Issue
Block a user