fix: use incremental diff per iteration instead of cumulative base diff
After each iteration's _commit_iteration, record the new HEAD SHA and use it as the diff anchor for the next iteration. Previously capture_diff always diffed against the initial base commit, causing every iteration to return the same full cumulative diff — reviewers couldn't see what changed between iterations, leading to repeated feedback and stuck FAIL loops. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -62,18 +62,20 @@ def _commit_iteration(
|
||||
label: str,
|
||||
iteration: int,
|
||||
verdict: str | None,
|
||||
) -> None:
|
||||
) -> str:
|
||||
"""Intermediate commit after each agentic iteration.
|
||||
|
||||
This resets the diff baseline so the next iteration only captures new changes.
|
||||
Returns the new HEAD SHA to use as the base_commit for the next iteration.
|
||||
"""
|
||||
from cross_eval.worktree import commit_worktree
|
||||
from cross_eval.worktree import commit_worktree, get_current_head
|
||||
committed = commit_worktree(
|
||||
worktree_path,
|
||||
f"cross-eval: {label} v{iteration} ({verdict or 'no-verdict'})",
|
||||
)
|
||||
if committed:
|
||||
logger.debug(" Intermediate commit: v%d (%s)", iteration, verdict)
|
||||
return get_current_head(worktree_path)
|
||||
|
||||
|
||||
def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
|
||||
@@ -388,7 +390,7 @@ def _run_simple_pipeline(
|
||||
|
||||
# Intermediate commit so next iteration's diff only shows new changes
|
||||
if worktree_path is not None:
|
||||
_commit_iteration(worktree_path, config.preset_name, i, verdict)
|
||||
agentic_base_commit = _commit_iteration(worktree_path, config.preset_name, i, verdict)
|
||||
|
||||
iter_result = IterationResult(
|
||||
iteration=i,
|
||||
@@ -588,7 +590,7 @@ def _run_phased_pipeline(
|
||||
|
||||
# Intermediate commit so next iteration's diff only shows new changes
|
||||
if worktree_path is not None:
|
||||
_commit_iteration(
|
||||
agentic_base_commit = _commit_iteration(
|
||||
worktree_path, f"{config.preset_name}/{phase.name}",
|
||||
global_iter, verdict,
|
||||
)
|
||||
|
||||
@@ -101,19 +101,18 @@ def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[P
|
||||
|
||||
|
||||
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
|
||||
"""Capture all changes made in the worktree as a unified diff.
|
||||
"""Capture all changes made in the worktree since ``base_commit``.
|
||||
|
||||
Includes both tracked modifications, new untracked files, and changes
|
||||
that the agent may have committed on its own.
|
||||
Handles two scenarios:
|
||||
1. Agent left changes uncommitted → ``git add -A && git diff base HEAD``
|
||||
2. Agent committed its own changes → HEAD advanced, diff base..HEAD captures them
|
||||
|
||||
Args:
|
||||
base_commit: The commit SHA from when the worktree was created.
|
||||
If provided, diffs against this fixed base instead of HEAD.
|
||||
This is critical because agents (e.g. Claude in interactive
|
||||
mode) may create their own commits, advancing HEAD and
|
||||
making ``git diff --cached HEAD`` return empty.
|
||||
base_commit: The diff anchor — typically the worktree HEAD *before* this
|
||||
iteration started (set by ``get_current_head`` after each
|
||||
``_commit_iteration``). Falls back to ``HEAD`` if not given.
|
||||
"""
|
||||
# Stage any uncommitted changes so they're included in the diff
|
||||
# Stage any uncommitted changes
|
||||
subprocess.run(
|
||||
["git", "add", "-A"],
|
||||
cwd=worktree_path,
|
||||
@@ -121,36 +120,34 @@ def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
|
||||
check=True,
|
||||
)
|
||||
|
||||
if base_commit:
|
||||
# Diff everything (committed + staged) against the original base.
|
||||
# This captures changes regardless of whether the agent committed them.
|
||||
result = subprocess.run(
|
||||
["git", "diff", base_commit, "--cached"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
diff = result.stdout.strip()
|
||||
if diff:
|
||||
return diff
|
||||
|
||||
# Also check committed changes (agent may have committed and left
|
||||
# nothing staged)
|
||||
result = subprocess.run(
|
||||
["git", "diff", base_commit, "HEAD"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
|
||||
# Fallback: no base_commit, use original behavior
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--cached", "HEAD"],
|
||||
# Commit staged changes so everything is reachable via HEAD
|
||||
# (this is a no-op if nothing is staged)
|
||||
subprocess.run(
|
||||
["git", "commit", "-m", "cross-eval: capture-diff snapshot", "--allow-empty-message"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
ref = base_commit or "HEAD~1"
|
||||
result = subprocess.run(
|
||||
["git", "diff", ref, "HEAD"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def get_current_head(worktree_path: Path) -> str:
|
||||
"""Return the current HEAD SHA of the worktree."""
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "HEAD"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user