diff --git a/cross_eval/pipeline.py b/cross_eval/pipeline.py index 3b2ff8e..2a40ad5 100644 --- a/cross_eval/pipeline.py +++ b/cross_eval/pipeline.py @@ -62,18 +62,20 @@ def _commit_iteration( label: str, iteration: int, verdict: str | None, -) -> None: +) -> str: """Intermediate commit after each agentic iteration. This resets the diff baseline so the next iteration only captures new changes. + Returns the new HEAD SHA to use as the base_commit for the next iteration. """ - from cross_eval.worktree import commit_worktree + from cross_eval.worktree import commit_worktree, get_current_head committed = commit_worktree( worktree_path, f"cross-eval: {label} v{iteration} ({verdict or 'no-verdict'})", ) if committed: logger.debug(" Intermediate commit: v%d (%s)", iteration, verdict) + return get_current_head(worktree_path) def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool: @@ -388,7 +390,7 @@ def _run_simple_pipeline( # Intermediate commit so next iteration's diff only shows new changes if worktree_path is not None: - _commit_iteration(worktree_path, config.preset_name, i, verdict) + agentic_base_commit = _commit_iteration(worktree_path, config.preset_name, i, verdict) iter_result = IterationResult( iteration=i, @@ -588,7 +590,7 @@ def _run_phased_pipeline( # Intermediate commit so next iteration's diff only shows new changes if worktree_path is not None: - _commit_iteration( + agentic_base_commit = _commit_iteration( worktree_path, f"{config.preset_name}/{phase.name}", global_iter, verdict, ) diff --git a/cross_eval/worktree.py b/cross_eval/worktree.py index da8adef..c4174aa 100644 --- a/cross_eval/worktree.py +++ b/cross_eval/worktree.py @@ -101,19 +101,18 @@ def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[P def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str: - """Capture all changes made in the worktree as a unified diff. + """Capture all changes made in the worktree since ``base_commit``. - Includes both tracked modifications, new untracked files, and changes - that the agent may have committed on its own. + Handles two scenarios: + 1. Agent left changes uncommitted → ``git add -A && git diff base HEAD`` + 2. Agent committed its own changes → HEAD advanced, diff base..HEAD captures them Args: - base_commit: The commit SHA from when the worktree was created. - If provided, diffs against this fixed base instead of HEAD. - This is critical because agents (e.g. Claude in interactive - mode) may create their own commits, advancing HEAD and - making ``git diff --cached HEAD`` return empty. + base_commit: The diff anchor — typically the worktree HEAD *before* this + iteration started (set by ``get_current_head`` after each + ``_commit_iteration``). Falls back to ``HEAD`` if not given. """ - # Stage any uncommitted changes so they're included in the diff + # Stage any uncommitted changes subprocess.run( ["git", "add", "-A"], cwd=worktree_path, @@ -121,36 +120,34 @@ def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str: check=True, ) - if base_commit: - # Diff everything (committed + staged) against the original base. - # This captures changes regardless of whether the agent committed them. - result = subprocess.run( - ["git", "diff", base_commit, "--cached"], - cwd=worktree_path, - capture_output=True, - text=True, - ) - diff = result.stdout.strip() - if diff: - return diff - - # Also check committed changes (agent may have committed and left - # nothing staged) - result = subprocess.run( - ["git", "diff", base_commit, "HEAD"], - cwd=worktree_path, - capture_output=True, - text=True, - ) - return result.stdout.strip() - - # Fallback: no base_commit, use original behavior - result = subprocess.run( - ["git", "diff", "--cached", "HEAD"], + # Commit staged changes so everything is reachable via HEAD + # (this is a no-op if nothing is staged) + subprocess.run( + ["git", "commit", "-m", "cross-eval: capture-diff snapshot", "--allow-empty-message"], cwd=worktree_path, capture_output=True, text=True, ) + + ref = base_commit or "HEAD~1" + result = subprocess.run( + ["git", "diff", ref, "HEAD"], + cwd=worktree_path, + capture_output=True, + text=True, + ) + return result.stdout.strip() + + +def get_current_head(worktree_path: Path) -> str: + """Return the current HEAD SHA of the worktree.""" + result = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=worktree_path, + capture_output=True, + text=True, + check=True, + ) return result.stdout.strip() diff --git a/tests/test_runtime_misc.py b/tests/test_runtime_misc.py index 3f7c39d..d531a91 100644 --- a/tests/test_runtime_misc.py +++ b/tests/test_runtime_misc.py @@ -413,11 +413,13 @@ class TestInvokeAgenticRuntime(unittest.TestCase): class TestPipelineHelpers(unittest.TestCase): + @patch("cross_eval.worktree.get_current_head", return_value="a" * 40) @patch("cross_eval.worktree.commit_worktree", return_value=True) - def test_commit_iteration_logs_only_when_committed(self, mock_commit: MagicMock) -> None: + def test_commit_iteration_logs_only_when_committed(self, mock_commit: MagicMock, mock_head: MagicMock) -> None: with tempfile.TemporaryDirectory() as tmpdir: - _commit_iteration(Path(tmpdir), "review-fix", 2, "PASS") + new_head = _commit_iteration(Path(tmpdir), "review-fix", 2, "PASS") mock_commit.assert_called_once() + self.assertEqual(new_head, "a" * 40) def test_snapshot_repo_state_includes_untracked_digest(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: