fix: capture_diff uses base commit to handle agent self-commits

Claude in agentic mode (interactive, no -p flag) commits its own changes, advancing HEAD. This made `git diff --cached HEAD` return empty, triggering false EMPTY_DIFF errors every time. Now capture_diff diffs against the base commit SHA recorded at worktree creation, so changes are captured regardless of whether the agent committed them. Also adds UX_IMPROVEMENT_PLAN.md for guided message improvements. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 23:59:53 +09:00
parent af05fc1ddb
commit 60c7b07939
6 changed files with 281 additions and 28 deletions
--- a/cross_eval/pipeline.py
+++ b/cross_eval/pipeline.py
@@ -84,24 +84,25 @@ def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
    )


-def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str]:
+def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str, str]:
    """Create a shared worktree for the entire pipeline run.

    1. Generate branch name (cross-eval/<preset>_<timestamp>)
    2. Create branch from HEAD
    3. Create worktree on that branch

-    Returns (worktree_path, branch_name).
+    Returns (worktree_path, branch_name, base_commit).
    """
    from cross_eval.worktree import create_worktree, make_branch_name, make_worktree_dir
    branch_name = make_branch_name(preset_name)
    worktree_dir = make_worktree_dir(cwd, branch_name)
-    worktree_path = create_worktree(
+    worktree_path, base_commit = create_worktree(
        base_cwd=cwd, work_dir=worktree_dir, branch_name=branch_name,
    )
    (run_dir / "worktree_path.txt").write_text(f"{worktree_path}\n", encoding="utf-8")
    (run_dir / "worktree_branch.txt").write_text(f"{branch_name}\n", encoding="utf-8")
-    return worktree_path, branch_name
+    (run_dir / "worktree_base.txt").write_text(f"{base_commit}\n", encoding="utf-8")
+    return worktree_path, branch_name, base_commit


 def _copy_inputs_to_worktree(
@@ -321,10 +322,11 @@ def _run_simple_pipeline(
    # Setup shared worktree for agentic mode
    worktree_path: Path | None = None
    agentic_branch_name: str | None = None
+    agentic_base_commit: str | None = None
    base_repo_state: dict[str, str] | None = None
    base_repo_status: str | None = None
    if not dry_run and _has_agentic_steps(config, config.pipeline):
-        worktree_path, agentic_branch_name = _setup_worktree(
+        worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
            cwd, run_dir, config.preset_name,
        )
        _copy_inputs_to_worktree(config, worktree_path)
@@ -360,6 +362,7 @@ def _run_simple_pipeline(
                runtime_env=runtime_env,
                base_repo_state=base_repo_state,
                base_repo_status=base_repo_status,
+                base_commit=agentic_base_commit,
            )

            # Intermediate commit so next iteration's diff only shows new changes
@@ -498,10 +501,11 @@ def _run_phased_pipeline(
    all_phase_steps = [s for p in config.phases for s in p.steps]
    worktree_path: Path | None = None
    agentic_branch_name: str | None = None
+    agentic_base_commit: str | None = None
    base_repo_state: dict[str, str] | None = None
    base_repo_status: str | None = None
    if not dry_run and _has_agentic_steps(config, all_phase_steps):
-        worktree_path, agentic_branch_name = _setup_worktree(
+        worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
            cwd, run_dir, config.preset_name,
        )
        _copy_inputs_to_worktree(config, worktree_path)
@@ -558,6 +562,7 @@ def _run_phased_pipeline(
                    runtime_env=runtime_env,
                    base_repo_state=base_repo_state,
                    base_repo_status=base_repo_status,
+                    base_commit=agentic_base_commit,
                )

                # Intermediate commit so next iteration's diff only shows new changes
@@ -903,6 +908,7 @@ def _run_steps(
    runtime_env: dict[str, str] | None = None,
    base_repo_state: dict[str, str] | None = None,
    base_repo_status: str | None = None,
+    base_commit: str | None = None,
 ) -> tuple[dict[str, str], dict[str, AgentResult], str | None]:
    """Execute all steps in one iteration, parallelizing where possible."""
    step_outputs: dict[str, str] = {}
@@ -923,6 +929,7 @@ def _run_steps(
                runtime_env=runtime_env,
                base_repo_state=base_repo_state,
                base_repo_status=base_repo_status,
+                base_commit=base_commit,
            )
        else:
            _execute_parallel_batch(
@@ -934,6 +941,7 @@ def _run_steps(
                runtime_env=runtime_env,
                base_repo_state=base_repo_state,
                base_repo_status=base_repo_status,
+                base_commit=base_commit,
            )

    # Extract verdict from all verdict steps (ALL must PASS; ESCALATE wins over all)
@@ -961,6 +969,7 @@ def _invoke_agentic(
    env: dict[str, str] | None = None,
    timeout: int | None = None,
    quiet: bool = False,
+    base_commit: str | None = None,
 ) -> AgentResult:
    """Run an agent in agentic mode using an existing worktree."""
    return invoke_agent_agentic(
@@ -968,6 +977,7 @@ def _invoke_agentic(
        worktree_path=worktree_path,
        env=env,
        timeout=timeout, quiet=quiet,
+        base_commit=base_commit,
    )


@@ -992,6 +1002,7 @@ def _execute_step(
    runtime_env: dict[str, str] | None = None,
    base_repo_state: dict[str, str] | None = None,
    base_repo_status: str | None = None,
+    base_commit: str | None = None,
 ) -> None:
    """Execute a single step, updating step_outputs and step_results in place."""
    if not quiet:
@@ -1035,6 +1046,7 @@ def _execute_step(
                worktree_path=worktree_path,
                env=runtime_env,
                timeout=timeout, quiet=quiet,
+                base_commit=base_commit,
            )
        else:
            # When worktree exists, run non-agentic agents (reviewers) in
@@ -1125,6 +1137,7 @@ def _execute_parallel_batch(
    runtime_env: dict[str, str] | None = None,
    base_repo_state: dict[str, str] | None = None,
    base_repo_status: str | None = None,
+    base_commit: str | None = None,
 ) -> None:
    """Execute multiple steps in parallel using threads."""
    agent_names = ", ".join(s.agent for s in batch)
@@ -1139,6 +1152,7 @@ def _execute_parallel_batch(
                run_dir=run_dir, output_iter=output_iter, phase_name=phase_name,
                base_repo_state=base_repo_state,
                base_repo_status=base_repo_status,
+                base_commit=base_commit,
            )
        return

@@ -1161,6 +1175,7 @@ def _execute_parallel_batch(
                phase_name=phase_name, worktree_path=worktree_path,
                base_repo_state=base_repo_state,
                base_repo_status=base_repo_status,
+                base_commit=base_commit,
            )
        return

@@ -1204,6 +1219,7 @@ def _execute_parallel_batch(
                worktree_path=worktree_path,
                env=runtime_env,
                timeout=timeout, quiet=True,
+                base_commit=base_commit,
            )
        else:
            effective_cwd = worktree_path if worktree_path else cwd