fix: capture_diff uses base commit to handle agent self-commits
Claude in agentic mode (interactive, no -p flag) commits its own changes, advancing HEAD. This made `git diff --cached HEAD` return empty, triggering false EMPTY_DIFF errors every time. Now capture_diff diffs against the base commit SHA recorded at worktree creation, so changes are captured regardless of whether the agent committed them. Also adds UX_IMPROVEMENT_PLAN.md for guided message improvements. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -84,24 +84,25 @@ def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str]:
|
||||
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str, str]:
|
||||
"""Create a shared worktree for the entire pipeline run.
|
||||
|
||||
1. Generate branch name (cross-eval/<preset>_<timestamp>)
|
||||
2. Create branch from HEAD
|
||||
3. Create worktree on that branch
|
||||
|
||||
Returns (worktree_path, branch_name).
|
||||
Returns (worktree_path, branch_name, base_commit).
|
||||
"""
|
||||
from cross_eval.worktree import create_worktree, make_branch_name, make_worktree_dir
|
||||
branch_name = make_branch_name(preset_name)
|
||||
worktree_dir = make_worktree_dir(cwd, branch_name)
|
||||
worktree_path = create_worktree(
|
||||
worktree_path, base_commit = create_worktree(
|
||||
base_cwd=cwd, work_dir=worktree_dir, branch_name=branch_name,
|
||||
)
|
||||
(run_dir / "worktree_path.txt").write_text(f"{worktree_path}\n", encoding="utf-8")
|
||||
(run_dir / "worktree_branch.txt").write_text(f"{branch_name}\n", encoding="utf-8")
|
||||
return worktree_path, branch_name
|
||||
(run_dir / "worktree_base.txt").write_text(f"{base_commit}\n", encoding="utf-8")
|
||||
return worktree_path, branch_name, base_commit
|
||||
|
||||
|
||||
def _copy_inputs_to_worktree(
|
||||
@@ -321,10 +322,11 @@ def _run_simple_pipeline(
|
||||
# Setup shared worktree for agentic mode
|
||||
worktree_path: Path | None = None
|
||||
agentic_branch_name: str | None = None
|
||||
agentic_base_commit: str | None = None
|
||||
base_repo_state: dict[str, str] | None = None
|
||||
base_repo_status: str | None = None
|
||||
if not dry_run and _has_agentic_steps(config, config.pipeline):
|
||||
worktree_path, agentic_branch_name = _setup_worktree(
|
||||
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
|
||||
cwd, run_dir, config.preset_name,
|
||||
)
|
||||
_copy_inputs_to_worktree(config, worktree_path)
|
||||
@@ -360,6 +362,7 @@ def _run_simple_pipeline(
|
||||
runtime_env=runtime_env,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=agentic_base_commit,
|
||||
)
|
||||
|
||||
# Intermediate commit so next iteration's diff only shows new changes
|
||||
@@ -498,10 +501,11 @@ def _run_phased_pipeline(
|
||||
all_phase_steps = [s for p in config.phases for s in p.steps]
|
||||
worktree_path: Path | None = None
|
||||
agentic_branch_name: str | None = None
|
||||
agentic_base_commit: str | None = None
|
||||
base_repo_state: dict[str, str] | None = None
|
||||
base_repo_status: str | None = None
|
||||
if not dry_run and _has_agentic_steps(config, all_phase_steps):
|
||||
worktree_path, agentic_branch_name = _setup_worktree(
|
||||
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
|
||||
cwd, run_dir, config.preset_name,
|
||||
)
|
||||
_copy_inputs_to_worktree(config, worktree_path)
|
||||
@@ -558,6 +562,7 @@ def _run_phased_pipeline(
|
||||
runtime_env=runtime_env,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=agentic_base_commit,
|
||||
)
|
||||
|
||||
# Intermediate commit so next iteration's diff only shows new changes
|
||||
@@ -903,6 +908,7 @@ def _run_steps(
|
||||
runtime_env: dict[str, str] | None = None,
|
||||
base_repo_state: dict[str, str] | None = None,
|
||||
base_repo_status: str | None = None,
|
||||
base_commit: str | None = None,
|
||||
) -> tuple[dict[str, str], dict[str, AgentResult], str | None]:
|
||||
"""Execute all steps in one iteration, parallelizing where possible."""
|
||||
step_outputs: dict[str, str] = {}
|
||||
@@ -923,6 +929,7 @@ def _run_steps(
|
||||
runtime_env=runtime_env,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
else:
|
||||
_execute_parallel_batch(
|
||||
@@ -934,6 +941,7 @@ def _run_steps(
|
||||
runtime_env=runtime_env,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
|
||||
# Extract verdict from all verdict steps (ALL must PASS; ESCALATE wins over all)
|
||||
@@ -961,6 +969,7 @@ def _invoke_agentic(
|
||||
env: dict[str, str] | None = None,
|
||||
timeout: int | None = None,
|
||||
quiet: bool = False,
|
||||
base_commit: str | None = None,
|
||||
) -> AgentResult:
|
||||
"""Run an agent in agentic mode using an existing worktree."""
|
||||
return invoke_agent_agentic(
|
||||
@@ -968,6 +977,7 @@ def _invoke_agentic(
|
||||
worktree_path=worktree_path,
|
||||
env=env,
|
||||
timeout=timeout, quiet=quiet,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
|
||||
|
||||
@@ -992,6 +1002,7 @@ def _execute_step(
|
||||
runtime_env: dict[str, str] | None = None,
|
||||
base_repo_state: dict[str, str] | None = None,
|
||||
base_repo_status: str | None = None,
|
||||
base_commit: str | None = None,
|
||||
) -> None:
|
||||
"""Execute a single step, updating step_outputs and step_results in place."""
|
||||
if not quiet:
|
||||
@@ -1035,6 +1046,7 @@ def _execute_step(
|
||||
worktree_path=worktree_path,
|
||||
env=runtime_env,
|
||||
timeout=timeout, quiet=quiet,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
else:
|
||||
# When worktree exists, run non-agentic agents (reviewers) in
|
||||
@@ -1125,6 +1137,7 @@ def _execute_parallel_batch(
|
||||
runtime_env: dict[str, str] | None = None,
|
||||
base_repo_state: dict[str, str] | None = None,
|
||||
base_repo_status: str | None = None,
|
||||
base_commit: str | None = None,
|
||||
) -> None:
|
||||
"""Execute multiple steps in parallel using threads."""
|
||||
agent_names = ", ".join(s.agent for s in batch)
|
||||
@@ -1139,6 +1152,7 @@ def _execute_parallel_batch(
|
||||
run_dir=run_dir, output_iter=output_iter, phase_name=phase_name,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
return
|
||||
|
||||
@@ -1161,6 +1175,7 @@ def _execute_parallel_batch(
|
||||
phase_name=phase_name, worktree_path=worktree_path,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
return
|
||||
|
||||
@@ -1204,6 +1219,7 @@ def _execute_parallel_batch(
|
||||
worktree_path=worktree_path,
|
||||
env=runtime_env,
|
||||
timeout=timeout, quiet=True,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
else:
|
||||
effective_cwd = worktree_path if worktree_path else cwd
|
||||
|
||||
Reference in New Issue
Block a user