fix: capture_diff uses base commit to handle agent self-commits
Claude in agentic mode (interactive, no -p flag) commits its own changes, advancing HEAD. This made `git diff --cached HEAD` return empty, triggering false EMPTY_DIFF errors every time. Now capture_diff diffs against the base commit SHA recorded at worktree creation, so changes are captured regardless of whether the agent committed them. Also adds UX_IMPROVEMENT_PLAN.md for guided message improvements. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -414,6 +414,7 @@ def invoke_agent_agentic(
|
||||
env: Optional[dict[str, str]] = None,
|
||||
timeout: int | None = None,
|
||||
quiet: bool = False,
|
||||
base_commit: str | None = None,
|
||||
) -> AgentResult:
|
||||
"""Invoke an agent in agentic mode using the worktree as the source of truth."""
|
||||
from cross_eval.worktree import capture_diff
|
||||
@@ -506,8 +507,8 @@ def invoke_agent_agentic(
|
||||
suggested_action=suggested_action,
|
||||
)
|
||||
|
||||
# Capture git diff as the output (changes since last commit on the branch)
|
||||
diff_output = capture_diff(worktree_path)
|
||||
# Capture git diff as the output (changes since the base commit)
|
||||
diff_output = capture_diff(worktree_path, base_commit=base_commit)
|
||||
|
||||
if not diff_output:
|
||||
stdout_excerpt = (result.stdout or "").strip()
|
||||
|
||||
@@ -84,24 +84,25 @@ def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str]:
|
||||
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str, str]:
|
||||
"""Create a shared worktree for the entire pipeline run.
|
||||
|
||||
1. Generate branch name (cross-eval/<preset>_<timestamp>)
|
||||
2. Create branch from HEAD
|
||||
3. Create worktree on that branch
|
||||
|
||||
Returns (worktree_path, branch_name).
|
||||
Returns (worktree_path, branch_name, base_commit).
|
||||
"""
|
||||
from cross_eval.worktree import create_worktree, make_branch_name, make_worktree_dir
|
||||
branch_name = make_branch_name(preset_name)
|
||||
worktree_dir = make_worktree_dir(cwd, branch_name)
|
||||
worktree_path = create_worktree(
|
||||
worktree_path, base_commit = create_worktree(
|
||||
base_cwd=cwd, work_dir=worktree_dir, branch_name=branch_name,
|
||||
)
|
||||
(run_dir / "worktree_path.txt").write_text(f"{worktree_path}\n", encoding="utf-8")
|
||||
(run_dir / "worktree_branch.txt").write_text(f"{branch_name}\n", encoding="utf-8")
|
||||
return worktree_path, branch_name
|
||||
(run_dir / "worktree_base.txt").write_text(f"{base_commit}\n", encoding="utf-8")
|
||||
return worktree_path, branch_name, base_commit
|
||||
|
||||
|
||||
def _copy_inputs_to_worktree(
|
||||
@@ -321,10 +322,11 @@ def _run_simple_pipeline(
|
||||
# Setup shared worktree for agentic mode
|
||||
worktree_path: Path | None = None
|
||||
agentic_branch_name: str | None = None
|
||||
agentic_base_commit: str | None = None
|
||||
base_repo_state: dict[str, str] | None = None
|
||||
base_repo_status: str | None = None
|
||||
if not dry_run and _has_agentic_steps(config, config.pipeline):
|
||||
worktree_path, agentic_branch_name = _setup_worktree(
|
||||
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
|
||||
cwd, run_dir, config.preset_name,
|
||||
)
|
||||
_copy_inputs_to_worktree(config, worktree_path)
|
||||
@@ -360,6 +362,7 @@ def _run_simple_pipeline(
|
||||
runtime_env=runtime_env,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=agentic_base_commit,
|
||||
)
|
||||
|
||||
# Intermediate commit so next iteration's diff only shows new changes
|
||||
@@ -498,10 +501,11 @@ def _run_phased_pipeline(
|
||||
all_phase_steps = [s for p in config.phases for s in p.steps]
|
||||
worktree_path: Path | None = None
|
||||
agentic_branch_name: str | None = None
|
||||
agentic_base_commit: str | None = None
|
||||
base_repo_state: dict[str, str] | None = None
|
||||
base_repo_status: str | None = None
|
||||
if not dry_run and _has_agentic_steps(config, all_phase_steps):
|
||||
worktree_path, agentic_branch_name = _setup_worktree(
|
||||
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
|
||||
cwd, run_dir, config.preset_name,
|
||||
)
|
||||
_copy_inputs_to_worktree(config, worktree_path)
|
||||
@@ -558,6 +562,7 @@ def _run_phased_pipeline(
|
||||
runtime_env=runtime_env,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=agentic_base_commit,
|
||||
)
|
||||
|
||||
# Intermediate commit so next iteration's diff only shows new changes
|
||||
@@ -903,6 +908,7 @@ def _run_steps(
|
||||
runtime_env: dict[str, str] | None = None,
|
||||
base_repo_state: dict[str, str] | None = None,
|
||||
base_repo_status: str | None = None,
|
||||
base_commit: str | None = None,
|
||||
) -> tuple[dict[str, str], dict[str, AgentResult], str | None]:
|
||||
"""Execute all steps in one iteration, parallelizing where possible."""
|
||||
step_outputs: dict[str, str] = {}
|
||||
@@ -923,6 +929,7 @@ def _run_steps(
|
||||
runtime_env=runtime_env,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
else:
|
||||
_execute_parallel_batch(
|
||||
@@ -934,6 +941,7 @@ def _run_steps(
|
||||
runtime_env=runtime_env,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
|
||||
# Extract verdict from all verdict steps (ALL must PASS; ESCALATE wins over all)
|
||||
@@ -961,6 +969,7 @@ def _invoke_agentic(
|
||||
env: dict[str, str] | None = None,
|
||||
timeout: int | None = None,
|
||||
quiet: bool = False,
|
||||
base_commit: str | None = None,
|
||||
) -> AgentResult:
|
||||
"""Run an agent in agentic mode using an existing worktree."""
|
||||
return invoke_agent_agentic(
|
||||
@@ -968,6 +977,7 @@ def _invoke_agentic(
|
||||
worktree_path=worktree_path,
|
||||
env=env,
|
||||
timeout=timeout, quiet=quiet,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
|
||||
|
||||
@@ -992,6 +1002,7 @@ def _execute_step(
|
||||
runtime_env: dict[str, str] | None = None,
|
||||
base_repo_state: dict[str, str] | None = None,
|
||||
base_repo_status: str | None = None,
|
||||
base_commit: str | None = None,
|
||||
) -> None:
|
||||
"""Execute a single step, updating step_outputs and step_results in place."""
|
||||
if not quiet:
|
||||
@@ -1035,6 +1046,7 @@ def _execute_step(
|
||||
worktree_path=worktree_path,
|
||||
env=runtime_env,
|
||||
timeout=timeout, quiet=quiet,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
else:
|
||||
# When worktree exists, run non-agentic agents (reviewers) in
|
||||
@@ -1125,6 +1137,7 @@ def _execute_parallel_batch(
|
||||
runtime_env: dict[str, str] | None = None,
|
||||
base_repo_state: dict[str, str] | None = None,
|
||||
base_repo_status: str | None = None,
|
||||
base_commit: str | None = None,
|
||||
) -> None:
|
||||
"""Execute multiple steps in parallel using threads."""
|
||||
agent_names = ", ".join(s.agent for s in batch)
|
||||
@@ -1139,6 +1152,7 @@ def _execute_parallel_batch(
|
||||
run_dir=run_dir, output_iter=output_iter, phase_name=phase_name,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
return
|
||||
|
||||
@@ -1161,6 +1175,7 @@ def _execute_parallel_batch(
|
||||
phase_name=phase_name, worktree_path=worktree_path,
|
||||
base_repo_state=base_repo_state,
|
||||
base_repo_status=base_repo_status,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
return
|
||||
|
||||
@@ -1204,6 +1219,7 @@ def _execute_parallel_batch(
|
||||
worktree_path=worktree_path,
|
||||
env=runtime_env,
|
||||
timeout=timeout, quiet=True,
|
||||
base_commit=base_commit,
|
||||
)
|
||||
else:
|
||||
effective_cwd = worktree_path if worktree_path else cwd
|
||||
|
||||
@@ -37,18 +37,31 @@ def make_worktree_dir(base_cwd: Path, branch_name: str) -> Path:
|
||||
)
|
||||
|
||||
|
||||
def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> Path:
|
||||
def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[Path, str]:
|
||||
"""Create a git worktree on a new branch from HEAD.
|
||||
|
||||
1. Create branch from HEAD
|
||||
2. Create worktree checked out to that branch
|
||||
|
||||
The branch lives in the original repo, so it survives worktree removal.
|
||||
Returns (worktree_path, base_commit_sha).
|
||||
"""
|
||||
work_dir = work_dir.resolve()
|
||||
if work_dir.exists():
|
||||
shutil.rmtree(work_dir)
|
||||
|
||||
# Record the base commit SHA before creating the branch.
|
||||
# This is the anchor for all diffs — even if the agent makes its own commits,
|
||||
# we always diff against this base to capture the full set of changes.
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "HEAD"],
|
||||
cwd=base_cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
base_commit = result.stdout.strip()
|
||||
|
||||
# Create the branch at HEAD
|
||||
try:
|
||||
subprocess.run(
|
||||
@@ -83,15 +96,24 @@ def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> Path:
|
||||
f"Failed to create worktree at {work_dir}: {e.stderr.strip()}"
|
||||
) from e
|
||||
|
||||
logger.debug("Created worktree on branch '%s': %s", branch_name, work_dir)
|
||||
return work_dir
|
||||
logger.debug("Created worktree on branch '%s': %s (base: %s)", branch_name, work_dir, base_commit[:8])
|
||||
return work_dir, base_commit
|
||||
|
||||
|
||||
def capture_diff(worktree_path: Path) -> str:
|
||||
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
|
||||
"""Capture all changes made in the worktree as a unified diff.
|
||||
|
||||
Includes both tracked modifications and new untracked files.
|
||||
Includes both tracked modifications, new untracked files, and changes
|
||||
that the agent may have committed on its own.
|
||||
|
||||
Args:
|
||||
base_commit: The commit SHA from when the worktree was created.
|
||||
If provided, diffs against this fixed base instead of HEAD.
|
||||
This is critical because agents (e.g. Claude in interactive
|
||||
mode) may create their own commits, advancing HEAD and
|
||||
making ``git diff --cached HEAD`` return empty.
|
||||
"""
|
||||
# Stage any uncommitted changes so they're included in the diff
|
||||
subprocess.run(
|
||||
["git", "add", "-A"],
|
||||
cwd=worktree_path,
|
||||
@@ -99,6 +121,30 @@ def capture_diff(worktree_path: Path) -> str:
|
||||
check=True,
|
||||
)
|
||||
|
||||
if base_commit:
|
||||
# Diff everything (committed + staged) against the original base.
|
||||
# This captures changes regardless of whether the agent committed them.
|
||||
result = subprocess.run(
|
||||
["git", "diff", base_commit, "--cached"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
diff = result.stdout.strip()
|
||||
if diff:
|
||||
return diff
|
||||
|
||||
# Also check committed changes (agent may have committed and left
|
||||
# nothing staged)
|
||||
result = subprocess.run(
|
||||
["git", "diff", base_commit, "HEAD"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return result.stdout.strip()
|
||||
|
||||
# Fallback: no base_commit, use original behavior
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--cached", "HEAD"],
|
||||
cwd=worktree_path,
|
||||
|
||||
Reference in New Issue
Block a user