fix: capture_diff uses base commit to handle agent self-commits

Claude in agentic mode (interactive, no -p flag) commits its own changes,
advancing HEAD. This made `git diff --cached HEAD` return empty, triggering
false EMPTY_DIFF errors every time. Now capture_diff diffs against the
base commit SHA recorded at worktree creation, so changes are captured
regardless of whether the agent committed them.

Also adds UX_IMPROVEMENT_PLAN.md for guided message improvements.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
chungyeong
2026-03-14 23:59:53 +09:00
parent af05fc1ddb
commit 60c7b07939
6 changed files with 281 additions and 28 deletions

View File

@@ -414,6 +414,7 @@ def invoke_agent_agentic(
env: Optional[dict[str, str]] = None,
timeout: int | None = None,
quiet: bool = False,
base_commit: str | None = None,
) -> AgentResult:
"""Invoke an agent in agentic mode using the worktree as the source of truth."""
from cross_eval.worktree import capture_diff
@@ -506,8 +507,8 @@ def invoke_agent_agentic(
suggested_action=suggested_action,
)
# Capture git diff as the output (changes since last commit on the branch)
diff_output = capture_diff(worktree_path)
# Capture git diff as the output (changes since the base commit)
diff_output = capture_diff(worktree_path, base_commit=base_commit)
if not diff_output:
stdout_excerpt = (result.stdout or "").strip()

View File

@@ -84,24 +84,25 @@ def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
)
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str]:
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str, str]:
"""Create a shared worktree for the entire pipeline run.
1. Generate branch name (cross-eval/<preset>_<timestamp>)
2. Create branch from HEAD
3. Create worktree on that branch
Returns (worktree_path, branch_name).
Returns (worktree_path, branch_name, base_commit).
"""
from cross_eval.worktree import create_worktree, make_branch_name, make_worktree_dir
branch_name = make_branch_name(preset_name)
worktree_dir = make_worktree_dir(cwd, branch_name)
worktree_path = create_worktree(
worktree_path, base_commit = create_worktree(
base_cwd=cwd, work_dir=worktree_dir, branch_name=branch_name,
)
(run_dir / "worktree_path.txt").write_text(f"{worktree_path}\n", encoding="utf-8")
(run_dir / "worktree_branch.txt").write_text(f"{branch_name}\n", encoding="utf-8")
return worktree_path, branch_name
(run_dir / "worktree_base.txt").write_text(f"{base_commit}\n", encoding="utf-8")
return worktree_path, branch_name, base_commit
def _copy_inputs_to_worktree(
@@ -321,10 +322,11 @@ def _run_simple_pipeline(
# Setup shared worktree for agentic mode
worktree_path: Path | None = None
agentic_branch_name: str | None = None
agentic_base_commit: str | None = None
base_repo_state: dict[str, str] | None = None
base_repo_status: str | None = None
if not dry_run and _has_agentic_steps(config, config.pipeline):
worktree_path, agentic_branch_name = _setup_worktree(
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
cwd, run_dir, config.preset_name,
)
_copy_inputs_to_worktree(config, worktree_path)
@@ -360,6 +362,7 @@ def _run_simple_pipeline(
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=agentic_base_commit,
)
# Intermediate commit so next iteration's diff only shows new changes
@@ -498,10 +501,11 @@ def _run_phased_pipeline(
all_phase_steps = [s for p in config.phases for s in p.steps]
worktree_path: Path | None = None
agentic_branch_name: str | None = None
agentic_base_commit: str | None = None
base_repo_state: dict[str, str] | None = None
base_repo_status: str | None = None
if not dry_run and _has_agentic_steps(config, all_phase_steps):
worktree_path, agentic_branch_name = _setup_worktree(
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
cwd, run_dir, config.preset_name,
)
_copy_inputs_to_worktree(config, worktree_path)
@@ -558,6 +562,7 @@ def _run_phased_pipeline(
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=agentic_base_commit,
)
# Intermediate commit so next iteration's diff only shows new changes
@@ -903,6 +908,7 @@ def _run_steps(
runtime_env: dict[str, str] | None = None,
base_repo_state: dict[str, str] | None = None,
base_repo_status: str | None = None,
base_commit: str | None = None,
) -> tuple[dict[str, str], dict[str, AgentResult], str | None]:
"""Execute all steps in one iteration, parallelizing where possible."""
step_outputs: dict[str, str] = {}
@@ -923,6 +929,7 @@ def _run_steps(
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=base_commit,
)
else:
_execute_parallel_batch(
@@ -934,6 +941,7 @@ def _run_steps(
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=base_commit,
)
# Extract verdict from all verdict steps (ALL must PASS; ESCALATE wins over all)
@@ -961,6 +969,7 @@ def _invoke_agentic(
env: dict[str, str] | None = None,
timeout: int | None = None,
quiet: bool = False,
base_commit: str | None = None,
) -> AgentResult:
"""Run an agent in agentic mode using an existing worktree."""
return invoke_agent_agentic(
@@ -968,6 +977,7 @@ def _invoke_agentic(
worktree_path=worktree_path,
env=env,
timeout=timeout, quiet=quiet,
base_commit=base_commit,
)
@@ -992,6 +1002,7 @@ def _execute_step(
runtime_env: dict[str, str] | None = None,
base_repo_state: dict[str, str] | None = None,
base_repo_status: str | None = None,
base_commit: str | None = None,
) -> None:
"""Execute a single step, updating step_outputs and step_results in place."""
if not quiet:
@@ -1035,6 +1046,7 @@ def _execute_step(
worktree_path=worktree_path,
env=runtime_env,
timeout=timeout, quiet=quiet,
base_commit=base_commit,
)
else:
# When worktree exists, run non-agentic agents (reviewers) in
@@ -1125,6 +1137,7 @@ def _execute_parallel_batch(
runtime_env: dict[str, str] | None = None,
base_repo_state: dict[str, str] | None = None,
base_repo_status: str | None = None,
base_commit: str | None = None,
) -> None:
"""Execute multiple steps in parallel using threads."""
agent_names = ", ".join(s.agent for s in batch)
@@ -1139,6 +1152,7 @@ def _execute_parallel_batch(
run_dir=run_dir, output_iter=output_iter, phase_name=phase_name,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=base_commit,
)
return
@@ -1161,6 +1175,7 @@ def _execute_parallel_batch(
phase_name=phase_name, worktree_path=worktree_path,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=base_commit,
)
return
@@ -1204,6 +1219,7 @@ def _execute_parallel_batch(
worktree_path=worktree_path,
env=runtime_env,
timeout=timeout, quiet=True,
base_commit=base_commit,
)
else:
effective_cwd = worktree_path if worktree_path else cwd

View File

@@ -37,18 +37,31 @@ def make_worktree_dir(base_cwd: Path, branch_name: str) -> Path:
)
def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> Path:
def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[Path, str]:
"""Create a git worktree on a new branch from HEAD.
1. Create branch from HEAD
2. Create worktree checked out to that branch
The branch lives in the original repo, so it survives worktree removal.
Returns (worktree_path, base_commit_sha).
"""
work_dir = work_dir.resolve()
if work_dir.exists():
shutil.rmtree(work_dir)
# Record the base commit SHA before creating the branch.
# This is the anchor for all diffs — even if the agent makes its own commits,
# we always diff against this base to capture the full set of changes.
result = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=base_cwd,
capture_output=True,
text=True,
check=True,
)
base_commit = result.stdout.strip()
# Create the branch at HEAD
try:
subprocess.run(
@@ -83,15 +96,24 @@ def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> Path:
f"Failed to create worktree at {work_dir}: {e.stderr.strip()}"
) from e
logger.debug("Created worktree on branch '%s': %s", branch_name, work_dir)
return work_dir
logger.debug("Created worktree on branch '%s': %s (base: %s)", branch_name, work_dir, base_commit[:8])
return work_dir, base_commit
def capture_diff(worktree_path: Path) -> str:
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
"""Capture all changes made in the worktree as a unified diff.
Includes both tracked modifications and new untracked files.
Includes both tracked modifications, new untracked files, and changes
that the agent may have committed on its own.
Args:
base_commit: The commit SHA from when the worktree was created.
If provided, diffs against this fixed base instead of HEAD.
This is critical because agents (e.g. Claude in interactive
mode) may create their own commits, advancing HEAD and
making ``git diff --cached HEAD`` return empty.
"""
# Stage any uncommitted changes so they're included in the diff
subprocess.run(
["git", "add", "-A"],
cwd=worktree_path,
@@ -99,6 +121,30 @@ def capture_diff(worktree_path: Path) -> str:
check=True,
)
if base_commit:
# Diff everything (committed + staged) against the original base.
# This captures changes regardless of whether the agent committed them.
result = subprocess.run(
["git", "diff", base_commit, "--cached"],
cwd=worktree_path,
capture_output=True,
text=True,
)
diff = result.stdout.strip()
if diff:
return diff
# Also check committed changes (agent may have committed and left
# nothing staged)
result = subprocess.run(
["git", "diff", base_commit, "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
)
return result.stdout.strip()
# Fallback: no base_commit, use original behavior
result = subprocess.run(
["git", "diff", "--cached", "HEAD"],
cwd=worktree_path,