fix: capture_diff uses base commit to handle agent self-commits

Claude in agentic mode (interactive, no -p flag) commits its own changes,
advancing HEAD. This made `git diff --cached HEAD` return empty, triggering
false EMPTY_DIFF errors every time. Now capture_diff diffs against the
base commit SHA recorded at worktree creation, so changes are captured
regardless of whether the agent committed them.

Also adds UX_IMPROVEMENT_PLAN.md for guided message improvements.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
chungyeong
2026-03-14 23:59:53 +09:00
parent af05fc1ddb
commit 60c7b07939
6 changed files with 281 additions and 28 deletions

View File

@@ -84,24 +84,25 @@ def _has_agentic_steps(config: PipelineConfig, steps: list[StepConfig]) -> bool:
)
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str]:
def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, str, str]:
"""Create a shared worktree for the entire pipeline run.
1. Generate branch name (cross-eval/<preset>_<timestamp>)
2. Create branch from HEAD
3. Create worktree on that branch
Returns (worktree_path, branch_name).
Returns (worktree_path, branch_name, base_commit).
"""
from cross_eval.worktree import create_worktree, make_branch_name, make_worktree_dir
branch_name = make_branch_name(preset_name)
worktree_dir = make_worktree_dir(cwd, branch_name)
worktree_path = create_worktree(
worktree_path, base_commit = create_worktree(
base_cwd=cwd, work_dir=worktree_dir, branch_name=branch_name,
)
(run_dir / "worktree_path.txt").write_text(f"{worktree_path}\n", encoding="utf-8")
(run_dir / "worktree_branch.txt").write_text(f"{branch_name}\n", encoding="utf-8")
return worktree_path, branch_name
(run_dir / "worktree_base.txt").write_text(f"{base_commit}\n", encoding="utf-8")
return worktree_path, branch_name, base_commit
def _copy_inputs_to_worktree(
@@ -321,10 +322,11 @@ def _run_simple_pipeline(
# Setup shared worktree for agentic mode
worktree_path: Path | None = None
agentic_branch_name: str | None = None
agentic_base_commit: str | None = None
base_repo_state: dict[str, str] | None = None
base_repo_status: str | None = None
if not dry_run and _has_agentic_steps(config, config.pipeline):
worktree_path, agentic_branch_name = _setup_worktree(
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
cwd, run_dir, config.preset_name,
)
_copy_inputs_to_worktree(config, worktree_path)
@@ -360,6 +362,7 @@ def _run_simple_pipeline(
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=agentic_base_commit,
)
# Intermediate commit so next iteration's diff only shows new changes
@@ -498,10 +501,11 @@ def _run_phased_pipeline(
all_phase_steps = [s for p in config.phases for s in p.steps]
worktree_path: Path | None = None
agentic_branch_name: str | None = None
agentic_base_commit: str | None = None
base_repo_state: dict[str, str] | None = None
base_repo_status: str | None = None
if not dry_run and _has_agentic_steps(config, all_phase_steps):
worktree_path, agentic_branch_name = _setup_worktree(
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
cwd, run_dir, config.preset_name,
)
_copy_inputs_to_worktree(config, worktree_path)
@@ -558,6 +562,7 @@ def _run_phased_pipeline(
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=agentic_base_commit,
)
# Intermediate commit so next iteration's diff only shows new changes
@@ -903,6 +908,7 @@ def _run_steps(
runtime_env: dict[str, str] | None = None,
base_repo_state: dict[str, str] | None = None,
base_repo_status: str | None = None,
base_commit: str | None = None,
) -> tuple[dict[str, str], dict[str, AgentResult], str | None]:
"""Execute all steps in one iteration, parallelizing where possible."""
step_outputs: dict[str, str] = {}
@@ -923,6 +929,7 @@ def _run_steps(
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=base_commit,
)
else:
_execute_parallel_batch(
@@ -934,6 +941,7 @@ def _run_steps(
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=base_commit,
)
# Extract verdict from all verdict steps (ALL must PASS; ESCALATE wins over all)
@@ -961,6 +969,7 @@ def _invoke_agentic(
env: dict[str, str] | None = None,
timeout: int | None = None,
quiet: bool = False,
base_commit: str | None = None,
) -> AgentResult:
"""Run an agent in agentic mode using an existing worktree."""
return invoke_agent_agentic(
@@ -968,6 +977,7 @@ def _invoke_agentic(
worktree_path=worktree_path,
env=env,
timeout=timeout, quiet=quiet,
base_commit=base_commit,
)
@@ -992,6 +1002,7 @@ def _execute_step(
runtime_env: dict[str, str] | None = None,
base_repo_state: dict[str, str] | None = None,
base_repo_status: str | None = None,
base_commit: str | None = None,
) -> None:
"""Execute a single step, updating step_outputs and step_results in place."""
if not quiet:
@@ -1035,6 +1046,7 @@ def _execute_step(
worktree_path=worktree_path,
env=runtime_env,
timeout=timeout, quiet=quiet,
base_commit=base_commit,
)
else:
# When worktree exists, run non-agentic agents (reviewers) in
@@ -1125,6 +1137,7 @@ def _execute_parallel_batch(
runtime_env: dict[str, str] | None = None,
base_repo_state: dict[str, str] | None = None,
base_repo_status: str | None = None,
base_commit: str | None = None,
) -> None:
"""Execute multiple steps in parallel using threads."""
agent_names = ", ".join(s.agent for s in batch)
@@ -1139,6 +1152,7 @@ def _execute_parallel_batch(
run_dir=run_dir, output_iter=output_iter, phase_name=phase_name,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=base_commit,
)
return
@@ -1161,6 +1175,7 @@ def _execute_parallel_batch(
phase_name=phase_name, worktree_path=worktree_path,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
base_commit=base_commit,
)
return
@@ -1204,6 +1219,7 @@ def _execute_parallel_batch(
worktree_path=worktree_path,
env=runtime_env,
timeout=timeout, quiet=True,
base_commit=base_commit,
)
else:
effective_cwd = worktree_path if worktree_path else cwd