Files
cross-eval/cross_eval/worktree.py
chungyeong 60c7b07939 fix: capture_diff uses base commit to handle agent self-commits
Claude in agentic mode (interactive, no -p flag) commits its own changes,
advancing HEAD. This made `git diff --cached HEAD` return empty, triggering
false EMPTY_DIFF errors every time. Now capture_diff diffs against the
base commit SHA recorded at worktree creation, so changes are captured
regardless of whether the agent committed them.

Also adds UX_IMPROVEMENT_PLAN.md for guided message improvements.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 23:59:53 +09:00

199 lines
6.1 KiB
Python

"""Git worktree lifecycle management for agentic mode."""
from __future__ import annotations
import logging
import shutil
import subprocess
import tempfile
from datetime import datetime
from pathlib import Path
logger = logging.getLogger(__name__)
class WorktreeError(RuntimeError):
"""Error during worktree operations."""
def make_branch_name(preset_name: str) -> str:
"""Generate a branch name for agentic results."""
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"cross-eval/{preset_name}_{ts}"
def make_worktree_dir(base_cwd: Path, branch_name: str) -> Path:
"""Choose a worktree directory outside the base repo.
Keeping agentic worktrees outside the source checkout avoids tools that
incorrectly walk up to the outer repo and write into the base worktree.
"""
repo_name = base_cwd.resolve().name or "repo"
branch_slug = branch_name.replace("/", "__")
return (
Path(tempfile.gettempdir())
/ "cross-eval-worktrees"
/ repo_name
/ branch_slug
)
def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[Path, str]:
"""Create a git worktree on a new branch from HEAD.
1. Create branch from HEAD
2. Create worktree checked out to that branch
The branch lives in the original repo, so it survives worktree removal.
Returns (worktree_path, base_commit_sha).
"""
work_dir = work_dir.resolve()
if work_dir.exists():
shutil.rmtree(work_dir)
# Record the base commit SHA before creating the branch.
# This is the anchor for all diffs — even if the agent makes its own commits,
# we always diff against this base to capture the full set of changes.
result = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=base_cwd,
capture_output=True,
text=True,
check=True,
)
base_commit = result.stdout.strip()
# Create the branch at HEAD
try:
subprocess.run(
["git", "branch", branch_name, "HEAD"],
cwd=base_cwd,
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
raise WorktreeError(
f"Failed to create branch '{branch_name}': {e.stderr.strip()}"
) from e
# Create worktree on that branch
try:
subprocess.run(
["git", "worktree", "add", str(work_dir), branch_name],
cwd=base_cwd,
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
# Clean up the branch if worktree creation fails
subprocess.run(
["git", "branch", "-D", branch_name],
cwd=base_cwd,
capture_output=True,
)
raise WorktreeError(
f"Failed to create worktree at {work_dir}: {e.stderr.strip()}"
) from e
logger.debug("Created worktree on branch '%s': %s (base: %s)", branch_name, work_dir, base_commit[:8])
return work_dir, base_commit
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
"""Capture all changes made in the worktree as a unified diff.
Includes both tracked modifications, new untracked files, and changes
that the agent may have committed on its own.
Args:
base_commit: The commit SHA from when the worktree was created.
If provided, diffs against this fixed base instead of HEAD.
This is critical because agents (e.g. Claude in interactive
mode) may create their own commits, advancing HEAD and
making ``git diff --cached HEAD`` return empty.
"""
# Stage any uncommitted changes so they're included in the diff
subprocess.run(
["git", "add", "-A"],
cwd=worktree_path,
capture_output=True,
check=True,
)
if base_commit:
# Diff everything (committed + staged) against the original base.
# This captures changes regardless of whether the agent committed them.
result = subprocess.run(
["git", "diff", base_commit, "--cached"],
cwd=worktree_path,
capture_output=True,
text=True,
)
diff = result.stdout.strip()
if diff:
return diff
# Also check committed changes (agent may have committed and left
# nothing staged)
result = subprocess.run(
["git", "diff", base_commit, "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
)
return result.stdout.strip()
# Fallback: no base_commit, use original behavior
result = subprocess.run(
["git", "diff", "--cached", "HEAD"],
cwd=worktree_path,
capture_output=True,
text=True,
)
return result.stdout.strip()
def commit_worktree(worktree_path: Path, message: str) -> bool:
"""Stage and commit all changes in the worktree.
Returns True if a commit was made, False if nothing to commit.
"""
subprocess.run(
["git", "add", "-A"],
cwd=worktree_path,
capture_output=True,
check=True,
)
result = subprocess.run(
["git", "commit", "-m", message],
cwd=worktree_path,
capture_output=True,
text=True,
)
# exit code 1 = nothing to commit
return result.returncode == 0
def remove_worktree(base_cwd: Path, work_dir: Path) -> None:
"""Remove a git worktree (branch is preserved in the original repo)."""
work_dir = work_dir.resolve()
try:
subprocess.run(
["git", "worktree", "remove", "--force", str(work_dir)],
cwd=base_cwd,
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError:
if work_dir.exists():
shutil.rmtree(work_dir, ignore_errors=True)
subprocess.run(
["git", "worktree", "prune"],
cwd=base_cwd,
capture_output=True,
)
logger.debug("Removed worktree: %s (branch preserved)", work_dir)