After each iteration's _commit_iteration, record the new HEAD SHA and use it as the diff anchor for the next iteration. Previously capture_diff always diffed against the initial base commit, causing every iteration to return the same full cumulative diff — reviewers couldn't see what changed between iterations, leading to repeated feedback and stuck FAIL loops. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
196 lines
5.8 KiB
Python
196 lines
5.8 KiB
Python
"""Git worktree lifecycle management for agentic mode."""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import shutil
|
|
import subprocess
|
|
import tempfile
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class WorktreeError(RuntimeError):
|
|
"""Error during worktree operations."""
|
|
|
|
|
|
def make_branch_name(preset_name: str) -> str:
|
|
"""Generate a branch name for agentic results."""
|
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
return f"cross-eval/{preset_name}_{ts}"
|
|
|
|
|
|
def make_worktree_dir(base_cwd: Path, branch_name: str) -> Path:
|
|
"""Choose a worktree directory outside the base repo.
|
|
|
|
Keeping agentic worktrees outside the source checkout avoids tools that
|
|
incorrectly walk up to the outer repo and write into the base worktree.
|
|
"""
|
|
repo_name = base_cwd.resolve().name or "repo"
|
|
branch_slug = branch_name.replace("/", "__")
|
|
return (
|
|
Path(tempfile.gettempdir())
|
|
/ "cross-eval-worktrees"
|
|
/ repo_name
|
|
/ branch_slug
|
|
)
|
|
|
|
|
|
def create_worktree(base_cwd: Path, work_dir: Path, branch_name: str) -> tuple[Path, str]:
|
|
"""Create a git worktree on a new branch from HEAD.
|
|
|
|
1. Create branch from HEAD
|
|
2. Create worktree checked out to that branch
|
|
|
|
The branch lives in the original repo, so it survives worktree removal.
|
|
Returns (worktree_path, base_commit_sha).
|
|
"""
|
|
work_dir = work_dir.resolve()
|
|
if work_dir.exists():
|
|
shutil.rmtree(work_dir)
|
|
|
|
# Record the base commit SHA before creating the branch.
|
|
# This is the anchor for all diffs — even if the agent makes its own commits,
|
|
# we always diff against this base to capture the full set of changes.
|
|
result = subprocess.run(
|
|
["git", "rev-parse", "HEAD"],
|
|
cwd=base_cwd,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
base_commit = result.stdout.strip()
|
|
|
|
# Create the branch at HEAD
|
|
try:
|
|
subprocess.run(
|
|
["git", "branch", branch_name, "HEAD"],
|
|
cwd=base_cwd,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
raise WorktreeError(
|
|
f"Failed to create branch '{branch_name}': {e.stderr.strip()}"
|
|
) from e
|
|
|
|
# Create worktree on that branch
|
|
try:
|
|
subprocess.run(
|
|
["git", "worktree", "add", str(work_dir), branch_name],
|
|
cwd=base_cwd,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
# Clean up the branch if worktree creation fails
|
|
subprocess.run(
|
|
["git", "branch", "-D", branch_name],
|
|
cwd=base_cwd,
|
|
capture_output=True,
|
|
)
|
|
raise WorktreeError(
|
|
f"Failed to create worktree at {work_dir}: {e.stderr.strip()}"
|
|
) from e
|
|
|
|
logger.debug("Created worktree on branch '%s': %s (base: %s)", branch_name, work_dir, base_commit[:8])
|
|
return work_dir, base_commit
|
|
|
|
|
|
def capture_diff(worktree_path: Path, base_commit: str | None = None) -> str:
|
|
"""Capture all changes made in the worktree since ``base_commit``.
|
|
|
|
Handles two scenarios:
|
|
1. Agent left changes uncommitted → ``git add -A && git diff base HEAD``
|
|
2. Agent committed its own changes → HEAD advanced, diff base..HEAD captures them
|
|
|
|
Args:
|
|
base_commit: The diff anchor — typically the worktree HEAD *before* this
|
|
iteration started (set by ``get_current_head`` after each
|
|
``_commit_iteration``). Falls back to ``HEAD`` if not given.
|
|
"""
|
|
# Stage any uncommitted changes
|
|
subprocess.run(
|
|
["git", "add", "-A"],
|
|
cwd=worktree_path,
|
|
capture_output=True,
|
|
check=True,
|
|
)
|
|
|
|
# Commit staged changes so everything is reachable via HEAD
|
|
# (this is a no-op if nothing is staged)
|
|
subprocess.run(
|
|
["git", "commit", "-m", "cross-eval: capture-diff snapshot", "--allow-empty-message"],
|
|
cwd=worktree_path,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
ref = base_commit or "HEAD~1"
|
|
result = subprocess.run(
|
|
["git", "diff", ref, "HEAD"],
|
|
cwd=worktree_path,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
return result.stdout.strip()
|
|
|
|
|
|
def get_current_head(worktree_path: Path) -> str:
|
|
"""Return the current HEAD SHA of the worktree."""
|
|
result = subprocess.run(
|
|
["git", "rev-parse", "HEAD"],
|
|
cwd=worktree_path,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
return result.stdout.strip()
|
|
|
|
|
|
def commit_worktree(worktree_path: Path, message: str) -> bool:
|
|
"""Stage and commit all changes in the worktree.
|
|
|
|
Returns True if a commit was made, False if nothing to commit.
|
|
"""
|
|
subprocess.run(
|
|
["git", "add", "-A"],
|
|
cwd=worktree_path,
|
|
capture_output=True,
|
|
check=True,
|
|
)
|
|
|
|
result = subprocess.run(
|
|
["git", "commit", "-m", message],
|
|
cwd=worktree_path,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
# exit code 1 = nothing to commit
|
|
return result.returncode == 0
|
|
|
|
|
|
def remove_worktree(base_cwd: Path, work_dir: Path) -> None:
|
|
"""Remove a git worktree (branch is preserved in the original repo)."""
|
|
work_dir = work_dir.resolve()
|
|
try:
|
|
subprocess.run(
|
|
["git", "worktree", "remove", "--force", str(work_dir)],
|
|
cwd=base_cwd,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
if work_dir.exists():
|
|
shutil.rmtree(work_dir, ignore_errors=True)
|
|
subprocess.run(
|
|
["git", "worktree", "prune"],
|
|
cwd=base_cwd,
|
|
capture_output=True,
|
|
)
|
|
logger.debug("Removed worktree: %s (branch preserved)", work_dir)
|