fix: capture_diff uses base commit to handle agent self-commits

Claude in agentic mode (interactive, no -p flag) commits its own changes, advancing HEAD. This made `git diff --cached HEAD` return empty, triggering false EMPTY_DIFF errors every time. Now capture_diff diffs against the base commit SHA recorded at worktree creation, so changes are captured regardless of whether the agent committed them. Also adds UX_IMPROVEMENT_PLAN.md for guided message improvements. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 23:59:53 +09:00
parent af05fc1ddb
commit 60c7b07939
6 changed files with 281 additions and 28 deletions
--- a/tests/test_agentic.py
+++ b/tests/test_agentic.py
@@ -76,10 +76,12 @@ class TestCreateWorktree(unittest.TestCase):

            wt_dir = Path(td) / "wt"
            branch = "cross-eval/test_branch"
-            result_path = create_worktree(base, wt_dir, branch)
+            result_path, base_commit = create_worktree(base, wt_dir, branch)

            # Worktree directory exists
            self.assertTrue(result_path.exists())
+            # Base commit SHA was captured
+            self.assertEqual(len(base_commit), 40)
            # Branch was created in the original repo
            branches = subprocess.run(
                ["git", "branch", "--list", branch],
@@ -102,7 +104,7 @@ class TestCaptureDiff(unittest.TestCase):

            wt_dir = Path(td) / "wt"
            branch = "cross-eval/diff_test"
-            create_worktree(base, wt_dir, branch)
+            create_worktree(base, wt_dir, branch)  # ignore return tuple

            # Make changes in the worktree
            (wt_dir / "new_file.txt").write_text("hello\n")
@@ -553,7 +555,7 @@ class TestSetupWorktreeCalledForAgentic(unittest.TestCase):

            wt_path = run_dir / "work"
            wt_path.mkdir()
-            mock_setup.return_value = (wt_path, "cross-eval/test")
+            mock_setup.return_value = (wt_path, "cross-eval/test", "a" * 40)

            mock_invoke_agentic.return_value = AgentResult(
                output="diff output", exit_code=0,
@@ -582,7 +584,7 @@ class TestSetupWorktreeLocation(unittest.TestCase):
            run_dir.mkdir(parents=True)
            _init_git_repo(base)

-            worktree_path, branch_name = _setup_worktree(base, run_dir, "review-fix")
+            worktree_path, branch_name, _base_commit = _setup_worktree(base, run_dir, "review-fix")
            try:
                self.assertTrue(worktree_path.exists())
                self.assertNotIn(str(base.resolve()), str(worktree_path.resolve()))
@@ -620,7 +622,7 @@ class TestReviewerRunsInWorktreeCwd(unittest.TestCase):

            wt_path = run_dir / "work"
            wt_path.mkdir()
-            mock_setup.return_value = (wt_path, "cross-eval/test")
+            mock_setup.return_value = (wt_path, "cross-eval/test", "a" * 40)

            mock_invoke_agentic.return_value = AgentResult(
                output="diff output", exit_code=0,
@@ -662,7 +664,7 @@ class TestCommitIterationCalled(unittest.TestCase):

            wt_path = run_dir / "work"
            wt_path.mkdir()
-            mock_setup.return_value = (wt_path, "cross-eval/test")
+            mock_setup.return_value = (wt_path, "cross-eval/test", "a" * 40)

            mock_invoke_agentic.return_value = AgentResult(
                output="diff output", exit_code=0,
@@ -704,7 +706,7 @@ class TestFinalizeWorktreeCalled(unittest.TestCase):

            wt_path = run_dir / "work"
            wt_path.mkdir()
-            mock_setup.return_value = (wt_path, "cross-eval/test")
+            mock_setup.return_value = (wt_path, "cross-eval/test", "a" * 40)

            mock_invoke_agentic.return_value = AgentResult(
                output="diff output", exit_code=0,
@@ -822,7 +824,7 @@ class TestParallelAgenticFallsBackToSequential(unittest.TestCase):

            wt_path = run_dir / "work"
            wt_path.mkdir()
-            mock_setup.return_value = (wt_path, "cross-eval/test")
+            mock_setup.return_value = (wt_path, "cross-eval/test", "a" * 40)

            call_order: list[str] = []

--- a/tests/test_runtime_misc.py
+++ b/tests/test_runtime_misc.py
@@ -775,11 +775,18 @@ class TestRuntimeEnvironmentHelpers(unittest.TestCase):
 class TestWorktreeFailures(unittest.TestCase):
    @patch("cross_eval.worktree.subprocess.run")
    def test_create_worktree_raises_when_branch_creation_fails(self, mock_run: MagicMock) -> None:
-        mock_run.side_effect = subprocess.CalledProcessError(
-            1,
-            ["git", "branch"],
-            stderr="branch failed",
-        )
+        # First call: git rev-parse HEAD (succeeds)
+        # Second call: git branch (fails)
+        rev_parse_result = MagicMock(returncode=0)
+        rev_parse_result.stdout = "a" * 40
+        mock_run.side_effect = [
+            rev_parse_result,
+            subprocess.CalledProcessError(
+                1,
+                ["git", "branch"],
+                stderr="branch failed",
+            ),
+        ]

        with tempfile.TemporaryDirectory() as tmpdir:
            base = Path(tmpdir)
@@ -791,14 +798,17 @@ class TestWorktreeFailures(unittest.TestCase):

    @patch("cross_eval.worktree.subprocess.run")
    def test_create_worktree_cleans_branch_on_worktree_failure(self, mock_run: MagicMock) -> None:
+        rev_parse_result = MagicMock(returncode=0)
+        rev_parse_result.stdout = "a" * 40
        mock_run.side_effect = [
-            MagicMock(returncode=0),
+            rev_parse_result,           # git rev-parse HEAD
+            MagicMock(returncode=0),    # git branch
            subprocess.CalledProcessError(
                1,
                ["git", "worktree", "add"],
                stderr="worktree failed",
            ),
-            MagicMock(returncode=0),
+            MagicMock(returncode=0),    # git branch -D (cleanup)
        ]

        with tempfile.TemporaryDirectory() as tmpdir: