fix: Claude reviewer empty output, worktree isolation false positives, and input file access

- Add -p flag to _CLAUDE_REVIEW_ARGS so reviewer uses print mode (stdin→stdout) instead of interactive mode which conflicts with plan permission mode - Copy input files (plan, checklist) into worktree .cross-eval-inputs/ so agents in plan mode can access them without escaping the sandbox - Simplify _snapshot_repo_state to use only git diff HEAD + untracked hashes, eliminating false positives from staging state changes (git diff --cached) and git status index drift during long-running pipelines Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 16:19:57 +09:00
parent 7b95233edf
commit cc8d583914
6 changed files with 158 additions and 41 deletions
--- a/tests/test_agentic.py
+++ b/tests/test_agentic.py
@@ -206,21 +206,28 @@ class TestMakeWorktreeDir(unittest.TestCase):
 class TestBaseRepoIsolation(unittest.TestCase):
    """Base repo mutations should fail fast during agentic execution."""

-    def test_raises_when_base_repo_status_changes(self) -> None:
+    def test_raises_when_base_repo_state_changes(self) -> None:
        with tempfile.TemporaryDirectory() as td:
            base = Path(td) / "repo"
            worktree = Path(td) / "worktree"
            base.mkdir()
            worktree.mkdir()

+            # Baseline has a diff that won't match a non-git directory
+            # (which returns {}), triggering the isolation error.
+            baseline_state = {
+                "diff": "diff --git a/file.py ...\n",
+                "untracked": "",
+            }
+
            with self.assertRaises(RuntimeError) as ctx:
                _assert_base_repo_isolation(
                    base,
-                    "M cross_eval/agent.py",
+                    baseline_state,
                    step_name="coding",
                    agent_name="claude-coder",
                    worktree_path=worktree,
-                    baseline_status="M cross_eval/agent.py",
+                    baseline_status="M file.py",
                )

        self.assertIn("base repository", str(ctx.exception))
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1030,6 +1030,60 @@ class FixPresetBehaviorTest(unittest.TestCase):
        self.assertTrue(captured["agentic"])
        self.assertEqual(captured["phase_max"], 3)

+    def test_run_senior_model_override_applies_only_to_seniors(self) -> None:
+        captured: dict[str, list[str]] = {}
+
+        def _fake_run_pipeline(config, **kwargs):
+            captured["coder_args"] = list(config.agents[config.coders[0]].args)
+            captured["reviewer_args"] = list(config.agents[config.reviewers[0]].args)
+            captured["senior_args"] = list(config.agents[config.seniors[0]].args)
+            return PipelineResult(
+                iterations=[],
+                final_verdict="PASS",
+                run_dir=Path(".cross-eval/output"),
+            )
+
+        with patch("cross_eval.pipeline.run_pipeline", side_effect=_fake_run_pipeline):
+            exit_code = main([
+                "run",
+                "--preset", "review-fix",
+                "--coder", "claude",
+                "--reviewer", "claude",
+                "--senior", "claude",
+                "--senior-model", "sonnet",
+                "--dry-run",
+            ])
+
+        self.assertEqual(exit_code, 0)
+        self.assertIn("opus", captured["coder_args"])
+        self.assertIn("opus", captured["reviewer_args"])
+        self.assertIn("sonnet", captured["senior_args"])
+
+
+class OutputDirectoryResolutionTest(unittest.TestCase):
+    def test_load_config_resolves_output_dir_from_project_root(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            ce_dir = root / ".cross-eval"
+            ce_dir.mkdir()
+            (ce_dir / "plan.md").write_text("# plan\n", encoding="utf-8")
+            config_path = ce_dir / "config.yaml"
+            config_path.write_text(
+                (
+                    "inputs:\n"
+                    "  plan: plan.md\n"
+                    "coders: [claude-coder]\n"
+                    "reviewers: [claude-reviewer]\n"
+                    "pipeline: preset:simple\n"
+                    "output_dir: .cross-eval/output\n"
+                ),
+                encoding="utf-8",
+            )
+
+            config = load_config(config_path)
+
+        self.assertEqual(config.output_dir.resolve(), (root / ".cross-eval" / "output").resolve())
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_runtime_misc.py
+++ b/tests/test_runtime_misc.py
@@ -390,7 +390,7 @@ class TestPipelineHelpers(unittest.TestCase):

            snapshot = _snapshot_repo_state(repo)

-            self.assertIn("UNTRACKED scratch.txt", snapshot)
+            self.assertIn("UNTRACKED scratch.txt", snapshot["untracked"])

    def test_finalize_worktree_deletes_empty_branch(self) -> None:
        with tempfile.TemporaryDirectory() as tmpdir: