fix: Claude reviewer empty output, worktree isolation false positives, and input file access
- Add -p flag to _CLAUDE_REVIEW_ARGS so reviewer uses print mode (stdin→stdout) instead of interactive mode which conflicts with plan permission mode - Copy input files (plan, checklist) into worktree .cross-eval-inputs/ so agents in plan mode can access them without escaping the sandbox - Simplify _snapshot_repo_state to use only git diff HEAD + untracked hashes, eliminating false positives from staging state changes (git diff --cached) and git status index drift during long-running pipelines Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1030,6 +1030,60 @@ class FixPresetBehaviorTest(unittest.TestCase):
|
||||
self.assertTrue(captured["agentic"])
|
||||
self.assertEqual(captured["phase_max"], 3)
|
||||
|
||||
def test_run_senior_model_override_applies_only_to_seniors(self) -> None:
|
||||
captured: dict[str, list[str]] = {}
|
||||
|
||||
def _fake_run_pipeline(config, **kwargs):
|
||||
captured["coder_args"] = list(config.agents[config.coders[0]].args)
|
||||
captured["reviewer_args"] = list(config.agents[config.reviewers[0]].args)
|
||||
captured["senior_args"] = list(config.agents[config.seniors[0]].args)
|
||||
return PipelineResult(
|
||||
iterations=[],
|
||||
final_verdict="PASS",
|
||||
run_dir=Path(".cross-eval/output"),
|
||||
)
|
||||
|
||||
with patch("cross_eval.pipeline.run_pipeline", side_effect=_fake_run_pipeline):
|
||||
exit_code = main([
|
||||
"run",
|
||||
"--preset", "review-fix",
|
||||
"--coder", "claude",
|
||||
"--reviewer", "claude",
|
||||
"--senior", "claude",
|
||||
"--senior-model", "sonnet",
|
||||
"--dry-run",
|
||||
])
|
||||
|
||||
self.assertEqual(exit_code, 0)
|
||||
self.assertIn("opus", captured["coder_args"])
|
||||
self.assertIn("opus", captured["reviewer_args"])
|
||||
self.assertIn("sonnet", captured["senior_args"])
|
||||
|
||||
|
||||
class OutputDirectoryResolutionTest(unittest.TestCase):
|
||||
def test_load_config_resolves_output_dir_from_project_root(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
ce_dir = root / ".cross-eval"
|
||||
ce_dir.mkdir()
|
||||
(ce_dir / "plan.md").write_text("# plan\n", encoding="utf-8")
|
||||
config_path = ce_dir / "config.yaml"
|
||||
config_path.write_text(
|
||||
(
|
||||
"inputs:\n"
|
||||
" plan: plan.md\n"
|
||||
"coders: [claude-coder]\n"
|
||||
"reviewers: [claude-reviewer]\n"
|
||||
"pipeline: preset:simple\n"
|
||||
"output_dir: .cross-eval/output\n"
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
config = load_config(config_path)
|
||||
|
||||
self.assertEqual(config.output_dir.resolve(), (root / ".cross-eval" / "output").resolve())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user