release: cut 0.2.0 baseline

2026-03-13 21:47:54 +09:00
parent 204e071b74
commit 941304398d
15 changed files with 1930 additions and 270 deletions
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -6,12 +6,14 @@ from pathlib import Path
 from unittest.mock import patch

 from cross_eval.agent import AgentInvocationError, _supports_reasoning_effort
-from cross_eval.cli import _apply_phased_iteration_override
+from cross_eval.cli import _apply_phased_iteration_override, main
 from cross_eval.agent import invoke_agent
 from cross_eval.config import (
    BUILTIN_AGENTS,
+    _SENIOR_SYSTEM_PROMPT,
    _default_seniors_for_preset,
    apply_reasoning_effort_settings,
+    load_config,
    normalize_reasoning_effort,
    normalize_prompt_template,
    normalize_step_role,
@@ -52,7 +54,6 @@ from cross_eval.prompts import (
    _build_review_only_preset,
    _build_simple_preset,
 )
-from cross_eval.config import _SENIOR_SYSTEM_PROMPT
 from cross_eval.report import build_report, parse_review_metrics, print_escalation_report

 class BuiltinAgentConfigTest(unittest.TestCase):
@@ -954,5 +955,82 @@ class EscalateVerdictTest(unittest.TestCase):
        self.assertIn("VERDICT: ESCALATE", AGGREGATE_REVIEW_TEMPLATE_KO)


+class FixPresetBehaviorTest(unittest.TestCase):
+    def _write_fix_config(self, root: Path, *, max_iterations: int = 7) -> Path:
+        (root / "plan.md").write_text("# plan\n", encoding="utf-8")
+        (root / "checklist.md").write_text("# checklist\n", encoding="utf-8")
+        config_path = root / "config.yaml"
+        config_path.write_text(
+            (
+                "inputs:\n"
+                "  plan: plan.md\n"
+                "  checklist: checklist.md\n"
+                "coders: [claude-coder]\n"
+                "reviewers: [claude-reviewer]\n"
+                "pipeline: preset:review-fix\n"
+                f"max_iterations: {max_iterations}\n"
+                "language: en\n"
+            ),
+            encoding="utf-8",
+        )
+        return config_path
+
+    def test_load_config_syncs_phased_iterations_and_enables_agentic(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config = load_config(self._write_fix_config(Path(tmpdir), max_iterations=7))
+
+        self.assertEqual(config.preset_name, "review-fix")
+        self.assertEqual(config.phases[0].max_iterations, 7)
+        self.assertTrue(config.agents["claude-coder"].agentic)
+        self.assertNotIn("-p", config.agents["claude-coder"].args)
+
+    def test_run_config_max_iter_updates_existing_phased_pipeline(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = self._write_fix_config(Path(tmpdir), max_iterations=7)
+            captured: dict[str, object] = {}
+
+            def _fake_run_pipeline(config, **kwargs):
+                captured["phase_max"] = config.phases[0].max_iterations
+                captured["agentic"] = config.agents[config.coders[0]].agentic
+                return PipelineResult(
+                    iterations=[],
+                    final_verdict="PASS",
+                    run_dir=Path(tmpdir) / "output",
+                )
+
+            with patch("cross_eval.pipeline.run_pipeline", side_effect=_fake_run_pipeline):
+                exit_code = main([
+                    "run",
+                    "--config", str(config_path),
+                    "--max-iter", "9",
+                    "--dry-run",
+                ])
+
+        self.assertEqual(exit_code, 0)
+        self.assertEqual(captured["phase_max"], 9)
+        self.assertTrue(captured["agentic"])
+
+    def test_run_preset_review_fix_auto_enables_agentic_without_flag(self) -> None:
+        captured: dict[str, object] = {}
+
+        def _fake_run_pipeline(config, **kwargs):
+            captured["preset"] = config.preset_name
+            captured["agentic"] = config.agents[config.coders[0]].agentic
+            captured["phase_max"] = config.phases[0].max_iterations
+            return PipelineResult(
+                iterations=[],
+                final_verdict="PASS",
+                run_dir=Path(".cross-eval/output"),
+            )
+
+        with patch("cross_eval.pipeline.run_pipeline", side_effect=_fake_run_pipeline):
+            exit_code = main(["run", "--preset", "review-fix", "--dry-run"])
+
+        self.assertEqual(exit_code, 0)
+        self.assertEqual(captured["preset"], "review-fix")
+        self.assertTrue(captured["agentic"])
+        self.assertEqual(captured["phase_max"], 3)
+
+
 if __name__ == "__main__":
    unittest.main()