release: cut 0.2.0 baseline

2026-03-13 21:47:54 +09:00
parent 204e071b74
commit 941304398d
15 changed files with 1930 additions and 270 deletions
--- a/tests/test_agentic.py
+++ b/tests/test_agentic.py
@@ -0,0 +1,701 @@
+"""Comprehensive tests for the agentic worktree flow.
+
+Covers:
+  1. worktree.py unit tests (real temp git repo)
+  2. agent.py agentic tests (mocking subprocess)
+  3. config.py _make_agentic tests
+  4. pipeline integration tests (mock invoke_agent / invoke_agent_agentic)
+"""
+from __future__ import annotations
+
+import subprocess
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, call, patch
+
+from cross_eval.agent import invoke_agent_agentic
+from cross_eval.config import BUILTIN_AGENTS, _make_agentic
+from cross_eval.models import (
+    AgentConfig,
+    AgentResult,
+    PipelineConfig,
+    StepConfig,
+)
+from cross_eval.pipeline import (
+    _commit_iteration,
+    _finalize_worktree,
+    _has_agentic_steps,
+    _setup_worktree,
+    run_pipeline,
+)
+from cross_eval.worktree import (
+    capture_diff,
+    commit_worktree,
+    create_worktree,
+    make_branch_name,
+    remove_worktree,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _init_git_repo(path: Path) -> None:
+    """Initialise a minimal git repo with one commit."""
+    subprocess.run(["git", "init"], cwd=path, capture_output=True, check=True)
+    subprocess.run(
+        ["git", "config", "user.email", "test@test.com"],
+        cwd=path, capture_output=True, check=True,
+    )
+    subprocess.run(
+        ["git", "config", "user.name", "Test"],
+        cwd=path, capture_output=True, check=True,
+    )
+    (path / "README.md").write_text("# init\n")
+    subprocess.run(["git", "add", "."], cwd=path, capture_output=True, check=True)
+    subprocess.run(
+        ["git", "commit", "-m", "initial"],
+        cwd=path, capture_output=True, check=True,
+    )
+
+
+# ===================================================================
+# 1. worktree.py unit tests (real temp git repo)
+# ===================================================================
+
+class TestCreateWorktree(unittest.TestCase):
+    """create_worktree creates a worktree on a named branch."""
+
+    def test_creates_worktree_and_branch(self) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            base = Path(td) / "repo"
+            base.mkdir()
+            _init_git_repo(base)
+
+            wt_dir = Path(td) / "wt"
+            branch = "cross-eval/test_branch"
+            result_path = create_worktree(base, wt_dir, branch)
+
+            # Worktree directory exists
+            self.assertTrue(result_path.exists())
+            # Branch was created in the original repo
+            branches = subprocess.run(
+                ["git", "branch", "--list", branch],
+                cwd=base, capture_output=True, text=True,
+            )
+            self.assertIn(branch, branches.stdout)
+
+            # Clean up
+            remove_worktree(base, wt_dir)
+
+
+class TestCaptureDiff(unittest.TestCase):
+    """capture_diff captures changes correctly."""
+
+    def test_captures_new_and_modified_files(self) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            base = Path(td) / "repo"
+            base.mkdir()
+            _init_git_repo(base)
+
+            wt_dir = Path(td) / "wt"
+            branch = "cross-eval/diff_test"
+            create_worktree(base, wt_dir, branch)
+
+            # Make changes in the worktree
+            (wt_dir / "new_file.txt").write_text("hello\n")
+            (wt_dir / "README.md").write_text("# modified\n")
+
+            diff = capture_diff(wt_dir)
+            self.assertIn("new_file.txt", diff)
+            self.assertIn("hello", diff)
+            self.assertIn("modified", diff)
+
+            remove_worktree(base, wt_dir)
+
+
+class TestCommitWorktree(unittest.TestCase):
+    """commit_worktree commits changes and returns True; False when nothing to commit."""
+
+    def test_commit_returns_true_on_changes(self) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            base = Path(td) / "repo"
+            base.mkdir()
+            _init_git_repo(base)
+
+            wt_dir = Path(td) / "wt"
+            branch = "cross-eval/commit_test"
+            create_worktree(base, wt_dir, branch)
+
+            (wt_dir / "file.txt").write_text("data\n")
+            result = commit_worktree(wt_dir, "test commit")
+            self.assertTrue(result)
+
+            remove_worktree(base, wt_dir)
+
+    def test_commit_returns_false_when_nothing_to_commit(self) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            base = Path(td) / "repo"
+            base.mkdir()
+            _init_git_repo(base)
+
+            wt_dir = Path(td) / "wt"
+            branch = "cross-eval/empty_commit"
+            create_worktree(base, wt_dir, branch)
+
+            result = commit_worktree(wt_dir, "empty")
+            self.assertFalse(result)
+
+            remove_worktree(base, wt_dir)
+
+
+class TestRemoveWorktree(unittest.TestCase):
+    """remove_worktree removes worktree but branch survives."""
+
+    def test_branch_survives_worktree_removal(self) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            base = Path(td) / "repo"
+            base.mkdir()
+            _init_git_repo(base)
+
+            wt_dir = Path(td) / "wt"
+            branch = "cross-eval/remove_test"
+            create_worktree(base, wt_dir, branch)
+
+            remove_worktree(base, wt_dir)
+
+            # Worktree directory should be gone
+            self.assertFalse(wt_dir.exists())
+
+            # Branch should still exist in the original repo
+            branches = subprocess.run(
+                ["git", "branch", "--list", branch],
+                cwd=base, capture_output=True, text=True,
+            )
+            self.assertIn(branch, branches.stdout)
+
+
+class TestMakeBranchName(unittest.TestCase):
+    """make_branch_name generates expected format."""
+
+    def test_format(self) -> None:
+        name = make_branch_name("review-fix")
+        self.assertTrue(name.startswith("cross-eval/review-fix_"))
+        # Should contain a timestamp-like suffix
+        parts = name.split("_", 1)
+        self.assertEqual(len(parts), 2)
+        # Timestamp portion should be like 20260313_123456
+        ts_part = parts[1]  # after "cross-eval/review-fix_"
+        self.assertEqual(len(ts_part), 15)  # YYYYMMDD_HHMMSS
+
+
+# ===================================================================
+# 2. agent.py agentic tests (mocking subprocess)
+# ===================================================================
+
+class TestInvokeAgentAgenticClaude(unittest.TestCase):
+    """invoke_agent_agentic builds correct cmd for claude (no -p, prompt as positional arg)."""
+
+    @patch("cross_eval.worktree.capture_diff", return_value="diff --git a/file ...")
+    @patch("subprocess.run")
+    def test_claude_cmd_has_no_dash_p_and_prompt_as_positional(
+        self, mock_run: MagicMock, mock_diff: MagicMock,
+    ) -> None:
+        mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="")
+
+        agent = AgentConfig(
+            name="claude-coder",
+            command="claude",
+            args=["--setting-sources", "user", "--dangerously-skip-permissions"],
+            agentic=True,
+        )
+
+        with tempfile.TemporaryDirectory() as td:
+            wt = Path(td)
+            _init_git_repo(wt)
+
+            invoke_agent_agentic(
+                agent, "implement feature X", "coding",
+                worktree_path=wt, quiet=True,
+            )
+
+        # Find the subprocess.run call that actually runs the agent
+        agent_call = None
+        for c in mock_run.call_args_list:
+            cmd = c[0][0] if c[0] else c[1].get("args", [])
+            if cmd and cmd[0] == "claude":
+                agent_call = c
+                break
+
+        self.assertIsNotNone(agent_call, "Expected a subprocess.run call with 'claude'")
+        cmd = agent_call[0][0]
+
+        # No -p flag
+        self.assertNotIn("-p", cmd)
+        # Last arg is a task file reference (not raw prompt — avoids arg length limits)
+        self.assertIn("task file", cmd[-1].lower())
+
+
+class TestInvokeAgentAgenticCodex(unittest.TestCase):
+    """invoke_agent_agentic builds correct cmd for codex (stdin mode, - sentinel)."""
+
+    @patch("cross_eval.worktree.capture_diff", return_value="diff --git a/file ...")
+    @patch("subprocess.run")
+    def test_codex_cmd_uses_stdin_with_dash_sentinel(
+        self, mock_run: MagicMock, mock_diff: MagicMock,
+    ) -> None:
+        mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="")
+
+        agent = AgentConfig(
+            name="codex-coder",
+            command="codex",
+            args=["exec", "--full-auto", "--skip-git-repo-check"],
+            agentic=True,
+        )
+
+        with tempfile.TemporaryDirectory() as td:
+            wt = Path(td)
+            _init_git_repo(wt)
+
+            invoke_agent_agentic(
+                agent, "implement feature Y", "coding",
+                worktree_path=wt, quiet=True,
+            )
+
+        agent_call = None
+        for c in mock_run.call_args_list:
+            cmd = c[0][0] if c[0] else c[1].get("args", [])
+            if cmd and cmd[0] == "codex":
+                agent_call = c
+                break
+
+        self.assertIsNotNone(agent_call, "Expected a subprocess.run call with 'codex'")
+        cmd = agent_call[0][0]
+
+        # Should have "-" sentinel at the end for stdin
+        self.assertEqual(cmd[-1], "-")
+        # Stdin input should contain the prompt
+        input_data = agent_call[1].get("input")
+        self.assertIsNotNone(input_data)
+        self.assertIn("implement feature Y", input_data)
+
+
+class TestTaskFileCleanup(unittest.TestCase):
+    """Task file is cleaned up before capture_diff."""
+
+    @patch("cross_eval.worktree.capture_diff", return_value="(no changes)")
+    @patch("subprocess.run")
+    def test_task_file_in_tmp_not_worktree(
+        self, mock_run: MagicMock, mock_diff: MagicMock,
+    ) -> None:
+        mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="")
+
+        agent = AgentConfig(
+            name="claude-coder", command="claude", args=[], agentic=True,
+        )
+
+        with tempfile.TemporaryDirectory() as td:
+            wt = Path(td)
+            _init_git_repo(wt)
+
+            invoke_agent_agentic(
+                agent, "do stuff", "coding",
+                worktree_path=wt, quiet=True,
+            )
+
+            # Task file should NOT be in the worktree (it's in /tmp)
+            self.assertFalse((wt / "CROSS_EVAL_TASK.md").exists())
+
+
+# ===================================================================
+# 3. config.py tests
+# ===================================================================
+
+class TestMakeAgenticClaude(unittest.TestCase):
+    """_make_agentic strips -p from claude args and sets agentic=True."""
+
+    def test_strips_dash_p_and_sets_agentic(self) -> None:
+        agent = AgentConfig(
+            name="claude-coder",
+            command="claude",
+            args=["-p", "--setting-sources", "user", "--model", "opus"],
+        )
+        self.assertFalse(agent.agentic)
+        _make_agentic(agent)
+        self.assertTrue(agent.agentic)
+        self.assertNotIn("-p", agent.args)
+        self.assertIn("--setting-sources", agent.args)
+
+    def test_idempotent_when_no_dash_p(self) -> None:
+        agent = AgentConfig(
+            name="claude-coder",
+            command="claude",
+            args=["--setting-sources", "user"],
+        )
+        _make_agentic(agent)
+        self.assertTrue(agent.agentic)
+        self.assertEqual(agent.args, ["--setting-sources", "user"])
+
+
+class TestMakeAgenticCodex(unittest.TestCase):
+    """_make_agentic on codex agent still works (no -p to strip)."""
+
+    def test_codex_agentic_works(self) -> None:
+        agent = AgentConfig(
+            name="codex-coder",
+            command="codex",
+            args=["exec", "--full-auto", "-"],
+        )
+        _make_agentic(agent)
+        self.assertTrue(agent.agentic)
+        # -p was never there so args are unchanged
+        self.assertIn("exec", agent.args)
+        self.assertIn("--full-auto", agent.args)
+
+
+# ===================================================================
+# 4. pipeline integration tests
+# ===================================================================
+
+def _make_agentic_config(
+    run_dir: Path,
+    agentic_coder: bool = True,
+) -> PipelineConfig:
+    """Build a config with an agentic coder + non-agentic reviewer."""
+    coder = AgentConfig(
+        name="claude-coder", command="claude",
+        args=["--setting-sources", "user"],
+        agentic=agentic_coder,
+    )
+    reviewer = AgentConfig(
+        name="claude-reviewer", command="claude",
+        args=["-p", "--setting-sources", "user"],
+        agentic=False,
+    )
+    steps = [
+        StepConfig(
+            name="coding",
+            agent="claude-coder",
+            role="coding",
+            prompt_template="default:coding",
+            output_key="coding_output",
+        ),
+        StepConfig(
+            name="review",
+            agent="claude-reviewer",
+            role="review",
+            prompt_template="default:review",
+            output_key="review_result",
+            verdict=True,
+        ),
+    ]
+    return PipelineConfig(
+        output_dir=run_dir,
+        max_iterations=2,
+        min_iterations=1,
+        language="en",
+        inputs={"plan": "Test plan", "checklist": "Test checklist"},
+        agents={"claude-coder": coder, "claude-reviewer": reviewer},
+        coders=["claude-coder"],
+        reviewers=["claude-reviewer"],
+        pipeline=steps,
+        preset_name="simple",
+    )
+
+
+class TestSetupWorktreeCalledForAgentic(unittest.TestCase):
+    """When agentic agent is configured, _setup_worktree is called."""
+
+    @patch("cross_eval.pipeline._finalize_worktree", return_value="cross-eval/test")
+    @patch("cross_eval.pipeline._commit_iteration")
+    @patch("cross_eval.pipeline._setup_worktree")
+    @patch("cross_eval.pipeline.invoke_agent_agentic")
+    @patch("cross_eval.pipeline.invoke_agent")
+    def test_setup_worktree_called(
+        self,
+        mock_invoke: MagicMock,
+        mock_invoke_agentic: MagicMock,
+        mock_setup: MagicMock,
+        mock_commit_iter: MagicMock,
+        mock_finalize: MagicMock,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            run_dir = Path(td)
+            config = _make_agentic_config(run_dir)
+
+            wt_path = run_dir / "work"
+            wt_path.mkdir()
+            mock_setup.return_value = (wt_path, "cross-eval/test")
+
+            mock_invoke_agentic.return_value = AgentResult(
+                output="diff output", exit_code=0,
+                agent_name="claude-coder", step_name="coding",
+                duration_seconds=0.1,
+            )
+            mock_invoke.return_value = AgentResult(
+                output="VERDICT: PASS", exit_code=0,
+                agent_name="claude-reviewer", step_name="review",
+                duration_seconds=0.1,
+            )
+
+            run_pipeline(config, cwd=Path(td))
+
+            mock_setup.assert_called_once()
+
+
+class TestReviewerRunsInWorktreeCwd(unittest.TestCase):
+    """Reviewer runs with worktree cwd (not original cwd) when worktree exists."""
+
+    @patch("cross_eval.pipeline._finalize_worktree", return_value="cross-eval/test")
+    @patch("cross_eval.pipeline._commit_iteration")
+    @patch("cross_eval.pipeline._setup_worktree")
+    @patch("cross_eval.pipeline.invoke_agent_agentic")
+    @patch("cross_eval.pipeline.invoke_agent")
+    def test_reviewer_uses_worktree_cwd(
+        self,
+        mock_invoke: MagicMock,
+        mock_invoke_agentic: MagicMock,
+        mock_setup: MagicMock,
+        mock_commit_iter: MagicMock,
+        mock_finalize: MagicMock,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            run_dir = Path(td)
+            config = _make_agentic_config(run_dir)
+
+            wt_path = run_dir / "work"
+            wt_path.mkdir()
+            mock_setup.return_value = (wt_path, "cross-eval/test")
+
+            mock_invoke_agentic.return_value = AgentResult(
+                output="diff output", exit_code=0,
+                agent_name="claude-coder", step_name="coding",
+                duration_seconds=0.1,
+            )
+            mock_invoke.return_value = AgentResult(
+                output="VERDICT: PASS", exit_code=0,
+                agent_name="claude-reviewer", step_name="review",
+                duration_seconds=0.1,
+            )
+
+            run_pipeline(config, cwd=Path(td))
+
+            # The reviewer (non-agentic) should have been called with cwd=worktree_path
+            reviewer_call = mock_invoke.call_args
+            self.assertEqual(reviewer_call[1].get("cwd") or reviewer_call[0][3], wt_path)
+
+
+class TestCommitIterationCalled(unittest.TestCase):
+    """_commit_iteration is called after each iteration when worktree exists."""
+
+    @patch("cross_eval.pipeline._finalize_worktree", return_value="cross-eval/test")
+    @patch("cross_eval.pipeline._commit_iteration")
+    @patch("cross_eval.pipeline._setup_worktree")
+    @patch("cross_eval.pipeline.invoke_agent_agentic")
+    @patch("cross_eval.pipeline.invoke_agent")
+    def test_commit_iteration_called(
+        self,
+        mock_invoke: MagicMock,
+        mock_invoke_agentic: MagicMock,
+        mock_setup: MagicMock,
+        mock_commit_iter: MagicMock,
+        mock_finalize: MagicMock,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            run_dir = Path(td)
+            config = _make_agentic_config(run_dir)
+
+            wt_path = run_dir / "work"
+            wt_path.mkdir()
+            mock_setup.return_value = (wt_path, "cross-eval/test")
+
+            mock_invoke_agentic.return_value = AgentResult(
+                output="diff output", exit_code=0,
+                agent_name="claude-coder", step_name="coding",
+                duration_seconds=0.1,
+            )
+            mock_invoke.return_value = AgentResult(
+                output="VERDICT: PASS", exit_code=0,
+                agent_name="claude-reviewer", step_name="review",
+                duration_seconds=0.1,
+            )
+
+            run_pipeline(config, cwd=Path(td))
+
+            mock_commit_iter.assert_called_once()
+            call_args = mock_commit_iter.call_args
+            self.assertEqual(call_args[0][0], wt_path)
+
+
+class TestFinalizeWorktreeCalled(unittest.TestCase):
+    """_finalize_worktree commits and cleans up at end."""
+
+    @patch("cross_eval.pipeline._finalize_worktree", return_value="cross-eval/test")
+    @patch("cross_eval.pipeline._commit_iteration")
+    @patch("cross_eval.pipeline._setup_worktree")
+    @patch("cross_eval.pipeline.invoke_agent_agentic")
+    @patch("cross_eval.pipeline.invoke_agent")
+    def test_finalize_called(
+        self,
+        mock_invoke: MagicMock,
+        mock_invoke_agentic: MagicMock,
+        mock_setup: MagicMock,
+        mock_commit_iter: MagicMock,
+        mock_finalize: MagicMock,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as td:
+            run_dir = Path(td)
+            config = _make_agentic_config(run_dir)
+
+            wt_path = run_dir / "work"
+            wt_path.mkdir()
+            mock_setup.return_value = (wt_path, "cross-eval/test")
+
+            mock_invoke_agentic.return_value = AgentResult(
+                output="diff output", exit_code=0,
+                agent_name="claude-coder", step_name="coding",
+                duration_seconds=0.1,
+            )
+            mock_invoke.return_value = AgentResult(
+                output="VERDICT: PASS", exit_code=0,
+                agent_name="claude-reviewer", step_name="review",
+                duration_seconds=0.1,
+            )
+
+            run_pipeline(config, cwd=Path(td))
+
+            mock_finalize.assert_called_once()
+            call_args = mock_finalize.call_args
+            # Should pass cwd, worktree_path, branch_name, preset_name, verdict
+            self.assertEqual(call_args[0][1], wt_path)
+            self.assertEqual(call_args[0][2], "cross-eval/test")
+
+
+class TestParallelAgenticFallsBackToSequential(unittest.TestCase):
+    """Multiple agentic steps in parallel batch fall back to sequential."""
+
+    def test_has_agentic_steps_detects_agentic(self) -> None:
+        coder = AgentConfig(
+            name="claude-coder", command="claude", args=[], agentic=True,
+        )
+        reviewer = AgentConfig(
+            name="claude-reviewer", command="claude", args=[], agentic=False,
+        )
+        config = PipelineConfig(
+            agents={"claude-coder": coder, "claude-reviewer": reviewer},
+        )
+        steps = [
+            StepConfig(name="a", agent="claude-coder", role="coding",
+                       prompt_template="default:coding", output_key="a"),
+        ]
+        self.assertTrue(_has_agentic_steps(config, steps))
+
+    def test_has_agentic_steps_returns_false_without_agentic(self) -> None:
+        reviewer = AgentConfig(
+            name="claude-reviewer", command="claude", args=[], agentic=False,
+        )
+        config = PipelineConfig(
+            agents={"claude-reviewer": reviewer},
+        )
+        steps = [
+            StepConfig(name="r", agent="claude-reviewer", role="review",
+                       prompt_template="default:review", output_key="r", verdict=True),
+        ]
+        self.assertFalse(_has_agentic_steps(config, steps))
+
+    @patch("cross_eval.pipeline._finalize_worktree", return_value="cross-eval/test")
+    @patch("cross_eval.pipeline._commit_iteration")
+    @patch("cross_eval.pipeline._setup_worktree")
+    @patch("cross_eval.pipeline.invoke_agent_agentic")
+    @patch("cross_eval.pipeline.invoke_agent")
+    def test_parallel_agentic_runs_sequentially(
+        self,
+        mock_invoke: MagicMock,
+        mock_invoke_agentic: MagicMock,
+        mock_setup: MagicMock,
+        mock_commit_iter: MagicMock,
+        mock_finalize: MagicMock,
+    ) -> None:
+        """When multiple agentic steps are parallel, they should run sequentially."""
+        with tempfile.TemporaryDirectory() as td:
+            run_dir = Path(td)
+
+            coder_a = AgentConfig(
+                name="coder-a", command="claude", args=[], agentic=True,
+            )
+            coder_b = AgentConfig(
+                name="coder-b", command="claude", args=[], agentic=True,
+            )
+            reviewer = AgentConfig(
+                name="reviewer", command="claude", args=["-p"], agentic=False,
+            )
+
+            steps = [
+                StepConfig(
+                    name="code_a", agent="coder-a", role="coding",
+                    prompt_template="default:coding", output_key="code_a",
+                    parallel=True,
+                ),
+                StepConfig(
+                    name="code_b", agent="coder-b", role="coding",
+                    prompt_template="default:coding", output_key="code_b",
+                    parallel=True,
+                ),
+                StepConfig(
+                    name="review", agent="reviewer", role="review",
+                    prompt_template="default:review", output_key="review_result",
+                    verdict=True,
+                ),
+            ]
+
+            config = PipelineConfig(
+                output_dir=run_dir,
+                max_iterations=1,
+                min_iterations=1,
+                language="en",
+                inputs={"plan": "Test plan", "checklist": "Test checklist"},
+                agents={
+                    "coder-a": coder_a,
+                    "coder-b": coder_b,
+                    "reviewer": reviewer,
+                },
+                coders=["coder-a", "coder-b"],
+                reviewers=["reviewer"],
+                pipeline=steps,
+                preset_name="custom",
+            )
+
+            wt_path = run_dir / "work"
+            wt_path.mkdir()
+            mock_setup.return_value = (wt_path, "cross-eval/test")
+
+            call_order: list[str] = []
+
+            def _track_agentic(agent_config, prompt, step_name, **kwargs):
+                call_order.append(step_name)
+                return AgentResult(
+                    output="diff", exit_code=0,
+                    agent_name=agent_config.name, step_name=step_name,
+                    duration_seconds=0.1,
+                )
+
+            mock_invoke_agentic.side_effect = _track_agentic
+            mock_invoke.return_value = AgentResult(
+                output="VERDICT: PASS", exit_code=0,
+                agent_name="reviewer", step_name="review",
+                duration_seconds=0.1,
+            )
+
+            run_pipeline(config, cwd=Path(td))
+
+            # Both agentic steps should have been called (sequentially)
+            agentic_calls = [c for c in call_order if c.startswith("code_")]
+            self.assertEqual(len(agentic_calls), 2)
+            # They should appear in order (sequential, not concurrent)
+            self.assertEqual(agentic_calls, ["code_a", "code_b"])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -6,12 +6,14 @@ from pathlib import Path
 from unittest.mock import patch

 from cross_eval.agent import AgentInvocationError, _supports_reasoning_effort
-from cross_eval.cli import _apply_phased_iteration_override
+from cross_eval.cli import _apply_phased_iteration_override, main
 from cross_eval.agent import invoke_agent
 from cross_eval.config import (
    BUILTIN_AGENTS,
+    _SENIOR_SYSTEM_PROMPT,
    _default_seniors_for_preset,
    apply_reasoning_effort_settings,
+    load_config,
    normalize_reasoning_effort,
    normalize_prompt_template,
    normalize_step_role,
@@ -52,7 +54,6 @@ from cross_eval.prompts import (
    _build_review_only_preset,
    _build_simple_preset,
 )
-from cross_eval.config import _SENIOR_SYSTEM_PROMPT
 from cross_eval.report import build_report, parse_review_metrics, print_escalation_report

 class BuiltinAgentConfigTest(unittest.TestCase):
@@ -954,5 +955,82 @@ class EscalateVerdictTest(unittest.TestCase):
        self.assertIn("VERDICT: ESCALATE", AGGREGATE_REVIEW_TEMPLATE_KO)


+class FixPresetBehaviorTest(unittest.TestCase):
+    def _write_fix_config(self, root: Path, *, max_iterations: int = 7) -> Path:
+        (root / "plan.md").write_text("# plan\n", encoding="utf-8")
+        (root / "checklist.md").write_text("# checklist\n", encoding="utf-8")
+        config_path = root / "config.yaml"
+        config_path.write_text(
+            (
+                "inputs:\n"
+                "  plan: plan.md\n"
+                "  checklist: checklist.md\n"
+                "coders: [claude-coder]\n"
+                "reviewers: [claude-reviewer]\n"
+                "pipeline: preset:review-fix\n"
+                f"max_iterations: {max_iterations}\n"
+                "language: en\n"
+            ),
+            encoding="utf-8",
+        )
+        return config_path
+
+    def test_load_config_syncs_phased_iterations_and_enables_agentic(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config = load_config(self._write_fix_config(Path(tmpdir), max_iterations=7))
+
+        self.assertEqual(config.preset_name, "review-fix")
+        self.assertEqual(config.phases[0].max_iterations, 7)
+        self.assertTrue(config.agents["claude-coder"].agentic)
+        self.assertNotIn("-p", config.agents["claude-coder"].args)
+
+    def test_run_config_max_iter_updates_existing_phased_pipeline(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = self._write_fix_config(Path(tmpdir), max_iterations=7)
+            captured: dict[str, object] = {}
+
+            def _fake_run_pipeline(config, **kwargs):
+                captured["phase_max"] = config.phases[0].max_iterations
+                captured["agentic"] = config.agents[config.coders[0]].agentic
+                return PipelineResult(
+                    iterations=[],
+                    final_verdict="PASS",
+                    run_dir=Path(tmpdir) / "output",
+                )
+
+            with patch("cross_eval.pipeline.run_pipeline", side_effect=_fake_run_pipeline):
+                exit_code = main([
+                    "run",
+                    "--config", str(config_path),
+                    "--max-iter", "9",
+                    "--dry-run",
+                ])
+
+        self.assertEqual(exit_code, 0)
+        self.assertEqual(captured["phase_max"], 9)
+        self.assertTrue(captured["agentic"])
+
+    def test_run_preset_review_fix_auto_enables_agentic_without_flag(self) -> None:
+        captured: dict[str, object] = {}
+
+        def _fake_run_pipeline(config, **kwargs):
+            captured["preset"] = config.preset_name
+            captured["agentic"] = config.agents[config.coders[0]].agentic
+            captured["phase_max"] = config.phases[0].max_iterations
+            return PipelineResult(
+                iterations=[],
+                final_verdict="PASS",
+                run_dir=Path(".cross-eval/output"),
+            )
+
+        with patch("cross_eval.pipeline.run_pipeline", side_effect=_fake_run_pipeline):
+            exit_code = main(["run", "--preset", "review-fix", "--dry-run"])
+
+        self.assertEqual(exit_code, 0)
+        self.assertEqual(captured["preset"], "review-fix")
+        self.assertTrue(captured["agentic"])
+        self.assertEqual(captured["phase_max"], 3)
+
+
 if __name__ == "__main__":
    unittest.main()