Make plan-review a review-fix-verify loop

This commit is contained in:
chungyeong
2026-03-15 00:01:26 +09:00
parent 60c7b07939
commit a85a490a9b
7 changed files with 289 additions and 73 deletions

View File

@@ -13,7 +13,11 @@ from cross_eval.models import (
StepConfig,
)
from cross_eval.pipeline import run_pipeline
from cross_eval.prompts import _build_review_fix_preset, _build_simple_preset
from cross_eval.prompts import (
_build_plan_review_preset,
_build_review_fix_preset,
_build_simple_preset,
)
def _make_mock_agent(outputs: list[str]):
@@ -262,6 +266,60 @@ class TestPhasedPipelineEscalateBreaksPhase(unittest.TestCase):
self.assertTrue(len(result.escalated_issues) > 0)
class TestPlanReviewPipelineLoopsUntilVerifyPass(unittest.TestCase):
"""Document plan-review should revise docs and re-verify across iterations."""
def test_plan_review_fail_then_pass(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
coders = ["claude-coder"]
reviewers = ["claude-reviewer"]
seniors = ["claude-senior"]
steps = _build_plan_review_preset(coders, reviewers, seniors)
config = PipelineConfig(
output_dir=Path(tmpdir),
max_iterations=4,
min_iterations=1,
language="en",
inputs={
"plan": "Test plan",
"checklist": "Test checklist",
"docs": "Reference docs",
},
agents=dict(BUILTIN_AGENTS),
coders=coders,
reviewers=reviewers,
seniors=seniors,
pipeline=steps,
preset_name="plan-review",
)
mock = _make_step_mock({
"plan_review": [
"Requirements are ambiguous\n\nVERDICT: FAIL",
"Looks aligned\n\nVERDICT: PASS",
],
"aggregate_review": [
"### Confirmed Issues\n- Clarify acceptance criteria\n\n"
"### Action Items\n1. Tighten the checklist\n\nVERDICT: FAIL",
"### Confirmed Issues\nNone\n\n"
"### Dismissed Findings\nNone\n\n"
"### Action Items\n1. No document changes needed\n\nVERDICT: PASS",
],
"plan_fix": ["Updated plan and checklist", "No-op"],
"verify": [
"Still missing edge-case criteria\n\nVERDICT: FAIL",
"Planning package is now implementable\n\nVERDICT: PASS",
],
})
with patch("cross_eval.pipeline.invoke_agent", side_effect=mock):
result = run_pipeline(config)
self.assertEqual(result.final_verdict, "PASS")
self.assertEqual(len(result.iterations), 2)
class TestAutoEscalateFiresWithoutSenior(unittest.TestCase):
"""Test 6: simple pipeline without senior, same FAIL feedback 3 times -> auto-escalate."""