Make plan-review a review-fix-verify loop
This commit is contained in:
@@ -13,7 +13,11 @@ from cross_eval.models import (
|
||||
StepConfig,
|
||||
)
|
||||
from cross_eval.pipeline import run_pipeline
|
||||
from cross_eval.prompts import _build_review_fix_preset, _build_simple_preset
|
||||
from cross_eval.prompts import (
|
||||
_build_plan_review_preset,
|
||||
_build_review_fix_preset,
|
||||
_build_simple_preset,
|
||||
)
|
||||
|
||||
|
||||
def _make_mock_agent(outputs: list[str]):
|
||||
@@ -262,6 +266,60 @@ class TestPhasedPipelineEscalateBreaksPhase(unittest.TestCase):
|
||||
self.assertTrue(len(result.escalated_issues) > 0)
|
||||
|
||||
|
||||
class TestPlanReviewPipelineLoopsUntilVerifyPass(unittest.TestCase):
|
||||
"""Document plan-review should revise docs and re-verify across iterations."""
|
||||
|
||||
def test_plan_review_fail_then_pass(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
coders = ["claude-coder"]
|
||||
reviewers = ["claude-reviewer"]
|
||||
seniors = ["claude-senior"]
|
||||
steps = _build_plan_review_preset(coders, reviewers, seniors)
|
||||
|
||||
config = PipelineConfig(
|
||||
output_dir=Path(tmpdir),
|
||||
max_iterations=4,
|
||||
min_iterations=1,
|
||||
language="en",
|
||||
inputs={
|
||||
"plan": "Test plan",
|
||||
"checklist": "Test checklist",
|
||||
"docs": "Reference docs",
|
||||
},
|
||||
agents=dict(BUILTIN_AGENTS),
|
||||
coders=coders,
|
||||
reviewers=reviewers,
|
||||
seniors=seniors,
|
||||
pipeline=steps,
|
||||
preset_name="plan-review",
|
||||
)
|
||||
|
||||
mock = _make_step_mock({
|
||||
"plan_review": [
|
||||
"Requirements are ambiguous\n\nVERDICT: FAIL",
|
||||
"Looks aligned\n\nVERDICT: PASS",
|
||||
],
|
||||
"aggregate_review": [
|
||||
"### Confirmed Issues\n- Clarify acceptance criteria\n\n"
|
||||
"### Action Items\n1. Tighten the checklist\n\nVERDICT: FAIL",
|
||||
"### Confirmed Issues\nNone\n\n"
|
||||
"### Dismissed Findings\nNone\n\n"
|
||||
"### Action Items\n1. No document changes needed\n\nVERDICT: PASS",
|
||||
],
|
||||
"plan_fix": ["Updated plan and checklist", "No-op"],
|
||||
"verify": [
|
||||
"Still missing edge-case criteria\n\nVERDICT: FAIL",
|
||||
"Planning package is now implementable\n\nVERDICT: PASS",
|
||||
],
|
||||
})
|
||||
|
||||
with patch("cross_eval.pipeline.invoke_agent", side_effect=mock):
|
||||
result = run_pipeline(config)
|
||||
|
||||
self.assertEqual(result.final_verdict, "PASS")
|
||||
self.assertEqual(len(result.iterations), 2)
|
||||
|
||||
|
||||
class TestAutoEscalateFiresWithoutSenior(unittest.TestCase):
|
||||
"""Test 6: simple pipeline without senior, same FAIL feedback 3 times -> auto-escalate."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user