feat: ESCALATE verdict, issue tracker, onboarding commands
Add 3-verdict system (PASS/FAIL/ESCALATE) with priority handling across simple and phased pipelines. Senior reviewers can now escalate issues requiring human intervention, immediately breaking the review loop. - ESCALATE verdict extraction with highest priority over PASS/FAIL - Issue Tracker tables (ISS-NNN) carried across iterations - Auto-escalate heuristic using (file, keyword) composite fingerprints - Report restructuring: executive view first (verdict → tracker → metrics) - Onboarding: `doctor`, `demo`, `init --guided` commands - Exit codes: PASS=0, FAIL=1, ESCALATE=2 - 87 tests passing (54 config + 25 onboarding + 8 integration) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -39,6 +39,26 @@ _CODEX_ARGS = [
|
||||
"-",
|
||||
]
|
||||
|
||||
_CLAUDE_BASE_ARGS = [
|
||||
"-p",
|
||||
"--setting-sources",
|
||||
"user",
|
||||
"--disable-slash-commands",
|
||||
"--model",
|
||||
"opus",
|
||||
]
|
||||
|
||||
_CLAUDE_CODER_ARGS = list(_CLAUDE_BASE_ARGS) + [
|
||||
"--dangerously-skip-permissions",
|
||||
"--permission-mode",
|
||||
"bypassPermissions",
|
||||
]
|
||||
|
||||
_CLAUDE_REVIEW_ARGS = list(_CLAUDE_BASE_ARGS) + [
|
||||
"--permission-mode",
|
||||
"plan",
|
||||
]
|
||||
|
||||
_CODER_SYSTEM_PROMPT = (
|
||||
"You are a senior software engineer implementing code changes.\n"
|
||||
"Rules:\n"
|
||||
@@ -81,29 +101,37 @@ _SENIOR_SYSTEM_PROMPT = (
|
||||
"4. Be skeptical of false positives, but do not lower the bar on real requirement "
|
||||
"gaps.\n"
|
||||
"5. When issues remain, produce a concise prioritized action list the coder can act on.\n"
|
||||
"6. Do NOT invent new requirements beyond the plan and checklist.\n"
|
||||
"7. End with VERDICT: PASS or VERDICT: FAIL."
|
||||
"6. Maintain an Issue Tracker table across iterations to track issue status.\n"
|
||||
"7. Do NOT invent new requirements beyond the plan and checklist.\n"
|
||||
"8. End with one of three verdicts:\n"
|
||||
" - VERDICT: PASS — all requirements met, no issues remain.\n"
|
||||
" - VERDICT: FAIL — issues found that the coder can fix.\n"
|
||||
" - VERDICT: ESCALATE — issues that require human intervention. Use ESCALATE when:\n"
|
||||
" * Requirements are ambiguous and need clarification from stakeholders\n"
|
||||
" * Architecture decisions are needed that go beyond the plan scope\n"
|
||||
" * External dependency issues block progress\n"
|
||||
" * The coder has failed to resolve the same issue 2+ times"
|
||||
)
|
||||
|
||||
BUILTIN_AGENTS: dict[str, AgentConfig] = {
|
||||
"claude-coder": AgentConfig(
|
||||
name="claude-coder",
|
||||
command="claude",
|
||||
args=["-p", "--model", "opus", "--permission-mode", "auto"],
|
||||
args=list(_CLAUDE_CODER_ARGS),
|
||||
system_prompt=_CODER_SYSTEM_PROMPT,
|
||||
reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["coder"],
|
||||
),
|
||||
"claude-reviewer": AgentConfig(
|
||||
name="claude-reviewer",
|
||||
command="claude",
|
||||
args=["-p", "--model", "opus", "--permission-mode", "auto"],
|
||||
args=list(_CLAUDE_REVIEW_ARGS),
|
||||
system_prompt=_REVIEWER_SYSTEM_PROMPT,
|
||||
reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["reviewer"],
|
||||
),
|
||||
"claude-senior": AgentConfig(
|
||||
name="claude-senior",
|
||||
command="claude",
|
||||
args=["-p", "--model", "opus", "--permission-mode", "auto"],
|
||||
args=list(_CLAUDE_REVIEW_ARGS),
|
||||
system_prompt=_SENIOR_SYSTEM_PROMPT,
|
||||
reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["senior"],
|
||||
),
|
||||
@@ -136,6 +164,11 @@ _AGENT_ALIASES: dict[str, str] = {
|
||||
"codex": "codex",
|
||||
}
|
||||
|
||||
_ROLE_ALIASES: dict[str, str] = {
|
||||
"coding": "coding",
|
||||
"review": "review",
|
||||
}
|
||||
|
||||
|
||||
def resolve_agent_shorthand(name: str, role: str) -> str:
|
||||
"""Resolve shorthand agent name to full builtin name.
|
||||
@@ -150,6 +183,16 @@ def resolve_agent_shorthand(name: str, role: str) -> str:
|
||||
return name
|
||||
|
||||
|
||||
def normalize_step_role(role: str) -> str:
|
||||
"""Normalize step role aliases to the canonical role name."""
|
||||
return _ROLE_ALIASES.get(role, role)
|
||||
|
||||
|
||||
def normalize_prompt_template(template_ref: str) -> str:
|
||||
"""Normalize prompt template aliases to canonical template refs."""
|
||||
return template_ref
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Role inference (backward compatibility)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -233,7 +276,7 @@ def _default_seniors_for_preset(
|
||||
"""Infer a default senior agent for presets that benefit from adjudication."""
|
||||
if not (
|
||||
isinstance(pipeline_raw, str)
|
||||
and pipeline_raw == "preset:review-fix"
|
||||
and pipeline_raw in {"preset:review-fix", "preset:coding-review-fix"}
|
||||
and reviewers
|
||||
):
|
||||
return []
|
||||
@@ -465,7 +508,7 @@ def _resolve_pipeline(
|
||||
"""Resolve pipeline from preset string or explicit step list.
|
||||
|
||||
Returns (steps, phases) tuple. Only one will be non-empty.
|
||||
- Simple/cross-review/review-only → steps populated, phases empty.
|
||||
- Simple/cross-review/plan-review/review-only → steps populated, phases empty.
|
||||
- Phased presets (review-fix) → steps empty, phases populated.
|
||||
"""
|
||||
# Preset: "preset:simple" or "preset:review-fix"
|
||||
@@ -485,11 +528,15 @@ def _resolve_pipeline(
|
||||
if isinstance(pipeline_raw, list):
|
||||
steps = []
|
||||
for step_data in pipeline_raw:
|
||||
raw_role = step_data.get("role", "coding")
|
||||
normalized_role = normalize_step_role(raw_role)
|
||||
steps.append(StepConfig(
|
||||
name=step_data["name"],
|
||||
agent=step_data["agent"],
|
||||
role=step_data.get("role", "generate"),
|
||||
prompt_template=step_data.get("prompt_template", f"default:{step_data.get('role', 'generate')}"),
|
||||
role=normalized_role,
|
||||
prompt_template=normalize_prompt_template(
|
||||
step_data.get("prompt_template", f"default:{normalized_role}")
|
||||
),
|
||||
output_key=step_data["output_key"],
|
||||
verdict=step_data.get("verdict", False),
|
||||
verdict_pattern=step_data.get("verdict_pattern", r"VERDICT:\s*PASS"),
|
||||
@@ -524,10 +571,6 @@ def validate_config(config: PipelineConfig) -> list[str]:
|
||||
errors,
|
||||
scope=f"Phase '{phase.name}'",
|
||||
)
|
||||
if not any(s.verdict for s in phase.steps):
|
||||
errors.append(
|
||||
f"Phase '{phase.name}' must have at least one step with verdict: true"
|
||||
)
|
||||
# Validate verdict patterns
|
||||
for step in phase.steps:
|
||||
if step.verdict:
|
||||
|
||||
Reference in New Issue
Block a user