feat: ESCALATE verdict, issue tracker, onboarding commands
Add 3-verdict system (PASS/FAIL/ESCALATE) with priority handling across simple and phased pipelines. Senior reviewers can now escalate issues requiring human intervention, immediately breaking the review loop. - ESCALATE verdict extraction with highest priority over PASS/FAIL - Issue Tracker tables (ISS-NNN) carried across iterations - Auto-escalate heuristic using (file, keyword) composite fingerprints - Report restructuring: executive view first (verdict → tracker → metrics) - Onboarding: `doctor`, `demo`, `init --guided` commands - Exit codes: PASS=0, FAIL=1, ESCALATE=2 - 87 tests passing (54 config + 25 onboarding + 8 integration) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -48,11 +48,16 @@ _STRINGS: dict[str, dict[str, str]] = {
|
||||
"pass_msg": "All checklist items satisfied. No over-engineering or omissions detected.",
|
||||
"fail_phased": "Pipeline phases ({phases}) completed without full convergence.",
|
||||
"fail_simple": "Maximum iterations ({max_iter}) reached without passing all checks.",
|
||||
"escalate_msg": "Human review required. The following issues could not be resolved automatically:",
|
||||
"escalate_title": "Escalation Report",
|
||||
"issue_tracker_title": "Issue Tracker Summary",
|
||||
"issue_tracker_desc": "Issues discovered across iterations and their final resolution status.",
|
||||
"metrics_title": "Review Metrics",
|
||||
"metrics_trend_title": "Metrics Trend",
|
||||
"metrics_iter": "Iter",
|
||||
"metrics_total_issues": "Total Issues",
|
||||
"metrics_na": "N/A",
|
||||
"iteration_details": "Iteration Details",
|
||||
},
|
||||
"ko": {
|
||||
"title": "교차 검증 리포트",
|
||||
@@ -84,11 +89,16 @@ _STRINGS: dict[str, dict[str, str]] = {
|
||||
"pass_msg": "모든 체크리스트 항목 충족. 과최적화/누락 없음.",
|
||||
"fail_phased": "파이프라인 페이즈 ({phases}) 완료, 완전한 수렴에 도달하지 못함.",
|
||||
"fail_simple": "최대 반복 횟수 ({max_iter})에 도달, 모든 검증을 통과하지 못함.",
|
||||
"escalate_msg": "사람의 확인이 필요합니다. 아래 이슈는 자동으로 해결할 수 없었습니다:",
|
||||
"escalate_title": "에스컬레이션 리포트",
|
||||
"issue_tracker_title": "이슈 트래커 요약",
|
||||
"issue_tracker_desc": "반복 과정에서 발견된 이슈와 최종 처리 상태입니다.",
|
||||
"metrics_title": "리뷰 메트릭",
|
||||
"metrics_trend_title": "메트릭 추이",
|
||||
"metrics_iter": "반복",
|
||||
"metrics_total_issues": "총 이슈",
|
||||
"metrics_na": "해당 없음",
|
||||
"iteration_details": "반복 상세",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -181,20 +191,41 @@ def _build_simple_report(
|
||||
|
||||
out_of_scope_items: list[tuple[int, str]] = []
|
||||
|
||||
# Pre-scan iterations to collect out-of-scope items and review metrics
|
||||
# (needed before rendering final verdict / metrics sections)
|
||||
for iter_result in result.iterations:
|
||||
lines.append("---\n")
|
||||
lines.append(f"## {_t(config, 'iteration')} {iter_result.iteration}\n")
|
||||
for step in config.pipeline:
|
||||
output = iter_result.step_outputs.get(step.output_key, "")
|
||||
if step.role == "review":
|
||||
oos = _extract_out_of_scope(output)
|
||||
if oos:
|
||||
out_of_scope_items.append((iter_result.iteration, oos))
|
||||
step_metrics = parse_review_metrics(output)
|
||||
if iter_result.review_metrics is None:
|
||||
iter_result.review_metrics = step_metrics
|
||||
else:
|
||||
iter_result.review_metrics = _aggregate_metrics(
|
||||
iter_result.review_metrics, step_metrics,
|
||||
)
|
||||
|
||||
_append_iteration_steps(lines, config, iter_result, config.pipeline, out_of_scope_items)
|
||||
_append_final_verdict(lines, config, result)
|
||||
_append_issue_tracker_summary(lines, config, result)
|
||||
_append_review_metrics_table(lines, config, result)
|
||||
|
||||
lines.append("---\n")
|
||||
lines.append(f"## {_t(config, 'iteration_details')}\n")
|
||||
|
||||
for iter_result in result.iterations:
|
||||
lines.append(f"### {_t(config, 'iteration')} {iter_result.iteration}\n")
|
||||
|
||||
_append_iteration_steps(lines, config, iter_result, config.pipeline, out_of_scope_items, skip_extraction=True)
|
||||
|
||||
if iter_result.feedback:
|
||||
lines.append(f"**{_t(config, 'feedback_next')}** {iter_result.feedback[:200]}...")
|
||||
lines.append("")
|
||||
|
||||
_append_out_of_scope(lines, config, out_of_scope_items)
|
||||
_append_review_metrics_table(lines, config, result)
|
||||
_append_repeated_aggregate(lines, config, result)
|
||||
_append_final_verdict(lines, config, result)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@@ -211,14 +242,42 @@ def _build_phased_report(
|
||||
phase_map = {p.name: p for p in config.phases}
|
||||
out_of_scope_items: list[tuple[int, str]] = []
|
||||
|
||||
# Pre-scan iterations to collect out-of-scope items and review metrics
|
||||
for phase_name, phase_iters_iter in groupby(
|
||||
result.iterations, key=lambda ir: ir.phase_name,
|
||||
):
|
||||
phase_iters = list(phase_iters_iter)
|
||||
phase_config = phase_map.get(phase_name or "")
|
||||
steps = phase_config.steps if phase_config else config.pipeline
|
||||
for iter_result in phase_iters:
|
||||
for step in steps:
|
||||
output = iter_result.step_outputs.get(step.output_key, "")
|
||||
if step.role == "review":
|
||||
oos = _extract_out_of_scope(output)
|
||||
if oos:
|
||||
out_of_scope_items.append((iter_result.iteration, oos))
|
||||
step_metrics = parse_review_metrics(output)
|
||||
if iter_result.review_metrics is None:
|
||||
iter_result.review_metrics = step_metrics
|
||||
else:
|
||||
iter_result.review_metrics = _aggregate_metrics(
|
||||
iter_result.review_metrics, step_metrics,
|
||||
)
|
||||
|
||||
_append_final_verdict(lines, config, result)
|
||||
_append_issue_tracker_summary(lines, config, result)
|
||||
_append_review_metrics_table(lines, config, result)
|
||||
|
||||
lines.append("---\n")
|
||||
lines.append(f"## {_t(config, 'iteration_details')}\n")
|
||||
|
||||
for phase_name, phase_iters_iter in groupby(
|
||||
result.iterations, key=lambda ir: ir.phase_name,
|
||||
):
|
||||
phase_iters = list(phase_iters_iter)
|
||||
phase_config = phase_map.get(phase_name or "")
|
||||
|
||||
lines.append("---\n")
|
||||
lines.append(f"## {_t(config, 'phase')}: {phase_name}\n")
|
||||
lines.append(f"### {_t(config, 'phase')}: {phase_name}\n")
|
||||
|
||||
if phase_config:
|
||||
step_desc = " → ".join(s.name for s in phase_config.steps)
|
||||
@@ -242,14 +301,17 @@ def _build_phased_report(
|
||||
verdict_label += " ✓"
|
||||
else:
|
||||
verdict_label = " — PASS ✓"
|
||||
elif iter_result.verdict == "ESCALATE":
|
||||
consecutive = 0
|
||||
verdict_label = " — ESCALATE"
|
||||
else:
|
||||
consecutive = 0
|
||||
verdict_label = " — FAIL"
|
||||
|
||||
lines.append(
|
||||
f"### {_t(config, 'iteration')} {iter_result.iteration}{verdict_label}\n"
|
||||
f"#### {_t(config, 'iteration')} {iter_result.iteration}{verdict_label}\n"
|
||||
)
|
||||
_append_iteration_steps(lines, config, iter_result, steps, out_of_scope_items)
|
||||
_append_iteration_steps(lines, config, iter_result, steps, out_of_scope_items, skip_extraction=True)
|
||||
|
||||
if iter_result.feedback:
|
||||
lines.append(
|
||||
@@ -258,9 +320,7 @@ def _build_phased_report(
|
||||
lines.append("")
|
||||
|
||||
_append_out_of_scope(lines, config, out_of_scope_items)
|
||||
_append_review_metrics_table(lines, config, result)
|
||||
_append_repeated_aggregate(lines, config, result)
|
||||
_append_final_verdict(lines, config, result)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@@ -309,8 +369,14 @@ def _append_iteration_steps(
|
||||
iter_result: IterationResult,
|
||||
steps: list[StepConfig],
|
||||
out_of_scope_items: list[tuple[int, str]],
|
||||
*,
|
||||
skip_extraction: bool = False,
|
||||
) -> None:
|
||||
"""Append step details for one iteration."""
|
||||
"""Append step details for one iteration.
|
||||
|
||||
If *skip_extraction* is True, out-of-scope and review-metrics parsing
|
||||
is skipped (useful when a pre-scan already collected that data).
|
||||
"""
|
||||
for step in steps:
|
||||
agent_result = iter_result.step_results.get(step.output_key)
|
||||
output = iter_result.step_outputs.get(step.output_key, "")
|
||||
@@ -334,7 +400,7 @@ def _append_iteration_steps(
|
||||
lines.append(output)
|
||||
lines.append("")
|
||||
|
||||
if step.role == "review":
|
||||
if not skip_extraction and step.role == "review":
|
||||
oos = _extract_out_of_scope(output)
|
||||
if oos:
|
||||
out_of_scope_items.append((iter_result.iteration, oos))
|
||||
@@ -471,6 +537,12 @@ def _append_final_verdict(
|
||||
|
||||
if result.final_verdict == "PASS":
|
||||
lines.append(_t(config, "pass_msg"))
|
||||
elif result.final_verdict == "ESCALATE":
|
||||
lines.append(_t(config, "escalate_msg"))
|
||||
lines.append("")
|
||||
for issue in result.escalated_issues:
|
||||
lines.append(f"- {issue}")
|
||||
lines.append("")
|
||||
else:
|
||||
if config.phases:
|
||||
phase_names = " → ".join(p.name for p in config.phases)
|
||||
@@ -481,6 +553,121 @@ def _append_final_verdict(
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Issue Tracker extraction from senior/aggregate outputs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_ISSUE_TRACKER_PATTERN = re.compile(
|
||||
r"##+ (?:Issue Tracker|이슈 트래커)[^\n]*\n((?:\|[^\n]+\|\n?)+)",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
_TRACKER_ROW_PATTERN = re.compile(
|
||||
r"^\|\s*(ISS-\d+)\s*\|\s*(\S+)\s*\|\s*(.*?)\s*\|\s*(\S+)\s*\|\s*(\S+)\s*\|",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
|
||||
def _extract_issue_tracker_rows(
|
||||
result: PipelineResult,
|
||||
) -> list[dict[str, str]]:
|
||||
"""Extract the latest Issue Tracker table from pipeline results.
|
||||
|
||||
Scans iteration outputs in reverse to find the most recent tracker table
|
||||
from aggregate/senior review steps. Falls back to parsing individual
|
||||
review outputs for ISS-NNN tagged issues.
|
||||
"""
|
||||
# Try to find a tracker table from the last iteration with one
|
||||
for ir in reversed(result.iterations):
|
||||
for key, output in ir.step_outputs.items():
|
||||
match = _ISSUE_TRACKER_PATTERN.search(output)
|
||||
if not match:
|
||||
continue
|
||||
table_text = match.group(1)
|
||||
rows = []
|
||||
for row_match in _TRACKER_ROW_PATTERN.finditer(table_text):
|
||||
rows.append({
|
||||
"id": row_match.group(1),
|
||||
"severity": row_match.group(2),
|
||||
"description": row_match.group(3).strip(),
|
||||
"status": row_match.group(4),
|
||||
"since": row_match.group(5),
|
||||
})
|
||||
if rows:
|
||||
return rows
|
||||
|
||||
# Fallback: parse ISS-NNN from review outputs across iterations
|
||||
seen: dict[str, dict[str, str]] = {}
|
||||
for ir in result.iterations:
|
||||
for key, output in ir.step_outputs.items():
|
||||
for m in re.finditer(
|
||||
r"(ISS-\d+)\s*\[(\w+)\]\[.*?\]\s*(.*?)(?:\n|$)", output,
|
||||
):
|
||||
iss_id = m.group(1)
|
||||
if iss_id not in seen:
|
||||
seen[iss_id] = {
|
||||
"id": iss_id,
|
||||
"severity": m.group(2),
|
||||
"description": m.group(3).strip()[:80],
|
||||
"status": "Open",
|
||||
"since": f"v{ir.iteration}",
|
||||
}
|
||||
return list(seen.values())
|
||||
|
||||
|
||||
def _append_issue_tracker_summary(
|
||||
lines: list[str],
|
||||
config: PipelineConfig,
|
||||
result: PipelineResult,
|
||||
) -> None:
|
||||
"""Append a consolidated issue tracker table to the report."""
|
||||
rows = _extract_issue_tracker_rows(result)
|
||||
if not rows:
|
||||
return
|
||||
|
||||
lines.append("---\n")
|
||||
lines.append(f"## {_t(config, 'issue_tracker_title')}\n")
|
||||
lines.append(f"{_t(config, 'issue_tracker_desc')}\n")
|
||||
|
||||
lang = getattr(config, "language", "en")
|
||||
if lang == "ko":
|
||||
lines.append("| ISS-ID | 심각도 | 설명 | 상태 | 최초 발견 |")
|
||||
else:
|
||||
lines.append("| ISS-ID | Severity | Description | Status | Since |")
|
||||
lines.append("|--------|----------|-------------|--------|-------|")
|
||||
|
||||
for row in rows:
|
||||
lines.append(
|
||||
f"| {row['id']} | {row['severity']} "
|
||||
f"| {row['description']} | {row['status']} | {row['since']} |"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
|
||||
def print_escalation_report(
|
||||
config: PipelineConfig,
|
||||
result: PipelineResult,
|
||||
) -> None:
|
||||
"""Print a prominent ANSI-colored escalation report to the terminal."""
|
||||
RED = "\033[31m"
|
||||
YELLOW = "\033[33m"
|
||||
BOLD = "\033[1m"
|
||||
RESET = "\033[0m"
|
||||
|
||||
title = _t(config, "escalate_title")
|
||||
msg = _t(config, "escalate_msg")
|
||||
|
||||
print(f"\n{RED}{BOLD}{'=' * 60}")
|
||||
print(f" {title}")
|
||||
print(f"{'=' * 60}{RESET}\n")
|
||||
print(f"{YELLOW}{msg}{RESET}\n")
|
||||
|
||||
for issue in result.escalated_issues:
|
||||
print(f" {RED}•{RESET} {issue}")
|
||||
|
||||
print(f"\n{RED}{BOLD}{'=' * 60}{RESET}\n")
|
||||
|
||||
|
||||
def _append_repeated_aggregate(
|
||||
lines: list[str],
|
||||
config: PipelineConfig,
|
||||
|
||||
Reference in New Issue
Block a user