Files
cross-eval/cross_eval/models.py
chungyeong 204e071b74 feat: ESCALATE verdict, issue tracker, onboarding commands
Add 3-verdict system (PASS/FAIL/ESCALATE) with priority handling across
simple and phased pipelines. Senior reviewers can now escalate issues
requiring human intervention, immediately breaking the review loop.

- ESCALATE verdict extraction with highest priority over PASS/FAIL
- Issue Tracker tables (ISS-NNN) carried across iterations
- Auto-escalate heuristic using (file, keyword) composite fingerprints
- Report restructuring: executive view first (verdict → tracker → metrics)
- Onboarding: `doctor`, `demo`, `init --guided` commands
- Exit codes: PASS=0, FAIL=1, ESCALATE=2
- 87 tests passing (54 config + 25 onboarding + 8 integration)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 18:19:05 +09:00

121 lines
3.3 KiB
Python

"""Data models for cross-eval pipeline."""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
@dataclass
class AgentConfig:
"""Definition of a single agent."""
name: str
command: str
args: list[str] = field(default_factory=list)
system_prompt: Optional[str] = None
reasoning_effort: Optional[str] = None
stdin_mode: bool = False
@dataclass
class StepConfig:
"""One step in the pipeline."""
name: str
agent: str # reference to agents key
role: str # "coding" or "review"
prompt_template: str # "default:<role>" or file path
output_key: str
verdict: bool = False
verdict_pattern: str = r"VERDICT:\s*PASS"
context_override: dict[str, str] = field(default_factory=dict)
parallel: bool = False # Can run concurrently with adjacent parallel steps
@dataclass
class PhaseConfig:
"""One phase in a multi-phase pipeline (e.g. review-fix)."""
name: str
steps: list[StepConfig] = field(default_factory=list)
max_iterations: int = 10
consecutive_pass: int = 1 # stop after N consecutive PASSes
@dataclass
class PipelineConfig:
"""Full cross-eval configuration."""
output_dir: Path = field(default_factory=lambda: Path("output"))
max_iterations: int = 3
min_iterations: int = 1
verbose: bool = False
language: str = "en" # "en" or "ko"
inputs: dict[str, Path | str] = field(default_factory=dict)
agents: dict[str, AgentConfig] = field(default_factory=dict)
coders: list[str] = field(default_factory=list)
reviewers: list[str] = field(default_factory=list)
seniors: list[str] = field(default_factory=list)
pipeline: list[StepConfig] = field(default_factory=list)
phases: list[PhaseConfig] = field(default_factory=list)
preset_name: str = "custom"
_config_path: Optional[Path] = field(default=None, repr=False)
_config_mtime: Optional[float] = field(default=None, repr=False)
@dataclass
class AgentResult:
"""Result from an agent invocation."""
output: str
exit_code: int
agent_name: str
step_name: str
duration_seconds: float
@dataclass
class ReviewMetrics:
"""Parsed metrics from a single review output."""
# Severity counts
critical: int = 0
major: int = 0
minor: int = 0
# Category counts
over_engineering: int = 0
omission: int = 0
# Assessment counts
confirmed: int = 0
dismissed: int = 0
@dataclass
class IterationResult:
"""Results from a single iteration."""
iteration: int
step_results: dict[str, AgentResult] = field(default_factory=dict)
step_outputs: dict[str, str] = field(default_factory=dict)
verdict: Optional[str] = None
feedback: Optional[str] = None
phase_name: Optional[str] = None
repeated_aggregate_warning: Optional[str] = None
review_metrics: Optional[ReviewMetrics] = None
escalated_issues: Optional[str] = None
@dataclass
class PipelineResult:
"""Results from the entire pipeline run."""
iterations: list[IterationResult] = field(default_factory=list)
final_verdict: str = "MAX_ITERATIONS_REACHED"
total_duration: float = 0.0
run_dir: Optional[Path] = None
repeated_aggregate_warnings: list[str] = field(default_factory=list)
escalated_issues: list[str] = field(default_factory=list)