"""Data models for cross-eval pipeline.""" from __future__ import annotations from dataclasses import dataclass, field from pathlib import Path from typing import Optional @dataclass class AgentConfig: """Definition of a single agent.""" name: str command: str args: list[str] = field(default_factory=list) system_prompt: Optional[str] = None reasoning_effort: Optional[str] = None stdin_mode: bool = False @dataclass class StepConfig: """One step in the pipeline.""" name: str agent: str # reference to agents key role: str # "generate" or "review" prompt_template: str # "default:" or file path output_key: str verdict: bool = False verdict_pattern: str = r"VERDICT:\s*PASS" context_override: dict[str, str] = field(default_factory=dict) parallel: bool = False # Can run concurrently with adjacent parallel steps @dataclass class PhaseConfig: """One phase in a multi-phase pipeline (e.g. review-fix).""" name: str steps: list[StepConfig] = field(default_factory=list) max_iterations: int = 10 consecutive_pass: int = 1 # stop after N consecutive PASSes @dataclass class PipelineConfig: """Full cross-eval configuration.""" output_dir: Path = field(default_factory=lambda: Path("output")) max_iterations: int = 3 min_iterations: int = 1 verbose: bool = False language: str = "en" # "en" or "ko" inputs: dict[str, Path | str] = field(default_factory=dict) agents: dict[str, AgentConfig] = field(default_factory=dict) coders: list[str] = field(default_factory=list) reviewers: list[str] = field(default_factory=list) seniors: list[str] = field(default_factory=list) pipeline: list[StepConfig] = field(default_factory=list) phases: list[PhaseConfig] = field(default_factory=list) preset_name: str = "custom" _config_path: Optional[Path] = field(default=None, repr=False) _config_mtime: Optional[float] = field(default=None, repr=False) @dataclass class AgentResult: """Result from an agent invocation.""" output: str exit_code: int agent_name: str step_name: str duration_seconds: float @dataclass class ReviewMetrics: """Parsed metrics from a single review output.""" # Severity counts critical: int = 0 major: int = 0 minor: int = 0 # Category counts over_engineering: int = 0 omission: int = 0 # Assessment counts confirmed: int = 0 dismissed: int = 0 @dataclass class IterationResult: """Results from a single iteration.""" iteration: int step_results: dict[str, AgentResult] = field(default_factory=dict) step_outputs: dict[str, str] = field(default_factory=dict) verdict: Optional[str] = None feedback: Optional[str] = None phase_name: Optional[str] = None repeated_aggregate_warning: Optional[str] = None review_metrics: Optional[ReviewMetrics] = None @dataclass class PipelineResult: """Results from the entire pipeline run.""" iterations: list[IterationResult] = field(default_factory=list) final_verdict: str = "MAX_ITERATIONS_REACHED" total_duration: float = 0.0 run_dir: Optional[Path] = None repeated_aggregate_warnings: list[str] = field(default_factory=list)