initial commit
This commit is contained in:
118
cross_eval/models.py
Normal file
118
cross_eval/models.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""Data models for cross-eval pipeline."""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentConfig:
|
||||
"""Definition of a single agent."""
|
||||
|
||||
name: str
|
||||
command: str
|
||||
args: list[str] = field(default_factory=list)
|
||||
system_prompt: Optional[str] = None
|
||||
reasoning_effort: Optional[str] = None
|
||||
stdin_mode: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class StepConfig:
|
||||
"""One step in the pipeline."""
|
||||
|
||||
name: str
|
||||
agent: str # reference to agents key
|
||||
role: str # "generate" or "review"
|
||||
prompt_template: str # "default:<role>" or file path
|
||||
output_key: str
|
||||
verdict: bool = False
|
||||
verdict_pattern: str = r"VERDICT:\s*PASS"
|
||||
context_override: dict[str, str] = field(default_factory=dict)
|
||||
parallel: bool = False # Can run concurrently with adjacent parallel steps
|
||||
|
||||
|
||||
@dataclass
|
||||
class PhaseConfig:
|
||||
"""One phase in a multi-phase pipeline (e.g. review-fix)."""
|
||||
|
||||
name: str
|
||||
steps: list[StepConfig] = field(default_factory=list)
|
||||
max_iterations: int = 10
|
||||
consecutive_pass: int = 1 # stop after N consecutive PASSes
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineConfig:
|
||||
"""Full cross-eval configuration."""
|
||||
|
||||
output_dir: Path = field(default_factory=lambda: Path("output"))
|
||||
max_iterations: int = 3
|
||||
min_iterations: int = 1
|
||||
verbose: bool = False
|
||||
language: str = "en" # "en" or "ko"
|
||||
inputs: dict[str, Path | str] = field(default_factory=dict)
|
||||
agents: dict[str, AgentConfig] = field(default_factory=dict)
|
||||
coders: list[str] = field(default_factory=list)
|
||||
reviewers: list[str] = field(default_factory=list)
|
||||
seniors: list[str] = field(default_factory=list)
|
||||
pipeline: list[StepConfig] = field(default_factory=list)
|
||||
phases: list[PhaseConfig] = field(default_factory=list)
|
||||
preset_name: str = "custom"
|
||||
_config_path: Optional[Path] = field(default=None, repr=False)
|
||||
_config_mtime: Optional[float] = field(default=None, repr=False)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentResult:
|
||||
"""Result from an agent invocation."""
|
||||
|
||||
output: str
|
||||
exit_code: int
|
||||
agent_name: str
|
||||
step_name: str
|
||||
duration_seconds: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReviewMetrics:
|
||||
"""Parsed metrics from a single review output."""
|
||||
|
||||
# Severity counts
|
||||
critical: int = 0
|
||||
major: int = 0
|
||||
minor: int = 0
|
||||
|
||||
# Category counts
|
||||
over_engineering: int = 0
|
||||
omission: int = 0
|
||||
|
||||
# Assessment counts
|
||||
confirmed: int = 0
|
||||
dismissed: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class IterationResult:
|
||||
"""Results from a single iteration."""
|
||||
|
||||
iteration: int
|
||||
step_results: dict[str, AgentResult] = field(default_factory=dict)
|
||||
step_outputs: dict[str, str] = field(default_factory=dict)
|
||||
verdict: Optional[str] = None
|
||||
feedback: Optional[str] = None
|
||||
phase_name: Optional[str] = None
|
||||
repeated_aggregate_warning: Optional[str] = None
|
||||
review_metrics: Optional[ReviewMetrics] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineResult:
|
||||
"""Results from the entire pipeline run."""
|
||||
|
||||
iterations: list[IterationResult] = field(default_factory=list)
|
||||
final_verdict: str = "MAX_ITERATIONS_REACHED"
|
||||
total_duration: float = 0.0
|
||||
run_dir: Optional[Path] = None
|
||||
repeated_aggregate_warnings: list[str] = field(default_factory=list)
|
||||
Reference in New Issue
Block a user