Files
cross-eval/cross_eval/models.py
이충영 에이닷서비스개발 ee4f1a07ef initial commit
2026-03-11 21:53:14 +09:00

119 lines
3.2 KiB
Python

"""Data models for cross-eval pipeline."""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
@dataclass
class AgentConfig:
"""Definition of a single agent."""
name: str
command: str
args: list[str] = field(default_factory=list)
system_prompt: Optional[str] = None
reasoning_effort: Optional[str] = None
stdin_mode: bool = False
@dataclass
class StepConfig:
"""One step in the pipeline."""
name: str
agent: str # reference to agents key
role: str # "generate" or "review"
prompt_template: str # "default:<role>" or file path
output_key: str
verdict: bool = False
verdict_pattern: str = r"VERDICT:\s*PASS"
context_override: dict[str, str] = field(default_factory=dict)
parallel: bool = False # Can run concurrently with adjacent parallel steps
@dataclass
class PhaseConfig:
"""One phase in a multi-phase pipeline (e.g. review-fix)."""
name: str
steps: list[StepConfig] = field(default_factory=list)
max_iterations: int = 10
consecutive_pass: int = 1 # stop after N consecutive PASSes
@dataclass
class PipelineConfig:
"""Full cross-eval configuration."""
output_dir: Path = field(default_factory=lambda: Path("output"))
max_iterations: int = 3
min_iterations: int = 1
verbose: bool = False
language: str = "en" # "en" or "ko"
inputs: dict[str, Path | str] = field(default_factory=dict)
agents: dict[str, AgentConfig] = field(default_factory=dict)
coders: list[str] = field(default_factory=list)
reviewers: list[str] = field(default_factory=list)
seniors: list[str] = field(default_factory=list)
pipeline: list[StepConfig] = field(default_factory=list)
phases: list[PhaseConfig] = field(default_factory=list)
preset_name: str = "custom"
_config_path: Optional[Path] = field(default=None, repr=False)
_config_mtime: Optional[float] = field(default=None, repr=False)
@dataclass
class AgentResult:
"""Result from an agent invocation."""
output: str
exit_code: int
agent_name: str
step_name: str
duration_seconds: float
@dataclass
class ReviewMetrics:
"""Parsed metrics from a single review output."""
# Severity counts
critical: int = 0
major: int = 0
minor: int = 0
# Category counts
over_engineering: int = 0
omission: int = 0
# Assessment counts
confirmed: int = 0
dismissed: int = 0
@dataclass
class IterationResult:
"""Results from a single iteration."""
iteration: int
step_results: dict[str, AgentResult] = field(default_factory=dict)
step_outputs: dict[str, str] = field(default_factory=dict)
verdict: Optional[str] = None
feedback: Optional[str] = None
phase_name: Optional[str] = None
repeated_aggregate_warning: Optional[str] = None
review_metrics: Optional[ReviewMetrics] = None
@dataclass
class PipelineResult:
"""Results from the entire pipeline run."""
iterations: list[IterationResult] = field(default_factory=list)
final_verdict: str = "MAX_ITERATIONS_REACHED"
total_duration: float = 0.0
run_dir: Optional[Path] = None
repeated_aggregate_warnings: list[str] = field(default_factory=list)