Files
cross-eval/cross_eval/models.py
2026-03-13 21:52:13 +09:00

139 lines
4.0 KiB
Python

"""Data models for cross-eval pipeline."""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
@dataclass
class AgentConfig:
"""Definition of a single agent."""
name: str
command: str
args: list[str] = field(default_factory=list)
system_prompt: Optional[str] = None
reasoning_effort: Optional[str] = None
stdin_mode: bool = False
agentic: bool = False # run in worktree, capture git diff instead of stdout
@dataclass
class StepConfig:
"""One step in the pipeline."""
name: str
agent: str # reference to agents key
role: str # "coding" or "review"
prompt_template: str # "default:<role>" or file path
output_key: str
verdict: bool = False
verdict_pattern: str = r"VERDICT:\s*PASS"
context_override: dict[str, str] = field(default_factory=dict)
parallel: bool = False # Can run concurrently with adjacent parallel steps
@dataclass
class PhaseConfig:
"""One phase in a multi-phase pipeline (e.g. review-fix)."""
name: str
steps: list[StepConfig] = field(default_factory=list)
max_iterations: int = 10
consecutive_pass: int = 1 # stop after N consecutive PASSes
@dataclass
class ExecutionConfig:
"""Runtime execution policy for agent subprocesses."""
mode: str = "agent-decides"
command_policy: str = "broad"
inherit_env: bool = True
auto_env_files: list[str] = field(default_factory=lambda: [".env", ".env.local"])
env_files: list[str] = field(default_factory=list)
expose_env_names: bool = True
auto_context_targets: list[str] = field(default_factory=list)
@dataclass
class PipelineConfig:
"""Full cross-eval configuration."""
output_dir: Path = field(default_factory=lambda: Path(".cross-eval/output"))
max_iterations: int = 3
min_iterations: int = 1
verbose: bool = False
language: str = "en" # "en" or "ko"
execution: ExecutionConfig = field(default_factory=ExecutionConfig)
inputs: dict[str, Path | str] = field(default_factory=dict)
agents: dict[str, AgentConfig] = field(default_factory=dict)
coders: list[str] = field(default_factory=list)
reviewers: list[str] = field(default_factory=list)
seniors: list[str] = field(default_factory=list)
pipeline: list[StepConfig] = field(default_factory=list)
phases: list[PhaseConfig] = field(default_factory=list)
preset_name: str = "custom"
_config_path: Optional[Path] = field(default=None, repr=False)
_config_mtime: Optional[float] = field(default=None, repr=False)
@dataclass
class AgentResult:
"""Result from an agent invocation."""
output: str
exit_code: int
agent_name: str
step_name: str
duration_seconds: float
transcript: str = ""
command_preview: str = ""
@dataclass
class ReviewMetrics:
"""Parsed metrics from a single review output."""
# Severity counts
critical: int = 0
major: int = 0
minor: int = 0
# Category counts
over_engineering: int = 0
omission: int = 0
# Assessment counts
confirmed: int = 0
dismissed: int = 0
@dataclass
class IterationResult:
"""Results from a single iteration."""
iteration: int
step_results: dict[str, AgentResult] = field(default_factory=dict)
step_outputs: dict[str, str] = field(default_factory=dict)
verdict: Optional[str] = None
feedback: Optional[str] = None
phase_name: Optional[str] = None
repeated_aggregate_warning: Optional[str] = None
review_metrics: Optional[ReviewMetrics] = None
escalated_issues: Optional[str] = None
@dataclass
class PipelineResult:
"""Results from the entire pipeline run."""
iterations: list[IterationResult] = field(default_factory=list)
final_verdict: str = "MAX_ITERATIONS_REACHED"
total_duration: float = 0.0
run_dir: Optional[Path] = None
repeated_aggregate_warnings: list[str] = field(default_factory=list)
escalated_issues: list[str] = field(default_factory=list)
agentic_branch: Optional[str] = None