137 lines
3.9 KiB
Python
137 lines
3.9 KiB
Python
"""Data models for cross-eval pipeline."""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass
|
|
class AgentConfig:
|
|
"""Definition of a single agent."""
|
|
|
|
name: str
|
|
command: str
|
|
args: list[str] = field(default_factory=list)
|
|
system_prompt: Optional[str] = None
|
|
reasoning_effort: Optional[str] = None
|
|
stdin_mode: bool = False
|
|
agentic: bool = False # run in worktree, capture git diff instead of stdout
|
|
|
|
|
|
@dataclass
|
|
class StepConfig:
|
|
"""One step in the pipeline."""
|
|
|
|
name: str
|
|
agent: str # reference to agents key
|
|
role: str # "coding" or "review"
|
|
prompt_template: str # "default:<role>" or file path
|
|
output_key: str
|
|
verdict: bool = False
|
|
verdict_pattern: str = r"VERDICT:\s*PASS"
|
|
context_override: dict[str, str] = field(default_factory=dict)
|
|
parallel: bool = False # Can run concurrently with adjacent parallel steps
|
|
|
|
|
|
@dataclass
|
|
class PhaseConfig:
|
|
"""One phase in a multi-phase pipeline (e.g. review-fix)."""
|
|
|
|
name: str
|
|
steps: list[StepConfig] = field(default_factory=list)
|
|
max_iterations: int = 10
|
|
consecutive_pass: int = 1 # stop after N consecutive PASSes
|
|
|
|
|
|
@dataclass
|
|
class ExecutionConfig:
|
|
"""Runtime execution policy for agent subprocesses."""
|
|
|
|
mode: str = "agent-decides"
|
|
command_policy: str = "broad"
|
|
inherit_env: bool = True
|
|
auto_env_files: list[str] = field(default_factory=lambda: [".env", ".env.local"])
|
|
env_files: list[str] = field(default_factory=list)
|
|
expose_env_names: bool = True
|
|
auto_context_targets: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class PipelineConfig:
|
|
"""Full cross-eval configuration."""
|
|
|
|
output_dir: Path = field(default_factory=lambda: Path(".cross-eval/output"))
|
|
max_iterations: int = 3
|
|
min_iterations: int = 1
|
|
verbose: bool = False
|
|
language: str = "en" # "en" or "ko"
|
|
execution: ExecutionConfig = field(default_factory=ExecutionConfig)
|
|
inputs: dict[str, Path | str] = field(default_factory=dict)
|
|
agents: dict[str, AgentConfig] = field(default_factory=dict)
|
|
coders: list[str] = field(default_factory=list)
|
|
reviewers: list[str] = field(default_factory=list)
|
|
seniors: list[str] = field(default_factory=list)
|
|
pipeline: list[StepConfig] = field(default_factory=list)
|
|
phases: list[PhaseConfig] = field(default_factory=list)
|
|
preset_name: str = "custom"
|
|
_config_path: Optional[Path] = field(default=None, repr=False)
|
|
_config_mtime: Optional[float] = field(default=None, repr=False)
|
|
|
|
|
|
@dataclass
|
|
class AgentResult:
|
|
"""Result from an agent invocation."""
|
|
|
|
output: str
|
|
exit_code: int
|
|
agent_name: str
|
|
step_name: str
|
|
duration_seconds: float
|
|
|
|
|
|
@dataclass
|
|
class ReviewMetrics:
|
|
"""Parsed metrics from a single review output."""
|
|
|
|
# Severity counts
|
|
critical: int = 0
|
|
major: int = 0
|
|
minor: int = 0
|
|
|
|
# Category counts
|
|
over_engineering: int = 0
|
|
omission: int = 0
|
|
|
|
# Assessment counts
|
|
confirmed: int = 0
|
|
dismissed: int = 0
|
|
|
|
|
|
@dataclass
|
|
class IterationResult:
|
|
"""Results from a single iteration."""
|
|
|
|
iteration: int
|
|
step_results: dict[str, AgentResult] = field(default_factory=dict)
|
|
step_outputs: dict[str, str] = field(default_factory=dict)
|
|
verdict: Optional[str] = None
|
|
feedback: Optional[str] = None
|
|
phase_name: Optional[str] = None
|
|
repeated_aggregate_warning: Optional[str] = None
|
|
review_metrics: Optional[ReviewMetrics] = None
|
|
escalated_issues: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class PipelineResult:
|
|
"""Results from the entire pipeline run."""
|
|
|
|
iterations: list[IterationResult] = field(default_factory=list)
|
|
final_verdict: str = "MAX_ITERATIONS_REACHED"
|
|
total_duration: float = 0.0
|
|
run_dir: Optional[Path] = None
|
|
repeated_aggregate_warnings: list[str] = field(default_factory=list)
|
|
escalated_issues: list[str] = field(default_factory=list)
|
|
agentic_branch: Optional[str] = None
|