From ee4f1a07ef1101051cf61a6fd57780493b5e6bd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EC=B6=A9=EC=98=81=20=EC=97=90=EC=9D=B4=EB=8B=B7?= =?UTF-8?q?=EC=84=9C=EB=B9=84=EC=8A=A4=EA=B0=9C=EB=B0=9C?= Date: Wed, 11 Mar 2026 21:53:14 +0900 Subject: [PATCH] initial commit --- .idea/.gitignore | 10 + .idea/cross-eval.iml | 14 + .idea/inspectionProfiles/Project_Default.xml | 6 + .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + DEVELOPMENT.md | 169 ++++ README.md | 120 +++ cross_eval.egg-info/PKG-INFO | 6 + cross_eval.egg-info/SOURCES.txt | 17 + cross_eval.egg-info/dependency_links.txt | 1 + cross_eval.egg-info/entry_points.txt | 2 + cross_eval.egg-info/requires.txt | 1 + cross_eval.egg-info/top_level.txt | 1 + cross_eval/__init__.py | 1 + .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 190 bytes .../__pycache__/__init__.cpython-313.pyc | Bin 0 -> 190 bytes cross_eval/__pycache__/agent.cpython-312.pyc | Bin 0 -> 8186 bytes cross_eval/__pycache__/agent.cpython-313.pyc | Bin 0 -> 8441 bytes cross_eval/__pycache__/cli.cpython-312.pyc | Bin 0 -> 29021 bytes cross_eval/__pycache__/cli.cpython-313.pyc | Bin 0 -> 29296 bytes cross_eval/__pycache__/config.cpython-312.pyc | Bin 0 -> 23740 bytes cross_eval/__pycache__/config.cpython-313.pyc | Bin 0 -> 24068 bytes cross_eval/__pycache__/models.cpython-312.pyc | Bin 0 -> 5585 bytes cross_eval/__pycache__/models.cpython-313.pyc | Bin 0 -> 5737 bytes .../__pycache__/pipeline.cpython-312.pyc | Bin 0 -> 26546 bytes .../__pycache__/pipeline.cpython-313.pyc | Bin 0 -> 26827 bytes .../__pycache__/prompts.cpython-312.pyc | Bin 0 -> 26642 bytes .../__pycache__/prompts.cpython-313.pyc | Bin 0 -> 26738 bytes cross_eval/__pycache__/report.cpython-312.pyc | Bin 0 -> 23464 bytes cross_eval/__pycache__/report.cpython-313.pyc | Bin 0 -> 23709 bytes cross_eval/agent.py | 162 ++++ cross_eval/cli.py | 701 +++++++++++++++ cross_eval/config.py | 607 +++++++++++++ cross_eval/models.py | 118 +++ cross_eval/pipeline.py | 700 +++++++++++++++ cross_eval/prompts.py | 845 ++++++++++++++++++ cross_eval/report.py | 497 ++++++++++ pyproject.toml | 18 + requirements.txt | 1 + tests/__pycache__/test_config.cpython-312.pyc | Bin 0 -> 22178 bytes tests/test_config.py | 515 +++++++++++ 42 files changed, 4533 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/cross-eval.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 DEVELOPMENT.md create mode 100644 README.md create mode 100644 cross_eval.egg-info/PKG-INFO create mode 100644 cross_eval.egg-info/SOURCES.txt create mode 100644 cross_eval.egg-info/dependency_links.txt create mode 100644 cross_eval.egg-info/entry_points.txt create mode 100644 cross_eval.egg-info/requires.txt create mode 100644 cross_eval.egg-info/top_level.txt create mode 100644 cross_eval/__init__.py create mode 100644 cross_eval/__pycache__/__init__.cpython-312.pyc create mode 100644 cross_eval/__pycache__/__init__.cpython-313.pyc create mode 100644 cross_eval/__pycache__/agent.cpython-312.pyc create mode 100644 cross_eval/__pycache__/agent.cpython-313.pyc create mode 100644 cross_eval/__pycache__/cli.cpython-312.pyc create mode 100644 cross_eval/__pycache__/cli.cpython-313.pyc create mode 100644 cross_eval/__pycache__/config.cpython-312.pyc create mode 100644 cross_eval/__pycache__/config.cpython-313.pyc create mode 100644 cross_eval/__pycache__/models.cpython-312.pyc create mode 100644 cross_eval/__pycache__/models.cpython-313.pyc create mode 100644 cross_eval/__pycache__/pipeline.cpython-312.pyc create mode 100644 cross_eval/__pycache__/pipeline.cpython-313.pyc create mode 100644 cross_eval/__pycache__/prompts.cpython-312.pyc create mode 100644 cross_eval/__pycache__/prompts.cpython-313.pyc create mode 100644 cross_eval/__pycache__/report.cpython-312.pyc create mode 100644 cross_eval/__pycache__/report.cpython-313.pyc create mode 100644 cross_eval/agent.py create mode 100644 cross_eval/cli.py create mode 100644 cross_eval/config.py create mode 100644 cross_eval/models.py create mode 100644 cross_eval/pipeline.py create mode 100644 cross_eval/prompts.py create mode 100644 cross_eval/report.py create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 tests/__pycache__/test_config.cpython-312.pyc create mode 100644 tests/test_config.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..ab1f416 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Ignored default folder with query files +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/cross-eval.iml b/.idea/cross-eval.iml new file mode 100644 index 0000000..b525243 --- /dev/null +++ b/.idea/cross-eval.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..03d9549 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..f632e42 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..02ac596 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md new file mode 100644 index 0000000..524e6b9 --- /dev/null +++ b/DEVELOPMENT.md @@ -0,0 +1,169 @@ +# Cross-Eval CLI 구현 계획 + +## Context + +AI 에이전트 2개를 활용한 개발 워크플로우(기획→체크리스트→개발→리뷰→반복)에서 발생하는 **과최적화/오탐/누락** 문제를 잡기 위해, 에이전트 간 교차 검증 루프를 자동화하는 CLI 도구를 만든다. 현재 수동으로 2개 에이전트에 복붙하는 과정을 `cross-eval run` 한 줄로 대체한다. + +## 핵심 설계 결정 + +**에이전트가 코드베이스를 직접 탐색한다** — `claude -p`는 non-interactive지만 내장 도구(Read, Glob, Grep)는 사용 가능. 파일 내용을 프롬프트에 전부 넣는 대신, 에이전트가 프로젝트 디렉토리에서 직접 파일을 탐색하도록 한다. +- Generator: `--permission-mode auto` (파일 읽기/쓰기 가능) +- Reviewer: `--permission-mode plan` (읽기 전용 탐색) +- subprocess의 `cwd`를 현재 작업 디렉토리로 설정 + +## 사용자 경험 (UX Flow) + +```bash +# 1. 프로젝트 초기화 +cd my-project +cross-eval init +# → cross-eval.yaml, plan.md, checklist.md 생성 + +# 2. plan.md, checklist.md 작성 후 실행 +cross-eval run + +# 3. 옵션들 +cross-eval run --config custom.yaml --max-iter 5 --dry-run +cross-eval run --input plan=./docs/spec.md --input checklist=./docs/checks.md + +# 4. 결과 확인 +ls output/v1/ v2/ final-report.md +``` + +## 설정 파일 형식 (`cross-eval.yaml`) + +```yaml +output_dir: output +max_iterations: 3 + +inputs: + plan: plan.md + checklist: checklist.md + +agents: + generator: + command: claude + args: ["-p", "--model", "sonnet", "--permission-mode", "auto"] + system_prompt: "You are a senior software engineer. Follow the plan precisely." + reviewer: + command: claude + args: ["-p", "--model", "opus", "--permission-mode", "plan"] + system_prompt: "You are a meticulous code reviewer." + +# 방법 1: 프리셋 사용 (사용자가 pipeline YAML 직접 작성할 필요 없음) +pipeline: preset:simple # "A 생성 → B 리뷰" (기본값) +# pipeline: preset:cross-review # "둘 다 생성 → 서로 리뷰" + +# 방법 2: 직접 커스텀 (고급 사용자용) +# pipeline: +# - name: generate +# agent: generator +# role: generate +# prompt_template: "default:generate" +# output_key: generated_code +# - name: review +# agent: reviewer +# role: review +# prompt_template: "default:review" +# output_key: review_result +# verdict: true +``` + +### 파이프라인 프리셋 + +| 프리셋 | 설명 | 자동 생성되는 steps | +|--------|------|-------------------| +| `simple` | A 생성 → B 리뷰 | generate(agent1) → review(agent2) | +| `cross-review` | 둘 다 생성, 서로 리뷰 | gen_a → gen_b → review_of_b(agent_a) → review_of_a(agent_b) | + +프리셋은 내부적으로 적절한 pipeline steps + context_override를 자동 구성한다. agents에 정의된 순서대로 agent1, agent2가 배정된다. 프리셋이 불충분하면 직접 steps를 작성할 수 있다. + +## 모듈 구조 및 구현 순서 + +``` +cross_eval/ +├── __init__.py (exists) +├── models.py # 1. 모든 데이터클래스 +├── config.py # 2. YAML 로딩 + 검증 +├── prompts.py # 3. 프롬프트 템플릿 +├── agent.py # 4. subprocess 에이전트 호출 +├── pipeline.py # 5. 핵심 반복 루프 +├── report.py # 6. 마크다운 리포트 +└── cli.py # 7. argparse (init, run) +``` + +### 모듈별 핵심 내용 + +**models.py** — 순환 참조 방지, 모든 데이터클래스 집중: +- `AgentConfig` (command, args, system_prompt, stdin_mode) +- `StepConfig` (name, agent, role, prompt_template, output_key, verdict, verdict_pattern, context_override) +- `PipelineConfig` (output_dir, max_iterations, inputs, agents, pipeline) +- `AgentResult` (output, exit_code, agent_name, step_name, duration_seconds) +- `IterationResult` (iteration, step_outputs, verdict, feedback) +- `PipelineResult` (iterations, final_verdict, total_duration) + +**config.py** — YAML → PipelineConfig + 검증: +- step.agent가 agents에 정의되어 있는지 +- output_key 중복 없는지 +- input 파일 존재 여부 +- verdict_pattern 유효한 정규식인지 + +**prompts.py** — 기본 프롬프트 2종 + 파이프라인 프리셋 정의: +- `default:generate` — "기획서에 명시된 것만 구현하라, 과최적화 금지" + plan/checklist/feedback + **"프로젝트 디렉토리의 기존 코드를 탐색하여 컨텍스트를 파악하라"** 지시 +- `default:review` — 과최적화/오탐/누락 3기준 검토 + `VERDICT: PASS|FAIL` 출력 + **"프로젝트 디렉토리를 직접 탐색하여 코드를 검증하라"** 지시 +- `{variable}` 플레이스홀더, 누락 시 `(no {key} provided)` 출력 +- 사용자가 커스텀 .md 파일로 오버라이드 가능 +- `PIPELINE_PRESETS` dict: `simple`, `cross-review` 등 프리셋별 StepConfig 리스트 정의 + +**agent.py** — `invoke_agent(agent_config, prompt, cwd)`: +- `cwd` 파라미터로 프로젝트 디렉토리 지정 → 에이전트가 해당 디렉토리에서 파일 탐색 가능 +- `stdin_mode=false`: prompt를 마지막 인자로 전달 +- `stdin_mode=true`: stdin으로 파이프 (긴 프롬프트용) +- command가 "claude"이고 system_prompt 있으면 `--system-prompt` 자동 주입 +- timeout 600초, 비정상 종료 시 RuntimeError + +**pipeline.py** — 핵심 루프: +``` +for iteration 1..max_iterations: + for step in pipeline: + 1. 템플릿 resolve → context 구성 (inputs + 이전 step 출력 + feedback) + 2. context_override 적용 (교차 리뷰용 변수 매핑) + 3. 에이전트 호출 (cwd=현재 작업 디렉토리) + 4. output_dir/v{i}/{step.name}.md 저장 + 5. verdict step이면 PASS/FAIL 판정 + PASS면 종료, FAIL이면 review 결과를 feedback으로 다음 반복 +final-report.md 생성 +``` + +**report.py** — 최종 마크다운 리포트: +- 요약 테이블 (반복 횟수, 판정, 소요시간) +- 반복별 상세 (각 step 출력, 에이전트명, 소요시간) +- 최종 판정 + +**cli.py** — 서브커맨드: +- `cross-eval init [--dir .] [--preset simple|cross-review]` — 스캐폴딩 (기존 파일 안 덮어씀) +- `cross-eval run [-c config] [--max-iter N] [--dry-run] [--output-dir path] [--input key=path ...]` +- `--input key=path`: config의 inputs 오버라이드/추가 +- `--dry-run`: 에이전트 호출 없이 렌더링된 프롬프트만 출력 + +## 수정할 파일 목록 + +| 파일 | 작업 | +|------|------| +| `cross_eval/__init__.py` | 이미 존재, 수정 없음 | +| `cross_eval/models.py` | **신규 생성** | +| `cross_eval/config.py` | **신규 생성** | +| `cross_eval/prompts.py` | **신규 생성** | +| `cross_eval/agent.py` | **신규 생성** | +| `cross_eval/pipeline.py` | **신규 생성** | +| `cross_eval/report.py` | **신규 생성** | +| `cross_eval/cli.py` | **신규 생성** | +| `pyproject.toml` | 이미 존재, 수정 없음 | + +## 검증 방법 + +1. `pip install -e .` 로 로컬 설치 +2. `cross-eval init` 로 스캐폴딩 확인 (3개 파일 생성) +3. `cross-eval run --dry-run` 로 프롬프트 렌더링 확인 (에이전트 호출 없이) +4. plan.md/checklist.md에 간단한 내용 넣고 `cross-eval run --max-iter 2` 로 실제 실행 +5. `output/` 디렉토리에 v1/, final-report.md 생성 확인 diff --git a/README.md b/README.md new file mode 100644 index 0000000..e286554 --- /dev/null +++ b/README.md @@ -0,0 +1,120 @@ +# cross-eval + +AI 에이전트 간 교차 검증을 자동화하는 CLI 도구. + +기획서와 체크리스트를 기반으로 "생성 → 리뷰 → 피드백 → 재생성" 루프를 자동으로 돌려서, +**과최적화 / 오탐 / 누락** 문제를 잡아냅니다. + +## 설치 + +```bash +# 1. 저장소 클론 +git clone +cd cross-eval + +# 2. 설치 (editable 모드 — 코드 수정 시 재설치 불필요) +pip3 install -e . +``` + +설치 후 터미널 어디서든 `cross-eval` 명령어를 사용할 수 있습니다. + +```bash +cross-eval --version +``` + +### 요구사항 + +- Python 3.9+ +- [Claude CLI](https://docs.anthropic.com/en/docs/claude-code) 설치 및 인증 완료 + +## 사용법 + +### 1. 프로젝트 초기화 + +```bash +cd my-project +cross-eval init +``` + +`.cross-eval/` 폴더 안에 `config.yaml`, `plan-sample.md`, `checklist-sample.md`가 생성됩니다. + +### 2. 기획서 작성 + +샘플 파일을 복사하여 기획서와 체크리스트를 작성합니다. + +```bash +cp .cross-eval/plan-sample.md .cross-eval/plan.md +cp .cross-eval/checklist-sample.md .cross-eval/checklist.md +# plan.md, checklist.md 편집 +``` + +### 3. 실행 + +```bash +# 기본 실행 (생성 → 리뷰, 최대 3회 반복) +cross-eval run + +# 프롬프트만 확인 (에이전트 호출 없이, 비용 절약) +cross-eval run --dry-run + +# 최대 반복 횟수 변경 +cross-eval run --max-iter 5 + +# 입력 파일 오버라이드 +cross-eval run --input plan=./docs/spec.md + +# 설정 파일 지정 +cross-eval run --config .cross-eval/config.yaml +``` + +### 4. 결과 확인 + +``` +output/ +├── v1/ +│ ├── generate.md # 에이전트 생성 결과 +│ └── review.md # 에이전트 리뷰 결과 +├── v2/ +│ ├── generate.md +│ └── review.md +└── final-report.md # 전체 요약 리포트 +``` + +## 설정 (`.cross-eval/config.yaml`) + +```yaml +output_dir: output +max_iterations: 3 +language: ko # ko 또는 en (프롬프트 템플릿 언어) + +inputs: + plan: plan.md # config.yaml 기준 상대경로 + checklist: checklist.md + +agents: + generator: + command: claude + args: ["-p", "--model", "sonnet", "--permission-mode", "auto"] + system_prompt: "You are a senior software engineer." + reviewer: + command: claude + args: ["-p", "--model", "opus", "--permission-mode", "plan"] + system_prompt: "You are a meticulous code reviewer." + +pipeline: preset:simple +``` + +실행 중에 `config.yaml`을 수정하면 다음 반복부터 자동으로 반영됩니다. + +### 파이프라인 프리셋 + +| 프리셋 | 설명 | +|--------|------| +| `simple` | Agent A가 생성, Agent B가 리뷰 (기본값) | +| `cross-review` | 둘 다 생성, 서로 교차 리뷰 | + +```bash +# 초기화 옵션 +cross-eval init --preset cross-review # 교차 리뷰 프리셋 +cross-eval init --lang en # 영어 템플릿 +``` diff --git a/cross_eval.egg-info/PKG-INFO b/cross_eval.egg-info/PKG-INFO new file mode 100644 index 0000000..1eeed19 --- /dev/null +++ b/cross_eval.egg-info/PKG-INFO @@ -0,0 +1,6 @@ +Metadata-Version: 2.4 +Name: cross-eval +Version: 0.1.0 +Summary: AI agent cross-evaluation CLI tool +Requires-Python: >=3.9 +Requires-Dist: pyyaml>=6.0 diff --git a/cross_eval.egg-info/SOURCES.txt b/cross_eval.egg-info/SOURCES.txt new file mode 100644 index 0000000..77a3801 --- /dev/null +++ b/cross_eval.egg-info/SOURCES.txt @@ -0,0 +1,17 @@ +README.md +pyproject.toml +cross_eval/__init__.py +cross_eval/agent.py +cross_eval/cli.py +cross_eval/config.py +cross_eval/models.py +cross_eval/pipeline.py +cross_eval/prompts.py +cross_eval/report.py +cross_eval.egg-info/PKG-INFO +cross_eval.egg-info/SOURCES.txt +cross_eval.egg-info/dependency_links.txt +cross_eval.egg-info/entry_points.txt +cross_eval.egg-info/requires.txt +cross_eval.egg-info/top_level.txt +tests/test_config.py \ No newline at end of file diff --git a/cross_eval.egg-info/dependency_links.txt b/cross_eval.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/cross_eval.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/cross_eval.egg-info/entry_points.txt b/cross_eval.egg-info/entry_points.txt new file mode 100644 index 0000000..f668a8c --- /dev/null +++ b/cross_eval.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +cross-eval = cross_eval.cli:main diff --git a/cross_eval.egg-info/requires.txt b/cross_eval.egg-info/requires.txt new file mode 100644 index 0000000..3aecde9 --- /dev/null +++ b/cross_eval.egg-info/requires.txt @@ -0,0 +1 @@ +pyyaml>=6.0 diff --git a/cross_eval.egg-info/top_level.txt b/cross_eval.egg-info/top_level.txt new file mode 100644 index 0000000..59bc124 --- /dev/null +++ b/cross_eval.egg-info/top_level.txt @@ -0,0 +1 @@ +cross_eval diff --git a/cross_eval/__init__.py b/cross_eval/__init__.py new file mode 100644 index 0000000..3dc1f76 --- /dev/null +++ b/cross_eval/__init__.py @@ -0,0 +1 @@ +__version__ = "0.1.0" diff --git a/cross_eval/__pycache__/__init__.cpython-312.pyc b/cross_eval/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b05eafd6cb9fd63ba0bf3bbf941073d04c1fafb2 GIT binary patch literal 190 zcmX@j%ge<80>?IFiUFy|U=EaEgwmfmfQ;!3DGX5zm5iE9RjdYjhI$5mnvA!&?IFiUFy|U=EaEgwmfmfQ+dO!3;$V>5Q68RjdYjhI$5mnvA!&Z)_V!cAw?nCAp;jQX(l?UQ2Rh+7=Z%w&U2cFN*#}wk+pl<$x#=wlsGolcq>! zc4=7*h12B%M7ncf``VtAUV<=(Lqo$k#0M0pgBFNCv_OFSLRv~mZxq1Q9opubQW!g*gHHK}o z+$2Z)rm%h1G3l7CnXF-u4oQX(uWh2#N_DrH!qa5EXr62k*~v!HAzI!rOgf>pLffQ6 z!$`Effkf`MZf9<@1(APZO`)w;i?yQt4dbM1Uhl0-9PXQuA_^ajT!{w!N-!GXuLS+P z9KRUD(SRh&{9I7E#D{}dBwmgMBM}Mvz966SGOEoVi9{8;NFIY{!wY`p614337$M{j zdv&UflJAd3rh_v(595*?4=XUw6bSp{q69r-ASy~%13UauPKY-22r=77pz|i^Ln*#p zLeq@Myum6vJR>xKZqr2wX)?Qjyt>5E{!3EeG9R4gl}i%ur;PTGjsSPDSQINV-`y?G z%ZfDH4b0BQ6n;AFpYc7m4cG#DifrQv===?H7BR4uB${FoT40jQvhE7{IkUnr=$h4l z=FzY9bIdJf>=wGES9QK8RYSx-E2-u{bavJs5oJP+=NG?zw&z7z!g9|cD8~*R?HQ2d z%StrXGcLuVa!`rl`JMod%5t}K#UCzo1=^)N_+s;_r5Cu7uEy}`M312HBNVt13fu^k zo8BOC%h5ukK>wO!V9hp=F$`cgybi!$ zvFsd8X8Sk{tsn_aG61MLkpTeJ0eI>GI4z<9K+GgsMI*FkXidB9O6M$d^j4J>BjL0)n4I*gSm%gU%U-IyqWsxX6miY)JL1CyPK(h z+D!fZX6o-YQ}1u4-rP)GPyEu(|35GGKoR4B9nzN zE9&5hfDbM6$g3Y4n$aG;2Gu+ZNDtBjaqq}dH4Dlmut!m~39{nHil78%B>)72EQP1> z9#||B@;qNS`K%xWBSA$F5_Ou!eA*C!PI)&}izwgRwq&@*qV0qM{{VuY03n({qBD}Bnnb@e8;y7^xE&U$1|nnC0wYlc@F;*g z$*85E2!JQmG*QrrQ6Rn;caYacsva+c_reoB(gl^^9(YbTirS#l4w5;~LItXDwGtg{ zwCsNO^joLbS`I83)>+p^GoNej$~JeUl@AlQ68AdSn}?~6SnfKv%;i{5mi2tfw*IlM zz#|rn^BQQd++Bx?W(YHO3P`V-&USP^fTn&){VNKYv=yaRhG1+Sm2|1tw0G6?r%AuM zz7k*uTF9^f=#*rBfyiMA)oL_LExgeubx<4LF)!#9;_zq*5yY)1A%d?JC8^iG#Gvn3 zP2jYmXnerh62G2`N$@BdOJBM6D-x4}$ounb|?d>|~TrlIk^bAuNOHmMIr6uch@;{&837D-RPs)t}hHOYzyu2MD5VIZR# zr^9ji5}0IIl44#1HAxu6GzgNy{+KL@sv#VVNHS4B0pEfr^ube^ty1$o3}f<7pjt#5 z&bAfdrjVL=|K;~y{#WOrrNMkd^UA58oqAB~+-P#Y-TAKXE#I1Fe|jR1 zPA?7To$l4WtIa7U)%pIx_YS7o()do-9rxY#Ov9pDOrCtB-3mKzEcI!h|P;dOk2G?9f zQ)a{8L95C+)3~R2YaSc=f}jdqGRy>Bqj4opFoowg-~KBe6B|qDWv|uCQFAXqsv#5& zMpPq?#v@{-?LQ4GFbQh#Gf;td>>nNM8y7}L#s={T81d>gs}I5r2%mzXouF^WQyOH5 z^b__nT#m+wKbNmTMPRw$8f`V#dw=k{&XH@|Io*YTEP z&9yImBzNFs7XG_V-u?bs{r8s48?5cRb=jI@U0K$(+L&{Dvu^J?yZ`a=XHqYwhSP`B zGwGM^h#B|Eb+-4h37@haYV2chiub_A;J7PTHDO7KlZh*Q13fSte`xPs9Lm=pSUkIJFc^1la|YwCZH_gzK58@@TPRO- zRiNCn5YqG%{~Sii{VoIpm1TxmFjYUf_?S{WyVcifX9Mp^nnJ}>T>1he3y9aLf)>Q< zyrZAdX%rY#odT>T9=M{EtwPn10v}Zf@3ef!&iU%!g_B#A@?$$@7R-@7)#u>`(OCTr zaBd=~@Qwji>ykjD3=68nS)v)}6uq*>6<{43A{Up6k~G2n!|Ew|f62yP`xff<5_K(t1n*t+?V^LCT}|~}1NXHh9E82FA#w++C?%}3Acfr1 zXbF+KPesvYE?5+Do@oiSYrRsNG$qYqomiJNB=sNE-`+VT3)Vl4deTC?%YrRw4ec+{ z)Ow+AtygM``{EQ5u+oy`G`n4}1EnXm@lq=`T?Qb=catn`2g!7m-e|oY={8qi57Kp& zqzh-t8X~TsqI}zauQIZSuKRQObR-?Psf<^L#;fSQ4A_U+BqaGnw+D$HVv!4uKh44x zY7_#Ww1_R=Xj840>KcSitzw^N`f5q{La$ShhPeR7RZ1YOgnsA$kqk2 zC)G+NOoK4hLUvZ@){1dVGxvfl1)`9e_O{V%u3B5ETblk;O&~}q0+%;PT*#{Zu|zXX z?5d`4bY@0^tDq=djL%?lfyU24r5b_}Ff1Cb5ri)V)T*`gGlQxJg{qE;LWn+iH5SB@ zNaH@$GI%v0(O(qkbrV=5IS(9+srs-KQLTlTlrT%-*YTh|0G%zKxF!>!rWi?6S zmw*D?p^5%fD`;B~B?W9mwE-c>$xBy)(wt1>LQZ3ODWuRi@*B)$NuYbf1p9~JPd)@8 z$s*eFAg=EEspV5kJ&XN$GnX^(x^LdK-rjrn#gC^xnp$r^n=i!uHwH4TNAA3sJ9;*I z^lawH=qG&{&se76{5tzW-e7+7%#Y42#jhup6YB=gmI<-;oY`~V>{;J^^6ucr=RP{O zzWdC>0-;Rf-c&r-bt2n!BGW+b2YF}9vNdmUM)#=YvyxesRD2Q#kjw2-fH zE?wHvGj`XPdSq$XvZAKeCF}1E);C9gG@7Z~pC0`1%&jwb^&hhzvG*G8joyv~>5lZp^sDKvwLOQ|*{3$@J*(ee zonO72t}l>F_uhFe)6&0IKd@xZvo+W4%l2zETZVe0W23fl#c|WIR?9CLH|kne+HSVx zxte@^YrbW7zRsC%Y{}O)JhGeXtxKjywW!gxa{A`!wT6zAlx;Y$#BMbrN8|OW<*A&V z&)WGld;8)Lqz7)8etLFs@Sz#x=E|B~t9_t_2e$e=*Ff%RtA|!!T0NWk;hihFr=QP0 z{e0%w$R~9f_t|xB6r{H@b#p4`+?RFkTXXKe@9a-ce>i(mw`Tn!yP2^{!lfN4CBr-{4v~aq~pJzGcJRwt6*Px8^?Z$Y$b9 zTgYTIef4OVL0nVL)|s_+{uk049hGv-bUl67d}sb1lW{+{&h_V6`-ZKt>de$!#@(~d zJqc5QAEV=IndCJ7rM1s=_8IisXZmW+v3j_ol1oGBimDneLe>j1vSUMD2ab|=#KquP zaTdh|dZi`ckZ@?fYraf==$xWpkpO^D_#{s*#Q0UR@-5ykWZBomyexLT`hZ{enX8ov&2!TBNi zAE1J>%`gwp@y}8F?@;Y$$nhDf`wUq>BYoTF$n`mD`44pLp~bV*oU?ec7SHPOHB0BB z382E=nL*Bc%iaufZJTt=bIi6?#|$$6ZS7!MzC=)MO)v<~rKc@l-@Ijig<(v2Tho@F wbRKdwTSn3WI<*a3X4++u)wN}zT`O|9wrsS^A+ENtddq=Kb&qPuD82Ll55m;y?EnA( literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/agent.cpython-313.pyc b/cross_eval/__pycache__/agent.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e40f1947fef5c1436801a094f0aca7794818fcfa GIT binary patch literal 8441 zcmd5>du$s=dY>hiujP`|Tcjk*Ye|mG*pd?4@SWdWN(LhnRG`W&#Q%hx* z^n+L6bhx6CF2Tu3FR(5R5T+;^xhN19XaNT;5TihW{LunrsFch`2@Kt#h5sp(+PK$W z{bp86$_$<4fcv8(YWCaNnQ!NN%x}K=_MywgAt-+{{14H8u0`k<E`La$bArB@ra(W@QX;mS@rCOFK|v6e~a1dn-2 zTPIx;ZtR}$V2^-aVFQ9Oh0?VRy;9xF==ZorwBcIOj_X9X=x9QhSzHekN2vxcYD1#4 z35k5KX}5o4DC{n@U5hVn&F0cmjw&0lUl=p9ax! zMr1$=`|Pgd2-TuqIt(F0dmYGcO7#w3lEYVo=)91)Bnu%b>hRP!$hWi<#|c&F>QZm0 z33;&##9drU2=lSfLib}6fk~iaWFkj^7-ZU!0Fw#A3(q+aidWB=&OrNB^tMGn=bZ>S z;rRkOX`W|pGt;-xZL?~&GFE$@g>vHA-kuRz zy^@G8^#tUlxEf8w@r|A^j;m^yd^Hr)Nr{qFlJ2D&nxh|NlCLk}L9ItJq>^-*BwZ$n z%G4hN`^MD4?&1J1OhT=P>NgM2GRnAzK0W+=`uNRjKm8#PzkNsy@Z;(_28?J9DL1rw zNqkwH89gt(7`Sl$#TjYn?CA82c$>l1gby*!pW+&*wU%$22lhdi1~P)ac^UoE>E3h? zeRlZyoO>we9Llgm58)|#%mjZJUpVt!zJTSB8dsvqf*{Y&gLeq=t1`yXh^+pBJOc9| z8FrD%4VB>rRBaew329v=-4$me>9gtnrUmyACVMP6^6BAW!-B)V#pH?Kgo(B_uM`*^ z5-&`TPoI@W$Hp!M3!e)II$f^q9-HQOR=1E4$5_^u{3o0PZ&{u$Anl9~wN zgp`Ob$^aCsD#zw=2MksTdqL0EiBdHSHI1Lahv1F^v~CUz zyBX5lMN80bM@XONp#n2_TZjd=n)kmq_|9Oi`S1$6Y4>h53i-y)Pa8Wo5+9~+r|xxR z8_!Y;+4(homCxIovi7FW>@DB46wxcSR2u9D`#9X{B~WxhqXj`s1Ny6AGhP zRT8)xUWI!|MZA=vfo6t?M6;@i2yCdD#$xTlKBCq_?E>%m=gY&;Doqvn!*8+QwT0 zw+241s^4nxz1#6#_dDIWrl&V%@?8U2_%#jW8U|O+7V3TL2iF_ZOuFNPBkvzcw{GA& zop*e9+cLESDMLQd zj}wfbD`B0kgvT)pylHP==t_#=(IK#P#;XeqWZ~mkwpf_0U#L0tWo#?g(1jhF94GQKmL_Ws8@d}eQ=E)PqY zY+Cy2Cp!^^fN5y%fE$2<1lFurP9(9SvD0w{0tbA7bbAph=ru{`X;PJ1c?dova)yE! zf|I1hcqADkG%rc7CPT5^CmuAQrL+Py~mZ|(cvZoz0_m~V`!V@^aXnTpDLG2lZD#cdCnSqsNiDqc| zp3(?Ul!gTkfLrC|F)0|ucHybY{fe^!G!reA`+#1o1ci>Vz{eB>ZhNq&Lc}N9fKN^N zjDWvAQ2))=pjC8;99xV$L}wRt>IJ_c1u{ww#U8X6^Nh@Z_DZ;10p;L0_2l=sO7Agn z9`=u7gdw^q&Uz}(AM{XFLSUGcJwT3$Wu=6&t!TEzco-ljv9eN%#}vn`U_1=88C2;C z@uD?o6RXAQAR9D`HK(l#_kDQ>9b{iqoIx(|bcv%u1&FfCiXnl*&yoO#mLPBVx8ed$ z&lv5ctJu(Y4WZC~fT!9GYlT1-&{AiqmqG1ny0J1Z(6qOtX;_C`fZ$kJ`L?+aFvc7r zfNhk+?<>7K=r(wSz|Z#R!foh6^!5D=WVVBpkh>IpEE1cDLKXM-^FhTkYbYCu%}?~Y zmM3ayANAMPpa(RzKj_{gHAGpkb_7_poH1LO*<%?38X(Hpqf5>h|hFMze-85O9Rm{TN5Uxo_2O{jojg9~sS~0rP$|&-kB8y*ds# z>MM}NRrVxuX?B|Ab{8N+tq6L?vzz0F@PgXMku$@q$*7#@6UOI-8Jv`l(9|y`iS6BV zws43fik3nc@S?IXgKB4=a8Si0zdikcY~m?a=cm) zA>|ZyvoFwR#&6LqQDrHa(A?qB5`;aTLo{pNnX2m) z=0njKWZ3j>BxeuP@(ZDa04ek(p)cLXa*5UHU~$SG;>T=G)|A6iLmrsq49yFLXIkqI1JPhJM7V>PZN3meEcV z;;YvNRtHvkmWK;AK5yFxzsQIHNkIl|Czg^AD{o|e188#cK^hE zomZyrU^O#$`u^$yrTtvFy7-^uh09AWPL|6-mVR);Hh7^v}0ym z-mlHbQM<#ThL#oXmn`?%)Q_ez)lYAXemHS^;;#8)`$zVBwf6$|LiZYTr^Y`S%bp5s zvf}-=rp*a~*fnrqedhG&PZvADOY>TbE;a_6dq70Xt2^R3pmS_{0VP}5Rq z-e0J$FVr;`s%pP>*=o2I>!T`E=e;%f)?lu-JuPQz53ksF>X5r`ZEkfg?-H^uA?Ipa z9)ncJP3!BE%cJ*gpgM2X=3O7k+4g#p{@wQV%YW6g zF`R4aDfrscBWW??^KXo1eb25;6dHWDF1>kaT}>ZN53OIzHFT~xx70hriL;sBb32F`XS@aPftx=7vb~|uaNy<~p`fiT?>&}Y%rqX~v6)>b z9-1v4ZUyw)Ov6Az1df&Y2Na+u3z7%&iM|1?X>dN9b~mw zfAi=pgZPHLvm@*5c!NxuM<3r8U9y1)N$x)+lq|K&1W?RIs)g z=5uuFOVsuYRP_aNe}SsMK-?FEc7BPxU!vy!Mknq&s`8GePaRF`r*e*tWh(%MuOox% z3(W^J$ot4@VqRb#aVBPz`8C(hG=Ght+L>Vxtjea=LQUh2>lKEv7Mu+`Wi1Y3^D9ItWcgIOd9`RNB=p8q*R{zRFT50Ct{{TpbwbB3p literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/cli.cpython-312.pyc b/cross_eval/__pycache__/cli.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3a8b4f21ab2371a093d0f15111434004e994806 GIT binary patch literal 29021 zcmd^oYj{&fmf)3j^{^h6MMzdTIx25 z(~$!iOa~f~h9G*9#w6~+Nk~F=(_bg^?M`~X{V@_YV{wQ6HvQV2LGt&x8~U4`%>LMO z>fWm>8S(0!>HfZ5eQeQ+7e>WO+6#NF3zu)y&y%hCp{E&XM5K!ES zLEsFwEZ*Uuqt?cdtYC_ ztKHSr-{)+BZ+ct1Yd^-@?FSv46Qb>Q_{`tl*wDJQrMcx9dt+16*6lm&>o#m{Uf0;_ zR%s}zocYzMLE#_M!Sy?xwT^@BJxpxq?O5=%oE8WAGY53#x^m|BwejeO*O*xF-0crP zVq#}TV&|{Mf z066>ifQ(So;dp`MLoN?X#HbpkbHG)*%(gVSOagtsBJZCSDh%v|gc>@>)Cy&bZH3Qx z=}3gb5Fr*IeBK^EizJMFd^&der_-meB8u3hLF5L3kbwNp%mn}L#p$38vA14{2H#<( zj~|bn|BR`^Hj80kx7gc*Ozh>O(c^=+KlmxIl2zB$H{fvaq>(9_;mPoB137QQ?7 zJ30>ZbU9u1au8Fnj6_zAo%zZ1>7YnU9brIcJ+mv3A@}H{zDY=tqV@nnp{Gb<0&DES zvPk8xPM;czp8qU%9!ATLUWvXFsArsAy#qZC#w(6)0ULY@f!hAQo+I+tzOKW%fvy2Z zPgkD<+Q>Pa4p;rRg|`n&iH--NI0lJ$(Dk+-ws*N4oFIehnI!|YPYq1|()d9IX-R+%FpKnV+ zQkB~ujNkt7GZ^`UvA5n~gnrWXyIs&vcD9SFXM`}KjWZAv`_FI3UVD4`73ki?zNv~0 z1*cy=ZL;&*&lS)0Nx%&Hi2pog+4l-F;ZSB6E_Rcb7 zJ^VS3I9-lj`vBM9I{@rnX0zGKAn8AjM1LO4I!_E3lQ-_}z8T zq1(0I;q2hL_BxnD``ca2k$yLGs2!$gS3k4Y-POb5_$z0&JHF-a;vBt5Z`~prvnwHj zS+qyDcvjS6A=^5by8O!-b-EwfAh#5~CnVvenD7t_CwY3=AAa3DnE;enp6jxHAr zWE4X}C7t{G-90SRvULXo^(Ha^YvzE;x(h(%0fB*I?OZH)9>*Fm;`E!ZgRCb_iN1Sz z`s6v?uG=4tO$UaMxuP#!g{d@l9%kC1x1;aAGJQG_8@d*I^OVpC2mmb1x6`M8LfW1f zAvHcJPHYRz=`hX2E&_{R#kM4Viv&oD60=1=I2AiL1cKuSOqEp00L)TM?DT6`$=e^i zY(;cX)7UX+CP6vy<)X%w`4CoiE zqn(Ap$&wBRphk{^*!`gYfoSez_Q8jP?Q}35?M{bOZc~3xPyZn&h;4zBAv?l$^!FWv zcA%C>Dlc=GlyQB(!$~-$on@JQj&_$D1V6CNk^VlGaqV}I$xf_o92OXs>u22^4yb*f zqr>5JwuA24z89I95XY%=A0(s{h|a`5xH^4wEc)Xy6u_@tBXvRf_u4y5B4>!@>=L_0 z7z9X0F+yUTpPq`oagLe(zCU)!FXj>SJ!DOR6QZx35@|}L=U@Lcb{;rH$VfEB<5I>m ze@(J{HVBgvR1f9`2Il_h)1dmC8HrvPgUSb><|25~$U%IkF2lq!8vWqy*ef7|-yV$l z1EhHZ>x#MJLyQr)mq_@b09H~U0~uW(p8_pQZbIz5RLAJY{^`?0Kq*qIk57SmGJW0>naGHpU`Rm_C0BZN8Kl1I;&&#V+IMo0^#%$4*AU*$1~d z?by167N)Mz`jj~vA0cN`%AC(ULe32-b2dLh&W(_B6T5kAOTw5)-L4h_x%Cksw-Lx^ zlOR)b!}d7GYrLA2ypYGv7sqXD(nw4Vy~C9q@3Hypb4k3LI>*kW9N&0^9M31^c!6yj z+m*l*sY@vailmY!sj0F%P8F?Jo02Mf*cX#VsU7MqN*5S;sq=g@DMx!!OH=3A3o}8- zteGHBqObh+Qt|qxd(%@{_&BqxP=fC+mO}Eo&yg zifNgC^SBin{%J7!4p=T=&GFVrY`9EjrmL^ZMOM)%AELokG(p9r#wO(F+3#coVE*dfQ!KKeB@n%<1%` z)gJVu*Vc;0YhEJ;Jn8eQFt^poSkrj5u)5$-Efk^m_B$QC>ss9CjDycDPXxgMJKk zWPlvEKY`#cRY`qUjv!<>G1VBKHc+ESSHOdV{G-ph%6>2Vg^% zmWE*Tv${8cat&+d*jr;v^wgNGI@7bc761Aq#S4 zVoeeng?y1XqH69nDhhVLWDLlX&~}Nd;+!bAXLgYZ6vHBu7lv{+tCB4eQv~?(T7sQaoB1 zT*TnXs|A~>mF9A=P=mnPg2D7_eS+ciYtdd?3wBfYX@DgpzQ+hFS4Tu_6RonVgvDbu zbYAq6abP7fgoFZC-Tr7KHa;pPC6u~Kh>Ko2UL#p?p?onGmY&hz)oM{t`vV}{fQ&dG zfZzA`cfmqkXs)`p7W%D|Pdm`hr#brE3|9YM^Q&C<1N~NB3F5LqKAQ(l=s7{U|j{9#W)&`wk&0U=xY~ zbLsKNv1^V!4)EY#0(gWp>LqEbK@bGLI`tAm=Jb1dmig89z8{~$F;(bhNq&fL$Sk5s_9$06Bgj|2P{>5R8Us)+&v zB!kdaW^we`=lDQK=H+{P zMC$5Ok0W-jJ`V6-L`H#iTu{(p`|=*2jz3|FOl-u1r^^SY3V0p`Cb7hahNbxt2j`E6 zl13Z~1k^O}hhdQ{g5wE^5#0uoa)}=jcRbc;O|0#MXR`;V3V0p`Cb7hahNbB%(Jg{B z7pWse3X~!aI`I@i%!#VwY+6Zlk`Tmq!39hY#hOw-VeY{y`{_Kmt|B^NW~>to5D8UI zz)O5QU>grTlki9IVx$m$`nDLKrabZvaFGO*(Ck@)Me55wT@tv6>qXANitR4YidI5I9VQ+vDGlM z5uxOM2p)+7WN#aN&m=O4?Zk2f-T==RP+38z6g}&hYM}y;Qrmm+CdFVoz`hrY!Bn? z2Ok9&qDU|Tkl_-yYNkS$lAk-!S1JJwn2F+6Dqigv=<4b3B-Sc7?n)=Q8G69zg4VJY zVr_xZD)`(8OENT3qpu-Th5P*efYqZ1aUq$?+?9xIwxvvPUX=311Sd8uh3)Tfx;J5# z@oQ*WjE-GH*&ieW&Fh7^bN2*5>&+Sn5dNosF9wq`YJ2z`l&KHi^KJ zKcpuUtKSa82Rw;t9s9sy@8VUP8+SHtwkkb3$6;{xsomezxt|xpb_{#ehe@2f0V@JS z7T8PBVsX(EN5=^0m@4oX?doms@q+V`x7WdTxqH2c#(Nl_$QNk6ffhDZhfHE z-HiYqZ@uiYjx-R|dmL=AhZ6n3540DB8aN{3I1%KP;DrwM>I4d)sKMrdTPOP6_H#m^ z?sAAH?N9DVp(K>og3$Ds6DUEdO9YEVhzDnOqXWz=6DJWev*JYGW7%{ZH+$pqT5wDj z{RoLi*6E?>Td=t_ehvHpaln9EaV@$P6DcWfu!)Ad={GNcXp}K8AtMYpNXS5jt>9jq zl);wde~hl)1W&S2FurN9$ch_FuC87OxDfNo*2Z=2&Cv3s5wZw2{9t$C8f+}#B9`oh z9Xoe>e5eX`C9F1P;i~8u98w?&am4W`BO1C-ccMI2Zy=7F0~p zdX-2S2;-Cy1uOYWp9xN;qC+>~!YkKMG<|p#s_mISud3Y%zJk3D*xK7Qud2s!(9zS^ zj-Pv&c~!mWQ`iX~9__ph_4BsW&ufLP8x?`#)xzc>?BTS#9Q%4*96I(`b9k+EWQNaZ z;5yyt!MP1j&~SXw_Vz=9BQM#0+yGmEP0|J=ZvfXvJ7gspICN*=GcXFy-H-z~oRin# zxA4KC9~+;Un8)79^}7c+>_A=#zLC6+fFe#O*~uiElp4?tIzg23s`ZU)pL>RKy||6(}Z;pz~BSca2B7Xv45wxVt?{=Qya_Z*PaS+@v& z%NH%H1G*k?^$*l-cMSA9yFe-(sgt*Zg`geJ71ee0blC=u@N_SDC2$)6>v2SX7w9=i z{l=IxWvmDrDn|QW>^Xjy8uZl@pfbgBxem zR94AU=KOHx{D?LqV%#u8tBs9{yXJTmW(ugRsymc1OFg)0rkGNhr|9A^T^!MxZW%4N z%-JKYZ=^?zO*83~s^l)Ef?607fHEWIeBS)b44r1&sF=|cm~?>2PJl5-%-Io3&WLNo z5HZ#zATm>##X^N6T4TgmGeawlPtO>hG%mRd&YTjBW}4He%=veyMn$F?8q|_TT2vR# ztQ$8?WUd+96roL0oz16Q!5vtLiqu32HPI&2BOOEq!5!mIB*0bQ2d*HvbUZHs zj&N&=axH7s}>Gq4_*GGc7QS{KKu0Bj&=+cPsDJ(KYa#mn7Hdjv6QHIPZeQ{V{9N00TFB@DBlAyHg!tOVBAAjb=me;n7 z9lGMZ>>X^pMN5Jw10>3m5p&@mG{`gtN^5x4d(=B}@VchtmMJelANx)SI6U2WqIkGC zkaIn~^p+`i#C6OU(iVTE&6v^_gtY~e+MSs;KGgbCH%l;kLt!!XZ_G2nx%>RnOb(^ z?o+wQ@m&N4S7I3gmu}<-5OIfTpUQ=+Ir8g9b!XNON=DUw*3YXPQ%SY+(q3>RKd1Dm zJ1L(A{%gT)H-#IZ)cW<*o^2Gh8`|be_vyR}Z#tob-mCA%1;VVKl#9y*x5c=Vp9~~B z=qaeu~78q%US4W3i9D!Jx^<3N;mQ?MdC`sqPLHtdOxFFxPctz` zV#9Emd5_zdkQo-hf)&e+1A@3^%9v z4vYG;a2OrjBgG?fHeqdI;g#sSrU4(b{+@#lUe$X5ziHxym9cGeLyNt&VavA7jrL7j z`K0g07Cvj;hQ@W9HaEBKNC3Jw5+T<&HZ?rA8BY0cZE0$L#{NRXmd)G(Xa%?AmZx!nhrX9^7!j69^Z;C1;Kbloq6iyQ={24N~&V@9V)Ft2igEr9jRFQJH6Vd9&DU3QzpycGm#84JPJxL z6rC-4t7O>dr~RD~Q%-;$_W9`u6#LZhQ^!|D3|S{yhFbzlZy1=b4CWJChqnfv8SV6M zoir>@Oq(%Mh4X)BreGzZHIC>4OE0WAyCV3FiK5z2p)HhAH>q8CQ2!HP@O7psFMAzj@hz3>*D?%xP2iG3km$s}EhpJhYM z@CxY)C+Q**STRZ0lfafqx`hO4Cuti5;-HPrT&m&Rb)D(rIJ8NaL^R1PO0j|>(fe=Q?iOAD@gm&Dkx5= zpkAPk`wvlvl`l|-6jqIA{dPPF+TPQ{kO2i=`?PKoLw=IUHK2j z`F;g(B@Xh}p9*~I!ojl#r{*jS&sjL0bz{!5nKas15GgFWP<^&~^iZIBqVS0kO~jmY z#&FUQa0MGKHC=29J~_I0v~%=@P{|WF%um9SJELkQ16Bk$fJfXD`jQY`^2fU>Drf$f zCX}`C_msj|aMPSWqlEX@P9*CW#wBaB(*8Efw055AZz~k=>`3H!RKVd|aVMLM4;T>; zG5eQllz8JDgR1zT;)uc(SLWfIVTy4e@u81)#YcoU&YdvIU?wN3u?kPWv1zE>!>BTV zimk*GX0G@Mk#ijY#TH;ny=g=hCvYnP7QU4BcsoI{Qh-uRryM_UKC?4Xc}>cBL}8^p zrC>JdX@`@ zPV(9N;b;Qa1!HR%;BFw3z6t?gQsA5|!&{~dC1FEJpnJk#9c+wf41+$fDcv$YD_?JZV86(ySeNj*mY`CZzQ;u$U&p5#>is9)cy|j-OFIZCbs(mUFrh7EKTA!M%5aWjt0h9Ri!Lhlc z&%x0-bdsO-lY}IyDs;1cBxj0v$eAmFLQb0$mfs|2ig?HwZ$IQ*DCdwuBxj0v$SL=$ zf;9lQ75WUlMLuJ1u`i>y#AoU)^_dB0t(I`fZ)ZM~wnqMzLQY(-u9d#zH~04tp90Te zg%hp~>!j9GCoCOlom5BK9uRRp3-pEAYhjHNC!I_?Zloz+rVnYqM#d|J67Xat;K?F* zve=ASc(Q$2sq2}5Cno_<4#AVdnr7k2_2qaooffc7q;4B13#_?Q;mh=96R6ZRM<@$G zWqWghe%e&@^W}QM0%DKbBfQ!0#YnC9=EB!p=?m(b0a(^c-|`!9KpE0eUdR(4hd_-c z8BPkZDz^eCQguHrfV4uAwnoY!zah;)(o%4PFVCAdqTqT#%4PPbdMkYS1TsYpg#KW& zdBQs9vK9TrVRpgL0B-<0ymZ&FSQE)~luO)94}HI94A*(>9hLZqBE(wF=e%bAl@ z4)$FN9LeRdID@e=CMmQpjYTQ2Px_YM`_lHML3!U0RdmKT*E@GmaXj5S_klV}nYZjg zI*OA?(60);Wsj#{fm*NimO)?dm+L8oybLY_j=w~0BkKi7rM>Z8249u8inAru6|SY}cFLtmA#asLd$Q;( zl;Tre&XBC_yIgVDLsCe7vlU*QT+fHV&SNW6)s3A$gjZOk&~Cbq$R$Z3i7(evQb=l< zTo1sOLV|&Yn{s}=pN7VL^C9g)iJYPwxgHP6Z%tjFN6x#xXp|8OVvJFfK^an0HwOUa!x696jxL!;`?9^ z*;nnYW)(zVNn=%g3U9Tq21dM^Xdd5@FvxFj4P0#`>QBDf1UazWTig9T8BPizWu+Ft z?x4J0d?KOUwcUORUVcltk}*I_Qq;;h4@!Sc&MSquf`k&EObrA5x8l4~Dh*Z~p-gG%tk``Q|*0SW$XSz6jmL^bn z*}or^txtItZtf=+Gvd`KH4J#Pb)o-Z4Y9*J4m&#BV1p+&CW@EnihF-FpCgzL1s^=g zeCXseSm6o^yJ(}-_W*wB!zAGsBjBPE^S0(~jhmZW8tvP*H?}tJXoWZfaq<%jZ1Lo+ zBioT=zkq=Y!r`(=cOP5N03=O}`Y2UA86p(eG5g`JTn-nJV5t{s7TzI)9uflBvj@L?_@~}5S&g8%)90?b9drab&1O{!(*tVgewQ;>f7jkt6hvqAf zMyPZ>hh{L?k1)@!7jGb?inZiT5>_d~DAYmvq;cnyPmgbTddy9*SCU9-kEI3Zp0H0- z&v>%no=+sdQ|J${al?i?jk_JZVwI<$xsP;q{Dg$y-dfMh_n7C~81S}aq%#&atAhJ0 zY?nEy3v~?RQQ|`XTX(pfumraz&@{4%QV@S5T)`1tP~l7S zDnJ6_))}PF{y~c3JpBX38HKQdJCgPjC2UwJJ!wo8udu>>^u&`D9Cp)e3wcE!uV{DT z}&nISvnSt38FHkE?BEPk%>yk8`yxp7ur>*w%4c{wL~>gH*`U9A2{F^3u^o zqwOQlj;tHu0&_1|&sqaT!G_7qN=W-xvZ?gaoVg0518#m1Iv6Dfci~enxbc$@HT(v6jch_aT&?CZo}R?-0X^y2d*jxUtgyaZaMlfl)@_poIpLG zxUidk8Nv@D^_xiQRRBatJwqwWbid3g8&SYsxY=^z;P65J{t-u@VXC+$TwD_>svX@v zu9#Z5HoS0csBYco%O?#RZ|0PV^k1?};VIoFUnIW29q+f@HP5-HhCzRaRlu9z1ALf) zC+ui?)HMwGS|I0{$1-r)^nkMlc$dMAN#T<5cz$%s0mpRW8?6z2^*uSdW^gOuIQIbT zXEJ+_FmSIfXZvTIe>`RhHzUeCPr7cq!wL8Q!00qI$4h`OEnKk-zS1y26?O1u6_ET* zyoLCo5I1HUkQ^oe?ik?Z(1V$K9UnL<`OE|`a3_L`vYp%sOvp-15J|(0U_PuKhgKU7 z4JTYXJa{GCwGG101uV?*x}EJkZbu`EYTkmv2E;fjRU0&&S@ z?0kk45~#>0T$E1SIk*-q`3-!W!UyiTazDVw8GJDKD8olNK2T#M?j+m~Ap|Z^*uPE= z)i0ht;DDOz1(|Cnn{CvDF%?cD8s9S+{OuM6&b#=oq=c6ESzzs!>0(gR-hu)l4! zk*&y7Y(ZFn^fs>-mLDeBC|FJX=K84@|p1niPBEq9FG$xe(q%?mhcw~gbp=I(88w@ zx|~uXeZ*8aWttl{&7Cxr-!fQ2+0~<) zMpuP0SNs>{m%1->Hw>F1S=r!)ke+ihhY1!<@U9`UX8o7? zFZ9>fJo|GlwC33fOKW(|v)AT^Gp(a@N563+a~VYX)geo3q-o=qeP8rlZ)yuY-xg|W zo5&_`z#Oem;L9e>(yJ zU9?=j@79pnp^kQho^HV#% zd40z>BL?%$HJfi4%pUPwCT#0%UiB3|9r*IJ12^F`kRh#NlmgyNjXxmFrv4NycpJ( zM~W8*T1J8@x;#{B8$B2aB!q6xo)Z)U3z8Jhr za&*Bs`$_lv-8T#kpym`+kx?|IDGFMt@TOhSA>u(&NmNC!ai3f6JT`%C8!=j4DI9HCLB>QvZJa z4fE=odHIpKW&S4gI0=+rH}HpKot4aCyc|!k$qup36KIMLnJ#FSPllvrc;~z>m#PjK*jKPq4ts7x^a5+U}#xm zD7)!aLGgvcvxR?9DJ_}5(^B~b&}|oVXLZ3Pql$_A1;fq$btBo4lDYook&f3|W-L?= z^Sewcy)>{d*afbTe^67Z#W#(`ft?e^iovFv8Y6l}X3PyXUD|YU)3|z~;)!s^6Q6bb ztozgM8`@?#W&vVWrGItP(M=!Z2P-etUaW zXM&5a>nlKT2fznzX?V`kQ0cPqo!6SCRz4eE`E2N^?VAD$Ry2=u%TyUx9Vo#`YH3<8j@ArJ(0Mi{)J^HQd%)+zu_yNE1 zrllmX4}@XQ<(|*COjgv}MB>g#5k ze^H2DHr@b_Ki+Ln!#SbJ;@XhO_IoNVJqK8Iq$Rj0SRTr(9BaMOcDd~aG&3vv-)2;h z`fqo0shl~J%%V`%VgNT509yt!g5L^ugmM>L&HAM9{lXjOmACTp5!_htzkk1!%G{y& zS3Z09zw((o|9JO#nlctl7FLJ!HNOWP!E#HVGtxd;uwYVOjbkKJ!eLl>D=U}y>M}t~ zP#Mao7+Z3s{&M{dUTfHvI~Gv?b)}iDp#P3BvsU^` z+RWPM|MAq5>=No%3r!vKmA`trw4+S<>oOIDf4#uO&Q<<;MJZdX{7tb6!oOK!+E=Lj zZCdrd9OZ8pm%{UZ=BOZs*V^rP&JzC$TnijH#Dnu-YIO3- zi5$)4K14t@0;Yo(;ATP3ht}e112Fy_Chx%qZf|oh;A1yFcHv_KKI-v-TRLRrK{P?0 z2A_D>Ws11Z2~L07m9Xw`!(}_%?*SCe15VV%!TVF8xJfPi1y%NI%Jh#^#y?WJUr_nK zrOJLmE&nRL(4RMzUKma<3_LlJUNH!IQ%xQnqOxXoE6kbnV8%>^_%aF6Wy1Sz%_dr5 zxwBhatuWr%tb-X{qA?2`_H{I2wUS`lIzVrgNE;uWA^2}f85`x z?w%Qmk-)jT+3&lzjeDxQPMtb+>eQ)Ir%tuLuv$$VuA$nu`~GNv@+o- zMKjOs*R|)U)kJ@bx&7948u?jF781o2!6@2sNA>JwW4mm}Si}&~rb`1^;INjZz>faI$yGAlQ z-R?oJ)7v-b_O#+zx7+DGMDII054%JUUUzolF{izuUTAA=YJIe`p|P>8eQ)Qs9c@k9 z8U&v{k>g7E-<%nd|M6YopvP0?I_&J{!^hqXkDra^MS%Xm2~&xwgui`#a^~IZe0coA z?RS5|htG|MU%M6_KX&Kk>wNjlyT@iu9YfyP@M~kp`!4eMnaMLVZ(QKR&tD0@dYQjH zIWqI|TlP9&;`y$Ar?1;p)iv1d5-Zgw(RH}b^|VXm`SNWnzmgvu@_G1~A5YAD@47v< zpvUFz8wB9|YmtJe>X}G|>>*ko)kv=@`Q9OK)w=4nbL(W#3st#rQ4kDqB1Rp%z*ot2 zD{aMNq;_J$GkBp?AV1!oJWnhP|MYD5?9cCa%!`ZhPZ{_9O1(+9Gy8ia{Ha1LzL{MKKGE%>!~yG{7}EsUB~X# z1KoJ;I^^nls=v?Ut&1iphw6wG)$qALyK{D2;iZWQpr?-Sh!w~KCbe&3a@1D`fKcu! zmKh@&2dFMa{Ixr0j?KLGe)u(vmOpuM=B=?h-qSZQ)bHY-RYtdr4Ucl7YS7((B>L3T z_l#+%Z^+f(=XRluqRZp*)_qI>IL5dQ8-9vq*!bn?#8_Z6h95g`AKD`*6b?eGt-hktO*1a|lO zoV}puTWXS#a{Hah+wZ=Qkv|fC{ViVZC)1$Mi+<|t?i1^HIZbFI0>p>^%bVes-@Nl8 zx;M6O%EQOT@4WDuJqj5q@*kG~VGC=vnYX}~@BKL#5PoIy_S=&)&qKSt;GaV_!N`h_ z36>uaulvI<|77Nbf9AxS)l7GSac+`Z(tQI*B4b~E0pXP=l{(7v4i5G+IG*Mm60+i| zs-AvlZ!w7;U7jNzuWO)lNE{p(g18q~S63G!>pzY~e;CZB;V=YL9DLH%<>hxn$RIN% zeA_{K?mO%Ney2QSNF8=95PA`9C(8oXR#1!ovhJ67^I{;A8j^|h92)fXck``ndwI~!bO2H20i3}Q zSyN_yeEH7l3#N|SKe=*e>=+5x%=6bUm4;u#OndCjnIFG+=j>Sc*!A$MXXHj80a(np zch3G9YkTYqwefS_DQ&@=j%ge$45gS(X8p$>;5(WyNEG5DkCEQ)cJX_B&i+2{5h4)zt%JPJ?en<0sh0*t zuZM?y(d}|}V{mq}4hB$zDB|rQ*nd#Xy?hTIT;07czRT%xsr5Dv_V*7y?Sa~sB^kvD zwrkLR80{b{kr5x2FtyO&5=QOH}5^E`My7V$*+`=?L87rSrTSmKBMrIY0n@2Jp39YL@vlI z#gl5mb3bH7-XFoF1oAL9@RZfO5 zp4>Tqh9Nn1493jdGF{3Vhb=dZxNA91^TlQu{Aali_L6Sx36W$ZQOs6gF)+#+Edqdq zpk1$ZYaE((;G2I5IzcZOgv1W2m0ii)ngqePUdM9_lt>axf?2Sv*D?sRS}yu4ScT;E z8h3&tq1_sNtEPk$!6u~MhvKyLI=9xLjZ1NQN3x2mBSlRK8R{AhE59s8ef-kdLQV}Q z zaa&v#ZLzRiC|BASAJ#qxcEzHw6?0%K7lo}7s)ZV1rLam^ym;KQ+%lPfT{!3hDD(^0%{YWHZKbG2)MSTBTePGP+x_luvOT`S`xqh z?QzRAELx^9Zkb0HEwdwTnWja{G^5N;VVBUt{~`?;J^L8%jB}AzmpyQfhhJQkr(V`ab%r`t0DV>$M*m&TN6PPX3ixcU|?c zDe67*JpqkX-^5G}G7Vx@n&hOC7p^l`uuO+J{rc$4sq?CN`p!uBN3UTfC!>IBz;YSZ zQ?)GgZU`VsU4<;V*jwE;|s5a zkG&0CSedk|98ALNb1HsIGqVl)j8M*V_aZr2v-Ep?QuW77r+670%C>Eta zyQZ3_7qrg54Cl(3_hGQjOukL6h8rO~c2w~$t@`$AnRez{C=dVS0;=L$@4R}_j)wni zeC93q0$|0zewrE{Wi#LB?(?#BY22IQ!c?3(VYRU_duKp6lpmv98{0u zi8t^3;P_lB2+P7(b+o=3SmHf>z12sY1O3e8n}o3_BQ!w_u%2S39dlaFymJ=rq^Li3 zqq0!n41y22`iJaVNe4T>S2B0AwdoMq_fiVh1p`hmJTjedD|tM_2~~%Old7r|cd(SG zB!-igl~ZZEhqot42H9R0$?#I34-R@-eubC+7tVs@=armmPoH`l=*StelySY|UPd>tRBE9@kV-XlXUDY4tt2eLI}2FG;(OehQF0W-)ry~Ule`?OKBLigV*A51~`W(h*|(M0tfZ55MlOt|sfoF#tMelb&L16{GvY`gkflKvr4_cw zp&aauvJpS0l=7fT$5$GyvD7X;n=PIz<9QI6*c$JfR;RDnKm*oX;*R`M<`fHgiR8#? zPB9(l^GfBDiXgJDEMvMa*0}Zwe-GAZb=wf*3 zrIJ>oaO9i^17W={4;a=c3+_Yf1M=W2>AhIlT3iP%o-5;d5SZ8+@0%XO z%&J5;RU)Bw0HtNN(Q@b*W^5MAq=YZQt-?aQ*leVXiH+i5ziYF}Bv0d5r1;@)sxtw- zs#MvsMvV?r?e8;}73bGBhB0b*vqi}qSJl0)Cc;jWt3>X^Yb4X&Vc>c~Gt1ed_>v{@mE0=MS!{GrL@qa_0B2!UA zROy=;*rO^XWZ3>*q%e#4!I{N6j72zNl`(<~*YG zei47-XAzvQ!1u^dF~?)9m3nUU_B#lE8$UL4;>vJhRTT>?gb#LT$mQ;q404>|uyJR@ z*Ej91-@C(COI5Pr14rfDki{{79R!}0;5|dGu1(c7&Y`|4N|NUkY$V^Thz%RKss>zM z=V7PFIO|I%q%l+}0e!wlSY;IXd0%Zw-X9uq00Ys8hqOc zb!6{Ab|f%=QY;ze6e5Uli`$ck)nQ?~m5K^h)wX@u6jcLKlBy6q^YF_W@AtwFucfR* zq(u4G<9ige%NW_d30r_Gsvo-*h_pm!)20Wj@5BZ8mr2*oVXzuuIGjf0J289^n^WcL zfydq}>322kYuIJi4x3!hAbh6kP+#vMNltfCdf4y`d*`bsLNH_@Ua}QSi=GHNUPi~4 zBL=f?z}f#S!hxP0aCP_j2A(A}&ptzUk_+-)ic9E5G4iteGFj<2g(zPxa<)-Nk{+L5 z$8c)27t{i@71*wm#nhMI#wNs4406y-{NC3w%;Ker?0nn+*E zN^{s2!wE(%Q!FYY7BB3haFcm1PNrn$y;GujqpstK+nbixa;UEona5(B&K{e29h*y& z*AZDq0|tS*RTQYpw4}1Zra12Iym}F;F^YK&8)1+jxd0osa=`AK0@ZUOdMS9C;hF0c zjBFaLveU+rw{O6Opj=5SG;H(jLd)ljkX6|5!|uX$Y%I|tmhFX|xNv*&SUGkj?A83r zO*2;z_RlgRh$F8^s!6NO>sX%w?`T?hVFa6DU9=@3Pc*V#owF&WDt+1Jt6OMK$3ASS zs*+=iY3sySNj1!gMUKC5A{VJ)oah!2iVMd-n@>*Qs0ErVo0y9CDwZ=y)08m-FZuKD z%VC>Jh%}SJtJg_2y?YI04=-OLC;Oe`0XhX@wKnbUZ?PRh|%0$=(y-=l+etW}K$D@+2skO09G9Pq$`ntBU?Fo^_m}Kl4 z= zk(%gsu$=6qlR7nB{e9I#Mas&di~p%mN=_&(|H96(O#z#IHj&fke9r0g zhFMb#Sb8WWKV&V5$+d84In$|S!PK&era)?0AhmX+dDg(C7fz=w52h^-8IwcS9kV)v zwLx>&79n9amrF07<*ez3k)5*zoZdFAD+uZeLdKL^*3?_Jj8Wm0q>#07Hi^?0e$MGZ zg(U_kEo94)Y>&?B60FUdSu=x40!&5>j4fo#2&HC@dPglGYfTIy8<$of6C5&HL)OY! zo!0uutmR?rn!8?2l)$D0E^YZN*Pux=ph2w(tVK1!w3^A3skAL4J43n@m1f%+@AzJ- zM9(PEf)Zm4C7BntPoNvtEs(90etd!K)Uoo3oa=QVYeNixB|=TeSQs)keWBMfPR3R% z7px6gg_we|02yPCP3-(2Yhi%2v4%+$ zQluu*sX61NlZi2K&kMEs)CP!+gfe7(oTx5{Z~ z=Vx44Goe$&1lfw1%+F@M6(>9WLdm#y(h;(D5V>&xY-6Pp85786fMz-ifs#>jtVaPImg(E^2Nx z)5KZQrp*OGbHUi&DRc42c4&g4;*0w)?mzkHsg{>pt~`D9*~`z4G~CjW;<*9ElMIdW zaLAVTyF?Ni3um;v^z6}RM-NXW7T!w99@Cxpb^sEdWIa`IykIPIDyisJO4g|N#B%{- z!Dq(gX=85Cn0wQhA4;)(0p-Va#Rfmq{K0~sdvHI?Iko(MI?nL}=L@;ffKtQ$C>D&%5$_E=@ z=+(z^S?kRXb2}{x=3Q!uNPr+HL&#$OJTN>)1N3;VWfh*y?G*Vi??UijBErRqn~C?u zvX|q`96?5E2gVLs*|1AMXnFfm^(p%6NK``wDL7qC1CMb5Jmv-P7}c_K@x)MIiQg*= z!do5M1@n^C+W6Quro??~i~BY;?%TAuZ{ydN9v3zeERP_`OncUFvC!qj+7>5}oV?rh zbZjV_ay+Ga*c=lywHI+{iLkw3LU|;k>lvJ5>KuGZBypn%H6@J5Nbxmv7SOK!GCQVp zY-HVVdTfRgNj01k`!eDJf|uk=z0ccI_3&j4YIVb50aMaPgAzehY&*G*|1zPHXAwGh zrR)X2agCzU$|+9ln1ANNh`o+y5pwWu&0Dx3L&3%lGuH7E=cDnAco!V);N<{6Nn;l#>d}>VuSZW<$zCsT>gOQ2hR!b1kG!_I{nL|J)zKmy zzMb%;(@GqkZwsS>EMavKG=7YzpVPExs1cB3nSUM!z7a5~97C&?S1YNKmZI4b5p_1$ zf7m7I2cDv59gE9K-o2~7wNt2X*}bcwb7z}0=eeO(O5e7lVcX7KO~T$7pnG2u-8xV&(hx#;w&h;IpSl9@!H)8nMGXp$|`yXqT#PhYdEdu?f*D?#my~3K$mgXz?D8UU9u@yvMH3FeQv|)4HFr&TCQ~SESFGf zf_=bOgi6q04+$DhLws@!mV!>LWj zH-#+er&^D$Tu?ZbA$AFQ8wAP(>F@s41!$=eN zM3*jp-N~GwXVarjtq+F8plkvNhPWnCgtAg@DHk=RqK4GkghbAX)iTd5;abjeTo1QN zyM*h}*b|4hx6>(1XMaD>1{&hcF`Hp<;#e@&<2(*y(&k4O#-1Dh%U|puNE&_oA(X{} zzFMSHLgH?^?ZItWSAw&`lEyTgU^3ZFh>V2;n8q%-=a}y5^K=pt@ku~a47L+TLg`~5 zqCKRmr;WpK&L$@J+Z>!y~hn@!MJb3=Lg7c0(JOgueS5y*RJG%;k$JZCv=8S{?U zUut}#as1(l)f2rFUk?;M^oi}^&y%?1^4Vl8AZUb-`lif<0bSw0+|_fL%daE`(pP@L zX{@=DEoWAX-!DDH+YfVBZY@gqiz3_B)%w3!rNO-`c1V!D&W2A88$K8?crpKqH;5b& z(INYthGeg%BVzsIC}Etzp^Bwj*AW>ej)<^g7*;Db^*)C|NT3w~8*JFtf_1JXE1)AX zSfb_X&`buml;a*NRUzpF%VXR6BW;D{rD3+PF7A-R;mMj9OES>GnRVVal9>*yYyu9 zr%^`nW!jQFc?eJd~RIV)MQF+X$kV0R9gw&hU*R`o0m@ta0NT zNA84Xty|lybL)8dTSxk4gImwix}Ay5Mz;Y=60V)3eBR~8*|~#XgV-dtXU@yQqpHyf zYOzJ0s;#@Rb<+`9+6e|nA}v8;PHDwCQBgv5v{SuSgD45+QaGB(L-gOYCYvboQ zjQx?NHXz9Mn|CQ?#>qk1Xdi0?3#2g5 zZE4PTTbm2q$<2lCl;$G0jY-_*d-1Bg?k-_G*|G=(N7caUuMcf==ehIU1;DkqY%@MO@*vs92N}bT zd`NO!*`41vST$wE$UHe8E%?$!rX!bTI+E;ovbzqH(}l9Xk9T_&2=L2@GP-IYTm5!9&2epwD6ePbx1hX!!Saz3tb7EUBUi{` zuIi;s;^NGTOq%1)d0nvB1e|VDOgS9lb% z+4*>`egD0A2Ql-;?^Vnq;HC^j{62xqvS!`)obj4 z&qKZ(C90NnmpV!pz+LcFa63xlb4(~CjzJ3Ia8s?<2{~BFxtGODrO4!R$8yIqrN_4^J+5JTX_*Si z(|o@a7BO3kIdqg8;cU61Jkr97IW*%tJ?)-o-D=8Fu9js>dT1N@iubh)bsg2=jX*uE zrlP+>slybd^**r6gtB;43Cr^q(1`6vqBW^0RbJC0t4Xy@Q92+gaWtOFtL|c^fw!m_ zqrVY|R0gp%nxm#v2!cNBJ#kz0uzXF24fhb=q%7SBvb=NWqk}FX(l(dvohwTk`#<}XE2sA zI|?=NAuy^}E{Ip>UU47FS9N?#trb|>)pYb%l}iUv7_WuO+Jh%=G}r!Yw4|CkNNb?D zIc?bbcUL$n1P!x^63|M0o5oS$u5=iRxyMG@zpdhk{yHl0?I^9gDrVNIbyT%~HwvMq zh_5R3eC#auP)wVv+Wjhg^j9r47X#W5haR;} zuhFU$@Pso?+l0CZ$26s|3Yq^M8ehITcs0fR@321INij9!8$=H}8!kbQxRdIWdX4gL zk4E+2!{7f0t=Tm8 zSA~_0TAh?ByK?0aO4XI?k&?URFNkz1{$g`K@T(6gDt_e!zB6On-L$)5S5s?4=kE3f zp<%CpcNP|-sZ`iHoV|_h9kWdw9$!qy*E4V<~wDqqA*G)N@p;c%i%be%|UNNf_>=F=rvSgi_`Qi_UIePNYpP#<-)O)Vuw z_NRw!jo847W%Y1sEBGF>c~-{{r{lw?#D0(5AJ}g|bcBw!pd`(v;oK%S>+Hw@7dbk) zj$b}(TVBm0Se93h$W+@k;vZ4FY;(F0!5G6@x|!tHb^J2Vu&JJHTu@pRP-f0fs~#`b z&`v8&xg{LW!o^^HaL@Y-}6gLG{gwn zl4Ojqss|M0N3(nkgFX0&5AwMlSm}`qYkQeHWRAsf2VQFSEzk&HG{Lx~iT9OJ;uiLP zm6Ui%YLYZV9xxD0mm?r9BQ5?Iw^xa+8<8Nk&T`sf({D11M>U93u%(_l zeEhKg(5P#ye!8GCSWp?rubOC|)J(738eF+GP_ylW+K(;GQf9Hj?b?Sm!$rHJA5uT$ zr0@H7Eb|J9d?a4V655S`jU?teeNjwud~G5LKQwhvko~N6J&c z6pj(FNcKeKWW{i%sRMtW?9!Wwau$`N@Yxq?LU$cRqdBrU!*4qfMkz>1P-pR?cRY9P%lrIK!eHfba! z&R((Zmk$saRT&Vk5E3Jp%RWXgC95f!i&D&o;#sJT*h)3OLN{2moOqUQKcL$=y76=? zrdtW!$b@8}KH?uEh43`$Uyn$(m!x~j1!wxfiJ5Yjk^^ZWrlxjBd1r zDW%ZhXGf6a9Zo4ZqIlVkr#McaZ_tel8}UzY!-+w*zt6VpCA|lq0{61bf5|{6BQSN~ z8*I>>Pf&pox=qsUDsGac-G}c&52%dY={j<7&?$CDL`X9AdHV2ma;Lk?C0Tp=@V#sL z%Oc%)fQ#~_fmG{lnM+58yj9d?tFU=lb;gXf!1|pPhe+w1w zIc%hT^MBUih{YFl&f*`jxxMyRx|Opy!jXNh;&cTfx)RfZiAABTrPEpVV3vIWqy{o7CU#D23Z$+7uiB4HADV7hc81b35C@W!`DrFUoDMDqe~zjO3E!NlTF0e`XWd>g^$66}$qM}mn3p}ZoROZ|qEDgSIRfGr$u zh)zKj-+Jj=quuA+=iC#ztCp)4#M^J#{*n1Z^YoTIzZ3&o_DrP;H@57#zBHI-pIAEa zwNKL41-I<+8v?09sImDY_lNH3#>WGXJsxO$d@5uAjmF16&;&CpC-zL5KFQn=Y<%3G z6v)^g+S>S0<%gBiTlWUq_Xf7^ol4txW9#1Q^}+NN6RRc;eUkoAaO+-wVjykb-=$=R zGV{+JIDKF`vv&O3fwEeEtWfh!rpLY8nMLybQ z4ds_o!*fEpJRXz(DrfnG;i~nrb@K4_BfnfemD5f@;A}6G;5h`#+!k@6V3x`d^9ZzcQ}Fv72-Gr}L*%%g2p=1BD49q6a-XUVCZN zrA?vCrNmBF0KP*&b_R(UY1 z{C8%O+BA~d;iJRB#KKS>f3fO9mB0BGLK@2^R!nxk_vG78eqyQrT+8K`vmrE{m>;Bn z%kPDqu5Aw1Hh*H-$)G<=G*2hy1?gWIxG-Heu>%Q1`zAunjKC$7t?ia~FOAf4fbSh)Vaid@B7Z8PW;*pRo{hax<`4y|b9@^vF z!>1oUS9iMZmMt@oQ$CS8p$%kJUR(2C-P?6HY@0vL&Iv6o_BT?*&sfQnr3j&(nN1qG zy~;3#4qJbE{d9V1Fuimtz07Y6rDUJ_&hhU|r|`iPo~f;6WwN8roL$#-#5K%U2xgDo5ou8T?>Z) z<~KIqG_DRMS_xSwv*6tR)BADisMz!w>9Fq~z&W;J(-)jJ%j7rRHF8NQgg=n6cJj#& z%08<3u;yc9`)65sW3|)yD}(tf19__g8LPoB8z@Vf9pQO(17wz_jY7ZZ!ooZ#=betnp&&Mex%) zY8=bDSaiOK#9b3eUUAb{@tG}a+EyC0l};F@Y}Ni%0o}UEdffiyZoL6#yKWX#1yZWN z;1ZHDA*iFRJegsYEVe*6X+nV#`)vwGzI+ubZKbIDD9RUmyez^%Cu%CY3} zZ;p2bvQ}J6e=qOtyc@QSx3Y5x+?xBpf47!P+pGCkDP#Y?N@@H4T`GxrzO89lY!e=AJyt=4^%l-|2W_tCoa-iLL6&DHj9;{JBCt!It) z?~?O-so?i2WwT|674@cl8^gum+0|EGIn;- z@mTtEc&#`mz6o^>+bC+W1A&VN@aL~u2i-0SHWmKziCbKQY(O;LhX4a{H^e@j`+xMwLUg->CiB72^c*x3pDu(MM#b$0gn z=DEEF9dxUs8*TrvB?vPKB^?efcrSA-lu-`6G;YKK#D{MriC+K| zCL`yeH65aGHJVSkmA~SO|C&qrOD_2@In%GWoWJFYf5p{)mSmew%Da)2H}>#UQt1c` zr^;+yfJ>jWPnA1$n$)}djTIW}-TfMqrs!^-Rg-s@->cQs k&bkep&f*`Q(&mPAi7)MVe#h~>C-!};qr(KcJVx;U0gb5+TL1t6 literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/config.cpython-312.pyc b/cross_eval/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08fad4dfdfde8262db356ef3603236eb831dced0 GIT binary patch literal 23740 zcmc(Hd30OXdFR6pVk1EkAh<|;qPT$~L2afM%aUk`v}s9{EXs>$%PhnPQlLnHeh-vH z6pZOK(@>6bOl2vl6K6~%X-(B`txR*K%1q}(ZD&r^)20OsDuhvHMs0k~G_xH#QcqH4 z{+Qo)-vW?^;-oX3K8g3;clYJH-+u8IPN$8-^Y2c6J3R0L$Nh*t)W<1DboVoQj+^72 z;{>js6Lf-pR5zyY*Xt;57&Y`8*xT4|#M?M(8Z-BsS)OUsGG^_!D&>m$i;!;~wT;>P z?PHF9$C$Ig4u$S_i`4kPytmA$KzzxvAsEBjaUmZ9Gx z*!sPK9seAH6W*RA?otP{LQBM5R`u8YQXzr;hfKy>0kJ$w&3N@e?Hr^e)uCW$xb~ z@S%;urhS-{P$tv}b#LkWYlY27t7mDug@(@=`+1?Uf0NMEUney8Zx;3lex%hGai9Dr z>=jy&*RVcEcwE?m^hRNy(294HU{LA5CWew_cs^H74{45XvHra5Vqpo zA{-R9;k`xZ6t?5tDm*Fdz`Lzr-9y4o zD!%7XG%^$(o)Ckva5Tb?MuS2)GTh2v2#$sYme$G#BLY7zhNMu8M;INQprU~v6Xf`H zS$i-NiN;t_sTbeI6T#RyybYa^Nxv~`?;H+AVsbB8`-yWwDWrV3PK3upqv449ZR?AL z#^pS}KI=O1bZ1}J;eit;yZX9L^<_&=^qlBA+SA*mrI;?A3lE?38{)RmrI;9Oqpz$S zAE0>Le2Jy`O(_mBxiCL={ zx)2UsM9eIOBH^g$H)oBZOQFHIy{&C%Vsx}EI1!7+J#B5$`S5t#a5&Z`hQ_09gXcnn z=O0?y+Q!h#XxuV99&39nursS`3w{^4SR& z%&G(kfAL%}CJ$mX7~!Sy&|r8di~;k9qkQl2Q+!as$)sK4 zheE;F1i+{jLyLq4Ly{E4NP}l3WDc@^F(ffG#!B(g@mP2)9G9W1m5+*iT_ijhYKukN z&IK=o>S(RR=moY~Se_kNm*^<=IDbK!q&3r~MU`TcqnLUm2)Ljn*5)iPLT&{ys)<1w zZ@{(_lOCd>T?j`fB%B0-4ooK`oDB}1=fe_hz=g075?cA8@FjU&Xw8+?7(@|iA|&x_ zXQPpjpskJ=8p7N$xrvAv8V$xmg0j$$q0iw6Z2~QRI2Ia{_#rVmmK%oFe~fkj&>G^$ zg5r6!5R~}CJ$=V|`f!S9;f8{v5`ZKsVK*;eJ{#_@ACB-sbmD9bdk~$7@zP*) z97`ia`J-KZeORMtj32`Y0u%j)eCTCxsUDF4PhfrQIY1=s_MnJOMk6=@;h{;kZ8!>{ zm^_W4Fkm#DGx~LNU*ay ziX*{xi^0#K(V!$@)O_ed2!P4KlYD3IVSW&^9L7Q0YvXyo zjqg+P_VS15$P5NY`R21&iu2gCi3kI3VAa@UyA&Fnpho=se*Rc+1e?<=hF+b(xh2@( z$HEf8fAAd34rBZI<{XH&GuZXZJswgAz#qr3+EffeK;6zCM>`S@GC?leX?eFOlg&p1 zWfg|7VE`Qk%m6-zj`sq~90T|?18HE*akSZb_z9kJz}37KwES}OGDQiF0@MJ;CxIHp z7!L?*iw?ChAXCqS6vCciBBPW1-%WpoPSIJQKdf04$ct)7I8^*N5KDV*zgPwfF zaB5p;Cv!X0tnI>37%0#muUJWK7j8gqY9p+-sGbYuYjswLrcHu$@BuNUQhK#0V9D6<#8t0=CY&sCB5Q+e)6CMTPlGi1`E5IF& zv-$+g$Q%ksh(VDEy}i`E2Bn=LsC=n&6emuYBorJH&afkm0k2sadBh_V!mx}fa2Q28 zck3{QOa(AN6?HI5$0#j9#~S>>$itJs_2)yNShQc!p>-G+%W)R?6^NBkDrgj-mKr-7 z6tNCiDz?cvEHI1}WJgEF(lYHJ!WfPag^UUlgTzGuGYp0av5R0)fOX=8`Wdd_8DgU@ z7()YWg+YTq0PjY~U!lxlM*H{(XxSP_UVj+YO&3S$5r+bsum;cr4;_P}!3iPMCi7Kh z`O9y5sBaq=E5LJQjnVN5Dei7-8xM&J(qi0G)<`UtScWoS;*22Ip{!*vIyQ#EAWs~Y zvQBAI0$Ciuf{cyFvL#qXDH;KU41|Wjy~G|CE1j$7M=VEc51mS?%!7#@O0(1WE-9mA zOyH%5E;&uDjH$3PDnlre1SD>zfJ8R2fr2Iqnh`wo3gN-nOW+||`8+Q8OxE?}b3I2- z_4E#OKGoHGs?V<%tEf&R)p6zOpd!l3g$6p0_H_1j^$h|`Xb%W7hd*R}F>3{RvfstKa^kFgmIO~W-A`Cmf}`)aRq-8LD)36TEbZ!S3Z-nY*;Df7s^wmjY)UY!mEqri{@l;d(zsT zu@_GtxsS4RgbdhNcAT@nNt%p6-ZN#*^5SPrIO}1gGg_TBGt4`gwSf78R1h0flo&K1j+74!e!H=7 zpEu8O!`ze(e|r2G@MlbLhq*J?Id007FpcEbWYszDUFH3TIYwuJJu~JZouK=iHD**( z4r}G!RoW0s6S@(LnxA_oOlk=ocjbw7ZR*jcRV`bvO~a?QIlQh-1KMOtC~CK0oBB^} z^J>EQsb#)tVmmguCo%-`%8*UFiWIK#3>S4bbiFr7d!i3)&FX+l7=t5GLp(n%w&R^G z+J~is#zNd5uN?@}4h^uq2<#h;f@PBS2ebnJ2IVC$f}j08_p@m(>DqDqz@{BYOF zffJplPIaB^?aP{jtnag@G!?DDbs`n!(W6=zAhq~mVGV+fa=9T`X2o4uEN=Xc(9@GNWh-G*K1 zlHE(r-5G~x*-?{n)Xbl{>!`cu@GU!PQ;ymN<6TF?Y8mIQ`Pj!fO0W6R*UYb{?bY+I zeqiS_-c1Xeu11ochMDdaU(NhWi|)nJ`GK^r?Q+kDPTxnAl=k`QqwJa^Q@U}vv@TUz zx3Dc;+K_ZNUN(Q|^j_PyQtq3tNtZXy6lE$ZXB?|$&g5D)mZyy6vo9|h>+adzvwP-; zllF$Bu|eF1)fvhahEkx$Hm_j%~G<5GO@w_y>nij`Sm zAnisF`5IsrbHbc3UC>FoE7pWLN4_s`NO9-B6J{b&2~)zn5AQ1*LE20}{JQwvu80Ix z9WP7eKrbX!D%`B11LUC~7z>F|v<}NGZ5ye-LXavUIzi{DPyt|4lB`IvKmcjW~?$KIy5u^u1{uuWt>X#9<(fgUY5O<>u;5%KjN#`4gvx&=}Vy>WQf^s z$Qqz}&l*WE5L+;1Ix%@&i7z5eJWWAAf~*VkAJ9fA5Mw4yjK^u`v0~{VC73?;N^i`Gcb=&%x>56{mZ4OWL_{ zy8A~Z8*zB-#j_Vauy0tXe=qPs-R_lY|6+Zrx_zd5cIO*MS52I|cGU`SpFSqT8{kWO zGwmE`}DMP|AqFA+*YV0LwuqYY? zRy$a8yQ}Og3>hgT~li8KOyP z#BhC_;oM7YOlpl=6roHOX^8`1;wj7@wy;Dx)F6%(U@|6VZTJvl5~;>Wwup(fRGKIT zN;}3C`_6zEyvX1a1cQ!?lyx!Q1dyj#SX^pM213@1WJWU#rz?d8B?wV!Iu%~lsQ|%-p?Q}A@r};`03r?Ra!crY^_R2q%}2Q%_ZW@rn!&Y{$+Pt%H6g&baN=}eq#EWjK#fF zxh-Ycw&L<-T&;`GFFw7>=^m?Isos&S+PP}bIXmtf48^up&QN3{l;%wuJ!~ueMzM~j z!SqC;qJzp7Yv8Sn+9^Y@lCLp`gaozy?`McGp%d+S1XUxx5y-e8o%%Jo8X1o0KpqWi z@-%eC@{DWpG)SO4LI0LvDo>tHa3eIXbq`W$r0br9SuhY12C`M^=(v2M|A>|fAWgJn zWGxwgrIs{VIyW%1#AI5?o!Xctb$)aFiO%tHXm?elN{TO1+lypv&|pRddEgjQxWgzq z$ppwiuULvcm@ zi`0r!ItOExphA&@geB!e`XnK*Rk~aU?Le0D$i*qN35OF-`Z^-h+{dL{m+m>;Qh6ZR zeqgEmV6y*}>0^KUM9-3Y%i_@`_ntdbA09f1+{M?H$`2%841IXu=u&Cx;+dt=$CD=q zKDHRk&C^F8I5<cBES4q^`tQ6arhPB%cB zqiNlhiiG}kD|7^=0S(t)*#w=E##*3R<#}CqC6L3D2?oz=>OBFGSLfHqcl0rb2nD&Q zfC5#%Oypp{68T`x`b)bP==B>Io|Cb}#=~9RozERTHE{Cy(Jn|#YY}4~poswuF=E+b z6*g6P3Q($z^~EG7DBc)xT}L*>Nt9#3X4?BB_?qVKySR$F8C#~rOTy0l-gHUhj45L; znRz1PD9_ldlJ>?-!_Jv5$SfIW>9TWkivDj|X*hKIaH^q)Wqe$WHsqZ`o7zLn4x?Xh zAd=6><*{+f9!!CWBOQ>SN62)3F^zGP@rbO~c~_x)5Fj*)7AsPF6g2E5m;`ee6j82~ zFurGbS1Cn$q4>d*Az1RUJ^Wh76WOu{j4Xv=cvSI$#5Y5ka=P=_QP@U8;Ku!doVFh> zAxwXoHN!kPJ`wvdjWDY}AN3byZF158%tu+*Sn$#SbgHr~RLWXmI2xY7K4s0}2-S(^wTT$%9XN;dFz+= zXDWOdw+Bp6rWm0oQ(B!VsmQo1AC%b~=2gySGXDYw=PbrRMvFoRSmTe^kPkuko?c~l z9)l(-2QCGPkwz@1^s;}JS{Fn~1E1IQIR&hmFKx;xNd;q`Vp1cpBjhl&?g2}s<$qQe z(^Qr~J2@uJ5F=X?dlDK+CJ92^kk=;wd4w!u>z*81iz7#st--!Q@wu7X208g6JQGiI zI{dpLUNW5KF6#W2_<^JF>SFdv#jA_#h77W0h=8V$ykSIkJc!|2dp-hj(?o2jZ8vv&;a?Qi6Kl_J`ArSmNmvE$3t1`0Kg0XgER!M7TPg%j$-*1`F;TnL}inyJs(c zbKm8CU)Vo=c%{fWBh0^Wx2R#!z2Yi;GjcgH|7_Y-J7f5%fwMI}FoQFk9eiu&lAQ-9 znXEqa9}nLt`_rD~!_TJBoChmAz@#@uc-Q^f%MTep-ocl0^*+WTaK;ZGV& zPX;V&%!I_K06{^Ui)qerVHj8npE^Mo)*!WzxS^g(5MMCsFN1ha8Gw{&@z|3vK;^F> zcQuXP0?wa$wjNQ!D42$IpHjxGLi{7zuzX6sRVY$>c+d8(!oHH>fa#IIn?7Py;9INK z13OU=UkOgNZ)mX!mvCt%puH%#O2t|U=)b0o3CpxTVST+UVSZhoFh|HRR+O+HZeej7 z@{rHs_DEwP(wNZyBHe@9oBDnkFfvs%Wt*}m?2s()x1jlY!z5=K*__K!V?w#$QCpFHxzU%l_IWcc3~64WLanW?M65w= z`w?sI6Doh@I#sz*YW*~QNxZ^#E5Ey4eE%nGYY&VN)u&?ac45|eJ@YyJIKg7Lf^`VHrSgyyae3R;Zivr3Psk?&X zpb2En4JSdO&H7W?_Ad=JBxaB!Bw+}`Bf~R~?3{yFqY`tQPh*MSjNbJ+D zsx688SgQ!onwMe|XQ>U=7bFO_##saAo;8525xeCX3R6OHyo=5Zo}SHFIQeMx9jSz`MnFTrOS3r zAI-SS=PIsN%=^;rI;h}~`_$E^=8vSyo2QRuNC*b?Blj$Bs9qN~BscB7 zbw0WOxqB@;lPw3YJ#+i)j5)LQiR9LvYg2bb*u7eJC0h?&d+AQ;Wea$*#_iv!zj@&g zfBoCP{s&X(?Oo}H?v%A|#yndzzw2-9r7NXvi=(%Cz+{#7&QNjJhWY0go?kqD%aV2; zobHBuO}>!xpvY8ao<2hQkNSlpOUA8O(9#Xd?#(IpW{keR5o@bV;pv65>2m*ad3&n7 z{RidSE?Z{|Gkq(LlG(B^^vvio#U*b}U7ot~TBf9G{@JUpna+&20t+R*VP7rcJlmnk z_B3JGI>RADix)=|FE~)ze2-EbiIXZyA;>^@}yj{(~w1!P}Kf{v#Q0-Lkhi ze8Q_&kh)7ufyzT&1D4@Op&jhPU5%F^CtKhqkWk{n5 zgmKnAq*O25&w1c_Sf9{WPJSHGTx)X82*hjWKDF40ON(j|m73`={v0(gTwkS);JIvS z0|oonq)c#HdXf`!PE7TjXhJGCs6UN)T;ybl5hX}sU{6fr%wk?OZG^lX);&W8v{g4{ z$Qz5sALWV)M##L%o~ivpjcxc0Be&KYuRD|{HY;9*BzjY0Xm&5( zAZY@#I#gbo7#ze!8FIv&jBis~kaHDLx|Jt?Y8n@)t$aa6o)R_acZrMG1n~w1V%7+soT$v3kjyJGN!#6x_3n+vy2%@#l^4H6wTnoBAlJ9x`VzTD z!T7<1YNz5*EmKgKXhIiNQCt_2jHpdyN>)y}Vpnr$#{HKZ#H>XJp6)p zdDG6+rk!ceV>7lDtLshsWqY!G`;v7>rnoX`t^C;o59cWR38zEn#hy2uunC#GKZmW{ zS+V5vzqj-K{Wta}?R$U1>CqgjyDqzu70=x1T(b6}{1=@+Uo|1;p+tUBZ`D89%>D7c zEl=*@{-oLdWQXBT9@9~{XXBwghCi*SIJDF7rvV+scbX8canm3*6hoC}g!$T5_yTLl zQ9W|v?*Nr(!a8aykDw{a#IK@+rejs}*9!2mW|mY|IYa^jc4CHb9zL(#X`xal*kj;-z$CBDzBBgPf%5U391*f3?X^z zUKY~?^PIcT2#Hwhp0$csfB6JixlSrx?J5G+?#;m-hWmdEhl2vp1F#?Qxa%_S7z`QvnTh3wiF>1GskpOI*!ErdG=lQOEubueZkP&~b5 zQ9$uUZDaRd$S0FE&chpSf?fMlyW!U~#4gRMS8!pMklP~f4L_f2gjLZR(3+G-BVNav ziQk}rWWjjlbCL6r=*0-HEsrdv@lA0TZ1$ZOpr8%H8nTy{>A`2ICbKwMVPqnEMkre> z-?In$)>8<>{l9X(=1t9108v_$E1VuPjaFSO?P>3h?+Aaie;KOk$9vO{pGbE+ zoAN$8V`r`uO^f!0DRQN#%2YMXZy-;K=A0)5gef+7hs(O3Ix9M%?HRnS3rUx=mu4b~cFU_CIY-)LD!}Sfx z_Ribe)0?^|G*VFiy^pHH9~ML@ywo`nM_)JAKA)R^*VcCC9wVGzI5Q=a-b&_ z=t;IcgNYR#)nR6K$T)^VG7f2)7~wt%aw}haD9q>2>cmjtoWtC#?o1zuiynGjjo71I z<&kQk11m`GQNDnzLk>b;Un4CPY83?SBKjmRI|Y%e{m~mE)d+kpo%wXu{K})DJ_?T$ zazvrtiA?9ne&Ml<(8{lS=-ASuk|Yoms~QJ+nuNAj)2$X>Yj_{x3c@2sYyP?crfBRo zJ+M7$mO;V#Iml#%S9$%I{t3HT^h@n#yRw^_q$iJ)LipX5KoZsT=7l6b>_h>QoVR-0 zjDXWC&r=ww#GYtc@VtK9kARJQD?!1UfX0#kr_kv97lb*@z6F>YT?2Ck=MI>2Jp!(Z z0apdn}~X1Fs}WI)yD)!wg$Bhe99Zu zxXQ2s!Yja<`_=T^TWw9$UBQK>f_T(TB7snvFy{*f*t>(wlGu`KON|L-aLarYRxZa( z3)Ddmo;BwfuA0!$XBoYi01 zB*$`^n_psdt&betqtBl_2F=-r{2Y`q=&p3FAA{c;->ta&uv^gJ>+;+Wm?j5|9ti>9 zry1wWKOvX!2jUjSa5GkzDVpF1#Q5~MwMpg6G)_F!0xOi2JO3a=h-@Ed5+}N`4?Ch&^lV6>@u|>Xj z^;xe^7vHKd+4L(cJeRhlan%=kFuq`BR?oV!fa;fA{#<$^Z`&;%|W2UTP zZtvB-3(voE=K7fr%C=|9tLBbfJ(j83w9xQQ%k`Fpn#C7y3V#^>cKGH?kZwA4t(n%X z?>}?nnZf^NGc6NpIUfSd32B0~-uyDR0MO!~0u)cT3vau~gQf z%pl|4Fduq*V6=>zg+&uPuc^{OVo~jjJ)!VjfwuPO`n|Gx) z@4D;T{Sym@@Do4h-MqS=EAhVh+8eJam)??92bUb3_b}_@UpYQ=I8$8qX8dw|!AoM& z2gL!cd)#t+`}DQb^GA|RUAKil8~Njr+b?4QPU@Ognzk%PM}5XWZ>v4lr2k7_*|B=dUv3E;tF?T;)`a-?o633{P2brgjp+0kCx-t(EZ-gnVTe;fE zh%37%%ltWIS7X?ICUIpDmM9q?Gpd2>h(Vba58@Zm@XHJA0$3HJ!MUI6(c0si#5A@m zUcIh>;>?U5_@-Vs9lwG?vPzCLT(}o32uvFXrxr#!2wjUylx34(pHuGimaurPElK=4 z6j^`6OI)GK6wvQMNTe=fH@RwRzxLSlGb@|wzv)RD%YRhjVK)um+;e%)ye(y~Q%~u0 z^G_u=KXFU=UiiD=TQA+wC99rUavaH6OUe2&8=ITFI=N)sMDCjhE+0s4gictL(fCybr zDCFO#9t0eM4T^lC$3a0o`N{bMLlU4C*F6dSKBQc!1xV2KgBJPv%Aun@d>by15>KFN zO=0(pGV7yk17x=j7^$UJ8Rb&sMbF^&`mdq5NaCHW zas3|@J52$L^=mO|mX%W#w5GKs84twTVI`?H1}9P>Hhz{$F~39RTL_mh->--8{Sx^E z-wWngzTxvfDeD3SEOt?gQ6pI+6~xGMJ<<}$#u&vH(g7Jm&q7b&p+XS{0=Dbb#d&=KJ0h+RFh9GNazrOI|KI?LL50L+ zt^2Ta_-WJq;{V1cu?~_4djZ}@r^|8&e#q7Rh;#i7SM)>9_e0M6BhLO`xsz$`g3Eyen2`)aw(v6kPG4 za+Q8<^W{_oUU<$8Yl`IeQ9(tN@#VFW-w_7cqubJ=pLQin8t-#kbo$j-bX;*o###Qc z$xw&%S=W7BiPhhC*mNbUbzIqD-E?uL?71XYlJRsWxzhXQ7G2|NzphkQnsH-&@V@8p zteWt~dY4qLS}11aTotQD6ti*Wn)`N&wbbZbaNk=s;C-*yw`#&0Rh*kwEflkI=HmNB zh~3|B)p=KIxccTy?dD8fV}`H)*jcK3QI~O4QGYM$P^)4Sb%dyuE89RlA!_5i{3^Oa z)WMZ+TxFG3U7Xo=znG=AxOC5;mAX{}Meez)R!tN^bEs%xQ7dP6troE;MsB-rr|A6_ zx6XWjYq75Me*F<0qA%-x7-C1O&der+H(LX|l{KK4yap6QOIz>T5xf72u1ANZcv0`x t1v2*PRRi9vD&DBzC|)h1nA|nRnoSTPY2Ng1dW^mAnTj5m@XcuF{{ge0nOOh; literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/config.cpython-313.pyc b/cross_eval/__pycache__/config.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea5d029fc0b604f4bdf218bc83761b0634bb9ce7 GIT binary patch literal 24068 zcmc(H3ve6fedpp0;zfcF5PYvFzCe+no|Ht%k|>FiDDfe41zR>HvmgmlkVt^u1!WPj zW2U`nsHUx?CXT2iXVY$4(`jd{Tyqzt)4RCsbxygvrhq}4D9XB;#?zbJ++C?iO;Tm1 zbHD$$*aZkdc5-*UuEY=De(%Ts`@g?^V7FU1JbyU!&EUwh9QOlyQ69S-(L7kua@=cN zBPVboCujuiDa}c(sO2fHJEaqK>{l=9@vA>&IB67(EYEPtbkZ!E)8z`p0^}P{Sx#C- z>q(nvJ82i~Cmo`L<(p0wo-7iJPC7*=&z;i^H7J9~{MK{lN_J`Fcd=A3vwsC%omeJV zM3-R2Kbv63dwB;ZIG)ysZlO@D5Q@Y~!6{aqv(|G$u~34%(x)}&D$bP)Wd<}Rxb_-Y zUb&iAi9Gj)yb3k13VD?q@~VXDbJZA|TNzuPwrd_M^LQV%kCoZbmr&<5iZud%u2$I8 zOye#Qs)c&5R;&{?Bdvj@?GYL)xr=&{7n;OPLbF&ew1}I9y@H3OHq>&D{}Z}|R+ihC znNgVZYGEV$Fgs{jc4t&n+*%3r`7NDtl2Yw&W`z91uEKTOQ${u$9GHg+szN z7TY3p3)@+&O*kyb6Op=XiAQ zk#J}tI5{i&Bf)Tpp9=fOgQ3Yb{<426IL^}AczIJ=7!goDot*r+ZKL^z{z(s40dk7lM-)Ji4eQa3v!8+vzPO$44k0 zHC|z99z)7B9SjAh{ZlF3RQRQoaXK&_oSjbTsg!8LG{hnboCx00r3_=?@qm~zi-F6* zz)Of3r9dbc7Cpw4K5!*47PYpwPs~nDwfkoy;b>WVyL2fy(>@uDw2Oh6aQoPWz}Tfn zruOz}G&2=7P0mExcX@ZFH0}QH0FOKup5^^wfcNuqtGpDRh`dB8fzTutBOvm@>6xj( zG*%5e!P8*)u?zmtWI*y-hGwS%QkP|$mp|4!B%I*`S7xTds2jNuz!rrs2F4=%cu>So zSe)Y{VSYA*xkwS(LMjuu5|q?tN3j=e{ER;m35X#{-siD!=rZ*j4oUoIV1n9yNer?o z{?Hs7jn}f>%l9f{=Hi_ z@JD6=Mr{~cC@>a~BtJ&#AC-_f#`?vO#K1Hw#fN7i!RcUBhORa~Eb{fC;8>tN5^lfX zzZ|HiwGzXZ*=k{Vc3@q?Q`qC|1!LEYif|gjFv%CnI6~w4!$7sA> z%T7#sf`)cE7@n1I5(qjloxu30f9w(;lxPDk2gd{BZTv*=io7ng=IPZKLlJ2qx;kQD0&~aYW}Ujg5T1?j(pY!~ zOEVtk`+5WcYZQ*~(-?tw_IX`4^fI_qj)<2huHI_zzwV#nd^`OW3$wVhd;pg`!8a1TExJMvpBZ|8~k)o z0{D+zVA(-zKi`r8(GCW?9=XRO$^iI5469wiAOzGM{2)|1I$^ci(M$q!e&C3+UKLt<&7!Lt8iV+?V z)*hZ{XF#T$2PuF(!$hX$_`jY15}l$^pg*ix7|4riNH|pd3=m64X1`bq42TCsU^u0% z(YeeHwWzx=5d;eKcrClt6DQ410-r`Upby}?xo{d!Qg8B6@LKkG`5q**HG8IK=xFbe zGhO`YZb9H%{8*H%oujiIp(DcsC2iOawqk1KW8J-dd`lV~uxWa{{txmIhe%-T0uUY` zNjYkO_pk^W4XDOO`qSy{z&Py$nG-0SrsUz7f1X3u8{++wlVV^}?uf`7@I_#Jc4mq? z53tEk`DcLQ8M|;f0Fos6QA0-4GLF3z2+Z)|5H=l%bUY9OQYSnL#3iqbmrnzCFv{u^ zFe7s!7$OEmBJ?(VYy(O=Pf$7BJ%tlDK1V1xFn*pLX$*M7(#RveI6FQmV+tHbk_V$^8m_}NRdrIkv#S%+U227kDnMkS#%P56IfRK^E1h|*Tqr!CO%J~sX(b^+>I#uSu#4<{=()g~VN6DDL zD~}v^jaWe%Zu!)TnkR&3uAb8{&4~|8K!9%q1SzPe>l;g0kx9?2v zz)1J;o`ExhM=MrRohGW|$kahal$8sObocdkgQFhVm+xU96|Meae{@Y*uExcwyMg6C= zL90$pC>ueo$c?c5P?JBUnd~032K4xF_!bas#&nN+ETS6==h295^rIDP@srYvVN9Ji zgXdG-9X<(McE>h6o!fSG91Td9BH@`1B6?uAz#(*uiD5}<4?wJvqa$*Z318lsxm0-? zRpzdrxD{ReI{qYrsCjO^h%?)M{j&*E%~~=?+huUyyz(d&UO&#j5eo?4Aah~OklPk1w_Wu3JV$#13(6^e~5*9!T1{2 z!G$#V)8bEuKfRAD;ZA&;<3a|XVJN#MtIlz|(!bNjVS;`3_>4O>f(9b3d04Ncm8c~Z zr39@WJ$@h)pJvD;zh#F$gHo2~`j6t|Z0teHdN3f3INGpz_iI;X6Wpzr!4tR@hrj}r`pSvoW! z&2XWzK1ekRCcRodq#YdeaZ0OxD_YGetF($z`?E#@tmxCLb!g}GLnNf>OtHEE6YR?V0my<;G)xXfiNZ;6XWqBY>Y}x z$+F?l(=pmXM2gVo0nKIr8<8p;^a73xJI`o|R9)s?GvL?(EC?7Ux!t-6%{!WbcSs+j z^=?gRfUOwIBvErbKQDIRH&w773kDsCcpz0b5~TeYVJq+5KNSWOCLQpq1^x}nOSHj1 z{UOk3%(3IGgNy#_J--=!>)<~=AjI)2&Qi_)n}Lgm(VTdM0^kvD!;r-jl-!HpTa-nN zhdiL7&i5O4EorU?zFdjOKZ+-j^Z#==|1M`Zd){x{tqfW6K5=@jSBM!k<))N&OTQwwSA^nm?ATs+sRiB6d7k%g^`ypr~p=pDZf9 zX}MwfqHV#jW_7GMcKpu4gk#65bw}K~BWWwX)_=8svEf5o-I~2_k7T}J5jV})xIZg-;=bJt=g*Nw(7+*AKL2g+1#tPy11=w zN&lg(@qP*Ctp2f^vlYMYL8l9!Nm#2EUrbo}q-)dCrW>JHS>wX7HFx#m@UnBccyT1* zZok_5k=^|PzGL((C6|^!4|w&c(@?wK1k|6t`iO#^|Uh55;jFH4knh@)|cx zP{AIbM!~IFX<8+%9XpbhuFFmj>9go=hI$x$dLWEV;0%ms1N+d=IKZ$Uw9;qHP<=uq zEZv3n)p7caM4iq)(~PixbDG*2fW>~Gz!xJ5t`Xa zng4AkeOUlvC}b@tNrBRVCP@q-H&=)T|G(WIrcV6F;u-DEX`$17H4w1F+18dDv!|!`PsNb_zfWN75{fA`OroDC%U88|jiEpEOa7MnoB|zS7P_;ixGs?a6X7NxDqt zg-yj0lt?k_$I$}}N5qr`FJeR@eH#fd5wVU+A4h=FwwbiiXhihC#NZi(hK`k#axex3 zkeW8zIFy(Sw5%D)h-MfqNJDnjNP7MwX)-c`sY9DU)-ExG!XM*L`U3=jatBvXa((~T zOIFQ|adTtR=8jc&By1fY*~;z}Iu{D=mDMi3c;npsK+;sUYU1N2e#yORYK@y(lSQ?$ z!n&BbE|XKSYT6PvZAli@#R~bDnZH+7b@TZf&o4O=WgV=AxTz+|T6I*fIvV1RhE+#v z+|inFw1Fg<9M_u@ri!d~3$O1^m@3ugl2-eCuY8JBtc*#}^Z^^12Q)RDB82iYlzrCH zX=gd*JcXb$$Y4~$zr^_u<3!01bPA+d&}Pa&!=Q4`0n`S}>7pG2VG&k1(&!OtYKPT> z@T#h_;bol-@Np2DeaRcG^#MW8kmeX@`LSZn(ZXgg578ScE&NS%@vt|#hkCk&!GYd^ z<0C!Cjtvf-Q8XuF5M#}gedNSo?~xuMWqHOwH5=#=MOgZ@;6Ir9C}n~AT^{myTH2Q} zVx^i6fZM2Y5@*qo4r?S4%+7NkJ3Xt;_PDcsc_QI_YW`%>%DLv_U{%CTTGK>|s`^V|amS6;thNfxG-<7jTbq)Nke}+4_Tp9h=D2-x!oFp#@yL5eTFK1H%Q&T;X@Pd7sU@Tpl?Dz&ogqJhipd0nVB880 z)Ti-jeLA0BFzqcM{ZaJLPyk*9*cN`H6O3$S1pbzSFoG&rLE>AWhnDBM;W;ru4D z_Ye%cfCxyX-T5VN!oCUOx1%~{ufeagK323jEs3uc)-9ReFZ3)o-FY@q*fW1(&04Wm zQod;Z;(=tjJLxO~!;~yUSe7iVN*0wTofY>>EH>jhXE7K*ffz4Im;_OgmlATc%z2d$VB@dc%7Lc!o%w%G&vA$Il_n&XV+1b{7Ny*S zWTigS)y3CQ@HEy|T0~@?yJs!Dw*Tt>FC3UZx>jIc7+-w${es42=bEGVTIkn8i%%yU zbql(m=r~K$LnFA$>tkQrxoYL(Rvrv#tm?>rJbJg}PkUF7J`+FsOzg=o9<8AEQh;H8!VZ`#Q<+<9vlF|c#Yc%n^6AaC}_5-Qb|6U zMLDjZ%;Ow43;H>;bC{42dk6>TJLCe6rgA8rP(m*lIy9eB#;Ab)6WTC+O1@brPS3*{wRY6nZ zpz#?LdG>jqR;AF^47xn)Is1DG_Z`v>tCl3t<^rF^XBCR}>O-~+qV(C69(kbgA#xnZ zc|vwTs^bYO=KQ%K6}F67tWl^iR5YxjS6`9Rd&uc?4uNybeH2(%R{?PDv-_OwIB8Vp z?B6r^f!dbfXShnKfA+b|I}UVeOr;aVlGK$p^se+zms3~f^KGrxlB zp01V!c|qCDnKDXDC>6?-mSsnD^snwn)+*$OXqQl~)K*q$xKU~Q3H#v|Dt_TQm6=hh z{m`Pa=L!8(WlphD=LsdNg&JimCTd}*$7IYjm+OE7!!$CRVamm^x19Q<8fgL$!R}y{Hoc>Z^PGZVLrZ<-v=g-O=L9`A< z7#E$g2Bi`BL6d)dAmvmw?g|4gGZ>T9^aZMGkl=3zYZ>-6r5_E4r&4+-P9rI^6d92R zRVIFo3c|06gnKq_dCs(likJo`f4_K6jm&CUb7ncz*1E{Qz%?X*jAl{^c6pRvk^UFYzFwhyf zrWQyB)pc(izjb`+#PVpOdfRIC?s)a?53Bda&F%<6sN51EsY$2W|{3?Onc*DBYcO zR^6<RB!Ah?jPJSi0@1c|o@ztl5gLmwcgjL6a;jy7tP|SFXK! z_0?oi<>J#f91Gn^S2-5QdcS}x+YTLhS<}+go!&(0;OF}1dlov;Q%TkQsWp@HdfV4^ ztEPsysbRT#)pIEBIrLt|s^?VPb1LboUv;&_T`kKc%l?FG>uqP;wPV5hu`J%{-Z0%V zE!nV(?%j!!J!|Eai#y)feQS4Y^N!n3C#rTQ%JZicm0lNaK6~TY#pjj{i861Zs6*MT z%B3?2XKQ+aGSZ@pQcCNRWn^4kbv4CZO$k@as%u-^we5ag5ebq-`2={;hwPA3l@-yUCVwvBaIgFq`ILq0gcEVv}Y@*pe_Lvihb-Hm{w0{Q;JJ^B(1$+Av}3~mAD zkDIBETOnwVZk9lQN7)kjWoGhKXGJerG13>ecv*7f$Ki3rtSoFyAUJS7hWGn8?AfY2 z@|ftCF0hsra^tlOMC*@aNziF`NRq;-AvD>=H%hAPtPGKtX2-^GtA{*B=c3!vEy%fP zsk)6P=WiMl=&O92MxF{a=y8beVBy7YP(XG!ksM`Gra^(3W>Q8rcrm4iyH8l=MqZ~< zB9(F;!x|5SBgg3K04pzkkZ& z5hhuC<&22XrnBiQqWAYu;5N~wHAFz0Y@FG#YO0LW|N5ng)uygE{XY!MvS44=>kO`> zyLQ#RCGOs`>fRc6Z%w$jue$fe-TRPj=FDZQ?zXtQEn#Y7H9po`K7ZbI|pLct_PeJ&7r#Esv}l@@^1Hv zc>v{q!~U~%19BcopdxYp>TiQk*9QjT2+2zkM2*m zYbd_QfN+De2x+JolJp|XR>S%}$$_d4!kVhuP$ILz(mAxI@PM{*xQ&E>vuY=2^<*P~cGU92bmwf27{xR3pteR@iKVQvu z%qwKbAZ4I2J3FbT)QM{sK=;N|&2x`{((^q1(O7ZjzT zaHu_iD;0BIc!CPd!jN`imqmlL%WFHk!b9$$tYIF$d$a5YqtXShHp1@yDmRH?(vaIB ze-RI#X@C`1E33_UE#g~P0`a#gAek^);R{^~gT<}^8s8jsz%8KrGBmh;7*V=- znM(W$)$}4rnPHP6Ge{s+C|?^H!3`4J_=*Ig=CpmHOEsO4{6eB7$E)Z{#4cIHZf4iZ{PKk#Ts&eMkC`Y=YO{gSP>UGE)Fl=R&ljg_2^ z*-j(>Yu<#lC0SDTrOz&1dcS=8?aucm6XgT3l7X0QV9iyzQnh8ZYU}$|TN72=6RsWK z8vmmMtNRAt-#3uhcRJDeblmmyf|Yq}G%s71ULlW-%4B8ZVhuTLv}7DMAY!rM>sL*U zaZ_WmsCdEfR~3y5y-Az%THn>auT>`DCUiMo)|7PBEtw$m;Qi+L8|Rlgm%9>Wok`yF zw*5_e%zN;?J$HNmqW^pSvFCp&HaeB)n~t3d#k#`@ekNJfuyih7wLR(H^oISGeJOam z45Cq{W~^IC@Mn^nTHmgDvnJNj{oeM(repV=1(l8;a|QN7NV1hpkYtO>u8%Iv#mw%H zEv4&Pjn%v6-F|z2!h2}d+Z*@x#@bI}ZUucBOwJ0?N0(3ZDa7V6s-6Pfl`nMUXZ;l_ za$MtonbB!f%za9@A_5Wl{<6gDq>Xun}7X%a0Ytf`WkoWVBXe2(X3h;1#>0DxBM%eJEH%qy)F2;_I7uAZ&l$i zvq)<1;2-)SGD4L7M8OeuU0pu9t*kG&fW7jX9@q3wDX~E>c8rv7Igi>q*8d4`oo$N9 z+Xi4a^v?jh_J2VLRn2Kw&6&p_H18?^LLHAmVIiO}Z|Nsru^NB!PN-%X{)w%g_!ogR z%#qIlEWGA3z9?r8Fmp0(Bz-2!d}17KQrb~?U=>2?d{hHUjh|$kP)d`siQJc-0MkyA zLxf_VFf!I8;X^^2tb# zL(}hnd>kHEbWhr4iQOs(lbPi#!4yqt2?UHiW8wLyeGD~Az|kMgPQ!h)l%^RD9|ra%2-V4zwh&M!-#FWY z$YzdbDp|mK5#2Y#B0u(|Z=XQ}K!#HJ_8Ggx^^S(I=~1JAu35|oMSi7T%F(8vh?)Lxi8OsV~c$A?3dk@n&{Rv%TJ%R;r;kP06Zw^u0C{q zB*w9?Po#y9E*@W{2}J9f*=phfKdJoa69*)tD3VDRKM-v=opzR&rHTAaM~wn?(3~{0Q(VOoh)D#Gt)iGhYR86UD(-wMyyU#O_se_V3jd?{SICd}xY=3jbKMvqm3h*O8^JZMnBo%D)rEs$sMn7i9 zA~8o6mWZ^AOwPPSF&_maPo=6yR4ADZ1z(&Ej4-JPpGcY-m&8FTN1|X#E6N`@n1LFT z$%QEcJ6p`IDjO|-j|ww6j&zfBPY!qHBxcq0n3zgaB+uHEfl2se6Aa9Vcd61JQSk2( zJm$44lHU*u9zg_?YQWILi^yF$f9#&g{`prv|H}L;bUoqxYv&hwk|pk&`)=+dH%$69>6y3Bzj^+HlI_XT%A5T+ z`jeHLmKxt~eY16`dim__@oxve8N59VX{uY(mTcSl?#XvfF7L|w!CRs ze0h0W%+>xQliu!lXo1--?&@4_e0R$?wj^Agt0kTBlFsxblCGM?z#A8DU0ggD^E?+T zeg2_N>nw&gwxTvOmHjvO-`EeGYgy%*yYdaoS1e0AS2ypDZ{Gc(d(T4?#__19~64k0pRA}8_(T(Zt+yC zx#zv{|8((>FTQsUt1zT#UTfa6eC*x+cluY`4#Z)fY&(=_?v6DbhG4Nliusq5y+-#WP%S*_aCXEmbhKW|0T!^6tEJ8H(&i6KJ!B$W zvzDd($P!k5x#Zo-cPihl{=MpSKZ|`Z>=~BN-`R1e5lix+t((T&N(yhs=U@HYtBX?! zT#|wE*?!d?b9B}miNhk3) zS=+c2c>Cg;7njc6cHe3EuID?RJ5^Y`9?kyb{-^Hjz)IB|()>iHtEhayA^6!kV5jnD z5BGEK%9~eiTtTAKr9vk@s_=Hjn-xpe*p`!bx828nwLQLBT*0)X zRXGLH?%3#oZytc#^au*c+Cfsb;Ue|;I8zM5&xTPI!sOxylx2}%G)!NDE@JUaTax%O zisW8x6F;KL6dWVM`Whna3R`vES9i^yT-(&}+hs9*=?{v^*pdyFggWn0>8NREDRi0e2ol2UE$v|{Ha&zv++=_V>+*svStSX46v3S6^5#5}v$1mM~ZT2&%j-Yy}n9tVtRYskxb+ zm+tk_MFaMNL7qj%qXJ^#>9a)V#s7)-XcIF646CVaOp(kAWJS;8y1$>=rLoD%-~UCi z=P6*ZVKqk0vT};9m{fIHB^i6f+F>QBHU=kAAvS)NN->X3W@!kgFn6>^a7PpQm-zv3 z#_~<2zo)F1C}6QGYK$65>8T(_p6QX6KpyYs)S~Kou>}#=kpaGtje>f{*uFw#*>-1A zjm*C)n)St|?$F$M@!#etDjS<+&#z1a7Fm(b7KdrmKXE~$ty%jdfUT2l>*v#Z#DuJL z()lzzW_8Hc*v!yMw1{nWRuhb6loy4tP1Yu`r7&h~!hfgyEflbMu>9>Q)5yqpc#OHZ zk4((c2Qx=TM4Vw5?#W&cd%O5I?2XRDRB%-Gcr(IBp0d~ie39~<>^n*9<49ieIGmDX zXF3yg$bg`LiWO%SlTA31Y{IZoZUHV0_mPW($m|v3A5-NS3SOjO9zn{=K38#eqBtev^XNDY!|&_b7Om0>-a1{7x7{{?y1eigh4JnfGJq z@S)fP;y>Y)aHmv)y#ODi(WJP8f6dkZfOGs6SMb-I`>#3I4>;?86Ubk@7y&6RB*~^kexWHP9oOWu2 z)XrPiOD%@o>pD(Twq9#DG*isI-e5IsrJE3)tao>ea959ICS4? zEHmDBaPG}Xd##e&PPvwN#S!GXDc7|rX|Gmtw@|J*UeS$Q7v;ir%Z@A1<$sy};=sn{ zOQ{GP1MM{%ie&duPDRq=%W5UNBMh=vv!zu#?}!yOJ>a%zwCgWuxWe+Jz4X38SC91T z4setj?L(VIQ?y>sl^oT~7bZ)5F|H_Cb}YsfKQy*#n%1A!6l;o;POJ}p@7c=M4fw@+ z7gemAC}!py-2`^kJ)0V|=)^ zP*eP{;gkl^b6Pis*x9BrvI*grtpR@1Yd|r14Jd||wm!5X_V5KwuLetTR_oMwlh&$r p9e!C={Gx)baJ_(Ha@Q1VF+j4UdDCz6F7|uRQ1A~1yfe!AzX2D%f~Wuh literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/models.cpython-312.pyc b/cross_eval/__pycache__/models.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e8726630799570730fd2b12a7c5213965fd5af10 GIT binary patch literal 5585 zcma)AO>7&-6<$)y|6j=z^+WwomK2L}M8&pZG>Kzd)=#X&rd>-(6DG=b%^k{HFS+c@ z%CUqV2)G4=o^%V)Q;rGZgA3%KWA44EAW@K81W4LL;hPE_3i#5#H?yQNt*ZflIHu?E#~~zxkoacNOyHBw(hwTCO%qceRunAy$PqE!w$xPq&#G-1c z>5{1tgQbfmO&6%i*bI538tIB&A%< zIx)0wT(*2ywI0Aw|D6gBQ;mYNg8ZvX7_LJD8ed98%Dcc@?hnC7`7X^(WUcE8FuWyW z%PkQP@jmr@3!cR*@6)_6%Ew3f^L{M=R?x8m+zN3koDXWSvRV|aEq^9vhvqj(*;+8m zMSWv?c##zKvW}0KHj8OB&GhnyLDDLY$=Y4aqPARCOT_jS%u-1$<9~~3q^NEhmQqv; zmPxl>1Pq;7pTgL6TUO}?%e{!S{pZ+;Z_PM!eU#?wtU|- z4LiszO)o3xhD^xzD+=CPQS6|CeVYch!;13TP1SH1pQ31H0lH&~a?C4=LUA|PGCmpY zh9*6OkU&TRd?OtKN-ySTKV*cm*$Z%8zVPPkB4H0Lvod>+R7|GhzneW$f@mrzidi9* zY-Nk$hU5@{274Fa57JNaxgUd(Co7Fe{K>6mGWGRhJ$a$JxRa^L7aHT|s<(IFt;y#a zuTAgEiQQ67o^JFFRo~y0>+%pCY0cMJGJgd6|B(zAkB5r@1?6qGTB7kB6HO@Z<>SLR z3dH6k{-=?)*a8}CKP|>*@PnXGY;m-AkXqiS~{EH|KM&Z2O(j+h0+sY8b@G zWkR$MAFLl?0AUazh431}IRv34oR((Lo<sShnb~asl)<==K`8p<{RS6A zZ+}hhZ^X|$xzk7tRNvcKtH}e6)J*lx?rKe*X`Xw1Z>@gr2FjvblW#P72dg)Cw7NVf z?u0k#EQ>W@UM&kJe#mW?`ZP~l6h4R_ey{@E!NGihTReVjg) zpo<}7LLks*L`N%7rl&}DBb%n=kxsstDe8|i*(3K0AH{jdI8x5K?LpbF=eV2fF1>8o z(USUD(ZQ`E&_2h|$9>62Vbjtd5%4-=xlHiY9z?&yFP28XJ--G!Pw)EF8@KNafou<#Df@RE0A=zz<0_y`{U zq<6q4vgZE1^t7w(6^|wpuO>FF7h=2*EI;>30DOd&i18utQ5dWU=c2GBV_@+uiL;W> zu{pz9H*h^T@)Q@*&H$HabJ1hJi)p>UB|CC)&h>TVx;fY1k?YY0V9vpg+!-we+z@iw za6ZBR(MW5gHmZf5rdzo?->Z$m*t4%-liE126Uc6jW+v_ET_=G%5*}IJG>qd39^wNe z!%ZQy>`NZ(f=S@xI>Z3+rDUN9`K7s9UR#wi?wsuvVT*c!Js~gKf5lKs_citEcHhg7 zl5G*FOy&ouO5d&RO?~&d5EO

H}y;MNOyA@tNoa1TN@H$VW?%H;*>UvB-JfWW;up zGT(u=-%!gNn-GAvdyk?d)+*`WiD2o~%Mg9pcJhcmRxqsFULL#|jpJRt1<2Ht*#WWD zvFT%^tece4EDrFuLc(@8-|)vG2~5SK27_5PnPryjP=yjkES~gpP*?e!Is-BVsn8eE zc1?lLyAFkb@EZpI2ivhWTQp47TDK!Dx@74klIiOhC3t{%8<&zte7>Hj8;WV1UPCyK za2;VDVF=*{!UDn~!V4>u8^?$=93x!(VIl z{!7*6ot2t=shLVYOVm@Bt1CNOHTiOLVB~A7J}?J4=#!c}*BpE8nN=UVQoXq!9jVDz zn(4`BT0Q*^PBvVV-)Z!XRzKK1QsV(Cr}au`CzU+JAkQ3G(B8Rc3g9!g96FNENhT< zJRJqaWppCD+`;=Hpkf#n3+VRgAH%UHA2eb;PjaFh;H_k;E~mu$;V(L`p9Re6^~=Ml zfj|a9{c3I~=+JW35poGyP7w&X1T9erx*?aKCGw0LatT_Z7I5c7+~QehE0hV_k$XI; z-zFB-3vB!RU6nGTrAxwgR=#zL5K$p5B4d`~SyHUKzd+&4QBV!rQ&N9pQuxx#@D;}j zO(EqCIBgLM=L6esmUPD8cwz_mZl@)p!GTL>*iE!tCfv~tzrtH_Uwn+MPZ075pCWvQ zu#RxTQy*a;LJmIcE&zJ!%>%F0n-rU_x6$2GUD~+>PHQ!Q9UkF@KusQQBzm2J(4Fjq zy~b*CvH?GbR$WdC*WxWYyY^4OoOUfpo*xK*u8cdyVZk7`%ZGplM|Vve3IpI|w0O0z z1;EK@iDDv<4|9tj7=epGB5qatflRj@TWM8fFK;?HHS=B9@_joOWu2|o?%)OBsr({l zgQ(8;9_1UF(J5FCr%R}a7YWhstA&Slkf#!{Wd|W!5I9L_in_5u$p$o) zFI0-h&F#K+mUv0W>vmLVRdIxziFT9~R}{|~JS(X9B==j7L(!vDLm&u6gcAZ&un*za z@L?!H5ejhcH_yNEeWHGTxq55AXR0PIL!8}9)W@%iD0{UrJO-h9y(W(}M$Ur9;M91w zk)D8@5vj=&jW_1@<&m$TIoHTutmgI;=j-xCp*Fl^ymbc zrkmxg&`vL}B~A!&-43=T#_Z(n`Hz*AwWWLWYb$qhtIEBl`GuQHi=qgMz&j7sP@Kc# z1}qQ1=qh~KQ43z}7)mSP=jaF6>w~*1P<)BHC-rKHu~$SSt{PrE=rEs%O%Jh!uYhO? zp^R`sFb}W~0ZFz1fP$I*X=-Lqt4}Rd-`|f<*5rl8zy-*?m+O5nK67(l9{p;dF5eWgz*%#d9)94(H!Q_XYm>qo7TN`b9csVSB8Z|!74;*}--C={3(rBK*x{*CXhfo}gS36fJgP*zJ!~Owq z(B*Qun$q+S(%FAWQ$I-kKS(JEsqWB!+afdp{a1?B-albi^-QU?-%L!ddt39oD9 zKmu?GG1*NV8!zunm6UicqW-E~U< literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/models.cpython-313.pyc b/cross_eval/__pycache__/models.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80cdbff836a59a49a2980bf5a226f5f257b08577 GIT binary patch literal 5737 zcma)A&2JmW6<>M=Z&fU9B3lh?LEWT+&;UyX?%; zv524`;6r`rL6`KLV{&tW9CXZ|AcBBEZV@194~1?jbSU6U``*lwk|jCq0{n5_d$T*U z^M3Q*n>~p{f&yIs`SoA*zl8nzX_K(+UURRC zL|pX{50k|;@2ZdZSlhGaUk#9eBy5Sj!iXSy`vuu|tBpVNmNNzkI#xee0~f3z#~K7{ z=z=xuSi@kAT(CwQYZR=p3)ZM(jf1uQf;Hw?JHXm`!5Vk039xouu(s#AQ$6LeMb%QJ zf|=3sRLYt}N)waPY3-Ssmx_8(%jIK^g`~-V`_BT`` zboM%2x3AxtUDW7P%Ph`rXhoCixD>Oe?BPuXi8RaUHB;OrxQQ9y^3x9h{v`Y)PW%{* zyjZD6;x8UFx|825R=clP7GI`n;`RFIMCIY$2Q_h`{?62anAj`S#Ho7EKxK7LJQN4W zaPxVW)9lY+{(ou42PXXq9tKGr{iy|(;7(108Tf(GRn+#6^227rV} z&~_$u00}!&2|FlTTG8Pku6?=hk)cUIkD{a-lJ!J8-F5^5oj@W0wudp&7KxeH>=@^f z0+cG|RZFvjX2~j+Eaj=TyG**VzwenwGJ4u7$39uwSX^1y{M8nndOR=7cFeITMewpl z3=)I>ucO!*$$AZ@0z8hVO~cZjTZ#$YiJsAHe^DiBKCk5&A=Xn7(ucF_M@S+JAPgc5 zAxt4$Mc}l<0mwAUGYAp_Qj5EA1Y2WGz;&Sy0gxP{oE)L(i`6=#Nmp-8?5)Q;Up%TO z`YIp4+^mUx_2hKr(VkorryCR3_BX2&_mC_`O}tm{>aW~?nK=~u`A9g(1@*iayO1 z&B!ZFYBRYRiD=Js?ThKG{ya5vN(%0|I14_fq}sMUNG5uOTg`UshGjT^X0XYa(5K7j>JN5p7%KF|yO&q8X45IhahvFdj89w0+pW!6na-G~`8UJD)+44Xp zcSGJ2!B<|{gZYDb#|JsY4^n_hLGT<(JcoqAa}kiD%oc;~8wZJPUwqzHm_02bcYwAN z`<&56i;_cTU?W_rnvC=d=lV0JMXsm4`?do9C5g zA6b&a^6;HD(k+ibuh9$o9(fG(ikq}Nv|P&P&sGEw6(Dz( zIFV-FaETjkfS>CGDe#r>DUys`vg?-99urb+>$aB%E8+#~IlkEb_w#CDC!^lAd)}PS zOw(`!?xKExityd${^WO`afVrjaKoQeeUgH|N3 zqpj)#=erFB1NR$-{3qpD%bLxbsq| z0XI7N7~wtwixw-`T18kxcz}Q*o;*U35jGJdgmYo!J=7r}E9oHshLOpidIqY~3)P-Q ztOjc0V!c1fBS`X0>oi3qdLa>{A$K&!Qu|}ou@5WD2jStG_+evwYJaXe{t?O}HSwcH z@8CC?YVXa;^2?Q)c(aj|UL~r@+m)4UuCM&d-$-yn)qJ5=SpR5ud^mzX^c+3O6)IHNAFc0?9D)Gf^lEX?Q7NX zMW}H;JQNo>m+?i@@M~R2W4K%=xM)A0p&-(EoJed2?YXF?mhAp;5>_N9e|~ z#04n}#j2!2X2Mz+Qq(gu&GLtnf$lSL8qEh81U((y|3oWi4t#3Skq!9y$ei9b!? zRFhWowx^)}-X!p+8}Jv$dQQ=d9Gn(30w)mLZx(b);Q(X@Sd<_IEdwVzouXRHuytQg zMc?kC&v8)ZCly;e2)M)~gP7n_F#vk;*0ERU>gJoZtKQyGS$cUB z9NKJvb#RCk2sLrAp6GHqg1Wm0_9(51-F3JfT8CmccRLR8hTBC@z~wsm3*hB$hpoa6 zqL&waXS7&Ccv=NTP!55_i@zW^9VK>3kb~fKlz4d&BoQX@BWjRDA&H-!6=Jzys?(0G zG>bN8gMJRPZEjFFuCpE5GK+#iM$soyz~wvz}1{f^UO&!H7ATzQ-e4z2JY3UOa2r{__Ll#Ch9r7l*ZoMY#< z9c)pK+T9Q5f2*u)E^W+jt~^?om5rtOh5Jj3ytswA=&Hgkwy`QO0E>uE?QpVI`Nx}NG_XK4K3lPy^#0v~*c8w_@+^|4K(1j83USNpK0%j-M5`1M-5 zMc8%3k9PL0Wei*F7{-by%zrH4vQ5BV|lm= z*OKd4Ky*w@G0AK`*TAs=;8;*hFhkVUb1VQj1{6n_A?Uh!EC4tL6#Jb=t-vro*9srz KUv1cAUidGbMuhhO literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/pipeline.cpython-312.pyc b/cross_eval/__pycache__/pipeline.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f80d03cc0071f9237322eee4e264121a79ebc87 GIT binary patch literal 26546 zcmc(I33Oc7dER@o@B4z4cmS~t0S-ulnR5(R;HLlPhs`ptmLXvT(h z6l-8J5ojmjsFJFY-9%u?R!~kF!RO@E^od)^?MaOxAY(L!PneiJZF<@UGU(Wr)3o1z z-@KV2G!P}HJ$>NZ_rJURcfa@F?&W{9T1_0D|I+#Pz}KGUxS!F7@~CBiU%sc}xNDrq z^>89Ds)qPsRga1%SUsffQM0$EN5kIQ9xZ$8dUWiq@6qF}88QqTdyK=T9urHa9WoD_ zd&~^h4Oxb*Jyr(mhit?49y@~#Lylo*kCVa1A=j|G$35)n@eF%=yu-d8->|>O&%#VY zWy9q?<--*{6~mQ1mCVmPR5e`PQ_XWKE~FmZsZ^-&4%OV?<*y#W$kCrzC%=nbajj^5 zLEBR+){C|mG(EW(rD8G+4Wb>f*UA_^A(jKN>cvLU37if0<<}&-;MXWNi*CG|#DM6* zyIE9=-dFkJn4b<7*NMItbUgv2w}}2%Ri7@^YO%x?u?*PO{kEl1@QSTsIq`vyjY^8@EYLjxlrA#^F!KNcAn9T7q!XW$Y1TN*_HK5On985xcA zQBb%mpvt_@v+g-p$?X2RLWZV!oB^Y!{>)Wk&uWq2C**^iVO^g0BauWi=4&V zGB9#s^jxU7?@VYUf^TO;y4)*;hDQ5Dq#Zdua3-M5n)guX6QS_fP$X+V6bVTz zv0P~%W%0B5?_9X)cqDXQ4hyKWb}2L>hNRv|2n9oVvJNQ}9v!+6D)?B!p-Asg-^iIU zl#0rDYHVOgM5)e?N|F8jn!>c=^nkH!@XJF0u5ppvB;&Xkr%brGCdLh7L0S1o_J~&T zi*YecjE|{e>Ki$~<6MMrv!_&OfR8rF4IL69XG6m2v7sTMUm6XE*M~0j4dr_<7|>*` zy~BNIVrguoR~(SCy7OoI!l7{1!+h9>*sJt-&NnLsGbHBc81rx3ujGh0jGs&$m5+b_@?hMy1Pbg#yc9 zFO|JbDNFGD<*bG9Djw1rAg{tN{4ktx?w*6QS0(q)+18F9xTCke7=J##XkVM!KXqXG z_)OESZFBY`vxXy!=9-jlNzPWP~4HFy6y(z z&@-6tK>3?okU*SYmjDLlDO&v9JTwE1X4dT7>!Q5Bh)ZEAo zC!UL`j&lKRbkn1ld=ZQU^hP++H+()M3TQY1-5u(umKMfn&{Ac$9qO22m9^4sdZ3(wHMoC7P(lNOCD( zg}11V;zJoD`?yK|tgznWpotE7_*C}bIq9W zCqz@+$l5wsz@$LuK!;0fA25o{tEDcC4W1j>^#ClE2V;q;!>URC$}11RWG&PZ`SKd> z)6M0WAqxH@?uu+wY9G_bjFI;ImrGRV`+|>|Z`u@sAnr!VtdTAGnAlxn?n3QjZtMj1 zXp=$|^a=7FE;Nsfj;xn=`9Ogz_yrAN1PP}@A@NjS|2g4r#(!TpJs^c6!hk|x1+u=* zJzwlS)ZOvKp6)}(x{mig(Xr>zgB|<+mbT95rd^-;i8=)#PrnHbVq1e4P_Z2%YtHQ` zlskwPW4;twOBe(k$S)%Q!nrRwayYBM5R$}!evppR29)U|jUY6HL0#@SemvSN2=o^E zimHS>h34UbkzIIW&+I=7%CvTjIHuB(TZL0&5djH$^W_OsT52s&M3I4!v4N2@(I!D? z22Hdco9+%F3@Yb5e1*Q#s7s+ag^hvh+N`O7C~G}EFw!^Fo2x_C76C~;)GLn3^sU@3 zm7qIma~DA)pOHdm0QO$&lSYt1I8Z4Om6%nHhO@dep$KR*iS~!AK_OqWmR?3n4~$^r zN5inq+c-Q5;(c^vpg*gXK<9?Cw&A`@MKzR+gv5?EKaFNT+nYL@_vZC z?MkENg2F@>fk^t;BU4SZ{YvBz&hSR8?&I8&gEN@NpIfxKet*-ohps;Kt%v7qH3{`y zjbqYxt>$XYyry>1TbpW}^9IJd?$|etAIvnb8$Z10^j=$YbmS>-&u_} zQ{OnN@!hq$CO1x=UhuV~eJxXcX)yjg#JSvXK`n?>`WY)w`|H-)+|^W(w2tQnR!ca z5xI9wbS1Z@w#{2w?$&IW_PtZ{cFnEUxte1M^Sq%d<6g7iZc4kGZoAv3wkHf3gKNQ1 zo;H*xTjmXocg(KI)3?o4nX0-(XU6WB+`r%nq&piX7=~_0Spj4zI6Gvvt_RL%MeqeDfo#Z*& z#(U55oUh{AGgqIP^EOPG(%wzuU3Y!;8FzKYw|3EAm3(IE@i~86#^KM@Z@p*M`E}#R zK6DT|^wSrTr&FWzhOKwZ6|+qUa(t@z^t%r*Rb@aCF5K>?g<6#%zOm8n=J}zO>-1 zc;HK{(TYc*3PJ0Ba$ZHHLL1mc9Vky?d)OoDOXn%-SMUa^lb~DU&X@@ySU=;YBHFef z!uTv)LJ{?QAD8gSwJ{XvsRz{CxJny4OV>DNQYfegwrFt~8kCj<)Lj)VwS-?Wvk=?VVJ*U@8+rALQK($xCOqY zK<$?91(y;RWytfMG4J5k0!9VO@h$qeKBg;KMsJ~vF^_0te9I^}7n1#_y=EXk{)Y=8$S1kf*D3o=Q&u zPs;-^xJzSzZ;4g1h{Lf)h-_ldC$I15GjLhdgW%;8|92!+ntX|+*)~2UCP0&d7~Q(+}bdhn*o_m%X4W$ zb0~Nw2tMV)KrFv_rT6{MXeoiQa}> zvL?o}GNEEX%a}WvqGq%;Ege!19B_}s_c2)7Oc1RZ(VAR#jQ?w9d}eWt0>m4#@;o{^ z0`BWfNUSB!u&C0Ebqs}r-O<`yBMD6n;U+}MDKvHnj~?s#e8&?9I`&1|SBrusE%1M6 zVpLLLJ0)7A@I?%qL<3ABE|jrv5{)qF5prk*NuPt0HHJpUz=?qUlSq&!?IWjyLNp|J zmG%&(l&A^we$ozjEyqOUC!9eY(;^A%JEdar!M!hu9v!z zBx{t1!GL%vt7Tx;wX%-TW!U9d2$6d;t2)1u3zI%i=&W+YTwVocO!@*s!?gZG4I-p160Id3qrf6-Ew^u1p5 za?QLYuxP1F?oV~iTiWl+3`^25Z>h~OFB!+yjKh1)bk&sHo6;@_o72MP>2oND)rN9d z-NfLq_;)w$q=;aYatxruHQBM?X-<2Zr`AqgoY7~>YNnpLb>iKg@ArTWY`3K?ZHc4F z)+s&My~=f&iiS*efc}-$WGaQ%cf7mCn2-l@5& z#}l24z70x-PfzW-bwo*}Rr_s;10NbVFi;PDZqhtYHKou-nI)sn(9kCnr0n!ISw)7*^=>;UpsvD@T{Oij2&DuaGu5~??PaA zI1u5GiBAUn_f1hIu`18r0aJew%3Q)O)ZnhKJb*^X=nucpV@t2 zHkZbD&b{TsOFU<(%yX6x&z$&P&v$#?w;as%UB=O&^jF71)rNG{2G&CygB$mzzw|Dz19VJyG?4mpRR`Xqa0ZM+zLwFMj!7 z0Jz41%_Ef>g{*i6bEHkN_l?}gKJFxT2*&EE;_4!ut2&Zb&MBL?q+>#V6|KZqs$%Lv zS~yocB7ak*kcyVUi``v@|+QU(jg>Khlmx zPOsm79ZK=gNdKq^VIJvNb8T<|odJxxFM1SSj# zb>jG<**n?)YWuvoCR5jt=t{QVw$$9IYj|`08|!EMsr7SpdvcyjM$WsA1_(q7<+4a2 zN3tV}qF(zOd_bh|+w>TAi{GZl?(1CxD-9f_cf%vcaW z@gg;ADaE9Sxu^6jQq&;@6n3S0caXCel9=UaVaA8^+y!FIp!It zx6>i0$ooU#{Dh_n**gltLmCi60_Ncu=*Fn=>F1iCzSt6YI?kx(fF5%=6k!zFeF?HU zIYm|_g&^D>7!gTYoK^RUVpe+w((LolhG>Xl%W6c(+NB7Z4{G+lNdMXU$3oWE3mrf& z)0QA6*}07L4}`+f^GLCVh6fR0P_=u@zI9@JU&ij1<*f_$mbAU)$M*FJ^`h5*?WwCz zrK+aFKlZjKIx-f=O9v+oPM%=C&Tl>QzNP*{HRs#Bq~;vKB}i8t<3~O|ENC5K!{Pk_`(6}pIjV|NRc`u{$G%-ynTZ{0fm`bWf$I@o7;=usO4Mv{wD2=~#O3j(P zod&vnPmxAHFIS6ep&?dBk;6YJe^tqn=W~HBwd6K~)&LK?@+s8!$}1SAD#`iamSGzc zAN{)nG?wmTabWDx2kpubG*%c`SiK^n!YLY87{4YH(Y=orxWHVbivy9ff|77ow=@T<) zib>vTVh&MFs=BzdXj8{%CnGu7W{~ZMK4ySE-x@PQR%wP+lugEk`MjZMK07{Tn!}h+ zv=(9ig*@C@YRdt6;DChFEay|~B7RPyTkI?5tz@9_w<{r#?K%e?%2(m7Y-VwH#HIKS z605)B5p_sU4CspIK5GEmxH{&J+pe6AxkWvbgz;BKL_K`IgnCk|*-ooLuo|L!@-iq` z5Go4+remlexM)5>CsC*@7j@;tD>=0Kb1WswBf0*;AIWSuYxkY&Yi6+vy}&}d(w&6U0ezpPd+8Lb3a zoxC>WEE94?d4MvPB%NR1eaBsvbY9(_*tcjY&p14jr;{U7XEToSo3&}4}UHYV$OXZxae7PA50b5UA+WC#08E@@^w;}CqNQrNbzA-v2 z&3Sh~Bv9EhwSRiw!sdhN%?IZy4<(M~xy;rn@vVXD12a|gmi-yKFY%R3MdMX{##x>6 z%{j5230L(?PUNuU;UHJ2N?n{a2WK@wMo2)SN3EXKerap~77fZ|Ba$M&)O=f3R@%Z_ zi5QZfQam7NM6Q%cAAIw?pOT&ysZx`^NFk?`g<_VN?l*GF9Sp<@lU;@7i7i-p(Xa!l zOH6;s7jY|PiLDYRzeG5SjQQ+XJaZH_7A}Jnj#(uwj7hC9CRIV`t;XV$SI~%LjZAI1 z;(__879k~COAaXUO2mW`&jt0Bb;Q1EI33hcL&dpP!HJm7 z3S4-vl4psS_vKkST+Y*9l&7x*es|1QN?j}3MElLc3@$FYza$p_r@-RJ8lV%Mv<8T- z*VJqkC==bWGJJbVk1d3F5mIsm5PdN{!u>@f&8Lhi_?N|eqFF4j;AADMSOLGvnEzvb zRj=9O<&jS@_K?2%H5C+Qxp9dVC*O6Yl(*uV(shdI3o5sQk)>F(YCQ#3&)U-1P^!9^ z8SdJc1#bPSu^L{>n}NUw0AI5LFo?&sv(C&cR2DA zVuiyAAsu8tqf$6(T`#O#_ZVqZ*R9(D1g5BSt-ua13ElER8Ivh$p-5CbiW9Y&=##}^ z-wyVji_Lu8VUy`G@XZ4_))S71D520CwUR(l3I}$Wo_+RN)4(hUqNt8ceM5n$i}~T$ z6OMdHa)hXs`DJyztfXV4Mb5Qo_o2>?V<)q+9~a}5&J%e82Vx|-N2 z69_Viw`T^fQ8T(O!g@}i2tlEPnnTFrh^pEyWcjveqnvwiSPTl?IFzz}JsDRLq76dx z*vQbpFhpNsAZk|{jp0F)=}fHnW3`216Eo4VuT)t-s>{_Zud0RKHMsF~xRt4RS+fPI zR3Qqb&*JX8L z2hM~M=f($ZPleM)> zQM)QvMC+HVf?vTO(Y(kS^39~rwGR%Ou?EEXAes?4QFkgdghPRJmM?ddHC$mhpq|u~%mdzN95}cG@`e$g)~x*Nq>5+QVR3 z&{y5oS1r1r9-Q91(0(x8esIopD4|ChTXph_sf%;gw(4aX2Aar%mtVj7a3*UMin zPnuI(XKm}|4K0~>!8x3lZ&u9WA^ ziZ?3K?$+_6OR7e#ZP8hs0jt%Rs(&-^Mj+#GC%-V&0ZpIFl+Z0b#&ZoVZ+5=XIev7~ zLuSKiO>L%rUD4NHn;m&w^9B+MADTEs_N&3PKlrEW>HY6?zTG*uVJ{Fu-E85szR5Gm z=ce|(b?Ew`=?m|~-;Tep>G;(>n6pANYOYV4>zBDQt!c4w!?g3AvbW3re9L^}p2^y4 z%~zYh+4ge(d;E_KKQ#OY+idseVQu`WB0+a``qj|9xjqBU$H}WFQ`@E=nmjq@-331N z6T&{&wr4V)>Pa1)R;9h|x1@yWj;%IhFHa0*Y!%7PuRrwiL&=sUj&FpWukiZemk%d* z;;VU0W=%u#NkC0Ci#0Hde&xC3Fuqzn(5xl5B==2r-uA3deR0+ugl}7QrW$;Fsylh| zcJ=*Cab7NA z>eN)_ly&;?>Bu{m-@ZJ34nceQbxUfMwPjhYv744SjnVX*dr;+-{hTvsZKax{hDL8u zYovOoyWi<~yJ!07E#rH&@44SQu%za#CvtUxslkKlvVErhdx7s_*6qs}tiM@uBHxd~ z!~*=FqVfRGy~}%!+0^fPJ9c+^)PL!*;NvfaJ%M9J^^c5NeB>npBs|PZ1ZF`gzR16* zeo^zH_C?)``WI_)fS(`CapZ`Ze?-*_Owe~D=cCZE5Lf27=+fFLQC%cd%$vr>Yy}Dv zyf-LEh!7!3f($?rl5#Qo1jLpT>Iuz+c0xBnVoN5nBr)85V#{ji6SYN02J`BTuh&l0 zmeNYbj7sTw(vCaub3zf~5_O_}NIz_dyXi^*hyV?x$_{!F#p4{oXG|3 zMwsRe5?#0AQR>2sbSvVO@v2xAqy`lve~48=M#G>h%^JE+K}&y18PG}%3bIevUNJA3 zEQle=Ib%FIJ?B%e{)q^iq{{}^E@#!E1y5)ukp~KxKM5GU#Y>y z@msPIW*c2`; z8G4W7(A`K#ikgcO;J~4rfX>6ynK|%Inm{lZyswRjB*&u1FYiUE{y<-B$N;uCzbznE z?oqln&KI;(6Z!#e!Z=}?C|KyLkh+9G1)|=<=CDc}WhsGH+pOeDnykFPdO}x92#p+b zdN3swz1c#iDLNlRr<+Tj9-lBwSeUU(uH4XqbAz5@TrwQfmRkN}TBcP}M>l~1VM-)o zPFi6#5gC(4gl3t&?u8b$HOE;}7#-{k1iPgqP(wjP+?VtQIo~GdGCAKNhgd4Uyx(=NP3>E#-=41DKIeWYVOVqt8B102 zRH}LE(DawVYtZ2}5c3wtf?2q278ZT$r_M}A7PcKtZ#z2Y>x4qrDWuA#w#_-)6S_P8 z>TB_<@l^MlJ#X}+{TmXNQsh1vgEDd-mX4m6zB2KZlTWP zXNG*FU@I-r;tiI0>=XnF5*d!L&d(Y6--qQ<-F3n8Wy9GS!w12(nhTCMv=&>l~QHD zk8rxBgKYG1*N*=57+RVH1u{^W67skw!-K3(On$s1-NHLu2S<2TfHdjZ zXXOE3!u_0VSonL0E0OcxX%X+mxc*0IFY7tC|61T`0A|G3Ps~-e&$-&ik1T3TFCO{o zky%IU&Hm|z8-p_~^P2sCt+)NeQ~v3dmtd2;6)>7G+QDI$RVBOT{4KK@Pe$We&{U>1 zmC2*?ngC`CY#Tc>twA_zgz^2q&~e&zAJ}{ullS-glMUZuywaxWho{%iL}m^96owwL zEOdj&3&$2+zH7m&!8upm_>o^~?Aoo%9GrV~oT=)jdaC1w>3w7S^ra6v4$rxceE-Bu z_vH3#yRYs}ZJ+9%b8nj7mv(Qzbt0jkbsbss)up!m*w>QSpK(_tw_e?y*tew8T5B`w z+uu5Q{p5|_v}4_*2{zRGGp(B@t;s=H8t0(fj=+az&foe=Eoa(%&xd-yTK{_R<=|Y! zx~Zqq729WO=R7;dJO5g5B?}GW4gKq;c1^X-xMsaO=MB4XVanv=69WnR9o&m@^s7fF zAAU8Q+VV;?IW!fVsh+Xll4hNU=QT%`b+Urj=6Y##Vl<)7R0XEirmKPwCmhES*Oi}^ z^5ojIWerXN%O`s?mb?_;OAG!$+8?;tKJ}%yE?vKLW0x!h$TNbPcZ_cvXEuIs`**j$ zZ`k*t8~OhF=S`gZDF3T_HqKN1bB;&Lz{J{m)tc;Ea0s^@!f%MBto`VhPBcQ=*#1%Y zAO_oCG;0r4s{dkr#lfKN2YN5yA5^b7*sl4(!z#e<1u68scH^Nk{d+r|hur!fT6OUG zp<4@|AC{>JuGAiC(BF423^9@T@yn9{n29m0zDlu;g*#I~AeW9&WKjky72NVdq;Zjr zOJ4DbFx85%ygeAg)D#xe!a73}7538IQ_S*75wOV?#g`(mBkhj_SC}$Dd;af{ni-J7 zs_Aipnc!~4=0xSCq|nw_q%c1=Dquc@OecL?4&uMsj8 zuwhCZPHX&&m1~lhU%`z$i6d!E*`lyE)io!yj~|$|RHZf5Mc48GcmAQZnKeHVqW6*^t*f*wY%{AHdjg#a1Gd0bxf92(`{J3UA+E58z`k6&v zLuzFD%$)BL@Ydn4o3S_w8}FE%PCLN?&@EGl zvhrbePO7V_D{ipSYEYoSAwRgF44CK<`JA0rY5PjGs3MxP6!lxU;p8}3ZGsX-7h)?$ zX-eZ#eKHoZ(?p5uU@@7es#y`41A_-uDN9N)&{~9)FylaoE~bU?M3KS6mB%%ls2`-h zUh%~8Gxs=Jl`)B`i?FL2`yL`bDqmhvxMW2(XoR6v&PqsT=W@oD$1(1};S~o7!ry%D zn}XDLQBh1a(_JmNYJ$$H4a3@r46t$rkDYXakXXcG1kmOH0pgy*?R}!atT!1YBAf-g z2omqUaW0!SqZqOYWszOiH&C0Xb?sW=33eD!ICWWws=!OB0;Z_mL>f1fDPUlhTGF$K zE4@U{?~rqt97eD|P0$bEZ@VSh?e7xf))`|oVQ>6uGZQ5Czs(rKdjn-)uqqDPSC!Mo8 zI&n0)3%U5O`=^Z4uIWAR?0RJujyzq9EWi;rKGi&utB7t4j7{k?_;;afb)}YhXEUryV(ZEDCn$f zZnQ(K$X4KjB_iY-Wx3OeJ60xQ+TX4$|5Q|UQiyJN;$z5h%?d*<-%W$raNos1QIF6Z z26Zmz zNOqLXzm#pg5~l1_*zQ%)4P8oy6$YEKrqWI?4@Fp`)3rrl5QAA>z&1~dRaQGRdJ!iw zwBhqZ19Z_8bzYb%lDp3EuK-Hqyhhz20Khpbv+bqr6Wb^EUpspB=(oD$bB%MJb#rEL zE-fG5scDj;p8PTmn_@z!l=KO5pgd_K@t66_DjYo9q8wIXXP!|dV%6m{D)PY02?}wA zROk|3Y`gGWekq8zJ;xO7 z#KFg5!jR=JMAdkAOLah3IL(DSW_8%TpyZYU2*{c)O2ENR?SV|JqPVmeWQ}=jIBg)G z5zK1P*k@M~h&L$-IoGMNY2#xj2rTZ&FD0QGp6rD>bt*7bIsNEt)AqD^J5Kg!48(LV zXuN5SHz`ha<8G&U%_j0b_SIvP2U2IokFl#V?aJYu;whHzqCY`a#Y6Fa@gsE|yQ*}R zgFB_VKvapUoB0dws0#*ZzgzL3qck^*btxpCWlt&7dk||y7ouL)!=%0S00h!>QeL(w zqW)!fM2|0HK0IUR76j%HFLk~@+ljBsSl@|z6RKi1KYQF3zrj5@j-0A`i797_AfIcor{ zfUA7AYJJ+Z9%sZg_63bEt??~rz+F|Px~KQeYO3Zn53zuXv<6~_Dcp2hF|XO27s1q} zj=Zngz~*E?-L=09I~6OSh4Q7%{Uh^ZD3%Ztx**SErtn$KDctDH%%46o!IDJ@`wL1! zvQg}f1su$mIut_Of-f8LAe?bsg?n-}84zs%dB|)p>{K!VEZeDzN63t&g|2U=n zm4YCvp|TeXAsODN0cY+=-KCbycbe)iJmZ^)s>l`ncIRJYIsg$8QNr|1Fn5pJN9id#JTkSs^NzcpH! zAV6-NV!0|4wdRta#6=T~@UT%153Yb*>`z+(+Oy;?*ht0p70Op!9(nC!1L6L9O8-FV z|9~7eB-lXsH9iAtWZ~G16lG4_abrz`=fCUHiGE}1H6h!*g=Kw} zgHkwGW+QI{Y!}HIakA;Ce6yyZmtA;+3bVar4W)&2OTPD!-8v#)3-c)XJx|UAIs3`^ zU2=YpoNtr!9dd|TV4MP@Pi4C8Ciy)E=X%bXm5d~^hMlP4*ih&Z3IAmUs3Yt^F;D^^ z?0NnJuHmO#-Oo7NPdFp{=llt0{wr?xPr2%!a%KOS3;dL8`=v_9tA4@3`K68HkMU`) z>!;khJI0EHf5BLhhE>q^Ib;2}4hD2(Rk-y62b`9S7XBcgu~aOn3AkhQENKaVB;V#* z(lgM&*~;X2NT~BHo5^>1jh1&W`3i{4Pw;#tLqxzGt8+<901%nHOL_(Zk!#7wKnT~$ zmROP{3*odf-}9;>d}9he0vJ95a`+f1<71$VkAX5i0+%nT4SdCt&j5=nRx$>(?pT8N zUjB~OxU3~$d5w;DEW2t^A4r|o)O}dFnQzOOE0@%G-$B!8@kYMhwM%+}44fU+Ajrfy z+)HMHES$G&$x4uoGuf8x1UaaVPJ&!iaW_Feg8T%PaW3ytIYAYivvP^@T&m)9yO*m8 zI;uiJGA{pzwf#K5pU;>)OKJwO@-Qe@A{my;B*Sv4WLPej49g{xVYzHFESFA(J)FCC z$t%M?&K+3t%WwsiyHbX$sN~f$yoSnNBg4dDF3IIxs^fH~<+ZXu*ab9d%`-gT!Lw#% z5UU7-autzbxgs(wS44*8ipa2B5gC>%BExb;WLU0<49gXfVYwnQELTK^<%-C#K!vZB z;lpYh@6LGYmgs%QB`j(2Mu{;5^aMeX>s~SvWTGKpCP*HE1l1XMSH_Bwg!dhL^%BOL z3yCq{^#u8xd;@C{ycur1DcHEICulRZo3)&GusC?LICvwDt!hb6klaQDS*R;0Hin?i zv1|vlyj{h&E}JyS%4Px@OiDH0cTjjO-av!G>j}bCaxNJO!W1_xn*rUc(D54?rTTvW DCRg0$ literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/pipeline.cpython-313.pyc b/cross_eval/__pycache__/pipeline.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d67873082ec1a1ef7f872b86e33a24fc472d3cc GIT binary patch literal 26827 zcmd6Pd2n3EdFPw^zGpDF@o-~6U;q-lMM~lYk^lix7(5vgL_tGfKtcis@Ed@~$Xdg$ z#0J;N3Y4Sb$Z?{Py}N>JXANx*(N(T8ontNkkr)%e!sr=RxG^`WRHbS`lk%bLdiVGB zoA+i24Mo|>R&A5$_f6l&*WKUMeaGRja&Z6H_vXasp5nNl&=1AwWI%JL1<=b}gcG@l zCZg@t^k_vbPq406C+gT&FY4LXAR5@$C>q(+8Au? zwf8tg2ZK$$&K{TOVz9Z_-Qy8G47T)odwim=$1nPO0%D-2L@enEia{1Q^dncZ@j1e=7^VtHbq(@J_a>MLJ>cM zS|YkgAgn1&|MB2RYovrqcvdThP=-Ac3~N7L&ikc`Y>AZSN^Jd2Tcwm8X^WKQ%G#oY zZT?2uBjs!i+6rSL;YbAwZNDe9BT~si!ynbMNN1!fS8GRPYowax=!|TO)G%mkVTlSH z>5A|yc3Wh7q?SQlXLX?+$(p|CM4X?Vn2t?O#AAHyQfzc~W@0MN$Kq!Z5dJ5^ypTq+ zMdR_QnJ7gi`a@dDJ9tivMaND}O-&wQd8S0k9!-plPMx2gjLpQxkjFe0or%p%oR0z4 zpNh_$!`D6$zcBS&Y$SR%7N5ay&y09^M2t;NMaPggK0a|aWRPqJD0U#0n4O%FT*qf( zBC9wT;5?OU?ID(4Qh(>-t&y47v>X@GNiH!KAB%}2Gchy_?UCGKEHO2CA(juZCt@=r zlhOFuS+t7UcxHBDaty7So)TxeNA>x|$e93RgVEfH19+Jm&Mi!i6F6m&#`OX>pe@Lg z16y+6sDtB%4NClwe#k56cI19HaKlssbDLvAnq;Fq)G>bMT#O%|ot)%H#i>N1J$4~F zsm@_Iq?a5c=c5>8aW+0OHX%yJ>2uLUEFt+>2>UTMqRe?NG%-OFr-ot{jPiU)y3vbc zl4*?QX*MpIXhNrEXCzau^aQn?Cnp{15E3yl(Xkb7=hht^hhvH7W~Qb)1~8I|iJ2+! za!0d&OBlcl4&o@%a}%|H8Vf zKHa?(xv^`-)w^u&UANVwjZ41k7guaYm-R>QAR9NTP7{7mbJ-Mr4FIMPzU+!kS{=7T ziB+deNzdU_Q72u4NjK(W6bPYa16K>O5v=dUl3pKAdzkKGAr-acrdV zSYuyfMAD25zM+*&(^0I7nS^AHU7ASDj7&Wj!WoGAC0%^#qGV3Yh~rd=WX%^P*`~2@ zuu$SrL>SILhvGvzv6OOYXA&ZtB#bMef>?#G=vTZg01_wR&2hIZo);_sedQOb*DQR- z!l!FiEL-OeW}W^uXHCXglYV@yaaX2s*K)(|>zeCz*Zs?N4`-YY&mGO`ZJ+P^Okb*D z(LdLh(TB4(?_9rFgEB`8=f#BpHn%siXkW&-jRJr2i*#6BmIH;08kxxD=USarZGJfx zpGhSp&kd_HSg;jl%F z$ne6*lTsFQCwKM~)LFV4=Be_0E&KVHYlOtoZ zGIcADO9iM-I^@SdA)ggvX912}jEZqokO))@nsf1)a8=C=LCejHZnHAzyvX3xI zdLj-200YAgam)EBknU6QiBZWQg366a&hybr1w9lkREP^>K8|5OHXEWqR> zm89UQWL6>Pz0)FsjONgxDbPIVb|MOj9(93Y5BV8Y!p2i9Ln^VHJUX!?C+iz=8Z$%y ziG38ai(;&xsKl8O8ZmJ<1$goka1KNg$ScX44^rkt(oBfks1RD@BCT!7MpK?cV6m5i z_fW7)8MItfg6JafSR(N;T?tSs(m6NH* zmxi)-$C|4l<7!BcE*;JKD{h; zCfIh@p!0ZtZbcKX?4suMUg=#fJ+Nv&_^#b^`?Q90ZoT`AhASz*_Vi0nuLK$wt(m~K zx&B)v4Ow4RwxoVNSebfy@v)U)N7fz8Htf7@GX{-wCx7NffixwbNKdVrciyy>FYC)c zObFAqfFn+G=gZhJ{(m5ZLC!+R_UjVD5nbF0V%Ub<_B=6MoD^0LY7oJ?W7r&Kv1F#- z8>Usp+<;2bDj^_S3Q5+FPH7OF11eFQF9D&nyKHg@C4Oj=+mT~BKtgbX^Zp)^_HOiqIeszqvoIx zR0!fIu`59#FsZH*!9Ji8u1Y!K0s}kc z5(@nfu`8yyNiY^|Z6M!T!5?ulc4Z2^0At72gzEt-(N%o(hJN+Y>$`Y7V}&}n2d#d0 z@j4M&Y)=8sk%KD16L*6}@toAso>9O?AjMH_A80&}l$0gK}pCc25akKXYymteGLZpWyvx;|iCK_Bre;yejuP~3tzH^==3OkC;k zLuy-$VB+?xze*z}Wh?{@!~lM*tFnU8CnoL@B{i71vVtC#fr$$wx8|9+^Wb?VWcE#E z-uRYS_-q*L%Y}*9#rE-uOQEnxJiJJ&U!(<=-1lp9W(66NVempEAKnXI3fxkTH2~Yw zMm$e~DFi@VPe&8P3x&Ex;_{Q*+J9}AY~cCf#>DB(JR#q5c5>=WbaF3$0p-7;lPPyB z6j0@im1Q!=kb&`hGNH{#ZrXIjC*gsqBzBO&;&y^)w@KFIs$*INlweLmp^o}|M4ROM~@s%cHJ)thBVI> zVu;a6g&P%Vd&4i{>qJ5^k(g7))ro{-;=|+-{)msllPt0LEEp7Ug(6A##KYtrp%^{M zWyOO8x(j;1rcFFR$%xAn50TeRsSJ#l6B!3*gvCtrm}QAY`!ILr_09EW_dUFaSN4KCs&LNtZaBHBU2ByGGL;8zG=8u3yRC1ZS*d(%zHhyxQ>p5y z#r-#Wm0Sj0&^dqfXJ!s8)}BWfY^&7vx;1ww;|?u0ue!GpzvsN_OkGG{Ty=Ncs^7jO zyfyshF!;#&lf-uN*q?%S-JPJ)m zGw$YPcU_Ky%=&j^{bkpBuJ$bR+t#-CWVZJ(hH=Bp`J2{4k7PoRtoR;T_m^Mmz1o{C zJ(w-6de!=hHGO2QVQ;2kFVY4|khXct!pV32Wg8C8|ELDyfhMZrA6+F^E`lk&J*(k- zJAQUa!`UlT4)mex!grqh){{T9AIr^M)_p{ouOn-foter`Hdhq&U*-U}b6UECP6$10zUU zHYK*;JAhQ1okDN=xMLt=XK_wW*XPx)f^L}fUCb%Rw@MTtW@xLRC1HIZ4^l!mKwIY~ z2TKv{9?g)KrNT)>M1`Qifv!t7o`5!+XObq!SSMq2qA+n2KQ2z4r(aNWhd(gP&WyM3 zegk^)SbTJ93=%$4y5@v$1_+VPCm=pdOvE9XijT%59n^A?g|zNaU0<4cLoaW;w8cug z=VF&79aO0a;xTEnlJ_Zge2}Ndz$R$7p5tydHqZ5EfmwZ5`_}x;8GrLT{?HZkyl&kV zSQ!0s*P5*+W2?#5HKw{&>@_#*8eeaJt^Im1-M(CRV7?zgw=G_2g^G*FW{uPXRMFW5iDhBVMBF_YB%_=|(uy&N#V6upBxja%(Xlbfa29g#Y3Nb(MAS+8F-Ymf84Ms4 z_0gHpbNAwfR5AkX!3fi{ASKzy#78G$3GuVYQB9~qn=F*|fzmGubBD7opDeLmb8X4E zw!GtN$5s&tUK@UCI9<7zcqh;`el<4U&Q)^YCe z1_ZV4x!w;W2yIpjL6%VK%N)df8vJSV8hd5ZF?Ad!@eNF(UeFh_)KHfx$V)-1{E9+5 zj7cwoPC$+vs`6BC@j8p=)OVcx`hJT}$Xp8nit&P9WDTT2D zYrNUzd*1i;93tmO%=2+ZjGc6vTrNcnKmz=)}&Q*-~JH zbsn6XJd$joCd4a9)=Z1@j{#sGcXG`k7{>5*b4S-3n!kEvSs(m~J20=ua((fI7hibK zRRYBUBVIweHfPH#7fkC-p|8f4-St_!k7-8^r*^K|>)y5d?;1IGQ+jO0wFUWYt~Fbu zk_76L`fN$*{L%Hg#;-OmgB~!uUUXh@F3gY;#ayvwuF05d()!n3ueshaciyh&JS9JC z;>@mWF5Sgda}CrfIcps#Zi!(ZRR}69e$IxPj?_ONoFi*E8T& ze&xT)2^aSb(>O9W5Ky4}H)2G7Vpuo1dyER4Wa}vS;?6$M7=kZiVlp+NlCTOQCWL=V z?*GtV3(oC25WD8&0abbhb3|p$V+F?YB9At8eNQAwQH~{x6%LcY#Kf7&Sh$~^drXog zK7r;*T+*8p%;2mfbw5d_Bz^aR<0nFPwhqK+D4k;@#!OFRY=uZc^{2XW*iw|WvO4S| z;%`$nH`1}=TGhn6a`j-z&EQOp=fx@_5lv7i)5^}1+p;jDlfn*?$CS(!1d;3@FTRom zQrjqOD|t*{^*KP2L2ems0+LbQ7ILNzxu!foJKRmhk#~}Ig#QTuI|9!&G|ipJZfS>C zUOjj8ma{&6WX0J!cl0No(s{$W%{f2&g}o`y7aqCkD@}Q>?w&uqZZFHa{R`uZ=d$jy z#jfkYth*J1to9Y}dq>$?(b937A7DON^)r zoMQ5((q?8%h*MpXJF(CaDZ65vBM3*Xn0+8ZX{+Ak5Q-W$y8>08(kfzviY#~KEQa-& zy4Z(8L)!&|qTY(Tk+aCsS9%o1vL5y+EfDULTHPqz1-5}MY@1l#VUnf7GiWAwxyt)2jOJwOyj-D!NS#mm?Gola%v~h8Bv&5&wSH1{#r;?j~2f2kc1kRdd6jEoBE4pQm^W zam+ZOYBR?Df`5P{)0^D=+a&mh()VkxkjG|-h97{x6iC5q6^n5w zLY-J7&t`RE#BtL9F;nWGgsoR{FwB?#XKwx~#+`?Toeq}0WO1uZK1h}yBE2qnc7 z&k<+D6>;y;-qW|>14;^hWJ$q--2{6=#6!DD#M`A~`$=iUCzRs1zc~INCV-ftdr723 zFd;rzfVU+Ijv~BND2do2WqR&IBU+Bw3L*Frv6Wq}c-im+u^Z)7b!nm2%k6^Lx767$ zrjm|S7w@HD$}0x*HlmT5`}Uk?J+CcZ8d_B+*x=U-cK8kVP1V??TDZnE5nDWPATX{O zCX$6Y6BiX}-92FbnttQ?YZd}%UuBo&|H)Xl6z{jPH}G7HQ~Es4?bp?CGm&Z+`(O5WHkt1W-WvBg2b0|(t z#u$_|@k|S~U;F|_I%%^)oytd0@LoP?=3C?}NkfQ7c+yOfkkjx<1O2upOY`=puq2b`wGLqVk z3zDWI*(BEb3>qs3LuRzH=FisVn@udt zk;BE>`axZ;XR2x$dgAcb@kAR_NwZ-KRc>NTBm+YeNl#cM5{U>MC{zs7uwzxOyE#YX z+iXk+0lw!lr3#L~^mg*HYzJE)loW{sg^E>#*hXfSjI4%)Xr>5jV)hKmhfFgeIdijp z5$OW7h11U>JI+{a5#={jZn5R-W;-&7IiLHW{4jo~kU=%}wcs*F&#`vTonv zk=q8H*E(<9IHlnlx4ho>THo9WvU$$vYqJflD*O*Nx2h5EB&qcAE5{cutOdiFVE9jT zOWkkvz1g?Yc@Qd2D8=oZp=9Cg;^A)|f8+Slg|}XK^MxPkkNn~;jB}yIwKZgH4R^Ux zgLS>BbIJ2o>6@kB-m%tnAk%bUq4rwK)s|}=snK7&t@(lZ`{o}wmj@q*sr1KA2u<7g zmt%4rS?FUhxOG&c$=fmAjTMZn#VisF`(~#e3J!S;|wuCc?|5 zE!Q7k?mT>BV%2<#M&EwLz8pAoWAJ-VeD{gvQ=eEh53M`Pv(CW6*tKV`KD+SLa&6br z(WO1td*9ao!18^|jptEeL?gE^eP*#@(XsT{(#%_z-@Lr^9HI_tS~qlB$Cf)fz0116 z=`GfOxeM)M=^f5&a26Yk8p7X#u}F_B4Zii{n@=vCxM6v__HEzWM>ljDhmh+DOd~$J zC%dmVd?)lRthmEjv*TYjJgD`91Th+axg~hi&3(@uIOWrQud%zM->>@{zXL!1ruIPl zDW`7PX}}LvEz>v>ZFt)FwCU+uyjq|c$Z@|&rMif=9I^wS zdJ|QlZy_Gd@$R@)NKEM_7ts~yB~*i7!I`Hs3pjr8UrfUS&K{BoMLY{b$w8>m26cn_ zLBpVN&;+6Bm%!&aazay(86*xpYbdz=sOnYD*ACVeQ>+OVr3XCe^xgRLKm+5A7$c@$ zQ;#|B;~{*60?%Bm{h$_MC=B1)Wl+jt+_fqJ$q&Ph90;1+uY?v8f)?Dr3`D#zp-u$c{{1A~<%oS@H_Ty>>+sP%g5EDZ>19Qw^u^W>x z&vhxZsNfpW_Tf~c@r;*Z4)W54Ql+0L4dM!z7nTZTObX$u=7h3=8l_eFuhR2b;99&q zUJTCPpid5&Q>8%J$5z-b;boc{J4kLAl7y)u(myga}tancv6$^w3txGyN{$b15d zaxLspQK0H*^XsCmbT@!{R6w>Sm~ha+Q=S1eSsu8+y=;J{2U{@Pw76Y5uzQ+*=%ijn zB0OrL4R=bKM1l%dlyR1Qp!{N&rp1S)*8Aue!uuwtMqzpLa9Bf{D3gVsTtDwEbCcM*wb<`f`O^KbjfYtQJ8j%_?I?VdCbg; zalS?7z(=6jZObvA6h~KBL*YR&h2o*PA_h%-jl8drN1T}WRq}}I68{r58Xo-gq-ub?t?#FQf-wfAX~_ zGr`VzyUY`)96{<6i-#4S00x}?7e9UF(`n=Dj@KNE7nWT+md!h49TDEHA)EG#Q{r=Y zn}q2alfHs=l;g=;lG;Ql=~UL#7-=wD#6Lv0LV9`Z3GImYqZvDe=oHi(4nY}zHukxQeoX?m4MwoEul$zTLDH7(-~sV~KEl1J>c_zifWI`Jif z31j{>95kK;VoWjFoy+D7l7nr8ieyarnUjl&U!n?G?@S4KKa*@s4GQ%wb@tB@PcM?t zg#)}MWE!8vNRt#n2J(wR#`r&@#d*f{Y>w&yX_mJ zOO0QD_WG7pefK|@oImoH<@KqlIMb-69$xHPGHueT!XzqC4)YC*3+#7kWvYKAxMf-I z&+7ea`ihLcB6VU_AHo`enPy+MEex-kpX+|#$QfGSb(UnUf!`ZVHGYw?QQMXtT57*O zvur-BaQjGQr*Tt(9@I+4in)o_^!>*GDq$)&*;-JKdda+qU3HJqzRM9JJyN-L`STwx1g~>-M`P z==YZ!UJZXKyi(q}IGicpeZ6+Yzi+PZA50E17a_JW*t)oXvE#aTIk0clydMw4EIf8) zV%~KV@4uY*%!!4EzMM$!`2A#Raxr|p>bmQOxa{d!)%V^p$~tJL_r<9zQ}eoPWoWTJ zQyHGOFGO&sck@qsS*ku`uf{EB`R;Pou8ImixfTp%f}wA8Eq?Nwm)^Ma_5HG>Ajc!> z-?F@ExxV!~yT7&j9rNLz`cUgHf7;CXPH28{*U9;-?r<863v9+6R~@P7nwwv7^Z!D; zX6*+*_h1O#v2=ZqI0iKPi&n$&R^4B8R2_TB_?Pwo;J>V^KF;a?iqivr`yq<`K4(4N zZ2G>{bG**<{R$I>)EOwGSx0cI_4ppsJ+I&BP%!>9cL1A`XqmlUe#i@SBlh4HjKVB1FP53 zneZm73MlvAMy<@;6}DB65xkAOO~)CXhMR4uib?*DI=1mwz8`4$b#w^rh>3eY$^z@0vThY_H7ds|p?- zLdmM59hnXNIRN((St>z=C!dO$8z`(AC|3|HvnX|Xd!F5ZhU#fB`#`cV3nY3H zN|Su(0LgKcam*W+o`-xFBq3~1A<~xL+>$Sl@rlt`JSt91CBk^U>3b+qwk#$R1UIZ` zFeQs(qneO+TDGe?nf0kESI)osC2ydNjv?<14LHpc9_zAlPVYkV7f#Q0XKPws{q!rJ zey650W3GVtXh~x_zEbk=yzvV*Y=`{Tn>No}A3Lu{3x|~`NExGn#WH#SxHh5fkU49} zDyHfNkYuw{s8D8p1xi%Vp3?T^WEF88=uMOzlFh&4 z(m=MPdO?TDd>q^qO*v)S1PD^2sW=UnCJigb1(5?^rE7{YN5Aj@U#Y9!aEOREV2)S8D<|{izFVgFf&v> z$c%Jn@J`_v&+JGUMZ%v0zX($A9^99WvC)LEwZPnrsoos*_51+4UCE!h%qO+rqqHGw z(qtu-oYfjK%XVAOpts_S)TRozYnT4L@@hpz<#DZ3IY;T!Up3PAFDD=Oh>@ z_Msh%_9&6O^R$47&Y$CMd%4=C6@S-P45_W(Ec>IfYmdEVnCn~icdeVO^ZQp!WvRzj zOx0Oe#hR-oICtV#oapgp6|Dxxc!8d}7mL>1ffw#Kf>|WY^z3cjg>*tosj=p0* zcH6``D{z6pUbbed%Gjz>u@zgxvcBP$KlN~d?FmSLxZn04((3=f2yawbWVE17_+tzC zp8@3f^f(p>9Z(3_?64Rw;@K$V?4#vdbl*fyB8oS;h-&j6Hx`IlRAk3ZaZ};ag78u| z#BN?jd0hKxrqvoZ3wk9@6bluLOiv4nnJeHHv0!0~NCzJRQTXOABQ1;uZfKieqFtIw zWikd`$dY{MFnb*b{Gj|g4p@xb%RaamLYI1T*bjXq+o|)0lMs58=B{P#-JFT3jr%3d za8i4kPj(J`sBkw zAplpK84Bg8boppGMn#bK8Cn^C0AP;0<5i3f<=aZj_4`+B`(eqPNAiZ(J6`Kp+P31^ z3B^nR55TMhcF$YibCtd83ubMB6&nvj$4u##`Qz_xeJJ%L4MPR-l%^@$)R7)u(t?`vZ2zeaw;8bzYFZZXc1^{*F5m4MvQXr4&p|%L z3aKtaQ-=JsQ`}F=4{3z5V31023UN^E6j9<70s{nrHrd&)OjRDul--qR0T~HlDNheJ zfo%*+8XklIZC{dMa_S;(lo%4zlN0pB7EN1%I+D9g_TK@Dp)jm;OkZ90Z%~>t(+3irS-LMPZ z=p3=~@@*R#Idc+3Jfju61hFEs?G3ZL;o|S0dQh0p;{&!j78OU&;U_~>ypG9I7Rmud zl5a5YE7W&T9_afmljZZ5KYMv0xMr%&n4p0RWNjX~%hXN9(U!ud{{jHo`(R*7kAG_E z@lQdE{X;9m>}rdWtKeG1hXggto2 zv*W_^>K^by$MZ}zPi*}QFnW+Q7m_-B2gN!xDSt5yPwN_SoIx!vh7ciHFN!FG-An`> zSxISW7mzG!X}Ay}--DF&7~yl9Dadb77V_Sp5k3KcU2?Gd7CxDRLU~~X>e$85V#U&- z<>uWP+iqM;(wm7BU(*LN`ao)IaS)GMW%S!9_~d6!E*wptojb|i3Uw)Wj|x|qIxYVZ zRTYx#g)&H*EcQn0eQq5U8~TVA|8(-u575E4$zf{r5d*~!o0X_y*8@PO=@sH12Z{;C z_H9QYjHcWFvegmID0>Q+-j&03Lh-q=sf%%1<9G%woOH4G=5WZcpC6vWq#LIOQpz?j zBzNuy3FAeZ#Q#Yi&8xUU-eK~Nz*DA{36K610VEKkG9;V)B@_A&P3A`GOrU#b}FB7zfMU-dskUuQqSzkcg zRXI#uy7!0rPPXnsy8iBd97(KscB+*$b|2VIp;7#&_*^_V|SYhj; z#QhazA<-xf!F(AklqM1~+X7Kq)}dQ`-wAk*wUL8CM0VfYXSgI|?7TC-GL@a0JWz5W8l4T25EOf;Y#VhBXc4 z68{%@#p-6f_B6$I!jp7l7j%$*{%`VVaOI~f^4-l3L4JTb)ErL&`T$SuiG?Gc+#?H9 z?;VXcCKZrJr%<@c7<$swxh7t@B{slin)v^TkV~+a2rqSa6q3Z896LJgl?Xt^K7S`o_=kLXV&8=a% zDF?8ZqzCeVNh(wrpU1U)=`5Zw42!E+{NE!gS+G!mfv4ryUKk2KNfohyW(5(1r9kJn zd`~)Gys;qkpNqs>3xI` zM(~p6h%C__84>9~l1%i_CRkJX0hs40_kSku5sIXTPIC7L!#RmKQwE3zN&@Z#`DxFc zoH!#d#qU!%9W0WkK(~NW@aTX@B7}Rw&%-WnG9eP{CXyCTWV?EnCyrg?$4_vZ$ROE+l4pG== z$-6>cH+jEH9!WH1q25;sBIv zC#N~7$#DHY=2~xB+-sKdzlGh{O4hK(0In%H=UoE{}n7c?90Qq%&*EH%iQ~ z$zm;IK-=C81RvDgG+6E$2)J8q)VS|@Ytf%)HC%Pw?TYQ1j;yU>Lx=B842=O_)ElVZ zFcD*erg zbu_3oPir(sG;CNI#5%&DTt{SB?uZP_9g$(VBQh*^M26*#$gtcI8J0UD!*WMtSnh}n z%N>znxg#>nQ{!u8xJT#I__F@G4f@{n@*4(x(PE&0i6H25eH#{ntb_tKf(QfeItZ#W zYrI(pFbUtAuBr{-n-`f8Z6c_|qiJL#f-fr@Uy8QeH4(I(#?4WtakDh|vNZT2jk9vY zM36j21legSC^b;f=ziY?=LD2O=@=|fK_y) zsakvY$A0JD?#6?ZB4<*mnkmCF(0%*%>3h#T=ey^edv5bT7Z%zDeBSJQ&pY53gkRH% z{>kMBrd$8dBnan)rvypp5hRmj_L&CEJ!TUfTYQ!t3x2IW>p)IV4lB#?Ls(yeHiROVhLmE){}oi$3!#;iS+(gvv#zg5zGQZ;_7 z^M%F#NlnsnTw9)fMQWB-pnSy=*N)3ja)0OoV4_;lti?2 z6>Dj|T<10Qh^%Bam6!#2mAvwByUzC1@4MRJyWl^4SWWR>Y7U`x&uXa(PhG>Fx>34c zT0550v&Jm!6r^=yg0xl9_Rf8_Oef4T_Kg#VV z=;{bNZ`Ci`PZ%}%l}@=&R%E|N7TW@z5Isk=pO69`m0#Yk$Va__kSccKS3V~0kY(wh z+jGQzqE9>G^*VyG!dPJMKIBzJ`lpwOCyc`pG0-QT7;rz|YZR#?yu}@UHK>F`>E+#t39Z%h9IR(NCJLYa@ikd)D{te{P=@z zub++10Pb)rBxi0_Z0qRS)6vz{-bQ`tbNj%*!GP)wdXLIYV#m(C`#RgvxB!Z&{i^ss zM$b}XkBf)gM=)5H;vpmF2`DsFF8gMexGxkO3~^?ApjSRQ2qOAuFfO_5p`ItVgqPw43AN*5D{!ZCXB8~pElG1Gy4OJke)p}(=(XUtQCEfgLN~7sihUkA) zx$g`4d<+(t%+vTnY@29o{17;|FN7}88HWy?e@q9cbTNQHv;#+>Gxi;j1ljEY0+D!$ zeF11(lj!leRn^-^a?J5*`@ZfcMU`aR8$2$y>}?Z0cw|4cbc@~IrI-6~;8VBQo5c+q zT2a&Mar-uG*dlIMWcLwO>ACphEZ17Oy|31f&o&5RdbO z*yE0dd^Ac2y||Cmm(kzjJ{(Ypc&Gfm5Fmr}LLBg_&`-~yCQNA z6(ykXhoG9ur44$R(JGC$*UxD~D0n^pQNm`RG4vI)1`>_PLvA_IpOVGmcU=ZXJ>T?+5EBpw7% zYGce>zfWrpQEt)eG4}Lf$}=hxw+(1QDWDRNq_c(%MM{%7l4UEj9bjg_4_9LJfX^L7 z43K`*D-l?bv^9yxG4t(tstdrYV&qj#@BlTM$n4v@qoZ?AdmFX98-3z32|@sUC;%R% z(kSb=8Up;_lvI-UJcVx!ndsK2Q|$A(`}@%#j?b9$J;4wr@Z%uDuuL43l@y0G9uw~{ zY3%2{5&0D8_L2I40o18JFD58H(`gh3q)C=6`}aMz%lAPk*gv!z%MET$>Ai#@G_ z31U5;e>Ieb(nC$iW`Gf)&q4a6FSfA3p-$Ty_W429>-YFVtVc8yKzvdJZ`#lsV}1;i zj6UeDM(Am)KBQa3XBp2WS@kGhW+@$D^9vtZOwW9<56TP{#Ag8+YQTR0H10gAcO<%K zSQn9P_zs{&7VGzJY^kTd_yaT|nX|E&)g8t!&QHwtV77fX%n@VU6__dzE$E{Glnpux zLPB81G?JV@tCt;kHX92Lh;1qMhz*KiwHZbLA$giW5le~9&oVRwK~44l8H5P>{@H{W zryyT&~>%@~@J)hVte2O-4AAmoQIG-UE+vMz&BkaZ?Jj1jE;OJ6p|9Ojcg=+M^Y zKwmR|CZhrnrm={Ja*DG=AqvWYb^1`Ti^V-b(n?FpBO(uUlDkiU7oqu6E@w4gVs~6{Ay_R_A zW4fN02`48%PP{!SCVzQ0@x~-8{$P4OJT`wWoOt!DcynelF*BKbYa;p9%ZXFtiBlIj zpc%B&{h+L8Eb@kuGuLju9}yGN*U+eRKPY+OYBDk=qIHR1%!u=6Kc=S4yqE0;p|y$k zKM<3VbBXt_+R@15Thogip7}RVi^+>;(*2^umGR`IYa+irt0T!vqsfn^$S0aVI|fE1 zBNxCPF>&^6B64#6l?b&7yqbS$BKguuFeQ0@ni!XS`>M{eD`56ja8EyYWsrJ~lBZQFGMK-;O1H2{ykG%6-8jq5Fd zqm(r~$qQk$V=D1RM4UhO^Tc0XPF@%T>C4Ht-%D#bJf}O14vNL(TO<;R%U40-DkP89 zeRZ6Ago2w_X3{9ZYtkqg0}NI`5>g5%Cc+aWb;c3W7AhrW)No+ z8i*(U>Kg8yPRxJ_*NC+miJMnOlRvwVn84_bCtv+Q7i73k$sdnF@`!(Y1UL^S6Onfe z;_#nJ5aPNwy<)O)adfd#T^WTifhKj2%X#ugSF>c%5RpHMQlZm$q?kB83SrTNb!m_4 z4g;L6ei}gt7HP~~)g^i9g=FMRrq+PZkibP+leqLw;=(V9@AI10Op1xuFE)wE$uZ*Q z9U&}()g*B#2%CTHB$Ov@1nmH^n^&fp!p(pgq_E?ph{;paH)mde3NeiQ=<58-r*FQm zD?Mq)87S+AqtJST8XPa^u_7q zE2kMzb{|SGX}mOwhcfV{*4dM3%;IbS7A18MBD+5|c*N-Qudw-qwFd-K5C%w3zV&u; z>^-h~1ba*$ulzFk<_vE#Z+-^NjoD~M&v*+sKbnG54PFu-MNoY6y_v*|Ks@ly(D`(( zQkRyVhER+-XY%IzA8^2?b^&=IM>-&3>cZ6Y2H}*LC3~8L=;k=}L7TX^xH2I;KTRO? z7Np{BU;>F6LqR5`oCAi9A~tYvWS*75VKKcYOx;giM3dRXZr~wrtxLT5i;syJnAK1v z9Y(+i4AL(p0REj3_$YcZVrD-v7W$flJuB8N>^#UV4_8yZ&R#!~C z@m?eKu|7ps)5?0({0P!Wj7hvd2GyiqKx?3vh7NKgB>5o$HTZchdFleU$+RJy*G&j* zIP4AC7nT}SJd%WG{xyuiRO(sT{uJjUnt2h^ICrLi5BdmlXt{ui2uG6oYP+E}X8%?THH?gNM+PGZ>5X?lF5ig^={5SI1Kz zCnm=yF-@m@s>IaC$;fCU8IM=}GhMT$0Rxf&+02xM(MLj`-n@;xRp42QC)utR5G)b-0mep5 zo_m$EEv*XowhsT^sJannqEOkNGQ4gCm$V>w16)!bK1=tDmRc`r%(nV83ST9Rekh;> zLj8vj!CJhMlo3&1Dw5v*-HxvAj=eisc+ue5y@b^D^*xHxY6}MxM22HMfN*l(aUM}d z%u>{AxZ8C4{tjF`A5}v5a-1Z_+Z`?m*xOq9lmcv*02R zb?VVGt%ZD{iLH!jt8FTUkJ+OULK>~1 zcs<@ANYVq@mnmLuPQXk;Wj+g30VzBRKu_YRK2CAr#X6gI7cTg)atlaDDzd zxXm&JJZ#}~QMmo<;5J*hOb=x1>kjE<1`NXl`_~lQ{zsTw89}(T7%Y!w-&?2%p8Es~ zr2GkEab%yT0^Ep;!X6bHvU7Fr7`k3$l-=a9J=mvz#86y%m5}wcrf=2AKBbnq~yA$?ajG-Hhl+YQZ9XJSpf4!=A#~kr&eAt4j?5 zbBnSfJE>3(v*Y+}JQzVCh|CZobr55L;IiRQ^H}tyvE(JhB0PTiDJ!w$8vg<`Q&VwWHA>Luf3f3 z8A6UPph22T#Yi&-80}{^URrPqVHg8Z5PZrCbTo=u$Ue=y(?T1F{!Wq`1s3t;jp`AhY_AZx+4W> z1`u`eBD)n)lFT>^rB#F!Kh;5Y1m*rB=0s`RJmV3GTcQM~LPhEnUo_bN$4cIAMRK$k zNtCjdt%}TUWIghtUY?+#rFnOFHtF zak;M&kPE;+KuWyAlMs>JY2@!|x!mZ5i!%TG%;ss6r4YqinDc~y76BD1tmBpMpAA{LO^B_p>tz*5_|!>fh{DWQ&MZ(;nH z1CvXpP6AyR%UB6EX^H$Y3iLufabGm$n2 zu*e*Bg4(RG>}*63k?UhTTMO?|GCLBj^)#Q%EFYwFg9cCYG17l1;LOE$AJ6wr&m?T+ zV~fV{FaoU$=s8n&$|z%-AB==b)_oPRKXgy$bAzfSVzud9N{KHXup=Wnc|dFhy9b!< z%M%E}O_s%sAz73Z*r2$P#6%f;ltoPXjA8vQTk2`iVz7NQjOkSM;4A{LBxeYqYDxZS z!i>zlr3o*QfEGMN5}hFhdgfURPqgfyEG`P=g1;Vx^6Jqj7CvR;E2@VAA)ka8=n zv-ePo&1z|RZN|89#o|jQDRxtGE1?C-#jLzq=GyBlVZ1^KF_1FhzEbj=uN&rn;@?c`iCWx`B?F(Y$-uy;wHC%6=2~g)9M&Aqy-iEHTB;fHw|-_U zaw84u)Hrfs{zO?Z^FJBGS~8qAa={bRAKwwQZzA*UeIqB0*Iyd9H9wB~fp^YcyBB9Y zBh4%$Pat8WnNjN3ODt1w{>SIw%FzND5@tjsVqsB_B~grr=YPbK)wJ|82r)R0+&b_% z@y11#-~s=UQYMf;LU|2o2?uOE&s|1(iSBMfx{)k~{25wPOT30>z*l7%Y~)O44XS<< z63z_!9jpJD>4zv_?mMysc+Q`elRzu|um2)3^Am(MG8=_;`NWG;6kpK#1n}oK@wLq# z+?*LFT3Gp}i z(pH&~0mJf0(2fr$lUI;gqe~di#NbS3UHR$9NQWZgJTib4d{%-1y_$UUG7p%XTAZ&! zESQWS=70djwV5rc2WWEz=Q*YDC4PE(DWVw)1Zk&SLlC7<63#`%|NYWZq6uvx}f zxz8Bjloyfeo;DU}3!7*75V83sgTLDR^3~Sp4Y_2Ki>7ZzA)8)6sRb7L};|18oYcN--0^R=!;wo(=9u*dH1zW)$}{uyWDfTJaapwXuBM(?wfP=M{WJFs+w7A#Vy<~WQ>&+ z1#GO=;ef{KaQaA6b$n>g`McNxOrI{H(K7U_F1Mdq@W?QrHEzFqfHvVNBDTiSj!lE% zN8O5-wi#oKBKPTf!8MrTB0k`U&rLh#4jmtaAENRtdu%fh_BFs#71aQvtinGyB8%&> zy$}0&nwdVYXC$ycCV(evSJ*B`Wj(nlUbZz*1cSZI)h6=N&B{ug|Bl9>0tXm_0->PzwXG+&hArW)ImfD~ zb(K<$D_`c5aF9+N$lx<2A23YBi114Ci@He-u%3oIv|;c3LzsFkC}vR*fgBO zOOG_5?>4HLI#)`EEr%VPUi$ZNj{ba;aQg8()q&<)bb8QaOWHl(u?oW$NSbx%-*w6q zpuko-5+ibGePA%c5zT#YlSR@&CU_}1q|_4{;5NevXQPU$@Se%PWP6(EPHa5$9D?4G z9|{K+n}TI*DHi?MX|7=Br5frZ-#}&5j6I0hNESflfQLgMT)u$+m|MZVW_oDGn9~S7 zNI3Wa4t5D9re6n&Fb-+sIBe4U{Ug(P6ZlnP6^;u(w;VHFHets;lRJWJ*ZoQz$fzVP zqNs`IR>J=02bFC@YkEU|@ApEoA+V}zt1p0!tm-zGQOksznf(55;s1;ZQL*)-r)Ih@ zR!=>2Y2`;xeW6l&U#zG!{g#ANsliucZrni}=bPu$cwQ^*43d?&L*lO^plKs+q1~^f zAMsp-I*|5)+j1DwJ}KA~n$}*WVRvTqqPumMs(m07iE)rT3)kB2AM_(%)5uVs-QL{XY_~UB;+7%rpmHCc7`Gk{c>RsGcrNBo?5U31iBK<>1xIQI zP~$m4q(I5>9Bcv&;*3>~=Nq--_O!}2P$nUv4;Pd&ME=kTMp)9KfXd^jR!kD!j{_jI zQ>a=#+J2+qzNsBE-}-CE-#6@@wXGS?89yH05VIA&R&cT)<}8_5J5d=nhaV2FkCa9_ zB9AQyri~@Bk`)sL;q40+Q=#}z7E8W;!6G#7F)i4Ig6ivz+F3_!Se|pNiCWk2;ZGkl zvKDY|x^)u=G@+`F)bLA&vcp_4Wn3}qL;0d9%|HTlOI8NB7AZ%{oz8n#8{o7$wqd;f zh1Ic1`I3FuTp&z4-qogpely_TthfNeONd4qm4acjL8VY~N<~t!RH9Rcu$5l0NM)1e z>GF3q!%T0li9>b8kZT8R7Ukx)ozXjZZ|X_yMGe+hrhBQXKS7~6P9dE9vRH$uEryNk)Q==)Ex^1ZRE_7b`6F&s0uag~ zZiw~jR*XV3p+fKJ`O>KqmKa(VVly}b zptyxXcVAZN6tnI0czH%e?GfEW^-#v0`imVHe->>^8gigcC!-Igt)?xx^eiZDMsr6l zo8u*mg6?rAucSxUFX!E{Q_4>2SurL=da*+<1-li*k{c~b8TFg=mcatNlw4-DsquVH z9QJWMkM|GSPin8S=_jS)<(i6f8R_LBqI83#NIL!j4n~E~Z9+jQ*}&CpbI$gtt^HF+ zIXm4s=X^41d-6B7Qs{E5v?^S3rjd?nT(Qdaw;j3p_S;S&zbsZ#9V=ZCZaLFLM>S2c z%5|}-l@q&e7v@3fONt9~N83NI7i>iuVB9EJ7OPkjTUHx(pE(?>sEvtRVk-_LnT~7j_jb>0i?$qyuKTmkb1e0xw~HKw&fAtkkgE|2N=A1tMvPK)g&~F$9ZNS2 zhYd4MG9AG+#gV43X?OvQu#o=iSTSN9v7}t8Vau?!->Cf@=+I;n)U+4|Uk*(y7tF&J zDObvqZ2U~hpSJ55l2I4a59(UAy6o%@bsYxX0;y2a&InnCX$mX8VDXxye1=vf?~2u|p+;f4RKL65iYe5rc!u*Rw>udkzCng*?J|zK)l@99F+c zg*num-tNY`z@3fW?f}El7q@GebMSaC!%(FSB$OxcHI#dT5mLxLZnWJQX77pSY>x5G zFP)h9_KSbcP;A$nb9dCXn~c>}C%-Y#7ykAc@96hNTW%EA!6w}lJ~~&p?u8wr+sDlW zbY(T+&Tw0_WL2ybz{aqyx-P7QrHSEKMKy!8+rHC)wyQYU#q`XeyuZl{}a*L7CeJvXGuQLwVXZZZub+;E=A-v-M{e2T86#5P4@u4 zziHeWt6m;%3FpREtci3++M?BsQ_svOGg9=v$D&nRV=F`$ch+#tRK=7sB}Im#RS(6g zR^Q6c*{}=_#riGA-D@KUBi+&J4O54&m0q((n;wr=ZHui~vb0LX{p*)~UQ|dRUqK*W ziFV(A2aqqpSTm6S6~-E7VW#nyW)`L(KIIuoX7aw3%3-rwkpgyj&${8qj6*4J+NMKx zzp0Bn4@(O4Qq(e{048_`1QPhb8)=3&f-{;RmiF4yo}RA(bdE$%*Ut!`^DuR2(@V-o zJiQS%XG^zjgZogE8WTtd{bbH ztiImgXTWh}&4kJ?n_IQmDQ&soQA>KS$r0*KkRmTEPN%iar96iRreSFwhtUI6N|?yp zc;!JlA~%r(YJL`=G6>ZzYFVMEn;N(2OpTkBBg$sF`4N1nG{KXx$UfXDtk$4b>o!Hd zb)-Fl+UE#qA7n^NW@80I*xhr^JyF}9-{jfHzwy%d!zF*4xBTr#Zs!Vxm1Ny5TNyE( z@i9B^;n<1|u=9YTh0X;_ezAQ)AegEWR<44NS-v*noh!e8bkB{d72%G^$lq5ze6nEN zHc_8uH&${i{SH*ZF{e9syeX9~w1(Pi7x9(e~-^}9FL zeIr)2Dzatj#I+}HS2LPD`3JYHld_Qmhk3d^>A5nr}VrJ%;I%muBL)?bwu zwANTXz%t`w7_do>ZGV8v%dpCznSoV?+X|gc$Ev0btkUqtXcG=nu}&Z*Hy?8(E3isC zBNG;gxfrQjKq^7XCzF~Pl!%Y=Grtvx%gb#`V3bBz!?FA&aV)cr9fr7Jdct+Lq_x=r ze6j+$98zHhZsiZ#C8vQ~MZm3M9re@hE*Z{~N~BV$j0{|_X~{av*b^$HD)6CNr_pbr zcb7`br4`dP`cr5&SrP?nhgv%s$WT6sZt)=3W-#nouHwzVuey#Mii|FCL;gl9 zH;~EDj5{;mQ_6gBdJh}D6SH>^TMipF2xgXS3V0b9lf-ifc4R?tyg&m_=8-FhsG^s? z4%0*P!6UxLLhDm3h{3}qC24nQ!M;+hC@qYqlB8>hO+Os9t1?lW9-;aAB!~Z%Ks#iB zHuv?PoO5hz!9tm%Hean>hn(4A{boZwZd~>eqiT9ePwuc{yJP=v)X5CD4)LId(dg9XupO~v| zdGGP5&hSn$n%{KIJRY?!i&nQ_g2^wBmDI*c>uwY;k5%6nTfQzZ)Q|H&4-*j2w4FK*erTRZ99+p?#<%R@@4e@L^K z5&a`=qFH*eQa3u4ek^AilZsA+mbfH~ZuIgvz=%1h`SOCW)WUO+^oD&>^?0;Yr$bTJ zVGEYG8`1zB69?iHxoPliIgI6>?9aPef!=P!J&eUU#EC(frlKw(NKOR%q#{Ejn39wV zEF6^S*HXVaT{VZM&LNBuYFiJ}DoobLsOb#Sx+nWcSdO3wjNU4=#Cn(-lJ${n)5bCc zA{O?c|K6!3v!<39U?=Ktaz=i!7dyaN%n&QbSSW=_sV&*@v`oBll;ELRtk(@(JqA%P zHgCgXh1VBs#(cB0eQ$dwG82pxh3wln1l1F*wA(t{pYCY?&Z3(J@nYH)X2 z%R!qju8|bS9bUB;?_mx2LP0s6%if}(#!Ggf8+!vmwn>)fF(4M}mJsEBKG>KOFVLgi zn0@-rQdjDMrd7?m1_}RLQLKzH!Tq>J8VcU9;7$Pc}aB z$@Z>j_jhNvKQmX|GwzHzoD=?8$C?|ZHL=pBsjjJZtl2zJccZR8vhDin&9kdF&#iuR zuI?Mr+AUbXDcO3g%eKbBAbeV2Z{>FJh7J5v0gTGla;f?)P$&7cqQ@#pV-`mDlQ`@kbI`o7l zJOrkEM>l)lFs)qcZ@HzlYg$v&vv_rJL%Ksqc4a^&!5@Q6T9-PHOpaj_Ajy1zR@W_i zhpY`qupY8w1%|)!w(+un#b&J5`udbpxFch^+0ompwV=0ms4BhDuCy!LX{-sUMuk{O z#e`?#sqltq(b}kOE#uH$*fB&|At0obC0gERC*XvwmzfW}y>UAzB4OMop&N^O=E-uX>30Km? z??Geg(IIfeJZxTS87<|nU@>gT2?T4tn6l^tzMF=|JL%;R^qLA?&om(9IWQ;mw3!jg z==aEn0zTB%6C3s*-xk+^La%EgP8xw18*yJObk>( zH!=ZH&fzF--HZ32>c%j&n-y~jvMTTj1w}MU>8R1JJWK_2%p(Zwh{6cG>`^+(qNGK8 zSQyWIn+XT>Ao1OAD`B^#{jG%q%4(29 zxmHH+sxfBgc;!;1RK2=o=g!XdoizVv5sTN{@e*$26KGLyZ`xnR085I|A(m&y5?Oe> ziV_OgilcInj*ie5IVH+4eT~ppYUceuozh&VG}0H-st6rrB2?VAmE+nr<*#s+KtZ(v zXfRcoOmX3%e-P?_EtLL)Q1y?(+W#dyyx?3>VT}qU3tM)X%B-VB3(uLWt58tBuz81R z84gMpI?e4SUbNTTVp@T+%7q=~2WxRqy>Qs{pooK-g+Y^0u`X7+Dpq-4tmfg*^K%@z zNRX&#jFqm5Rl+-XlocVGeZLuxi6hx^msP{>f@0ck zGUc-8vIDBe4yYm>e3rB9Rt_C3>^2KUt7GM^SXtBOdDceoqBvT*_Lg7;FPuV7Nwjps z0s>~{QD?Mx^)10{G9#9_a&>g|gV$F-It#mF^_IE1$3Bw&dfWBZr)OKAo@@Q?+_qz)CjiN-nMncL1PQ`7CuR@1tP$F7%cm@V6I8^qV$cIKIy8Q}eY$gbxi literal 0 HcmV?d00001 diff --git a/cross_eval/__pycache__/prompts.cpython-313.pyc b/cross_eval/__pycache__/prompts.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6edd0afd93fcf6f5089327b0f396a810e81e209 GIT binary patch literal 26738 zcmeHweQ*@lm2c07q|rzkeG3VYXe2%)kVaq(4j71qB(Th9)RKc2$kZ@ui7~^B+&zPh zh!fdhM;KCw7{wDuc*h$YM2fAIF?R5-^J*(^|9JnrSEE%bOr}WIwMo5KVMbLV@chbt zRj=Of+}qvrA>rU%SE_c)V>5I6_U-#|?m54E&bjBd?iCf;1$^G@e%E`8s*+tbNmUDlXRXQtX_d5qrK+XXQVmPh z)CqI{lbWT4?C!$sds2(Eh}~H<&z;5W&frFhtN{SCtvIP$)4N^5)UB+5% zmDWhhsb9~Ulx5XIy&$bvC`c<0TDqS~wWp`nTX;%YA+41f*=SY{SehOga(2l5?vO7i z4l03TgF!JU9~<^tIC<@SKT-#ZYuOF?;%r$e8DEuRU5M7JV~ zLAQET?h~K)29JndbV5ER`-5KpfanSI$>L#D&?g4`qFY4&@PpoYj>w*)KCc>d+U*Su z;y&ctPtx5N*>$^q+kVn0$?tT_{jwtaJ+jyl@Pw!x)qb)s;8FSQeTsbC8wjalH-6>k z#qF}(ci8PYYCqYprFglnpsX+w*n5t6RgwPJbHtNI@_IUffvg!mC#65d^M0Z~wsthV_v0ruv zQHk1&7X7kER#i7T;69A%-MqWaVn89TIK~Q!0ZjHW?+|AQr+uAM+#c}x0?&)`aT!ly zpEYjGh132hy4X*d7&EAc>NwqXGB;C~&<&OMpSrH~+Jo|H2wcjTB;c3(oc8ss zwmFr6fBZqW*Ux6>7@lw|#Aj|*?C6qqcS%xb2aToQ?F0P=1FARZJuWwkT|4&d?e0Y9 z0?4NRtKz>7U!=~S5RbTzgCk|xFJg2=Lsp$9pwLt~?Hiop-cWEb#EI>JT>0c6D$!?y zQORi!{XTzw?k6(H&+GFlx(BHD!M_CO?~wh((il%W30LgW=r15Qack96EIRF&UuTxc~yz4jhNf*tdfd zWVZ(hMC>K@2Ox3HqQ~b}Rc}A>F~_HEdwZS~RpM=L@Pyd5r$h9h$pJ{|Cc9nI^LiQ5#}eN+|uLw+VpZXYT^Bkqt2hVF9*-J&m`LiW%XuRo{+ z`a+OEH0KGi=8lJaG)srQc#g!E)xX>QY(SyPyXEIXfDDog@t9YIe0q*Fi>i0P4@UL4 zk?RW#cs-)5C;^2xf?_J?HppQ{ue7?oeqJ}2f|v6jCu{~9Lte33&?*4(E8gQ?w@(bP z`dAi5paxI)NV-W{LEv^yC-j@sWiM#hBKrrh$bgvs0U841JxD%CWZ=*o?BQbd0@0ta zOX0)r6AuF@wK?X!->Y?pYHriZG4k|d$ukNQwGC)OE}#;Sq?3kCMM{!6l4T3D9bjg_ z4;NyLfUg}?86f?*w~xSrxUE?{ft7D}uLR&#G4ra%cz_y>W%lmb-qpRkvx9oxgE4WQ zL`4972mqQ=X_j?d4FP^|Ng~Ndp2D}rOmu6~YwY*A2L{j~j?Y;0J;4wb@Dr$nVVO86 zD=7+TG$z_%(b&gFBl0EC?IZC40ccSDUMx_2%Edq^sAAAYEv+dctDeB1EH=u{0VfTn z#aPY`hX%k;2E#+ZJ+eY9hVJwSlw)pMx zylGQ!%=z@PlK|BD+Py_x$ zsN=!2`oK!(OzWI#8@~x?k;VEwt!?!*7Jqliqf;wW%y8=}O ztOa>AfU;3XL2wAvm?q-$1A5+}1KC(`NbE?FM{HCKt<6vZ2+7kJibzUiKETir6>6gY z@2H5N@84Y!qtrZXXxFI6mKj4*vIfPa}{c`*4AObw}gnWW1g6nLF64glTTUzHAgy=ol2J6R2CCw@u=nW|0w`r>I4wP$GLifhr+? zP`js1O?qCMbf8Lu&r_8Fn#AV-Pyl`K`nK+_&h26w6gWxR-MOdd5bxpBveM`E1PP0% zdzN#L0`|XATzCQllXLJcSl!y&n9snTDR}T zdZRq=g_=0${`R)6UG|U!<12CY!{pgW^3t$)=jP+Rel}AJP59WH>qgQR3}!G5L#&iPy(j_WRdn!l!4(!im=|igzZ*6O-e~w?>n1 zy^=UPk~n*r1Da8Hx*e3&#vE%XIeGKWdl4~l?It>vZU-eV-AG1Gi|AeA=ab^h#gC{f zlkaAmLFjGbz4ygrWGwOC4LdrSeCygAgJ<_paeX9t<)+9V&l*Vb%5d_- z39^Z1E}jMw>62B<3D zdH-7S?MYVp+_~h~D^&XQ&E%UGnTcdjEcwQ$nE2T%$+P2$(Q)z4^;3x}=jd5ZbnOW- z`N73IlWYzXuf5YOCO#OM89R--X2vjAA6!hHdTHiI7n{X9?|qawkI6#j`{+AmvJRl> zE5l;q%81?r7)n{wle`p0KPD2dN5q-2zfJt?YVy))RDCu1_Pc34hvoEu*+I6Le2Z8j zarFkOxB<>%WnUYi5h3Hw^~toFU^Qtq850arfD=*zC?>+A#C6H=WgK*hICVxJW9Q#EwGRin^Jc17D+yQuujB5swZfrsF30_`CH~yBuAbIJd zGB0kF!3*8*A)ycd^zzBZp@6mk6sdus5^;~(;%`|r*32sy)IrAlm9T0xPCeL zN|-1Pj6|g`UrWAvj#bK@Lk<>=Q^RPKfj9Neo=hVaCj+o3se=&d{i(?#Lcj4UTTfVj zP+9j7s9 z3m0cs#)NNNBM^EET=6zAfmn^9AmdU_0YgU-8#y>K%gUgzm_8CF?q@Hf%WPpc@R0Y` zDcW%o#Py3Lk%`G4>SQ%UiWc<9cq5*rWifI6^qrsIOffne z1!ny8yNnZASuyeYyG=C4`c$==RMw;9hu}scOya%M5KS5dqy}Q9O`V4YtxrBuX zsqqNZM^5i7i%OeC+@7zxd1h};b@LDDjaO;y!Gc_`ugJ)`bgmwcCoX*i8bV4gU@p>! z$Mo$KLQ+ewjif+MOpc6WnNHbMiHVPrk>MsbFE3-FZe-dB2hxGykT$Ivxx~ghH-;0D zi4-`@VKki{lQ*GFr5jls1-_IDxv}*nRmjO_h=(@dhZ7}`nj=>7Cm%rtn;FA`eT6|U zpM^}yX3{dv?Bazr8rvJ^2ov^-v~Q`kaGd!rYeP6i!`#uHmIA z2s1wbijR=oOrB1>^**fy98)#3Yv$Z2EC9Qvx@Ii{1|$QrnJNpVkC;Bad#$`zpjnD0 z*`^i{ED`x0=0;48y~fFwR)kwy&;D7jy5VOcSJ_|Ey>2*{G$(i?Oj7PXOSg;W+b?R$ z*7~#>zK9$BNI(gM29CgkHFqZ|!=pZ*C%tojm(X9sgH8Z>(tm%6^cM_yWO=}J>(lrY-dc{ZBxm8%$kOid%kB`$d;f8n~PC@Md4a)d} zpO-t0$uG$^(r6FG>+uFrCGBu&TUKyD8p16$W;z#}Xb#den9Pwifzn^?hJ3XfLN48Z z^KQt0@XdfOF3(>%w^^ishi#nBakqcv+-4h>>5goD-yuEEfMKX$|CyZI{{nR@!wHw> zgXP}rhjSIdavx=ml)s=aj`Z`CgByNP=%Zp|cC5|=UDtDzvOC05gb-pQJW>J=BC*{gvdK`a@J0r*ik?BID0b(xTTsG`!?u)*1I(Y>?d1gr` zFWqE@1b1q)3#QFw^oY8Wc=h$snK$KZAGBDxh`l1x7gg;j(UKb1jr1jYWs=R{%KJmL|7TVfxK3KgMK zeA8gxmleF-j^Jo7f+%GzS{0GqhH+mg!gI<3R5`^!L=Y2hDG#Cm5u5N$9*N6y$dz`?`+|%> zT51@<%-pd@;ba;C=pJ+SQ6BnA!9kQqf$l7yd#(^8NtQ~bQ9cOiMjbrN#|Zx+ zhcjp2y*%DKJ(94U&n-H`-3YWVpvO!-D58uVelP+m+3;2P{xCe9&W)-Th}EG}DaF2M zV0(sl@{rgLavx*5FApFDHCYrhresb~V58zj5EDi0Q4}%BGp2Pv+v;i4e6amAjp?=O z&RIBMiO=9b)q?!fm>H3K^H)4i09x<}adZY3=#gh_JkYX>qPWPF3;KE#imOMbnERB? zuc#gggnWJQfgY8~H~SE!*s7Km*JjKc7c9PIk|H++w-Q<)U(AZDWv0DO5=JZJ5CJI? z?h6I487xK-X5^4K?_L5<=kqMr-_R#}D`wk-C-ZNs-c=`Ep zTeIW19r)nrwTCg*Gs4U=;sg>#m>GqBonn!CGd~)GDMuS*2$&HOh=ol(7DO=;p7|jQ zR@1`Iz{H?9V(UQX#Os$?fCubH3YkFs2*ov|1st&X9J`9}65ZSccOzH~@iVlimN<_# zV5_nSHZmr&CRKk30cVE(j@|#v@Iz!U^BqwFJmyb}NuZtnAN)Kq`D3^>GCPHR`NYc; zsD%t%J=lyz+HqQH(Ndbl zp7`K2hRNbG*f{m35y@uoIK`S#_+*OJM-WJZus8~1dlS8l(C#?`C%6dU=QR2=^U63) zRN~4xiW<>Igm5(8Ve&B$Q@u9A$ENSw8lp+E3KAmRh41wnH%MmjD8=z*#jpW;w9Jfn z3ErUATIRx`?~Z5pN!w*c1PqHKK|ekiPhLl4jm}{_5`z<&4do{vAsmV-=bi!V;IkY| z=#Au?SGmLF?A&-2BEfhBJ_k4`Zcc7WHK45(oaPk5m-xxK`6|s2AaFay8lqBiCGqIQ z#OTBWs;6KU3Jk+c@OT6~m)b2B>mPatodI8J)@eO4ct>X%1cU)XvGq?OUf1C%WF8ud z(|F?*T3Wu?18kKscJ4DKIAukohNsO1`oh*3K1FPO$)K;czI?GgdV??7;-bl$k;xXA zCTrZLpYn?5a^>T*0{+`Vs^U-yPL3$@0F==do{e*!X4xM%9}NuEPyqG;92RVbGN-3- zdFW8Qw4-x-TkozOS5N2eeY@IvIwhqLM@r)s+>V#JG$q&tYhWPc9*{lSIazjk6`p3n zA_%5?K_te6Uf}@Crmx3Q&fP2s2XLQ%BvY^85G3;f*e?8&D&3u@=cRtLPv$y=byoe9 zu8g{#v*Nt0b*NsFwPSsnh68m3IxW-?z01y_s$PFegVwXfm7h6QI>ol*ZZA6^OEZ_F zIM9K({b?LolRN1^R@_P|YutKNKA|clxJhSgsS1?{A;!10_NvhBTi4>VVeR@>9FRB~ z3=Fn*;{*&&F5&=MD>DOH=olPNx>yps;yB)`ItNe03vrUyAh1%6djBUXS2rUuEX?K# zw!&YPFPqL=cH2=MU9@&`*<{t^cmApGS33{h+UcI!>7L%{ncm(P?UbW6{nL(tsBIut zy?Dx6c@NJE8S`aD2Ai)HNYH$Br_Ux8$A?a%ug5W9`YaQgEJGhiZa>rF5oJL8++~mS^pQ zmZl4qMy*Sg8r=CTpBRE<=}-o3iGvw!ov33hO%l2_TH?O(!P#)rLeRfl$mVjtDNQP9 zzOp{`!U63a0)n7RgWudL_$^f1lf8oAw;mw5WS?F`V*EM#mv1p|F_Z8~=9VIi)1sQ` z-1`1Xp0{bgAoSUAy*zd8E*FGeOLqaUE&aPUM{ivec5iubDM-A9*9iHwq&@RZK`0PJ z)D9lA4*gBHOzsIBq$36*bJhnk!x_=i40>OH;y?;K=`hT1Hm@iKyO}IYcAANv z!jUu25lAappfFu=99YI-V$qK?<_gYSs-b?e3RJss@({601mLKTb)KJ6CPHpP{AL;!BMgO z?|UbEF4s&vdSyvu$=~;Ws#3RKu3l*RBlS>OjIXBLcmWZcpPEzSdF^y0NLJznef&KH zv|hw5bnKM`B%TW&2jX4uSPr9kJU58f4*1l#RmJgZg=B{fBW@*^iAuaGvJW3n>6}q0 zYf5$o;Z>Tt!`?~Yhww!*XaQTLhRv-Vg)DI|C8biv|?^=Bi|D9j;PKd3)I~q z^CtWg8;2~|KaHDS9LlrXTUuJ|_9jc*GUOdpu#5|F>$3r`zsVNQ#bSv=)NwmiS=73WPks8Nky?Oj z)4l(P1j%|Me(BQ(Y)mnMWXL--6w?FBnDyzEO=;Q@@LRGnV=trzcu zVRvm(zGUw;>vgXdq=JpPt!CiAk>_5`@OU1ENvpB2*KE{SBsrvFsYEK(TLBxEt+Pnw zPP0_8F;DNWhSrrs&h2zslMi3V4j3?uekT(`D;=e8o06zAsxvVy28LHvv^fx;&`ph|(G*o~x-K>5HaSffxrM4i*8Qp#H zsySXd$Lk(<@ItzK{c7F=2cz)Z;zTd*Qlz2xG~yG42V_Sw>sqTJ!mPwE9*u?8Zy3#($4%VJfvVfTe+W0kcraZ_y3 zS}4{ZH|>cvuAA(->3(srT~0D>LMb<)ih)1S$F-7V_ed5JY#l8I+f(+`{zqGlk|DG(`1Aq8u99+S zp;>3{wGg&QHhx7|vN1QK6c!?svTCK+)tpiVMqPzck)&M_hOlLY{~Ft4k`|Xlu1%DK*NtzXclPbsL&4NTPkx+9?OdF=|PL zQiW70KpJIbWu>Yu-_Ac`j)kE$E7J>p4#Ym9wZi~YA=N*g zuVbW(L+KNgnM2)i^)!7B#M$VT4p16>al3{yhflZ|ZYrIqLTSU-Q0_@qkpewgXtF&R zTK3Tse??yeGM`tH?klO{U35wB)>3xjE1qxcG{$qg84N1B=;mI0L0`4_pr10dB8@Wz zSxmb?tKXp9Zu*k&6*sx#ro(Zw`)J&J_^8r@ER|4>V2eH+x)y5C#b!Xmc^$w!D7}*a z?7u>Iq1W03TfxYNQzu8iaq7Daxpq!Fc13NwpsJ28J+pDNKm3gg-r?tN7u7{f;p5Xq zD_+_@yp3S3d~vut+z~BZ8Y=^gF=VT$3oGHi(Qn5pYZ!ptFCmC~_=2_J!{MH2X+5iv z!C_59WLxCX=z_*ropW;K&H9@aH@l;2o`}}Ap$(3drTM_p3f9o)Jx{1QUnO_x`Ps*o zGo;;RdI+ffL(}G1&BAbQY|*kvccdd)(=_qSq%zqTUHy2pdUI@%2<6TiUOZ7bp-l8e zz8$T8G*;bkFF$8h6^w|Lo3v+_M-E4NqBW}~p1oOi(;98w60P1ETQqNOwTP!zR^2Tw zA~>%kIA4H1uXzBRmtuyQQuwc!VJL+UQuxx8!W5{dOhd^`rnei;g%rH?VG&3HJM05p z{nL~~$p)p*w2W*JOA7E(b*4K3_E+fc9M~mJ*d=UfnBS~VYcpR1=^TmL)UONx21w^& z@kmn+cdyY*dZUc{8H~wVEw(`g5ur$J?;`^d&q-wBsa=Pmr{WA}4~q6MVq zRV|5_F8G+nw?4LL6*N8|Y>{Kul3!w<6$rwrg(XX2eO4@wc&97Y4DY^Oy(rxE>+1Dq z3P)^d+T#L_(cg+y)oHcWG8hYQjV*2%FNoFD-YvEjIX^~Di;v~tUD)@+%s+IqccqCUKUaPRf+yjwI<5Utvp*5m^e*8R6v zY>ZVeoj7^($$J(v@GU2Y@U4jOtr!Cm8NPk`r{zM`)=%SATh&64?w2LS?JKRnTwdJX zZ1otbC~g=^>?a)cz9dtaA(_!m29gx`@pErlS-vBshm_`DCU`Wt5}^2q-tPf z4b~J3z1CD(C@qo}Z?w`1GcOX>4z+hPprP;*T^B@jkiG*2PXLd}AfLbsnm!Agk-f5y zUK#2O1O}Z$Ejk{e(C5M-Q;?zQaxriDbH#Q1Q2H?#j-E17+k2D&${-{)6j#(0Pi>uP zIvm$3k5FL)2DnyDm!TqMyvnDcrK>~Ql9{Kuo^%K`yCboqii#!rp_T##DTcXy=X!R4n zS@*jP~Wn=Fn3m1 zo|F6OpYnv7Hq)o^nl_I7*5dZH)?d~Xw{Nh*1)R(MGDC&wW~M@OI#iz8Ox9mBd?;SB zuWMiDuC6_uu6^B|QfH4eq}hlhwsFV4C)@CXSN&eRq;1zO?ZUOEZFi^SA?egVBvM!t zO!q#})|&Y@EOqxtqxPH|VvfyCV>QvMqpdH=qHDw4HQ+ZN&}Qbk zM(;0NQr2$T7|r@fa8z%^`q5+``tRLZ&}(XE0!pO*#%IJKdvQ{nxfZdzjEz<3o7mjr zp_}Zjr1iSitGg@e#g?tu+wl5=Em(kdbnfZwM&yDK!hjeW;}MQW^}wI>N4h(o?&|zz z#shAi`YHgWx&^rfMNz5b(qIuowl zHg;jel?v%hLp42mWCE)YuMNbOCRA!TZgWDx{+6#{gW86OUG`4brXt#5p)C{&L51TK zfyO}|onRS|gDez*E8G>RoA0|3AIA&4stfOC4fsMqIiAbjtf0nAw_^x<0zr0gmd7=~ zJ?rj+W>%cSE>syyN+9+l*b(u^I-VNvE4`HQi(YkvU7Jx37q2; zb!wDT&ZEFG$f5dMB(QT+BG@W!<<(8))rE&{<*k~^TNQIGyyXz59AczA;+}RiPLxhL zR*zV2*Q~l#Ov=h7HpVUz@Jm7_Hrejh@oY_be75pJ0IoZxc%Dri$yMwt8T3VaaIWbV2QK z$M5ng!acWYTc&DTZq>F<)wafp7f%&8U^lU#`26E%9zVb3%$D1ZCDHoFZ+6_=5EZvg zJKCeRcDlRu%+|537Z} z_Z@1fk~@&olou~_b?{Bs4s5!5zYe+$J)vgjj%cHjCj9|*^xU~l=4ppxZ zJW_BIg#xe((8T6qng!ahqgsTP`(&gGT`qd>Bn=x^#V!}ydZIVu`vb_xbGh*Lc$Z5V zqZ%t+E{GD|_Uv*8fi8G+r>x>0HO)Ig;HF&AzliQC?E$?i{Kop!|FN2KR`6m}Vt#V= z;ZC|g|A4WG=`iUx_nPP3!Zn;jA}v|-7&6#a#^ORBOpuQ?SHtL{F0?PbN0aFbmSJry1XjtgWd@$E!#2c5?zgyttGi#p$V? z@(G3;PCF)xk%~`NXpoL}#OkF6y=(D2ZN~_TBgNThS1Kq>xWQ4Q7^f`I#}sH> z#@+2S31o}Fb7~hV#LJfH@@2!jV%Cyd)&*161>wA3S;bgE`K^Mwse-!jx2Fr5qt<3N zPv{_pKrjYU-asmD-Geu(>UuQwn`LtjvntezA{x1rYO*UED1*}60l`w_h2VJ`DV0@| z=0avAyN!~Pk)o~)m9Xd`3EN=p?5{5z(ml_g|5zz|EbXs098wxk9c+@krYN9e}XVj47l;#PbOGecr>0 zm9BDwC+N(J!ps$l_P{jy@FT(QwC9I!yEUL6dh0@tgJ5A-*;o{kY=9+3`RLpMJZmL3} zY_{9nY2sOX%x$Jc$XhVG-Mp?AiJIAGP3uG?7S9fvgvu4MvZb*Ft7D7T-_6e{$VKEu zWmBwdS!@C9i?6XP_>tC_(Ok5=`JPZ@GS7;_!a7`!uA3HW@8{;2YVTL)n(FVDm6=Lr zR~4C-#|kTFE%^Poq-Hh;zX)(FTQr+TDY)p0XY(m#7jpCO7f?!UG?lVP=(m3Ltc89z ze4YJ%oKtx}2iLPtn|7GAVk|-BSb_>tf|aDiY>#<^R+4_HApKG~{IX*BomEWROr~7c zE=y23mY{-^_&BHPehwvOcbSFahFFC&R^EI!&)Ni9ltjyx-xI8$g+s_Gjh3yNh1bnI z?1+{$+!M?u^Ie;;q#@d{?pDLsrl71eY?`in{KLL~+8bXor`z{WZ+&K}?io}x o+B7XZ^NFL{)O>$~)wE*t@muAqrpi~{N98O2wzir93{0S7s*p{(nFyF*LYz*eZHVGyq;iKKMOE?Bwc1kh?wLIgb znY2lmL3GF^29=$_LBzV+(8`|i8%^WJygefoQ=)kMKF+Wp?Z zGW86_Qimy)W>tgqkm|6C#;|%&eOL`|&7fvTdss{2w1c`K{bBu(;jm%Ic-S~(I&2y; zA2!p}0LAK9{TnoEV2uDxtQnw%wF0!Uc>wcSJ3t3p0I-lP0_bFm0hX|(0L$2NfEDZ# zfJ@nB02y{Uz)E%nz$$hnz-qPzU@hxn>sULximhiGSU20qu4dP;P3&5B9lQRj?T0ks zWCLIt*o^>9>?VK~b~8X5y9HoA`vgD-+YGRf-3rjjZUb1tZU3i{b~^9*8ef zi2o+Um!DC)27~3D9_}PNeA>rw-jQK0!1Q~4Ud|I381}ipz_IC~d9%mo8xD{-e;0h~ z`?=xKk=`>dEuXh1AYvcz`bP%?yzQxh5%1uD&%0yTcYL5f`z@wh4|q=vcu#kF1KdEL zpEq>}ydxs2HZTwv^akxaxM9D)Vdp8&Aaj85#OwW|Lqi_!46pAN1DX+Tc!W1R1<=a{ z&hTo$=Cy}BgQH&F6aZ!idj^2BVE#c2GZ`-Zy!H5i4^mHgId-5g5VY>XPv(&D$s5>F zq4K<;EfX5@yd+Swb$VWs)3S`b;PHF?J%gTJ?;x*xihS`#ESadA(FO%kXGmWHivz_kxG>2Oay#7gPH!-xx!pT)~Q6UN770={w0B zALbaJ_oV=n;bKd$0CF(H$C>UvXmneErM-iDgcEIgB0k zu?!UIA7H&q0PsczJ-)`i7rcEZ2M7EC#!m?K0#y&7pN7%*0s{jL=(vOB(DhzVz{@f% z{r#M`AA$^Y6$^>}U~6un9top?BZCXego^rcNDDMPeK@AFmGQfSds+quq23ZNP|qPh z^)J~m-uG2+Y$TEzf!kn{RrXy8Z;TEyuf^qsn^vBhPB86oz15B4~393J-f zkg3QUFvxoSeSAUAtPztfLi>teX7P*Q8<54GA%8z_KJFP9>>=&JiXZ*~Y!lu(BrF1A z{({UWGAtD#MdCQ*3#Bh{hLdXWCMnkE87+nt>&p+rbMYAS<@@7vKf1=u{^;ht3pek* zHaV(;w7KtHntSuFN$}pAKbigUjZrn`h<|q!1@YVmfFF;{{_I2M-uJH0O+-em82jPX zxxe@m=HB`6T=Y#L=GF5+h?#r+>X-4e63*=TaWV6#5pv!8v&h`F59WR}K?)P73c=T7 zU;gaur~@fqj?caNr@|z@_vTrkDR3r~l!q8Uzd9TJA(^CeKM1= z@KurS+z7tS=y&kJT1>X!gb>=)>6`{bZCxN?%a#OHiLt)I@QZi#>wl z``()$%)b7SSb3Lf~^+6b0lFe}JoJvLX09c8oXlbRRshr>niY546_Iqlb{0 zqUih65V%YQXelK`G}?D@s=+g+4rFqj&Of2IaE|gdjgk-1rMtpphBv*=|7_ZPuoqoG(l&(H^2lqkIz3yq!Y9U$}8Rg zqOE__)9>Zh0g)Sk3Tl!Ul%@d3ZGaeF4PU$tM7Bf&DRcdJ+Z~(p!r-~V$hx;n zVpVS~P1#nDx2Fwx7tH6(;T=~?qE%Ox&Xlf7madBJh!>>Iv|b{iV9>#hhp#0 zAu6B|a#7R?gA~fXr4je8$U%ti$xwXkQdGZ3DVeeK*rlj{k5aHL$@AEyXnv1UGVS`< zrD%VTQcz_6ZgtUxGV6k@f5}c82|YHqdPO?0jtNhQhIwxY?s*E7W-n-BxO)*5OeQeQ zfX2jm`aqZUf<8A8I8)E`!Me~7v$3A>fcgt+kk{`Y^7;ax^Nsofps87o9@zn?1AU&s zqsM~AqenVDCx$sOIN-zJ=t?O3=#l*k6wsr`LPw9t1s7nV#siq5T1qekX$Ay~j`r@@ z-?eMcfzF+6TaNly)ox#YlywDH9qnz~)7`nJyL)Hb(caqakW_cH_Q*F}$Lfx{T-)I* zi|n#+H~@I{AW8+!%j-cCWI^%dO?%qA_8-`}qosQ%Z|D;%#A}DJGVsER@cF1h_sCRe zP$@-Dc_Uc?xgprM7})_oMtfNhKRQ8s=k=mm=f}mLVYnU0M&Sr{Lyyo6Jt78o5R!cG z=l@p#u%_9{UmqTCf1smE%C9zE-*RP3bU1$MWA2l&kH%8vovGrk@%Cv=;hnf) zz5!n$7V&S=?}|$=EDfq)^^W18Uf3ft;&5b+bHhVS;PkMxT9CcI+ok9BL1wN4K_`MP z1p5&@h2UubE(4ivkAtb=X%A2L=1dPqPe!lKOpfP(?5ptS|26;=_$6t(<8s->vWVyU zi7O|P_S&>9??V5%{s?_te?_0PEnDXRZauE$j!R1vKS4HV}%x{^K zj#cRe-w*N(7Tr8$(CUPNE7u87CX<2tHxRfCx+3TRnRKN=8bZkrxWc0}EG5Z5rV6S2 z^cjFBGSX7Y2X;WUBsorGJu4xusEI7oT4}^Lk+`0J6Zy)?-Zp18a-~79OOK!ABDZ?7q>=BWkvUzyUAh2b+g?Xe4k(OkoC& zqqRmd^KfX>2zUknYuGnYlJ;jjhoOJ#YGFs0}%saN1;xkMOAU3aPrl< z>$baUw{Jb_gWRE`hh04N1r8CHn%DTCAhK^3isBAqypG!qFJ9;Odbqw9cwO86gDo8$ zyq1_Q(fq~@jPP2pT6sB_hQn6-a3=AtQp^V+nk@y zv!`tZ7fzl#8Cm|iZ^l-cv{j~TRiJO{ZQmJtdF9e?6E3El#lr*T{6(<-@5YCcSv_p_n+8lLLfy|`IjfYjQu61-^kR`)NvT-qBqs_4; zi$f@91Attu#SH-NllDL=>c^bNY}2F;s@ zMXAT<8S>tMEvtJquNvTT)d$M6$lnWoYeC*9*5d7*sHuWpc5*l`OYLD*>Z_1cO{d{4MKwU6#iG^fh z1zVRG>r!)DffBESxroMc6BvvI)0p26J&A)%7-JSW!`vxIdK>=y>jA(DZZAw0ZjT+B zY1lT^uq{=%J!RiMzW0}U`<=WM(Ot1!sl2rb!`eIMWl>#BmohgdG>wau1zhjQ;5i5# za+&Ith$gYKCEm%2K#uGG&sHwtLL@M`bo?7Y9ZJ$!63p4!9yc$HFT|lGSEgH z`yq6^<`E@pq(MrX@!CZjqqd z?G~1OE3fm6jDQ3qb`HL%NBHUy50H8Sp58$(_abBx2o_5SnF-xPv^Cx!F~u7s^Ek|D zk(zwr=PRVx+)as46knE270x2E8d{dS9OF8#IWauob7{#^!J(Q1athQ-Kkty2iziY; za5RLB!}5abi5~|ktRdnm!uhe92z~;8{-rRvK`t4nlCtrgcQwZEbiCXVZo1SPaYl<% zPFJF+E>@YaH%x2XcQmDul~HHRIjvcJcXd-jp< zPjvQ78@_qh?3^(#OPQCYE%pl?=Q<)yS9_t@WvQi`6HB&Cu9!SIv+Y1~+kr&)vx$Sx zCbm77*!q0p`J;)_W7C#zNO>HeSt{o3REg_BA*C&i6h(E3+AY(XC+=vNXi-cT*G+3S z-q9?Lj7E>gj!$dW=Ty#|cVY9n&EaQK=JJH5eDNxYCU~++Hbc|?Ay!FNb5r{+N*8!w zm7|nb$qVW})c|bvjsZWgma3IjwvhS_)p@%HR?{&xthid)%BHySVRdJzn|f(AYNNvR zv0s8E0i?m0ZcHD*nv*A_m*(`C0oJR``XZ&ndZK%Y_M@kQA%kgr#A;=fFdn(0n{rl5 zv&0{)hL(_S;p!bSa;I7AiOkBD<%zX{3S>M{n2nJ_3M!CEncEyr@^Uya0Vnx6>Gqs- z;|Zr!UiQ5xk7Gd|T;diyq{bFms~I_qa=mtc8W7C1hcXf^c8})QsFl;6j04w&-LV;MTIvkOg=zR>p#ql51Mv z1M8wd<>XPIVGY^XQc|AHr)SH?Y(5Rvge`}B%Gy9hPLC{+weC`=+0w-|{yr75krviv zTR5;>!Xq-z8Zw8pGU;zUQ7NTo-yr>e`UswuV5P(vsex9e1JvZC$aMhMBU4sADg|@^g_$SU0s3H7=u?fUPgAay!OaTV zfT#&bvKAEprq=83cZ;TzhT{V-x!l}Kzyi?;xHAws2rei(A!p9c0+m-_h`x`r0CyEi zyNPa!djY8p0C3UV5WM`DQB=GQ&4ZqyUe>cMSgoYiyR&--5pMGNImTdtL;-};HgH1Wp1|+d z5MU(;|9n2|{3RC~;&j8E#k5KUxLk4HM(`&HUPbU71QQ6(0dVDIRX6T+h~ViF-be;) z&yZ(?JCBGsHu-!Z5>+@q@i5}evKU@)FX|z7@_yckiYf8NAe>50LBtP;`%@%m1!Fs& z)C&GceiY$E8_WW-yW}`z0KwnEpZ~u?NBxd^;iTp>&DOhBb?8;Mli(&g^ z=S644ez7##Ild>6-;^ebHajzCBe1M)lnjdqkD-PC-%lc%(m7P@QnpM7fwNRvzC$A`?s9 zse;CYV|82=Z=PA(m0a7EXxe|vFXR|X71Spj4Y31p%gpM|^r)R)zQ7E?yC{%+&5o*6zGTPu1>8*R7AAj34`S`KKK-ZJuPC zC$Xb9(c71(Wh2&fd3CIQrmj6%*FII=p004k4katrUg^AByYY(kZsm&bzI0V%xbv={ zUZ)+-OI)Nnx++!dPB}w8DEjgTanH$x@@^7-IL`rn>&-6J8!YK z{E5xoiA@JlX)k;Z2-+R@+>eRI72W}SXz;rrR&FE%d5tU7f=AK=#xydMm`X`2XXzm1&){nx10O6P8GKzx z%Nasi&^}b6UaRqG#E@zX&LovA(yT_V)w5ceHldT-VjhuOpOf1l%Wcd_G09TQIa-J^ zS91MS86&r3tBk%eq=7bgoKkXW*km;Fpf44Uc7SgFV{o5KSFXn!L;6L$sk#56d@#iQ zoOLMej@SZXj8(81DeOcHkJWR9vbq~XhDAIo9$bO`-wV3C@v%xMl9k{D9UW)Uf(J^? z9W}+WJSC7vVebWbN+CsgZ&a2u2FqpC%-5{aya7Fz)lk0r$ErnzjD`_u}TO{&eCFCS!4V7&_M;gfl*%oSBuOK}H z`PdG5AT-9Q2R6!46h1UlRGloUPT6b5_ujGD!__m6nxvy9)|7NKq-<_DJd#CLCLQ%D zTf=z!?Zyq^{_yjW-e_^u7_%n}R!yFs9QgFb$ZtRL}imzRu`O)#^ zGfl0@rq;xo9iKL(3kqinRwfHpCLGn#-kXltu6Sq4)tabE95|Pfa#&z6bfcX+!9I;t_(zJu@R*uq$cLMe~0B8+!b-> zu<$tn*b5(UX_PBMVzRW5jp=|AC?pG?$EU<*&Iv(X(Scb?r(~7{vy}D(PKyN(7!!3| zJ}N(t5Z!RNa(!A=B~9!4@2JE|-#JG|iB+Qi{*M$BkedjQ@-f%_GLqX_sxWUr722V$UA)CN_47x#T& zienHE0sB1+p)pOcqaf=+I8ZYUJOCjTG&YN5w?R-8@7uU4KmyYn?F*Wl8>LuN&|+$C zl+aChw2dB;U9JM&LJSt-(G;<>@FqCH!YfGV^eiL`5pWRa~o19&xxV+}V9XM0|ZBu1Kp5aMmQr*hnLSP6?v=A-3AF#Qm_41<|00y6^Z z#5sifPl$#Z7q5m(F(UJlhazr1vSMF4ej`RA1WhRT{{{dE{(RU6PiWG`l{3YS$>PSD z;^!lD1dO*j-7xYsTJ?v^T`qpAO#`p5#*Y)(PD&S2u@`&J@*8 z71e)Q_229MbKQjL_SQY&0};zi$@*l;`b6=DcyF>`^DS<|AlXdx>h&ig1ykk+h&-}X28@7WXW~R`cEOf^$;Hv3%koaHi01Jat zkkfKWi&`su)Ax6QB$XQ%hzE|Mfk9!@2M2oSq9!vSz?ENV>jKUZl%ngJ+&MxSLvE6w zbDbj=tmhdoAE=M`u!bCda@-bjQZlO*$O+4W?Y0tKxx9Kr3myB6XgXRYeFF3rMSVg% zg0vA`;?ek105I7Ca&~T)Y-M zVv2&agOt2$1zDMR{`&7-cQnL+OUl>6-125Lc+#TO)A?DhKsv&{XW`REUC{Qa`te}9 z`q4ekl_>Wb#NxTjg6FvEqUwAt9D^I+2s|tOGY4g?9S;}7G*|Not{pj}yFi+U&?KuZ9q> z6ED^0OZ-YV&v}EyHgC*u%^NdJyYjgaB=sVKHUuvtAl*i^WTFu7!?+Fvod~)R>__kv zf~OInQH_UN1jM`vw*~PgHrg{6naOxF`mTzjkyrUV{siL-&qiuZOZL<^35kEJYCw7H9QpV-u zt&#<$cv@2`8I=UHS7WNUDdAijZ%WuVfb$nYJ^x zQ}HvY`j*71)_F?3gRTN-T=_t))#~OgVy=YUH?0{4B6&qKwq?_{WfQ8j#WrJElC&&= zTaMt~z4zSSDa(?`iV6C**^*It3&DN(=*6SquIP%WC)zzv(XOI&k@NE5i-*Ggo;W5q;k+H9RMSHLDGt-0)0U2pLhpIwy_3r)dc(^v*IlfOym)sG72_j5kkNp3K1y zZ@Il{epsyxv%a!C~GJE)qMkHZ4q9Z$;TD}`f;27-zrvh zP?}rnrVg#`Z;d4#X4~JcFX^z`{%(0mN3reittA~xY<=>b4EkD-oeU2F@SRbCKBK;% z5%n3+?uZVd#tYb>Ul~Ea(vHCwODno%tKi5>tFY4n-JMnA+7E9HzE8pe$f;TFnBJGa zC=J#zNCRg?pNiF!BT+fWkQ*P?5TZdh11*c_t-^s_#&$+iXDp&L2DeCXL$U^Uiqh4? z;7@1_!Vi-W`H71hoZOs*`#KHcZ<}mkHZTp_m?i|PnOfG{2fxFCnOrsQ#pf;>U+lh8HGspWaI1m1?trBIYeIqH8=xP%VB55vADO?qP@nX%CzBFCPoHM44)`{M8C9nqCFSbv?ihz-K3o9m! zlJIB>d&AGd`ZyZtj~<8_qu+`zPuSOerdj{heLH2TB>n}jT3TvA$p@aK2e%+(;!^azh9|{tG zTS&S&2iMQwDh^!01|7rQCDRmZpjib?5$wRCdPVdfJWv#_38=EBL~yjnaVs1Eq6v_< zW|Zz87_=Mk%*i_G!az2}SOa zk3!>DdTPb`#YU^lQvW}Icj6|g)H9id>XBnrl`}#z1}hjFc%KKC!v)WxQKN{(>d<`8 zRXoNR&v>tAN6{IZ@=YV|N!Fm~QDY?D8DPF!_$Y`k_>Bgod7aY^rjYukS=yTC@b&e1 zEpiVCB}0`&@1|9$1h{dtTsC*er4mMT36$=ex@Ggk1yvSSbh53&QOwmNcmu#6U>`|* z3b>mW8P^8xx%g!2E)~z2wN>Erc;g z7u>*p8Q#JM32iJguK<+0#O(lXgJ#!*0LH!#AomU^aq}eLACMgsh~Pmf?HIy~W`3e_ zW2$&l!nt|UF}Z7|`Ke^{Q;8>@PCVV6upgY(JaboXoza&k_2rRMQ~H(SbSB5urc}|Y z&kXf<&GrjhU)!24TYkO%N`10=XZ+MJ#@-)GRqsrd?F7d{!M~5V<8@B1nl9b-nWbH{ zn%~LC-*?&i!P@w#nT;LEjUB1AozwYUzd@V(;Y%j*X|)PNX#{8+fSb+b62a`9 zu^x$Fb|fzvu)JPE!bKRBD?zcEK+*_4$3goF1jwMZIA+WZQ|5->D!SaM;x!3pQygxH zt)JFxkc+OdRAED+z#RiukTug9@R%r_DOf*Uuzo@>2_`O56DbXU<1<6myqYT9Ann{U zz7L=}Zbja36&C#F$!y$?pBTbJMZAA5D96HWZ4&1GH=-B-z zSGHg>*Kq;}R7pC$OEi2uG@>a3e+=uGVwMY#kJt1L4-b;uyasQ-OXrOLibS&okrPLB z_aMiP!F)ow8xC`Kke`G)MjXT+7Ln+{DEw015%6!S$6NQujtRVd0hvX8n_ED9K`jEUn!33u_y z?_vg^iTuO~pSP2rt0KQqA?Xa5|A$duTOp`8eJ0N@SHz|82NcN;ekpSYg+{|PEF zucm35r$A|EAH=s^3rm<||g1b?`G;2|EmM2;Mc=!<*-|5CXH2F)wVI z*JBuW^b@D&jTkmj#-fC?cHWF17Rp!>SvGIQF!&G^g!j(pVK^Vc;37Y7#}5Z(ERGoF z3ou+r84JTl=8G`wq>TCD;`w3>mk8lf43|;H(nML^d^vtpP)28>xM_X~ek?_vbo0wF z%uq%L@VXqsl~6*Wq<(${epFFqo6<#f^BUS(1+`!r(k%!nKid-F{j!EI%NpW2Swob| z8lq%A(-4tHc{{91Auu zB}|&BPdir%?eMF8^d7oKHEu|lR@|rd(6s9Qb1FS;y&KsA}MQL;4yc*tjfsPj5 zz^S!-UXLL#6WIw4j5(d@z8T}%=vB0XOVSU#`Ekb#0bwMGoV&l9=l%^0#^ zZYze4(tByTlw`*M$&UfCP!cpCwvhx6t8DZJf(`GxX4||L-iTW{ug8#pilIuINEY6N zD!c&^H~_@b3}WegrGvH+itr|g@D@wQkU$Ybbyl%$;7t(WEh1tF5g!;KM5h1$2kI;-Ovp*Pe~;}KqF`n2ua9#81z679yXi0#hGHp3!6*+Z4k%eAjegZZ6{~```k?xNhSiWT%|Yz}9jiN_XY~gRtl@x>H6Adr zCYoxb7%ih4P%wJN0IrcSfoo9q zYywI>vl(0?vjto;(+aMY*$Qqh^ANapW*fNq%yw`c%nopin4RF35Puo-F!)!KSej`A ze>n-MWZJ=B#q0uiH?xP?%RIz%F#DMO%mL;g)5&x(JDF~#hnIwT#HD1-%%lAlrned@ zobfPB3H6kkwK9XuA>zwnh8Q>Tt%zdI2BNHn!}#4ke4$CLD;c@QIKph_YnH&w3GqPa%|KGu_wIj@aRx*(y|vn>BIad zrymaSh3E9U)4p-fGdwqIm**KNFZ0Bco`5$nFy4Xw;I0twt_8#&@*L=-)nkGTc$+Ep(@9>~! z=s11M&(c2cGeJ5n#n#C@NJ0CL(aaE3I=w#-2zdjOhYvmJ1?sUe|H)C`2t7(d>62a- zLqon{8Zr%x4twb!&`pead<{cSdWVjWjRt~rfG`>at{xyi3BB)08af*AaZQ#&(|bKZ z?=amqGQxUCz)M3@4MSvLvOPOfk4RC^lFo&BLO}!ArFkBnA?#C`!USBC``gCGpxmM? zP|ooH9e@T192@lxyXbB|?T0F?_xeVlH+xwu9aM^c9Eu2oBR~&(gWe$!&^5V3?DI6= zAD~Wo&uJ$;I_NJTxwm4_JtO0+_H~ z4@jC}f&BpG@rXr$@>uzSMGGTq(js{1L#)>aT?2dKq*)jaeA}tI8`_*!7!g9N4`7RP z2ERWrK!zfxNAIvVFvR6$jT#}^%-65*WfHy!y&gpz7!QnaremJbu>n#a!@@^k6zhbu zjPsL#kUlT{i2{pRh=JG-xqR_U7~!NCoKXz*c|wISV}1JL@M3h9{`C6P;v28h3vXPz zeeT-r7v@4*h+BN+!s5lhBi`E=f4K10)sPBPME@p)YI^YG;><-pH1EjkBoyg*=?<~GCO)}%T z^4=F`KmFS?Av;;U!q#mi@IU%X7;zVP0{cP}qQ-`+|JvvBCjPRc=61__ah50 zT%bRli{5_a+lyx|H!NO%=k{~gprRIEcpWN=&ZNYIZ%!?|3Aw#-ZQ-X;dhzx1!13~@ z*RL&vFAIDZzyH?@FHVD0u22bzM=B7bGb49}0Q8D6`|0)Qr+@q2r_m{T@zS-0E7!#G zT?&JU^uoE97hiqv_QlAj*FhGjfEOX`8i2Y+xfHoB1t=Wa|&d|$C#kV)T z_>-%Puf6S}Av0{)ndff5e4$}s`pm+se+M}~4>juwxq-{=mtF#?Z(o3j_ZDBBqtopv z9o>sBpIP|NFV)fTUYsF~0G(!G=AFe$VG#Mu!i(2HWT-)U;jN$DzBua&^-5tyjVA}Mf z1D}ba6nAcd=OWdo5M$f~@3!DjgU6@pOQ-79@L^KdEs1B8NYEnC`_yi=Yzjq&BcDQ! z>WEF`Bffnax3Wga+5-d9t?9){V%bwpF{(z&t=z5Hq<|EfO3JC8baZ%wbddG<0%Jr= zLA{{f;`E^6MnawuFS`-gTm@2qOI`p^L6+SFA)E@nI4$VcaZk{x<+Pv)5GSdQdrxuN z<0n1rNPtr#6F@zLXMj@#0w@+u9)c z&W=SkzFst2`P%A)wQ;H=rO!EMI%^8=x?D6<`SR*y@%ni2`q^EH;+BMd(^T6>I!nrI zIk)eHeR1>Z@Ck6H6)BT>YTq5;PYtE}0(@XUfxe))^Al)SX+LQ}Qh0odKB2ErRG*p; zqp16|jlwOZ(TmiXx7hDm1R30lv2?Hd68R|qf_&1w=}Y9J`U~>G8YRz{$VdGbz)}5XwBrQ$W<;dzwNUW3P1T!1|xD_x1=qL9-0x;SO*bLV*0_3EF zei|?o>lp%c>ji{28a!1;55WvE0^_od_5dITVB`%1#=X8EpumtX2-wZs-?s}$M~6IP z{YNJa{e4}Yr~Ito9rd9%v<@=w@9S9xN&AmJ+utWuWk87-2q*=hN+@{!1`G@O2Y2;! z@7>?qwP*L%{=oX0oooAtos;YP2Y2shy7n{7p56U}H9H}yw!fzDE6$^}{Z8jj_{tDF z%`A2UPBn&lg7tDbz=p#BMLFaCj_#h`J-gbNJ)C|>C=jO^$HKr13&Q0hs2-4jY5y6YsGkA`+GhOj|)0+h(PmD)R$ zlGY*nGD-F+efiAZYwbVky4nRqWk^&+i1P0msp7KB?N<)Gav)Z{^L7QIZUKyS{_i9V2DCRCueid305 z(947(J)>yV!K9#^tlH%tAB6QHEp$Zs80#OWgD3ssJVDm|E~kz?2+7z^bh^;#MyCgz zN6>i`94A=*(Ek9GN>1V76oXj(8&BQM_q-%@nt< zPc8fYEJttFE>U`omhZAsY=D}XblHCf&qb(MzyjdoAGz*XP94dWP{?#sQUCjt8MHSH zd5*w$Z(4JTc|c>TME%j5)8=!+|Ueguqb?Daxguz+~SCxWMhPL?9h{(c6ZOAw$4sYLyVYc zh+{`5I1SjZysT5r;sTA+O#oi^vOd&ZK}e&+!hpgKVDCZ{&a0k+0YTRffPxzE9~&SB zE!GP}--Z7H9K=)9l9kGNOfl7Q(`-Gr|AqbGk=NNbPrY_3Vcsyc=Oa^YD#w#dSStZ<>#W~8{oLtqemz#wn9wyMo%}41LXja81dnuD-6v}) zfU-5?QeX>sDkvJ{cT;)PK3Kc@ZP*sepA*zY4NY~yV$B59Bh+TqF~wCy_f-XFBsQi2 zpJ&{A6&A6sMou}(X3dC(ZD`znW`fLa0;4 zZU;`B76u~P&W&I{<}GD`05mA}G`^#mrP8t|A?kJbAJ_m6Oyaiuc>d1W!^!#`AY3AU zXTr8~>cH=HwwpN>GkX&`%`tuRP1BkgZNk(LQ#Y(c2{_4-{&N^w<092SVPELQF}3Vr zB+D{b(ksP&VTCEPIX!98(!N8x72@{WSR`T z2g$S&$#h^ckU#VW`KhrvM+x%c{?k z)gY18D9@KMF=jOd5?h)j65ofrHB+XpYy2qbL)#{Kj?hpF>{{?ZJLT~{ih^)SJM|UZ zCmdH6OfEQ^1hBKh z3?j@SEGh@${17~WJQ&`fn)Fmr$<&@(YQwiWpX&@aT^NiwW(pGyXRM%hwmfF5pI5tX zs*5A*W*zhD##@a|F}34X;i_a|&3s|av?-;xoHL&_haa2QuM+)%%T1AEGshF9O|g>Z z=!R%tvgJU$TGn(12{k-Sho>sa?Z7$Z4J8< zrqYf)k(03k z2r~4dE?@S@fFPs1R9Q0RWXWU%nR2t@(^h7PF+}E;* zWy%hVSosq6n4n+bp3JhxXJYbvX15+%U$GD3d=^IUv$`$4Wy=WRGhM^5$dadE^0&ge z($fL)W~QLt0{o!d>2!eXKsuQcF$G&MKsq2~9Bwm|uTY*2Vv3qIKI@TR%5mBH9`Jy% z>9#V(ByVevjw$h3d(>D)rWDF0-_y#nT5Xl21y(~jSFfxIE9601L7QoXzO^D9z=nTu zYa|fC(pxUZW!_?~5Qy+TNG&hhnk?C9iEL{@HW|KndWS%S$_Ic5>2XR!`**?YhBHTp zvJ1dH`uf(1@`9SzNZ>-XBwPv?oc9S|FCPwYL1vsv;X*)3!<^?+9iyD;5ZfRIJ^RwP(bMVZDXGC!C}u1u1XI7yD}>bfi8&*^x#ZUv*kakf2*FT9^kwK z!U%j}zlOeN(LurPDYw#pgnmxZzkU91=&S<3-2LxgM3LgbGD?jLD5sJg!;IU(kuITF zw5f8{f2N4yD(e=ePA{S=_kGaP5mmf-_>IHh`JA1ASQaOodn)=_n#BRfu0aQj!>RFP z3yX%G5F&H>w^JAReexPk(ZDI3Ay8TndilF#`#lqlA-JF+A$!~QcivNjoq$J->}iN# z{|X%hjqKOK>1L2KAU$?q+%v&` z8wp`uL2Ihw?oBq1DsE@^<6(?>W1f>?5lgGef#CL#xj*zX`a%L)GY6Mv8? zfGB_<$BaYPp=qVP~DQrO{m^Ft*y2$ZLq%8>7nTwq$d6 zytzBp)N>=irx;J<)y3@fv%OJsvau`P*cEH&zA?C@QQCSHcU5YO^-pH1x;bo4=9Yhu zTRx}#dCt2z)0$h3!fDgZ(yHl$sj}76U8$UsWKKE!rSeuM^Qz)`RkKxzyv9`inwbsp z{L1jbj|wYh&B>ZQ@tQq16p5PssoIw4@#xX_*S_DG-0g|)_QZA##s-ICHNz20s~Jq zJ{>JhZrB^&us7D&al`Wd*JFo|#Bv|Mt5RC5&?uJ5nLW{mVjYjqo1eIaGFQM~Jf|X+ zTX4yI-kfq2UA9N|&h#XTH^z!uqRd=rvb8JT+I3_2Mj+P8#I_tlRG)tY_}T1BTFU0Y zWRc!vE`4_cRaX6(6MDeie#Ku3>p%a)xPr1ZlO@s590xY4hj1{957?4;i2o#nO6_N$ zoN+E%_K3>gr_R{OfKtY)!6@n>eVShL0QA5mO|NxpM7ZDqP2tu6_)rQUSly!*d`cf| zSIQZx8MSl~z-S~OLMye_+$XgzE45yd+K?4vl*E{_z=yn4QV1(gk)5+e!r$OlLmfOw zF4;V+5*|6wRsfinKfpKl0i@67D~0+7w{C@Os&xDP653C8_b5#<2Qb(!x6oqph!t06 z=SgM(qko{*%$L-M!L45*x2&YF1RVlsZ+M^_3M4r=0M_FeTlPS%**&RHlBP(0K?7-u zAx3^lRg%>&OC{V)%@%P~f$iAK!`z;_2P#RKgpUE*P-Y>yqWUkN8`M5|bq zT}}d-MLbyvfIEOiAV&G}w(HwxJ#TJ$W7GBR0-D5rLBKK#3ril75zIHKtfMED&(J)+ zMC&F&*xhiH0amR7s#A^n;P^U!vQ#_;E&+s?FM9NT)UnmF`I~^cHzi&9enML+sUSvh&hOj%pk{UvT=W^(Uc$9$BT&#o~&?{5aT`OfT z2p>)sRK*Lb61M8812?Uo+N?gaeH0DT0hnCQNyP2Nci!{ z;7s9+Vb&JUTR(SlZuI@9=enjkMSEZ~fdI}#VO`7tNG4`$oL6s1Wf=<{2}ez=z&V?D ztv9Nj(7Gha!RY>X8XY%*tg9{}RMdHw#Bt?<&S z3#+Ev1TdJg=U?cJXlHbZf|^*qbJjlF_M^R5_s(vKHZOyOis;&8Q+vFrJ+@)j`%S65 z{AAv`c;330y=rFgntgU}v@79kkJUgNLc$KkpAZuMK}Ffuz;>a*O3bRfU{54U8e_#9 zqIuE7D@>_kL)kx6=kumZTp)@MP7Nwt*X|{_#;qd8K3GfQvBzGKO7iXn>g5j{iIid` zAzGZtd|G+r^WdDA^zI_y7;Rb`qDf0DXY2%%mZld6$g&5_l$q49dx%KsK6)PZs(KV~ zumCk#+TfXC2e>J1?sO}%aTDZ?^T4tP=6@y8qB^$${Q738nNuUDG1BFP{XeKd8J&`* zr~?XlISpBIN^uyriVV-?da5tIpOZ27GG=0w8@_L@5>0v^xvW`o<@BZZhUD@SdlHk| zssj0J%aEx|`($LL?^iQ+kt!`09+IqF3|31gIV@4d1&|?hEW4%v>(Bq`6`hh z!`)|e8`EtG@nB4sE0a>r;OI7POz)wAS#GTicN4U9xfq^#4}l5f$n~*wKL+{!4dh!V zQpl%rr_Jqdr6jGg3*;ud?fK4v$tnT0piu$uz`*Se{y++xy~tc(;dkHwj?_#l15Wn) zkTFZ6LlpC?=$k}`7-O&ildywl9K8$5ZPKt!V7mjn0w&nORssJJM?C_8LSN(Hr!qz^m{e9!M zaDTF(?t_B5_bdOQ_7}C&#*en|5BEmQ$)c8cQA@0FQ*@$P0o$oSbJmNjjb{FW3hrG^ZLhc!B*V1bK4>ZlVuI?C+_O; zJAuWC)y=V08>8*fuO&Ahh;Kd++jQ^-DC|e(&5wR;vQD3P@u5gw!c-=vDxEj4ODkjSazK89QfOis&7r^)_z>S6YM(4d{;c*6*a@5P?tSyg781=0@HKbE; zmghpYGBHL*JAwo`&3PFe+V$4Iqx5L^fzvU6*3HiGcf8)ZPo3vxWCMK&E2`xWY`<#) z$_7}ng6+d~|K5-uLBOL6YEQl`AymIS2a4LLd_>vR$jj-|!^V9EL!`Iq7#nVKb|^eN z7L@DH_N>v6CyraFFP(Ih^{o*lfKyWWxB-0Fa9A6FBLk$2z&`|KnE2k2@5%KxXlI$8 zRu3#8My97>3@IPf9pZ<8%BS@iBpZrSX$n?I102)R$n;ji8N&Z8XU7+Emg(&e%(?Ij zIm?We2j*P#g`8zZ+ZS>E^ZkEBIieIcT{Xv`FNkgR<^9g$$!!8cgq!*G>629A`FHVL zJa6d2`!GSg?n?`Z&h;N;hJuaN{|3F0Jr2%F3si$>fy#z!{HZM&L(~XDD(pDK5LK}I zgQi`x?MX-VuN>9j<&9C@Y%^w<*~D$YPXcG2^ub_iKa`GS;<-|Kc;3XG;@Qs&d z;4R+!L9cLK2E6I3Jm62?-vJ9?pJ4VAt$+3P8DH8f28nO57|MI4!S&B_ihA~kDE1TJ ze1fwkkM3a(gAG&RR{?TGdBv;qoL&^0Go+>F3~8aAx$M)(ibZEPI^RHtv>Db9zk7NR zeVyoZq0@~{4?2&a^C&uK&*R|g12L__HAlQkj<(bl7CO#^r)`Dq$SHjuvZv1}#yQ0a z&MbsO(m)W-5eEZuHo0lc5S_1Le`rz(Hn}`UykK{flEv2kdEn zY%opl4Rp<4xhbx>&Sl@fNp+L}63R(Hw1x**3wsH2|j?BOcI88O<;ChP4rE z!crYG*UYM}wa=c2o=ViU#n!hkQL0^vN^lM9?y58z?UGqY6|?!~)qY@-Q;@W-nYXT) zR;J9>q@u+&QC~X?wHdwT_wA*|+P69+=$*QCmpj zzVm(Iu9>`WHrcET!@DD@NPA>sBrwx714UI- z1J86QeACkJ2#dTc2=C7dsT_K#QmB!z9qxK98L7PDq`l$;d&M1%I$!%+d&P8n%4A8J z%04ue-O*5n+G+c_va@Aj7S21y3u_X3=dbM*OBzaJTcRKVT-4H=rne<@MUl37UHSkd zS@OiHw>Uf+IGUjh{y-tz062j|V5|Ml6oz0twx!ArI0Ya>rzIr;L* zxV>uD_M^h9g>idh!m{DrXFo7^-qolK#=CmTPzp(7dFy`NymQ|C@bzt1*Ugqs4<_xE zaeHOLvhM9|ADAD`q785T_4>{8<}Gh+du`nekYY9o%c`qDxFwtLp zOcM~ot(}Br_0`S~%&lLJ5KfW`R?FTG^+ikdlz#Q+e=tEUeSXJIS-KRT-_cW+HvZL0 zKDP31pccl@e|HshmaG3kR|f74y1BF3`pfF#&U))V<`;K1TmMN>+}UdVCs%RjPU{d1 zvh;)JJm6b#=eOX2gF67z4yz8U({KiW5&;k!-D&_?K#fqrr}3$LYDOV@j9dv@Z5o+n z62N^%g|=0AtMR=Rz=;R)W;8xsk8MTl1tC@ou}VfqwrHi20|$HkVc>6~n1cI16Q&+#{$~4~@V7F$TY?LX;;?aIj-^9Ih1A3qQ=VmEJ_x@1UE| zX{2k0y+iOLBACco?OM6tqXuC>UCCX;fcsZ4yYUYk5*Pe(*D%NlB2cuy0Ald~(YxSZ zsc+Y^qQGM#~nt-pZe8yPOw!^ok?mcq^aRvY-4LJp73vW2TI7A;Tw@7cTw~ ztZ`3SPxIENIgp7(6I3@yNj7s(*T8MNm*54*tN9=Efy?vo`!<|nfSt!+wJ!t*tTi5% z^`FGssVsj>iyYw+jW7HmnHZ3>==>1n|0Z~*sL#w)>DqbI+E=R1SBBTV*!FVO)b6-x zZOWX7Mu&Kj>uP?Y$dxd=ruL+Crs?+Qo(WfmpFUrm&=sfh>9dBE!7@E~wg@Ib+xdg{cbF*fdsX(SI?ral>3I8Boqh0%(4 z{lL;#?>|=0|KySID@o!_Ke##vw}{~4IDidTw*)d)L&frt5!;2wS-7g;53(TugC0zP z5<(`pC4fDcH!OY|e4Hf>zXzcEvcnKYIs)mid~f|0raPlbJHF2DBpf zt(6ExJQf5SeNFS~W`XcRd-z0TGLc^!%UeGS;KensZupn-jdA_zX^VJ2fVT*KEm77S zE8Q4vi;hPJ-fxc;bj<7bi6qkN1iin~7yUs;RkW-$fTd^ue+J3Pu}!(QNzYjK?W@YH zK9V+hLGQrpNNT8&WzUL!BTL0-$*E{K8~6adTqll@OivoI%3qZdN2q5z>JQn2KL&DO zMCL$}jAnyhn~)p9S#@G`s~A(OAxGc0*GIGd$7+N@? zDBg~BNk)ki+J~Rn$?EDq;b$E>|Ix=9SlIKC(QfJU3|TjMWE9*@9CBzkAk;jT6^T3+EZofC38j3Kj-zDCoz-3*B&iJU!~knGVUpgkK;r9!!ACteNQ`grPqH zC;JL3Id)2%|ADf+0|aoQQ(QTOR}B1E`Q}97mYAb;&OWy{x$Tkownt(QJsNwIiP;X# ztKGMBmZYvUt}Bh4Na)rHB44v(o_$BU#Q1iiNXyrM^hB;y0y%!H%ax_=|p~gEYCFyXHz!JtKkSoaWb!EKCfk3 zC+aCSQXMG{f8|&D%Fk3({w8rfpFWHMh!W;#ip7bTKi2;X^zB3keYjhR7wLJx%wIhx ze)cw!=)r-@>xV@@`&inSiC>Wl8AGZ}#$@EJhFbNnYOO-~M+%&!vI>(H ze)TJ6t6Wln_mepx;Ot3Sy3$B@=#_DML|Y&5{NE zaHuISd|)XD{kh+LdyY{$Y;-Q|&{S)I@E%CbmdwLNBR_ zkhCV8_o<1VOihG)Bo$$jRKyWUMU+Y^qG%~y5s?OIJ*=11gI-b(Tcq{iOxMGXR6z~a z!$zSVHl_+sr1GH>e5u0Ysp9piH5*bzbc(J^Io9#@@YzAdenqu%N*^;;+@ba>6w13t zlsbjwZdrjM?{1?}p}kvT1z)*V(YRzUQNV6|$&w1+l}+S*OQXN5f$ycQJB3JmVCpkk_mlgNPX9WzJA34g`$`wM-NGl9wAfW)g!fmc%M*O z6`Ke(yl bool: + """Check if the agent CLI supports --system-prompt flag.""" + return any(name in command for name in _SYSTEM_PROMPT_AGENTS) + + +def _supports_reasoning_effort(command: str) -> bool: + """Check if the agent CLI supports reasoning effort overrides.""" + return any(name in command for name in _REASONING_EFFORT_AGENTS) + + +class _Spinner: + """Animated spinner for long-running agent calls.""" + + FRAMES = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏" + _CLEAR_LINE = "\r" + (" " * 160) + "\r" + + def __init__(self, message: str) -> None: + self.message = message + self._running = False + self._thread: Optional[threading.Thread] = None + self._start_time = 0.0 + + def start(self) -> None: + self._running = True + self._start_time = time.monotonic() + self._thread = threading.Thread(target=self._spin, daemon=True) + self._thread.start() + + def _spin(self) -> None: + for frame in itertools.cycle(self.FRAMES): + if not self._running: + break + elapsed = int(time.monotonic() - self._start_time) + line = f"\r {frame} {self.message} ({elapsed}s)" + sys.stderr.write(line) + sys.stderr.flush() + time.sleep(0.1) + + def stop(self, final: str) -> None: + self._running = False + if self._thread: + self._thread.join(timeout=1) + elapsed = round(time.monotonic() - self._start_time, 1) + sys.stderr.write(self._CLEAR_LINE) + sys.stderr.write(f" \u2713 {final} ({elapsed}s)\n") + sys.stderr.flush() + + +def invoke_agent( + agent: AgentConfig, + prompt: str, + step_name: str, + cwd: Optional[Path] = None, + timeout: int | None = None, + quiet: bool = False, +) -> AgentResult: + """Invoke an agent CLI with the given prompt. + + Args: + quiet: If True, suppress spinner (for parallel execution). + """ + cmd = [agent.command] + if agent.reasoning_effort and _supports_reasoning_effort(agent.command): + cmd.extend(["-c", f'model_reasoning_effort="{agent.reasoning_effort}"']) + cmd.extend(agent.args) + + # Build the full prompt (system prompt + user prompt) + if agent.system_prompt and _supports_system_prompt_flag(agent.command): + # claude: --system-prompt flag supported natively + cmd.extend(["--system-prompt", agent.system_prompt]) + input_data = prompt + elif agent.system_prompt: + # codex, others: no --system-prompt flag, prepend to prompt + input_data = ( + f"\n{agent.system_prompt}\n\n\n" + f"{prompt}" + ) + else: + input_data = prompt + + logger.debug("Invoking agent '%s': %s", agent.name, " ".join(cmd[:5]) + " ...") + + spinner: Optional[_Spinner] = None + if not quiet: + logger.info(" cmd: %s", " ".join(cmd[:6])) + spinner = _Spinner(f"[{step_name}] {agent.name} running...") + spinner.start() + + try: + start = time.monotonic() + result = subprocess.run( + cmd, + input=input_data, + capture_output=True, + text=True, + timeout=timeout, + cwd=cwd, + ) + duration = time.monotonic() - start + except subprocess.TimeoutExpired: + if spinner: + spinner.stop(f"[{step_name}] TIMEOUT after {timeout}s") + raise + except Exception: + if spinner: + spinner.stop(f"[{step_name}] ERROR") + raise + + output = result.stdout.strip() + chars = len(output) + + if result.returncode != 0: + if spinner: + spinner.stop(f"[{step_name}] FAILED (exit {result.returncode})") + err_detail = result.stderr.strip() or result.stdout.strip() + if err_detail and len(err_detail) > 500: + err_detail = err_detail[:500] + "..." + cmd_preview = " ".join(cmd[:6]) + raise RuntimeError( + f"Agent '{agent.name}' failed (exit code {result.returncode}) " + f"at step '{step_name}':\n" + f" cmd: {cmd_preview}\n" + f" error: {err_detail or '(no output)'}" + ) + + if spinner: + spinner.stop(f"[{step_name}] done — {chars} chars") + + if not output: + logger.warning( + "Agent '%s' produced empty output at step '%s'", + agent.name, step_name, + ) + + return AgentResult( + output=output, + exit_code=result.returncode, + agent_name=agent.name, + step_name=step_name, + duration_seconds=round(duration, 1), + ) diff --git a/cross_eval/cli.py b/cross_eval/cli.py new file mode 100644 index 0000000..68dc75e --- /dev/null +++ b/cross_eval/cli.py @@ -0,0 +1,701 @@ +"""CLI entry point with argparse subcommands.""" +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +from cross_eval import __version__ +from cross_eval.config import REASONING_EFFORT_CHOICES + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Scaffolding templates for `cross-eval init` +# --------------------------------------------------------------------------- + +DEFAULT_CONFIG_YAML = """\ +# ─── cross-eval 설정 ─────────────────────────────────────────── +# +# 기본 제공 에이전트 (별도 정의 없이 바로 사용 가능): +# claude-coder, claude-reviewer (Claude, opus 모델) +# claude-senior (Claude, opus 모델) +# codex-coder, codex-reviewer (Codex, gpt-5.4 모델) +# codex-senior (Codex, gpt-5.4 모델) +# +# CLI에서 --coder claude --reviewer codex --senior codex 같이 축약해서 지정 가능 +# ──────────────────────────────────────────────────────────────── + +# 입력 파일 (이 파일 기준 상대경로) +inputs: + plan: plan.md + checklist: checklist.md + +# 에이전트 역할 지정 +coders: [claude-coder] +reviewers: [claude-reviewer] +# seniors: [codex-senior] + +# 파이프라인 종류: simple | cross-review | review-only | review-fix +pipeline: preset:{preset} + +# 반복 설정 +max_iterations: 3 +# min_iterations: 1 # PASS여도 최소 이만큼 반복 + +# 프롬프트 언어 +language: {language} + +# 결과 저장 경로 +output_dir: output + +# ─── 커스텀 에이전트 (선택) ──────────────────────────────────── +# 기본 제공 에이전트를 덮어쓰거나 새 에이전트를 정의할 수 있습니다. +# +# agents: +# my-reviewer: +# command: my-tool +# args: ["--flag"] +# system_prompt: "..." +# ──────────────────────────────────────────────────────────────── +""" + +PLAN_SAMPLE_EN = """\ +# Project Plan + +## Objective +[Describe what you want to build] + +## Requirements +1. [Requirement 1] +2. [Requirement 2] + +## Constraints +- [Constraint 1] +- [Constraint 2] + +## Out of Scope +- [Explicitly list what should NOT be implemented] +""" + +PLAN_SAMPLE_KO = """\ +# 프로젝트 기획서 + +## 목표 +[구현할 내용을 설명하세요] + +## 요구사항 +1. [요구사항 1] +2. [요구사항 2] + +## 제약조건 +- [제약조건 1] +- [제약조건 2] + +## 범위 밖 (구현하지 않을 것) +- [명시적으로 구현하지 않을 항목 나열] +""" + +CHECKLIST_SAMPLE_EN = """\ +# Implementation Checklist + +## Functional Requirements +- [ ] [Item 1] +- [ ] [Item 2] + +## Code Quality +- [ ] No unused imports or dead code +- [ ] Error handling for edge cases +- [ ] Follows project coding conventions + +## Constraints +- [ ] Does NOT add features beyond the plan +- [ ] Does NOT introduce unnecessary abstractions +""" + +CHECKLIST_SAMPLE_KO = """\ +# 구현 체크리스트 + +## 기능 요구사항 +- [ ] [항목 1] +- [ ] [항목 2] + +## 코드 품질 +- [ ] 사용하지 않는 import나 죽은 코드 없음 +- [ ] 엣지 케이스에 대한 에러 처리 +- [ ] 프로젝트 코딩 컨벤션 준수 + +## 제약 +- [ ] 기획서 범위를 넘는 기능을 추가하지 않음 +- [ ] 불필요한 추상화를 도입하지 않음 +""" + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +def main(argv: list[str] | None = None) -> int: + """Main CLI entry point.""" + parser = argparse.ArgumentParser( + prog="cross-eval", + description=( + "AI 코딩 에이전트의 결과물을 자동으로 검증하는 CLI 도구.\n" + "\n" + "동작 방식:\n" + " 1. 기획서(plan)를 바탕으로 Coder 에이전트가 코드를 생성\n" + " 2. Reviewer 에이전트가 기획서 대비 코드를 검토하고 PASS/FAIL 판정\n" + " 3. FAIL이면 피드백을 반영해서 1~2를 반복 (최대 N회)\n" + "\n" + "빠른 시작:\n" + " cross-eval init 설정 파일 생성\n" + " cross-eval run --plan plan.md 기획서로 바로 실행\n" + " cross-eval run .cross-eval/config.yaml 기반 실행\n" + "\n" + "자세한 사용법: cross-eval --help" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "-v", "--version", + action="version", + version=f"%(prog)s {__version__}", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="상세 로그 출력", + ) + + subparsers = parser.add_subparsers(dest="command") + + # --- init --- + init_parser = subparsers.add_parser( + "init", + help="설정 파일 생성 (config.yaml, plan.md, checklist.md)", + description=( + "현재 디렉토리에 .cross-eval/ 폴더를 만들고 템플릿을 생성합니다.\n" + "이미 있는 파일은 건드리지 않습니다.\n" + "\n" + "생성되는 파일:\n" + " .cross-eval/config.yaml 에이전트, 파이프라인 설정\n" + " .cross-eval/plan.md 기획서 템플릿\n" + " .cross-eval/checklist.md 체크리스트 템플릿" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + init_parser.add_argument( + "--dir", + type=Path, + default=Path("."), + help="초기화할 디렉토리 (기본: 현재 디렉토리)", + ) + init_parser.add_argument( + "--preset", + default="simple", + choices=["simple", "cross-review", "review-only", "review-fix"], + help=( + "파이프라인 종류 (기본: simple). " + "simple=코딩+리뷰, cross-review=교차리뷰, " + "review-only=리뷰만, review-fix=리뷰수렴+자동수정" + ), + ) + init_parser.add_argument( + "--lang", + default="ko", + choices=["en", "ko"], + help="프롬프트 언어 (기본: ko)", + ) + + # --- run --- + run_parser = subparsers.add_parser( + "run", + help="검증 파이프라인 실행", + description=( + "기획서(plan)를 기반으로 AI 에이전트가 코드 생성과 리뷰를 반복합니다.\n" + "\n" + "설정 파일 없이 바로 실행할 수 있고, config.yaml로도 실행할 수 있습니다.\n" + "CLI 옵션이 config.yaml보다 우선합니다." + ), + epilog=( + "파이프라인 종류 (--preset):\n" + " ┌──────────────┬─────────────────────────────────────────────────────┐\n" + " │ simple │ Coder가 코드 생성 → Reviewer가 리뷰 │\n" + " │ (기본값) │ FAIL이면 피드백 반영해서 재생성, PASS까지 반복 │\n" + " ├──────────────┼─────────────────────────────────────────────────────┤\n" + " │ review-fix │ 2단계 파이프라인: │\n" + " │ │ Reviewer N명 병렬 리뷰 → 취합 → 수정 → 재검증 │\n" + " ├──────────────┼─────────────────────────────────────────────────────┤\n" + " │ review-only │ 코드 생성 없이 Reviewer N명이 기존 코드만 검토 │\n" + " │ │ (이미 작성된 코드의 품질 감사용) │\n" + " ├──────────────┼─────────────────────────────────────────────────────┤\n" + " │ cross-review │ Coder 2명이 각각 구현 → 상대방 코드를 교차 리뷰 │\n" + " │ │ (서로 다른 에이전트의 구현 비교용) │\n" + " └──────────────┴─────────────────────────────────────────────────────┘\n" + "\n" + "기본 제공 에이전트:\n" + " ┌──────────────────┬─────────┬───────────┬──────────────────────────┐\n" + " │ 이름 │ CLI │ 기본 모델 │ 역할 │\n" + " ├──────────────────┼─────────┼───────────┼──────────────────────────┤\n" + " │ claude-coder │ claude │ opus │ 코드 생성 │\n" + " │ claude-reviewer │ claude │ opus │ 코드 리뷰 │\n" + " │ claude-senior │ claude │ opus │ 리뷰 취합/판정 │\n" + " │ codex-coder │ codex │ gpt-5.4 │ 코드 생성 │\n" + " │ codex-reviewer │ codex │ gpt-5.4 │ 코드 리뷰 │\n" + " │ codex-senior │ codex │ gpt-5.4 │ 리뷰 취합/판정 │\n" + " └──────────────────┴─────────┴───────────┴──────────────────────────┘\n" + " --coder, --reviewer, --senior에서 축약 가능: claude → claude-\n" + "\n" + "사용 예시:\n" + "\n" + " 기본 실행 (Claude가 코딩하고 Claude가 리뷰):\n" + " cross-eval run --plan plan.md\n" + "\n" + " Codex가 코딩, Claude가 리뷰:\n" + " cross-eval run --plan plan.md --coder codex --reviewer claude\n" + "\n" + " 리뷰어 2명 (Claude + Codex):\n" + " cross-eval run --plan plan.md --reviewer claude --reviewer codex\n" + "\n" + " 리뷰 취합용 Senior 추가:\n" + " cross-eval run --plan plan.md --preset review-fix \\\n" + " --reviewer claude --reviewer codex --senior codex\n" + "\n" + " 리뷰 수렴 후 자동 수정 (review-fix):\n" + " cross-eval run --plan plan.md --preset review-fix \\\n" + " --reviewer claude --reviewer codex\n" + "\n" + " 기존 코드 리뷰만 (review-only):\n" + " cross-eval run --plan plan.md --preset review-only \\\n" + " --reviewer claude --reviewer codex\n" + "\n" + " 모델 변경:\n" + " cross-eval run --plan plan.md --model sonnet\n" + "\n" + " config.yaml 기반 실행:\n" + " cross-eval run\n" + " cross-eval run -c my-config.yaml" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + # -- 입력 파일 -- + input_group = run_parser.add_argument_group("입력 파일") + input_group.add_argument( + "--plan", type=Path, default=None, + help="기획서 파일 경로 (필수)", + ) + input_group.add_argument( + "--checklist", type=Path, default=None, + help="체크리스트 파일 경로 (선택)", + ) + input_group.add_argument( + "--docs", type=Path, default=None, + help="참고 문서 폴더. 폴더 안 모든 파일을 에이전트에게 전달", + ) + input_group.add_argument( + "--input", action="append", dest="inputs", metavar="KEY=PATH", + help="추가 입력 파일 (예: --input spec=./api-spec.md)", + ) + + # -- 에이전트 설정 -- + agent_group = run_parser.add_argument_group( + "에이전트 설정", + "축약 가능: claude → claude-, codex → codex-", + ) + agent_group.add_argument( + "--coder", action="append", dest="coders", metavar="NAME", + help="코드를 생성할 에이전트 (여러 개 가능, 기본: claude)", + ) + agent_group.add_argument( + "--reviewer", action="append", dest="reviewers", metavar="NAME", + help="코드를 리뷰할 에이전트 (여러 개 가능, 기본: claude)", + ) + agent_group.add_argument( + "--senior", action="append", dest="seniors", metavar="NAME", + help="리뷰를 취합하고 최종 판정할 시니어 에이전트 (선택)", + ) + agent_group.add_argument( + "--reasoning-effort", default=None, metavar="LEVEL", + choices=REASONING_EFFORT_CHOICES + ("extra-high", "extra_high", "x-high"), + help="모든 역할의 reasoning effort (minimal|low|medium|high|xhigh)", + ) + agent_group.add_argument( + "--coder-effort", default=None, metavar="LEVEL", + choices=REASONING_EFFORT_CHOICES + ("extra-high", "extra_high", "x-high"), + help="Coder용 reasoning effort", + ) + agent_group.add_argument( + "--reviewer-effort", default=None, metavar="LEVEL", + choices=REASONING_EFFORT_CHOICES + ("extra-high", "extra_high", "x-high"), + help="Reviewer용 reasoning effort", + ) + agent_group.add_argument( + "--senior-effort", default=None, metavar="LEVEL", + choices=REASONING_EFFORT_CHOICES + ("extra-high", "extra_high", "x-high"), + help="Senior용 reasoning effort", + ) + agent_group.add_argument( + "--model", default=None, metavar="MODEL", + help="모든 에이전트의 모델을 한번에 변경 (예: sonnet, opus)", + ) + agent_group.add_argument( + "--generator-model", default=None, metavar="MODEL", + help="Coder 에이전트 모델만 변경", + ) + agent_group.add_argument( + "--reviewer-model", default=None, metavar="MODEL", + help="Reviewer 에이전트 모델만 변경", + ) + + # -- 파이프라인 -- + pipe_group = run_parser.add_argument_group("파이프라인") + pipe_group.add_argument( + "--preset", default=None, + choices=["simple", "cross-review", "review-only", "review-fix"], + help="파이프라인 종류 (기본: simple). 각 종류 설명은 아래 참조", + ) + pipe_group.add_argument( + "--max-iter", type=int, default=None, + help="최대 반복 횟수 (기본: 3)", + ) + pipe_group.add_argument( + "--min-iter", type=int, default=None, + help="최소 반복 횟수. PASS여도 이 횟수까지 반복 (기본: 1)", + ) + pipe_group.add_argument( + "--timeout", type=int, default=None, metavar="SEC", + help="에이전트 1회 호출 제한 시간(초). 0=무제한 (기본: 무제한)", + ) + pipe_group.add_argument( + "--lang", default=None, choices=["en", "ko"], + help="프롬프트 언어 (기본: ko)", + ) + + # -- 기타 -- + etc_group = run_parser.add_argument_group("기타") + etc_group.add_argument( + "-c", "--config", type=Path, default=None, + help="설정 파일 경로 (기본: .cross-eval/config.yaml)", + ) + etc_group.add_argument( + "--output-dir", type=Path, default=None, + help="결과 저장 디렉토리 (기본: output/)", + ) + etc_group.add_argument( + "--dry-run", action="store_true", + help="실제 실행 없이 에이전트에게 보낼 프롬프트만 미리보기", + ) + + args = parser.parse_args(argv) + + # Setup logging + level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig( + level=level, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%H:%M:%S", + ) + + if args.command == "init": + return cmd_init(args) + elif args.command == "run": + return cmd_run(args) + else: + parser.print_help() + return 0 + + +def cmd_init(args: argparse.Namespace) -> int: + """Scaffold a new cross-eval project.""" + target = args.dir.resolve() + ce_dir = target / ".cross-eval" + ce_dir.mkdir(parents=True, exist_ok=True) + + lang = args.lang + plan_sample = PLAN_SAMPLE_KO if lang == "ko" else PLAN_SAMPLE_EN + checklist_sample = CHECKLIST_SAMPLE_KO if lang == "ko" else CHECKLIST_SAMPLE_EN + + files = { + ".cross-eval/config.yaml": DEFAULT_CONFIG_YAML.format( + preset=args.preset, language=lang, + ), + ".cross-eval/plan.md": plan_sample, + ".cross-eval/checklist.md": checklist_sample, + } + + created = [] + skipped = [] + for name, content in files.items(): + path = target / name + if path.exists(): + skipped.append(name) + else: + path.write_text(content, encoding="utf-8") + created.append(name) + + if created: + print(f" 생성: {', '.join(created)}") + if skipped: + print(f" 이미 존재 (건너뜀): {', '.join(skipped)}") + + print(f"\n 파이프라인: {args.preset}") + print(f" 언어: {lang}") + print("") + print("다음 단계:") + print(" 1. .cross-eval/plan.md 에 기획서 작성") + print(" 2. .cross-eval/checklist.md 에 체크리스트 작성 (선택)") + print(" 3. cross-eval run 으로 실행") + print("") + print("주의: 에이전트는 기본적으로 파일 읽기/쓰기/실행 권한을 가집니다.") + print(" 실행 전에 .cross-eval/config.yaml 을 확인하세요.") + return 0 + + +def _read_docs_dir(docs_dir: Path) -> str: + """Read all files in a directory and concatenate with filename headers.""" + parts: list[str] = [] + for f in sorted(docs_dir.iterdir()): + if f.is_file() and not f.name.startswith("."): + try: + content = f.read_text(encoding="utf-8") + parts.append(f"### {f.name}\n{content}") + except (UnicodeDecodeError, OSError): + continue # skip binary or unreadable files + return "\n\n".join(parts) + + +def _apply_model_override(config, agent_name: str, model: str) -> None: + """Replace --model in agent args.""" + agent = config.agents.get(agent_name) + if agent is None: + return + new_args = list(agent.args) + for i, arg in enumerate(new_args): + if arg == "--model" and i + 1 < len(new_args): + new_args[i + 1] = model + agent.args = new_args + return + # --model not found, append it + new_args.extend(["--model", model]) + agent.args = new_args + + +def cmd_run(args: argparse.Namespace) -> int: + """Load config, validate, and execute the pipeline.""" + from cross_eval.config import ( + apply_input_overrides, + default_config, + load_config, + validate_config, + ) + from cross_eval.prompts import PIPELINE_PRESETS + from cross_eval.pipeline import run_pipeline + + # 1. Load config: YAML if exists, otherwise defaults + config_path = args.config + if config_path is not None: + config_path = config_path.resolve() + if not config_path.exists(): + print(f"Config file not found: {config_path}", file=sys.stderr) + return 1 + try: + config = load_config(config_path) + except (ValueError, FileNotFoundError) as e: + print(f"Config error: {e}", file=sys.stderr) + return 1 + config_source = config_path.name + else: + # Try default location, fall back to built-in defaults + default_path = Path(".cross-eval/config.yaml").resolve() + if default_path.exists(): + try: + config = load_config(default_path) + config_source = default_path.name + except (ValueError, FileNotFoundError) as e: + print(f"Config error: {e}", file=sys.stderr) + return 1 + else: + config = default_config() + config_source = "defaults" + + # 2. Apply CLI overrides + if args.max_iter is not None: + config.max_iterations = args.max_iter + if args.min_iter is not None: + config.min_iterations = args.min_iter + if args.output_dir is not None: + config.output_dir = args.output_dir + if args.lang is not None: + config.language = args.lang + + # --coder / --reviewer: resolve shorthands and override roles + from cross_eval.config import ( + _default_seniors_for_preset, + _infer_roles, + _resolve_agents, + apply_reasoning_effort_settings, + resolve_agent_shorthand, + ) + if args.coders or args.reviewers or args.seniors: + coders = [resolve_agent_shorthand(c, "coder") for c in (args.coders or [])] + reviewers = [resolve_agent_shorthand(r, "reviewer") for r in (args.reviewers or [])] + seniors = [resolve_agent_shorthand(s, "senior") for s in (args.seniors or [])] + # Fill defaults if only one side specified + if not coders: + coders = config.coders or ["claude-coder"] + if not reviewers: + reviewers = config.reviewers or ["claude-reviewer"] + if not seniors: + seniors = config.seniors + config.coders = coders + config.reviewers = reviewers + config.seniors = seniors + # Auto-merge built-in agents + config.agents = _resolve_agents(config.agents, coders, reviewers, seniors) + + # --preset: rebuild pipeline from preset + need_rebuild = args.preset is not None or args.coders or args.reviewers or args.seniors + if need_rebuild: + from cross_eval.prompts import PHASED_PRESETS + preset = args.preset or "simple" + # Determine which preset was configured (from YAML or defaults) + if args.preset is None and config.phases: + preset = "review-fix" # only phased preset currently + elif args.preset is None and not args.coders and not args.reviewers and not args.seniors: + pass # no changes needed + inferred_coders, inferred_reviewers, inferred_seniors = _infer_roles( + list(config.agents.keys()) + ) + coders = config.coders or inferred_coders + reviewers = config.reviewers or inferred_reviewers + seniors = config.seniors or [] + if not seniors: + seniors = _default_seniors_for_preset( + f"preset:{preset}", + reviewers, + config.agents, + ) + config.agents = _resolve_agents(config.agents, coders, reviewers, seniors) + config.coders = coders + config.reviewers = reviewers + config.seniors = seniors + config.preset_name = preset + if preset in PHASED_PRESETS: + config.phases = PHASED_PRESETS[preset](coders, reviewers, seniors) + config.pipeline = [] + elif preset in PIPELINE_PRESETS: + config.pipeline = PIPELINE_PRESETS[preset](coders, reviewers, seniors) + config.phases = [] + if preset == "review-only" and args.max_iter is None and args.min_iter is None: + config.max_iterations = 1 + + apply_reasoning_effort_settings( + config, + reasoning_effort=args.reasoning_effort, + coder_effort=args.coder_effort, + reviewer_effort=args.reviewer_effort, + senior_effort=args.senior_effort, + ) + + # --model: apply to ALL agents + if args.model is not None: + for agent_name in config.agents: + _apply_model_override(config, agent_name, args.model) + # --generator-model / --reviewer-model: apply by role + if args.generator_model is not None: + for coder_name in config.coders: + _apply_model_override(config, coder_name, args.generator_model) + if args.reviewer_model is not None: + for reviewer_name in config.reviewers: + _apply_model_override(config, reviewer_name, args.reviewer_model) + + # --plan / --checklist shortcuts + for key, val in [("plan", args.plan), ("checklist", args.checklist)]: + if val is not None: + p = val.resolve() + if not p.exists(): + print(f"File not found: {p}", file=sys.stderr) + return 1 + config.inputs[key] = p + + # --docs: read all files in directory, inject as {docs} + if args.docs is not None: + docs_dir = args.docs.resolve() + if not docs_dir.is_dir(): + print(f"Not a directory: {docs_dir}", file=sys.stderr) + return 1 + docs_content = _read_docs_dir(docs_dir) + if not docs_content: + print(f"No files found in: {docs_dir}", file=sys.stderr) + return 1 + config.inputs["docs"] = docs_content + + if args.inputs: + overrides = {} + for item in args.inputs: + if "=" not in item: + print( + f"Invalid --input format: '{item}'. Use KEY=PATH.", + file=sys.stderr, + ) + return 1 + key, path = item.split("=", 1) + overrides[key] = path + apply_input_overrides(config, overrides) + + # 3. Validate after all overrides + from cross_eval.config import validate_config + errors = validate_config(config) + if errors: + print("Config error:\n " + "\n ".join(errors), file=sys.stderr) + return 1 + + # 4. Run pipeline + logger.info("Config: %s", config_source) + logger.info( + "Agents: %s", + ", ".join(f"{n} ({a.command})" for n, a in config.agents.items()), + ) + if config.coders or config.reviewers or config.seniors: + logger.info("Coders: %s", config.coders) + logger.info("Reviewers: %s", config.reviewers) + logger.info("Seniors: %s", config.seniors) + if config.phases: + phase_desc = " → ".join( + f"{p.name}(max {p.max_iterations}, {p.consecutive_pass}xPASS)" + for p in config.phases + ) + logger.info("Pipeline: phased [%s], lang=%s", phase_desc, config.language) + else: + iter_info = f"max {config.max_iterations}" + if config.min_iterations > 1: + iter_info = f"min {config.min_iterations}, max {config.max_iterations}" + logger.info( + "Pipeline: %d steps, %s iterations, lang=%s", + len(config.pipeline), iter_info, config.language, + ) + + try: + raw_timeout = args.timeout if args.timeout is not None else 0 + agent_timeout = None if raw_timeout == 0 else raw_timeout + result = run_pipeline(config, dry_run=args.dry_run, timeout=agent_timeout) + except (RuntimeError, KeyboardInterrupt) as e: + if isinstance(e, KeyboardInterrupt): + print("\nInterrupted by user.", file=sys.stderr) + return 130 + print(f"Pipeline error: {e}", file=sys.stderr) + return 1 + + # 4. Print summary + print(f"\nResult: {result.final_verdict}") + print(f"Iterations: {len(result.iterations)}") + if not args.dry_run and result.run_dir: + print(f"Output: {result.run_dir}/") + + return 0 if result.final_verdict == "PASS" else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/cross_eval/config.py b/cross_eval/config.py new file mode 100644 index 0000000..ad5c620 --- /dev/null +++ b/cross_eval/config.py @@ -0,0 +1,607 @@ +"""Configuration loading, validation, and preset resolution.""" +from __future__ import annotations + +import logging +import re +from pathlib import Path +from typing import Any + +import yaml + +from cross_eval.models import AgentConfig, PhaseConfig, PipelineConfig, StepConfig +from cross_eval.prompts import PHASED_PRESETS, PIPELINE_PRESETS + +logger = logging.getLogger(__name__) + +REASONING_EFFORT_ALIASES = { + "extra-high": "xhigh", + "extra_high": "xhigh", + "x-high": "xhigh", +} +REASONING_EFFORT_CHOICES = ("minimal", "low", "medium", "high", "xhigh") +DEFAULT_ROLE_REASONING_EFFORTS = { + "coder": "medium", + "reviewer": "medium", + "senior": "high", +} + + +# --------------------------------------------------------------------------- +# Built-in agent registry +# --------------------------------------------------------------------------- + +_CODEX_ARGS = [ + "exec", + "--full-auto", + "--skip-git-repo-check", + "--model", + "gpt-5.4", + "-", +] + +_CODER_SYSTEM_PROMPT = ( + "You are a senior software engineer implementing code changes.\n" + "Rules:\n" + "1. FIRST explore the project directory to understand the existing codebase, " + "patterns, and conventions before writing any code.\n" + "2. Implement ONLY what the plan specifies. Do NOT add extra features, " + "unnecessary abstractions, premature optimizations, or \"nice-to-have\" improvements.\n" + "3. Follow the project's existing coding style, naming conventions, and directory structure.\n" + "4. If previous review feedback is provided, fix ONLY the specific issues mentioned. " + "Do NOT refactor unrelated code.\n" + "5. Ignore any items from previous feedback that were marked as DISMISSED or false positive.\n" + "6. When in doubt about scope, do LESS, not more." +) + +_REVIEWER_SYSTEM_PROMPT = ( + "You are a code reviewer. You MUST NOT create, modify, or delete any files.\n" + "Rules:\n" + "1. Explore the project directory to understand the full codebase context.\n" + "2. Compare the implementation against the plan and checklist ONLY.\n" + "3. Classify every issue with BOTH severity AND category:\n" + " - Severity: Critical (breaks functionality/security) > Major (requirement mismatch) > Minor (convention/style)\n" + " - Category: Over-engineering / Omission\n" + "4. When reviewing with previous feedback, mark items as CONFIRMED (still an issue) " + "or DISMISSED (false positive) with rationale.\n" + "5. Report out-of-scope issues separately — problems found outside plan/checklist scope.\n" + "6. Order issues by severity (Critical first).\n" + "7. Do NOT suggest improvements beyond the plan scope.\n" + "8. End with VERDICT: PASS (all requirements met, no over-engineering) " + "or VERDICT: FAIL (issues found)." +) + +_SENIOR_SYSTEM_PROMPT = ( + "You are a senior technical reviewer coordinating a review-fix-verification loop.\n" + "Rules:\n" + "1. Explore the project directory to understand the full codebase context.\n" + "2. In aggregation mode, deduplicate overlaps, resolve disagreements, and keep only " + "evidence-backed issues. Categorize dismissed findings as [False positive] or [Already fixed].\n" + "3. In verification mode, judge the current implementation directly against ONLY the " + "plan and checklist.\n" + "4. Be skeptical of false positives, but do not lower the bar on real requirement " + "gaps.\n" + "5. When issues remain, produce a concise prioritized action list the coder can act on.\n" + "6. Do NOT invent new requirements beyond the plan and checklist.\n" + "7. End with VERDICT: PASS or VERDICT: FAIL." +) + +BUILTIN_AGENTS: dict[str, AgentConfig] = { + "claude-coder": AgentConfig( + name="claude-coder", + command="claude", + args=["-p", "--model", "opus", "--permission-mode", "auto"], + system_prompt=_CODER_SYSTEM_PROMPT, + reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["coder"], + ), + "claude-reviewer": AgentConfig( + name="claude-reviewer", + command="claude", + args=["-p", "--model", "opus", "--permission-mode", "auto"], + system_prompt=_REVIEWER_SYSTEM_PROMPT, + reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["reviewer"], + ), + "claude-senior": AgentConfig( + name="claude-senior", + command="claude", + args=["-p", "--model", "opus", "--permission-mode", "auto"], + system_prompt=_SENIOR_SYSTEM_PROMPT, + reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["senior"], + ), + "codex-coder": AgentConfig( + name="codex-coder", + command="codex", + args=list(_CODEX_ARGS), + system_prompt=_CODER_SYSTEM_PROMPT, + reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["coder"], + ), + "codex-reviewer": AgentConfig( + name="codex-reviewer", + command="codex", + args=list(_CODEX_ARGS), + system_prompt=_REVIEWER_SYSTEM_PROMPT, + reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["reviewer"], + ), + "codex-senior": AgentConfig( + name="codex-senior", + command="codex", + args=list(_CODEX_ARGS), + system_prompt=_SENIOR_SYSTEM_PROMPT, + reasoning_effort=DEFAULT_ROLE_REASONING_EFFORTS["senior"], + ), +} + +# Shorthand aliases: "claude" → "claude-coder"/"claude-reviewer", "codex" → same +_AGENT_ALIASES: dict[str, str] = { + "claude": "claude", + "codex": "codex", +} + + +def resolve_agent_shorthand(name: str, role: str) -> str: + """Resolve shorthand agent name to full builtin name. + + Examples: + resolve_agent_shorthand("claude", "coder") → "claude-coder" + resolve_agent_shorthand("codex", "reviewer") → "codex-reviewer" + resolve_agent_shorthand("claude-coder", "coder") → "claude-coder" (unchanged) + """ + if name in _AGENT_ALIASES: + return f"{_AGENT_ALIASES[name]}-{role}" + return name + + +# --------------------------------------------------------------------------- +# Role inference (backward compatibility) +# --------------------------------------------------------------------------- + +_CODER_PATTERNS = ("gen", "coder", "implement", "develop", "write") +_SENIOR_PATTERNS = ("senior", "lead", "principal", "aggregate", "adjudicat", "synth") +_REVIEWER_PATTERNS = ("review", "audit", "check", "verify", "inspect") + + +def _infer_roles(agent_names: list[str]) -> tuple[list[str], list[str], list[str]]: + """Infer coder/reviewer/senior roles from agent names. + + Heuristic: + - Names containing 'gen', 'coder', etc. → coder + - Names containing 'senior', 'lead', etc. → senior + - Names containing 'review', 'audit', etc. → reviewer + - If no matches: first agent → coder, rest → reviewers + """ + coders: list[str] = [] + reviewers: list[str] = [] + seniors: list[str] = [] + unclassified: list[str] = [] + + for name in agent_names: + lower = name.lower() + if any(p in lower for p in _CODER_PATTERNS): + coders.append(name) + elif any(p in lower for p in _SENIOR_PATTERNS): + seniors.append(name) + elif any(p in lower for p in _REVIEWER_PATTERNS): + reviewers.append(name) + else: + unclassified.append(name) + + # Fallback: if no classification worked, use positional convention + if not coders and not reviewers: + if len(agent_names) >= 2: + coders = [agent_names[0]] + reviewers = list(agent_names[1:]) + elif agent_names: + # Single agent: treat as reviewer (for review-only) + reviewers = list(agent_names) + elif not coders and unclassified: + coders = [unclassified.pop(0)] + elif not reviewers and unclassified: + reviewers = list(unclassified) + unclassified = [] + + # Any remaining unclassified go to reviewers + reviewers.extend(unclassified) + + return coders, reviewers, seniors + + +def _resolve_agents( + user_agents: dict[str, AgentConfig], + coders: list[str], + reviewers: list[str], + seniors: list[str], +) -> dict[str, AgentConfig]: + """Ensure all referenced agents exist by merging built-in definitions. + + If a coder or reviewer name references an agent not in user_agents + but present in BUILTIN_AGENTS, the built-in definition is added. + """ + all_referenced = set(coders) | set(reviewers) | set(seniors) + result = dict(user_agents) + + for name in all_referenced: + if name not in result and name in BUILTIN_AGENTS: + result[name] = BUILTIN_AGENTS[name] + + return result + + +def _default_seniors_for_preset( + pipeline_raw: Any, + reviewers: list[str], + agents: dict[str, AgentConfig], +) -> list[str]: + """Infer a default senior agent for presets that benefit from adjudication.""" + if not ( + isinstance(pipeline_raw, str) + and pipeline_raw == "preset:review-fix" + and reviewers + ): + return [] + + first_reviewer = reviewers[0] + if first_reviewer.startswith("codex-"): + return ["codex-senior"] + if first_reviewer.startswith("claude-"): + return ["claude-senior"] + + reviewer_agent = agents.get(first_reviewer) + if reviewer_agent is None: + return [] + + command = reviewer_agent.command.lower() + if "codex" in command: + return ["codex-senior"] + if "claude" in command: + return ["claude-senior"] + return [] + + +def normalize_reasoning_effort(effort: str) -> str: + """Normalize user-facing reasoning effort aliases.""" + normalized = REASONING_EFFORT_ALIASES.get(effort, effort) + if normalized not in REASONING_EFFORT_CHOICES: + raise ValueError( + f"Unsupported reasoning effort '{effort}'. " + f"Use one of: {REASONING_EFFORT_CHOICES}" + ) + return normalized + + +def apply_reasoning_effort_settings( + config: PipelineConfig, + *, + reasoning_effort: str | None = None, + coder_effort: str | None = None, + reviewer_effort: str | None = None, + senior_effort: str | None = None, +) -> None: + """Apply default and override reasoning effort settings by role.""" + shared_effort = normalize_reasoning_effort(reasoning_effort) if reasoning_effort else None + role_efforts = { + "coder": normalize_reasoning_effort(coder_effort) if coder_effort else shared_effort, + "reviewer": normalize_reasoning_effort(reviewer_effort) if reviewer_effort else shared_effort, + "senior": normalize_reasoning_effort(senior_effort) if senior_effort else shared_effort, + } + + _apply_role_effort(config.agents, config.coders, role_efforts["coder"], "coder") + _apply_role_effort(config.agents, config.reviewers, role_efforts["reviewer"], "reviewer") + _apply_role_effort(config.agents, config.seniors, role_efforts["senior"], "senior") + + +def _apply_role_effort( + agents: dict[str, AgentConfig], + agent_names: list[str], + override_effort: str | None, + role: str, +) -> None: + """Set reasoning effort on agents for a specific role.""" + for agent_name in agent_names: + agent = agents.get(agent_name) + if agent is None: + continue + if override_effort is not None: + agent.reasoning_effort = override_effort + elif agent.reasoning_effort is None: + agent.reasoning_effort = DEFAULT_ROLE_REASONING_EFFORTS[role] + + +# --------------------------------------------------------------------------- +# Default config (no YAML) +# --------------------------------------------------------------------------- + +def default_config() -> PipelineConfig: + """Return a PipelineConfig with sensible defaults (no YAML needed).""" + agents = dict(BUILTIN_AGENTS) + coders = ["claude-coder"] + reviewers = ["claude-reviewer"] + seniors: list[str] = [] + pipeline = PIPELINE_PRESETS["simple"](coders, reviewers, seniors) + return PipelineConfig( + output_dir=Path("output"), + max_iterations=3, + language="ko", + inputs={}, + agents=agents, + coders=coders, + reviewers=reviewers, + seniors=seniors, + pipeline=pipeline, + ) + + +# --------------------------------------------------------------------------- +# YAML loading +# --------------------------------------------------------------------------- + +def load_config(path: Path) -> PipelineConfig: + """Load and validate a YAML config file, returning PipelineConfig.""" + path = path.resolve() + with open(path, encoding="utf-8") as f: + raw = yaml.safe_load(f) + + if not isinstance(raw, dict): + raise ValueError(f"Config file must be a YAML mapping, got {type(raw).__name__}") + + config = _parse_raw(raw, path) + + errors = validate_config(config) + if errors: + raise ValueError("Config validation failed:\n " + "\n ".join(errors)) + + return config + + +def _parse_raw(raw: dict[str, Any], config_path: Path) -> PipelineConfig: + """Parse raw YAML dict into PipelineConfig.""" + # --- agents --- + agents: dict[str, AgentConfig] = {} + for name, agent_data in raw.get("agents", {}).items(): + agents[name] = AgentConfig( + name=name, + command=agent_data.get("command", "claude"), + args=agent_data.get("args", ["-p"]), + system_prompt=agent_data.get("system_prompt"), + reasoning_effort=agent_data.get("reasoning_effort"), + stdin_mode=agent_data.get("stdin_mode", False), + ) + + # --- roles: explicit or inferred --- + pipeline_raw = raw.get("pipeline", "preset:simple") + coders_raw = raw.get("coders") + reviewers_raw = raw.get("reviewers") + seniors_raw = raw.get("seniors") + + if coders_raw is not None or reviewers_raw is not None or seniors_raw is not None: + # Explicit role assignment from YAML + coders: list[str] = coders_raw if coders_raw is not None else [] + reviewers: list[str] = reviewers_raw if reviewers_raw is not None else [] + seniors: list[str] = seniors_raw if seniors_raw is not None else [] + else: + # Backward compat: infer from agent names + coders, reviewers, seniors = _infer_roles(list(agents.keys())) + + if not seniors: + seniors = _default_seniors_for_preset(pipeline_raw, reviewers, agents) + + # Auto-merge built-in agents for any referenced names not yet defined + agents = _resolve_agents(agents, coders, reviewers, seniors) + config_stub = PipelineConfig( + agents=agents, + coders=coders, + reviewers=reviewers, + seniors=seniors, + ) + apply_reasoning_effort_settings(config_stub) + + # --- inputs (resolve relative to config file location) --- + config_dir = config_path.parent + inputs: dict[str, Path | str] = {} + for key, val in raw.get("inputs", {}).items(): + p = Path(val) + if not p.is_absolute(): + p = config_dir / p + inputs[key] = p + + # --- pipeline (preset or custom) --- + steps, phases = _resolve_pipeline(pipeline_raw, coders, reviewers, seniors) + + # Detect preset name for output directory naming + preset_name = "custom" + if isinstance(pipeline_raw, str) and pipeline_raw.startswith("preset:"): + preset_name = pipeline_raw.split(":", 1)[1] + + return PipelineConfig( + output_dir=Path(raw.get("output_dir", "output")), + max_iterations=int(raw.get("max_iterations", 3)), + min_iterations=int(raw.get("min_iterations", 1)), + verbose=bool(raw.get("verbose", False)), + language=raw.get("language", "en"), + inputs=inputs, + agents=agents, + coders=coders, + reviewers=reviewers, + seniors=seniors, + pipeline=steps, + phases=phases, + preset_name=preset_name, + _config_path=config_path, + _config_mtime=config_path.stat().st_mtime, + ) + + +def try_reload_config(config: PipelineConfig) -> PipelineConfig: + """Reload config if the file has been modified on disk. + + Returns the new config if reloaded, or the same config if unchanged. + Validation errors during reload are logged but do not crash the pipeline. + """ + if config._config_path is None or config._config_mtime is None: + return config + + try: + current_mtime = config._config_path.stat().st_mtime + except OSError: + return config + + if current_mtime <= config._config_mtime: + return config + + logger.info("Config file changed, reloading: %s", config._config_path.name) + try: + new_config = load_config(config._config_path) + logger.info("Config reloaded successfully") + return new_config + except (ValueError, FileNotFoundError, yaml.YAMLError) as e: + logger.warning("Config reload failed, keeping previous config: %s", e) + return config + + +def _resolve_pipeline( + pipeline_raw: Any, + coders: list[str], + reviewers: list[str], + seniors: list[str], +) -> tuple[list[StepConfig], list[PhaseConfig]]: + """Resolve pipeline from preset string or explicit step list. + + Returns (steps, phases) tuple. Only one will be non-empty. + - Simple/cross-review/review-only → steps populated, phases empty. + - Phased presets (review-fix) → steps empty, phases populated. + """ + # Preset: "preset:simple" or "preset:review-fix" + if isinstance(pipeline_raw, str) and pipeline_raw.startswith("preset:"): + preset_name = pipeline_raw.split(":", 1)[1] + if preset_name in PIPELINE_PRESETS: + return PIPELINE_PRESETS[preset_name](coders, reviewers, seniors), [] + if preset_name in PHASED_PRESETS: + return [], PHASED_PRESETS[preset_name](coders, reviewers, seniors) + all_presets = list(PIPELINE_PRESETS.keys()) + list(PHASED_PRESETS.keys()) + raise ValueError( + f"Unknown pipeline preset '{preset_name}'. " + f"Available: {all_presets}" + ) + + # Explicit step list + if isinstance(pipeline_raw, list): + steps = [] + for step_data in pipeline_raw: + steps.append(StepConfig( + name=step_data["name"], + agent=step_data["agent"], + role=step_data.get("role", "generate"), + prompt_template=step_data.get("prompt_template", f"default:{step_data.get('role', 'generate')}"), + output_key=step_data["output_key"], + verdict=step_data.get("verdict", False), + verdict_pattern=step_data.get("verdict_pattern", r"VERDICT:\s*PASS"), + context_override=step_data.get("context_override", {}), + )) + return steps, [] + + raise ValueError( + f"'pipeline' must be a preset string (e.g. 'preset:simple') " + f"or a list of step definitions, got {type(pipeline_raw).__name__}" + ) + + +def validate_config(config: PipelineConfig) -> list[str]: + """Return list of validation error strings (empty = valid).""" + errors: list[str] = [] + + if config.phases: + # --- Phased pipeline validation --- + for phase in config.phases: + if not phase.steps: + errors.append(f"Phase '{phase.name}' has no steps") + for step in phase.steps: + if step.agent not in config.agents: + errors.append( + f"Phase '{phase.name}' step '{step.name}' references " + f"undefined agent '{step.agent}'. " + f"Defined agents: {list(config.agents.keys())}" + ) + _validate_unique_step_fields( + phase.steps, + errors, + scope=f"Phase '{phase.name}'", + ) + if not any(s.verdict for s in phase.steps): + errors.append( + f"Phase '{phase.name}' must have at least one step with verdict: true" + ) + # Validate verdict patterns + for step in phase.steps: + if step.verdict: + try: + re.compile(step.verdict_pattern) + except re.error as e: + errors.append( + f"Phase '{phase.name}' step '{step.name}' " + f"has invalid verdict_pattern: {e}" + ) + else: + # --- Simple pipeline validation --- + if not config.pipeline: + errors.append("Pipeline must have at least one step") + + for step in config.pipeline: + if step.agent not in config.agents: + errors.append( + f"Step '{step.name}' references undefined agent '{step.agent}'. " + f"Defined agents: {list(config.agents.keys())}" + ) + + _validate_unique_step_fields( + config.pipeline, + errors, + scope="Pipeline", + ) + + if not any(s.verdict for s in config.pipeline): + errors.append("Pipeline must have at least one step with verdict: true") + + for step in config.pipeline: + if step.verdict: + try: + re.compile(step.verdict_pattern) + except re.error as e: + errors.append( + f"Step '{step.name}' has invalid verdict_pattern: {e}" + ) + + # --- Common validation --- + for key, val in config.inputs.items(): + if isinstance(val, Path) and not val.exists(): + errors.append(f"Input file '{key}' not found: {val}") + + if config.language not in ("en", "ko"): + errors.append(f"Unsupported language '{config.language}'. Use 'en' or 'ko'.") + + return errors + + +def _validate_unique_step_fields( + steps: list[StepConfig], + errors: list[str], + *, + scope: str, +) -> None: + """Ensure step names and output keys are unique within a step collection.""" + seen_names: set[str] = set() + seen_output_keys: set[str] = set() + + for step in steps: + if step.name in seen_names: + errors.append(f"{scope} has duplicate step name '{step.name}'") + seen_names.add(step.name) + + if step.output_key in seen_output_keys: + errors.append(f"{scope} has duplicate output_key '{step.output_key}'") + seen_output_keys.add(step.output_key) + + +def apply_input_overrides( + config: PipelineConfig, overrides: dict[str, str] +) -> None: + """Apply CLI --input overrides to the config.""" + for key, path_str in overrides.items(): + config.inputs[key] = Path(path_str) diff --git a/cross_eval/models.py b/cross_eval/models.py new file mode 100644 index 0000000..f5d10a9 --- /dev/null +++ b/cross_eval/models.py @@ -0,0 +1,118 @@ +"""Data models for cross-eval pipeline.""" +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +@dataclass +class AgentConfig: + """Definition of a single agent.""" + + name: str + command: str + args: list[str] = field(default_factory=list) + system_prompt: Optional[str] = None + reasoning_effort: Optional[str] = None + stdin_mode: bool = False + + +@dataclass +class StepConfig: + """One step in the pipeline.""" + + name: str + agent: str # reference to agents key + role: str # "generate" or "review" + prompt_template: str # "default:" or file path + output_key: str + verdict: bool = False + verdict_pattern: str = r"VERDICT:\s*PASS" + context_override: dict[str, str] = field(default_factory=dict) + parallel: bool = False # Can run concurrently with adjacent parallel steps + + +@dataclass +class PhaseConfig: + """One phase in a multi-phase pipeline (e.g. review-fix).""" + + name: str + steps: list[StepConfig] = field(default_factory=list) + max_iterations: int = 10 + consecutive_pass: int = 1 # stop after N consecutive PASSes + + +@dataclass +class PipelineConfig: + """Full cross-eval configuration.""" + + output_dir: Path = field(default_factory=lambda: Path("output")) + max_iterations: int = 3 + min_iterations: int = 1 + verbose: bool = False + language: str = "en" # "en" or "ko" + inputs: dict[str, Path | str] = field(default_factory=dict) + agents: dict[str, AgentConfig] = field(default_factory=dict) + coders: list[str] = field(default_factory=list) + reviewers: list[str] = field(default_factory=list) + seniors: list[str] = field(default_factory=list) + pipeline: list[StepConfig] = field(default_factory=list) + phases: list[PhaseConfig] = field(default_factory=list) + preset_name: str = "custom" + _config_path: Optional[Path] = field(default=None, repr=False) + _config_mtime: Optional[float] = field(default=None, repr=False) + + +@dataclass +class AgentResult: + """Result from an agent invocation.""" + + output: str + exit_code: int + agent_name: str + step_name: str + duration_seconds: float + + +@dataclass +class ReviewMetrics: + """Parsed metrics from a single review output.""" + + # Severity counts + critical: int = 0 + major: int = 0 + minor: int = 0 + + # Category counts + over_engineering: int = 0 + omission: int = 0 + + # Assessment counts + confirmed: int = 0 + dismissed: int = 0 + + +@dataclass +class IterationResult: + """Results from a single iteration.""" + + iteration: int + step_results: dict[str, AgentResult] = field(default_factory=dict) + step_outputs: dict[str, str] = field(default_factory=dict) + verdict: Optional[str] = None + feedback: Optional[str] = None + phase_name: Optional[str] = None + repeated_aggregate_warning: Optional[str] = None + review_metrics: Optional[ReviewMetrics] = None + + +@dataclass +class PipelineResult: + """Results from the entire pipeline run.""" + + iterations: list[IterationResult] = field(default_factory=list) + final_verdict: str = "MAX_ITERATIONS_REACHED" + total_duration: float = 0.0 + run_dir: Optional[Path] = None + repeated_aggregate_warnings: list[str] = field(default_factory=list) diff --git a/cross_eval/pipeline.py b/cross_eval/pipeline.py new file mode 100644 index 0000000..28086a3 --- /dev/null +++ b/cross_eval/pipeline.py @@ -0,0 +1,700 @@ +"""Main pipeline execution engine.""" +from __future__ import annotations + +import logging +import os +import re +import subprocess +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from pathlib import Path + +from cross_eval.agent import invoke_agent +from cross_eval.config import try_reload_config +from cross_eval.models import ( + AgentResult, + IterationResult, + PipelineConfig, + PipelineResult, + StepConfig, +) +from cross_eval.prompts import render_template, resolve_template, set_language +from cross_eval.report import build_report + +logger = logging.getLogger(__name__) + + +def run_pipeline( + config: PipelineConfig, + cwd: Path | None = None, + dry_run: bool = False, + timeout: int | None = None, +) -> PipelineResult: + """Execute the full cross-eval pipeline.""" + # Create run directory: output/{preset}_{datetime}/ + run_dir = _make_run_dir(config) + + if config.phases: + return _run_phased_pipeline(config, run_dir, cwd, dry_run, timeout) + return _run_simple_pipeline(config, run_dir, cwd, dry_run, timeout) + + +def _make_run_dir(config: PipelineConfig) -> Path: + """Create timestamped run directory under output_dir.""" + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + run_dir = config.output_dir / f"{config.preset_name}_{ts}" + run_dir.mkdir(parents=True, exist_ok=True) + return run_dir + + +def _run_simple_pipeline( + config: PipelineConfig, + run_dir: Path, + cwd: Path | None = None, + dry_run: bool = False, + timeout: int | None = None, +) -> PipelineResult: + """Execute a simple (non-phased) pipeline.""" + if cwd is None: + cwd = Path(os.getcwd()) + + set_language(config.language) + input_contents = _load_inputs(config) + + feedback = "(no feedback — first iteration)" + iterations: list[IterationResult] = [] + start_time = time.monotonic() + final_verdict = "MAX_ITERATIONS_REACHED" + aggregate_history: dict[str, int] = {} + aggregate_warnings: list[str] = [] + + for i in range(1, config.max_iterations + 1): + config = try_reload_config(config) + set_language(config.language) + _refresh_inputs(config, input_contents) + + logger.info("=" * 50) + logger.info(" Iteration %d/%d", i, config.max_iterations) + logger.info("=" * 50) + + step_outputs, step_results, verdict = _run_steps( + config.pipeline, config, input_contents, feedback, + i, config.max_iterations, cwd, timeout, dry_run, + run_dir=run_dir, output_iter=i, + ) + + iter_result = IterationResult( + iteration=i, + step_results=step_results, + step_outputs=step_outputs, + verdict=verdict, + ) + warning = _detect_repeated_aggregate( + config.pipeline, step_outputs, aggregate_history, iteration=i, + ) + if warning: + iter_result.repeated_aggregate_warning = warning + aggregate_warnings.append(warning) + logger.warning(" %s", warning) + + iter_result.feedback = _collect_feedback(config.pipeline, step_outputs) + feedback = iter_result.feedback or feedback + iterations.append(iter_result) + + if verdict == "PASS": + final_verdict = "PASS" + if i >= config.min_iterations: + logger.info(" PASS at iteration %d (min=%d reached)!", i, config.min_iterations) + break + else: + logger.info( + " PASS at iteration %d, but min_iterations=%d — continuing", + i, config.min_iterations, + ) + + if dry_run: + logger.info(" (dry-run: stopping after iteration 1)") + break + + total_duration = time.monotonic() - start_time + + pipeline_result = PipelineResult( + iterations=iterations, + final_verdict=final_verdict, + total_duration=round(total_duration, 1), + run_dir=run_dir, + repeated_aggregate_warnings=aggregate_warnings, + ) + + if not dry_run: + _save_report(run_dir, config, pipeline_result) + + return pipeline_result + + +def _run_phased_pipeline( + config: PipelineConfig, + run_dir: Path, + cwd: Path | None = None, + dry_run: bool = False, + timeout: int | None = None, +) -> PipelineResult: + """Execute a multi-phase pipeline (e.g. review-fix).""" + if cwd is None: + cwd = Path(os.getcwd()) + + set_language(config.language) + input_contents = _load_inputs(config) + + iterations: list[IterationResult] = [] + feedback = "(no feedback — first iteration)" + start_time = time.monotonic() + final_verdict = "MAX_ITERATIONS_REACHED" + global_iter = 0 + aggregate_history_by_phase: dict[str, dict[str, int]] = {} + aggregate_warnings: list[str] = [] + + for phase_idx, phase in enumerate(config.phases): + logger.info("=" * 60) + logger.info( + " Phase: %s (max_iter=%d, consecutive_pass=%d)", + phase.name, phase.max_iterations, phase.consecutive_pass, + ) + logger.info("=" * 60) + + consecutive_passes = 0 + phase_converged = False + + for pi in range(1, phase.max_iterations + 1): + global_iter += 1 + + config = try_reload_config(config) + set_language(config.language) + _refresh_inputs(config, input_contents) + + logger.info("-" * 50) + logger.info( + " [%s] Iteration %d/%d (global: v%d)", + phase.name, pi, phase.max_iterations, global_iter, + ) + logger.info("-" * 50) + + step_outputs, step_results, verdict = _run_steps( + phase.steps, config, input_contents, feedback, + pi, phase.max_iterations, cwd, timeout, dry_run, + run_dir=run_dir, output_iter=global_iter, phase_name=phase.name, + ) + + iter_result = IterationResult( + iteration=global_iter, + step_results=step_results, + step_outputs=step_outputs, + verdict=verdict, + phase_name=phase.name, + ) + phase_history = aggregate_history_by_phase.setdefault(phase.name, {}) + warning = _detect_repeated_aggregate( + phase.steps, step_outputs, phase_history, iteration=global_iter, + phase_name=phase.name, + ) + if warning: + iter_result.repeated_aggregate_warning = warning + aggregate_warnings.append(warning) + logger.warning(" %s", warning) + + iter_result.feedback = _collect_feedback(phase.steps, step_outputs) + feedback = iter_result.feedback or feedback + iterations.append(iter_result) + + if verdict == "PASS": + consecutive_passes += 1 + logger.info( + " [%s] PASS (%d/%d consecutive)", + phase.name, consecutive_passes, phase.consecutive_pass, + ) + if consecutive_passes >= phase.consecutive_pass: + logger.info( + " [%s] Converged! %d consecutive PASSes.", + phase.name, phase.consecutive_pass, + ) + phase_converged = True + break + else: + consecutive_passes = 0 + + if dry_run: + break + + if phase_converged: + logger.info(" Phase '%s' completed: CONVERGED", phase.name) + else: + logger.info( + " Phase '%s' completed: max iterations (%d) reached", + phase.name, phase.max_iterations, + ) + + if phase_idx == len(config.phases) - 1: + final_verdict = "PASS" if phase_converged else "MAX_ITERATIONS_REACHED" + + total_duration = time.monotonic() - start_time + + pipeline_result = PipelineResult( + iterations=iterations, + final_verdict=final_verdict, + total_duration=round(total_duration, 1), + run_dir=run_dir, + repeated_aggregate_warnings=aggregate_warnings, + ) + + if not dry_run: + _save_report(run_dir, config, pipeline_result) + + return pipeline_result + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +def _load_inputs(config: PipelineConfig) -> dict[str, str]: + """Load input file contents from config.""" + input_contents: dict[str, str] = {} + for key, val in config.inputs.items(): + if isinstance(val, str): + input_contents[key] = val + else: + input_contents[key] = val.read_text(encoding="utf-8") + return input_contents + + +def _refresh_inputs( + config: PipelineConfig, input_contents: dict[str, str], +) -> None: + """Re-read input files (they may have changed on disk).""" + for key, val in config.inputs.items(): + if isinstance(val, str): + input_contents[key] = val + elif isinstance(val, Path) and val.exists(): + input_contents[key] = val.read_text(encoding="utf-8") + + +# --------------------------------------------------------------------------- +# Parallel step grouping +# --------------------------------------------------------------------------- + +def _get_step_dependencies(step: StepConfig) -> set[str]: + """Extract output_key references from context_override values.""" + deps: set[str] = set() + for val in step.context_override.values(): + for match in re.finditer(r"\{(\w+)\}", val): + deps.add(match.group(1)) + return deps + + +def _group_parallel_steps(steps: list[StepConfig]) -> list[list[StepConfig]]: + """Group consecutive parallel steps into batches. + + Consecutive steps with parallel=True are grouped together, + but a new batch starts when a step depends on an output_key + from a step in the current batch (dependency breaking). + """ + batches: list[list[StepConfig]] = [] + current: list[StepConfig] = [] + current_output_keys: set[str] = set() + + for step in steps: + if not step.parallel: + if current: + batches.append(current) + current = [] + current_output_keys = set() + batches.append([step]) + continue + + # Check if this step depends on any output from the current batch + deps = _get_step_dependencies(step) + if deps & current_output_keys: + batches.append(current) + current = [] + current_output_keys = set() + + current.append(step) + current_output_keys.add(step.output_key) + + if current: + batches.append(current) + + return batches + + +# --------------------------------------------------------------------------- +# Step execution +# --------------------------------------------------------------------------- + +def _run_steps( + steps: list[StepConfig], + config: PipelineConfig, + input_contents: dict[str, str], + feedback: str, + iteration: int, + max_iterations: int, + cwd: Path, + timeout: int | None, + dry_run: bool, + *, + run_dir: Path, + output_iter: int, + phase_name: str | None = None, +) -> tuple[dict[str, str], dict[str, AgentResult], str | None]: + """Execute all steps in one iteration, parallelizing where possible.""" + step_outputs: dict[str, str] = {} + step_results: dict[str, AgentResult] = {} + verdict: str | None = None + + batches = _group_parallel_steps(steps) + + for batch in batches: + if len(batch) == 1: + # Single step — run directly + step = batch[0] + _execute_step( + step, config, input_contents, feedback, + iteration, max_iterations, cwd, timeout, dry_run, + step_outputs, step_results, + run_dir=run_dir, output_iter=output_iter, phase_name=phase_name, + ) + else: + # Parallel batch — run with ThreadPoolExecutor + _execute_parallel_batch( + batch, config, input_contents, feedback, + iteration, max_iterations, cwd, timeout, dry_run, + step_outputs, step_results, + run_dir=run_dir, output_iter=output_iter, phase_name=phase_name, + ) + + # Extract verdict from all verdict steps (ALL must PASS) + for step in steps: + if step.verdict: + output = step_outputs.get(step.output_key, "") + step_verdict = _extract_verdict(output, step.verdict_pattern) + logger.info(" [%s] verdict: %s", step.name, step_verdict) + if verdict is None: + verdict = step_verdict + elif step_verdict == "FAIL": + verdict = "FAIL" + + return step_outputs, step_results, verdict + + +def _execute_step( + step: StepConfig, + config: PipelineConfig, + input_contents: dict[str, str], + feedback: str, + iteration: int, + max_iterations: int, + cwd: Path, + timeout: int | None, + dry_run: bool, + step_outputs: dict[str, str], + step_results: dict[str, AgentResult], + *, + run_dir: Path, + output_iter: int, + phase_name: str | None = None, + quiet: bool = False, +) -> None: + """Execute a single step, updating step_outputs and step_results in place.""" + if not quiet: + logger.info(" [%s] agent='%s' role='%s'", step.name, step.agent, step.role) + + # 1. Resolve template + template = resolve_template(step.prompt_template) + + # 2. Build context + context = _build_context( + input_contents, step_outputs, feedback, iteration, max_iterations, + ) + + # 3. Apply context overrides + if step.context_override: + context = _apply_context_override(context, step.context_override) + + # 4. Render prompt + prompt = render_template(template, context) + + # 5. Dry run: print and skip + if dry_run: + phase_label = f" phase={phase_name}" if phase_name else "" + print(f"\n--- Step: {step.name} (agent={step.agent}{phase_label}) ---") + print(prompt) + print(f"--- end {step.name} ---\n") + step_outputs[step.output_key] = f"(dry-run: no output for {step.output_key})" + return + + # 6. Invoke agent + agent_config = config.agents[step.agent] + try: + result = invoke_agent( + agent_config, prompt, step.name, + cwd=cwd, timeout=timeout, quiet=quiet, + ) + except subprocess.TimeoutExpired as e: + stdout = (e.stdout or b"") if isinstance(e.stdout, bytes) else (e.stdout or "") + stderr = (e.stderr or b"") if isinstance(e.stderr, bytes) else (e.stderr or "") + if isinstance(stdout, bytes): + stdout = stdout.decode("utf-8", errors="replace") + if isinstance(stderr, bytes): + stderr = stderr.decode("utf-8", errors="replace") + phase_info = f"- **Phase**: {phase_name}\n" if phase_name else "" + error_msg = ( + f"# Agent Timeout\n\n" + f"{phase_info}" + f"- **Step**: {step.name}\n" + f"- **Agent**: {step.agent}\n" + f"- **Timeout**: {timeout}s\n\n" + f"Partial stdout ({len(stdout)} chars):\n" + f"```\n{stdout[:2000] or '(none)'}\n```\n\n" + f"Stderr:\n```\n{stderr[:2000] or '(none)'}\n```\n" + ) + _save_step_output(run_dir, output_iter, f"{step.name}_error", error_msg) + logger.error(" [%s] TIMEOUT after %ss — saved to output", step.name, timeout) + raise RuntimeError( + f"Agent '{step.agent}' timed out after {timeout}s at step '{step.name}'. " + f"Error saved to {run_dir}/v{output_iter}/{step.name}_error.md. " + f"Try --timeout 0 (unlimited)" + ) + except RuntimeError as e: + phase_info = f"- **Phase**: {phase_name}\n" if phase_name else "" + error_msg = ( + f"# Agent Error\n\n{phase_info}" + f"- **Step**: {step.name}\n- **Agent**: {step.agent}\n\n```\n{e}\n```\n" + ) + _save_step_output(run_dir, output_iter, f"{step.name}_error", error_msg) + logger.error(" [%s] FAILED — saved to output", step.name) + raise + + # 7. Store output + step_outputs[step.output_key] = result.output + step_results[step.output_key] = result + + if not quiet: + logger.info( + " [%s] completed (%.1fs, %d chars)", + step.name, result.duration_seconds, len(result.output), + ) + + # 8. Save to disk + _save_step_output(run_dir, output_iter, step.name, result.output) + + +def _execute_parallel_batch( + batch: list[StepConfig], + config: PipelineConfig, + input_contents: dict[str, str], + feedback: str, + iteration: int, + max_iterations: int, + cwd: Path, + timeout: int | None, + dry_run: bool, + step_outputs: dict[str, str], + step_results: dict[str, AgentResult], + *, + run_dir: Path, + output_iter: int, + phase_name: str | None = None, +) -> None: + """Execute multiple steps in parallel using threads.""" + agent_names = ", ".join(s.agent for s in batch) + logger.info(" [parallel] %d agents: %s", len(batch), agent_names) + + if dry_run: + for step in batch: + _execute_step( + step, config, input_contents, feedback, + iteration, max_iterations, cwd, timeout, dry_run, + step_outputs, step_results, + run_dir=run_dir, output_iter=output_iter, phase_name=phase_name, + ) + return + + # Snapshot context before parallel execution (all steps see same state) + context_snapshot = dict(input_contents) + context_snapshot.update(step_outputs) + + # Collect results from parallel threads + local_outputs: dict[str, str] = {} + local_results: dict[str, AgentResult] = {} + errors: list[Exception] = [] + + # Show a single spinner for the batch + from cross_eval.agent import _Spinner + spinner = _Spinner( + f"[parallel] {len(batch)} agents running ({agent_names})..." + ) + spinner.start() + batch_start = time.monotonic() + + def _run_one(step: StepConfig) -> tuple[str, str, AgentResult]: + """Run one step, return (output_key, output, result).""" + template = resolve_template(step.prompt_template) + context = _build_context( + context_snapshot, {}, feedback, iteration, max_iterations, + ) + if step.context_override: + context = _apply_context_override(context, step.context_override) + prompt = render_template(template, context) + + agent_config = config.agents[step.agent] + result = invoke_agent( + agent_config, prompt, step.name, + cwd=cwd, timeout=timeout, quiet=True, + ) + return step.output_key, result.output, result + + with ThreadPoolExecutor(max_workers=len(batch)) as executor: + futures = {executor.submit(_run_one, step): step for step in batch} + for future in as_completed(futures): + step = futures[future] + try: + output_key, output, result = future.result() + local_results[output_key] = result + local_outputs[output_key] = output + except Exception as e: + errors.append(e) + + batch_elapsed = round(time.monotonic() - batch_start, 1) + + if errors: + spinner.stop(f"[parallel] FAILED ({batch_elapsed}s)") + raise errors[0] + + spinner.stop(f"[parallel] {len(batch)} agents done ({batch_elapsed}s)") + + # Merge results + for step in batch: + key = step.output_key + step_outputs[key] = local_outputs[key] + step_results[key] = local_results[key] + r = local_results[key] + logger.info( + " [%s] completed (%.1fs, %d chars)", + step.name, r.duration_seconds, len(r.output), + ) + _save_step_output(run_dir, output_iter, step.name, r.output) + + +# --------------------------------------------------------------------------- +# Context and template helpers +# --------------------------------------------------------------------------- + +def _build_context( + input_contents: dict[str, str], + step_outputs: dict[str, str], + feedback: str, + iteration: int, + max_iterations: int, +) -> dict[str, str]: + """Build the template context dict.""" + context: dict[str, str] = {} + context.update(input_contents) + context.update(step_outputs) + context["feedback"] = feedback + context["iteration"] = str(iteration) + context["max_iterations"] = str(max_iterations) + return context + + +def _apply_context_override( + context: dict[str, str], + overrides: dict[str, str], +) -> dict[str, str]: + """Apply context_override mappings for cross-review scenarios.""" + result = dict(context) + for key, value_template in overrides.items(): + result[key] = render_template(value_template, context) + return result + + +def _collect_feedback( + steps: list[StepConfig], + step_outputs: dict[str, str], +) -> str: + """Collect feedback from all verdict steps. + + Single verdict step → raw output (backward compatible). + Multiple verdict steps → combined with agent headers for cross-referencing. + """ + verdict_steps = [s for s in steps if s.verdict] + if len(verdict_steps) == 1: + return step_outputs.get(verdict_steps[0].output_key, "") + parts: list[str] = [] + for s in verdict_steps: + output = step_outputs.get(s.output_key, "") + if output: + parts.append(f"## Review by {s.agent} ({s.name})\n{output}") + return "\n\n---\n\n".join(parts) + + +def _detect_repeated_aggregate( + steps: list[StepConfig], + step_outputs: dict[str, str], + history: dict[str, int], + *, + iteration: int, + phase_name: str | None = None, +) -> str | None: + """Detect repeated aggregate-review outputs across iterations.""" + for step in steps: + if step.prompt_template != "default:aggregate-review": + continue + output = step_outputs.get(step.output_key, "") + normalized = _normalize_aggregate_output(output) + if not normalized: + return None + if normalized in history: + prev_iter = history[normalized] + phase_prefix = f"[{phase_name}] " if phase_name else "" + return ( + f"{phase_prefix}Repeated aggregate_review detected at iteration {iteration} " + f"(same as iteration {prev_iter})." + ) + history[normalized] = iteration + return None + return None + + +def _normalize_aggregate_output(output: str) -> str: + """Normalize aggregate output for repeat detection.""" + return " ".join(output.lower().split()) + + +def _extract_verdict(output: str, pattern: str) -> str: + """Extract PASS or FAIL from output using regex pattern.""" + if re.search(pattern, output): + return "PASS" + return "FAIL" + + +def _save_step_output( + run_dir: Path, + iteration: int, + step_name: str, + content: str, +) -> Path: + """Save step output to run_dir/v{iteration}/{step_name}.md""" + path = run_dir / f"v{iteration}" / f"{step_name}.md" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + return path + + +def _save_report(run_dir: Path, config: PipelineConfig, result: PipelineResult) -> None: + """Generate and save the final markdown report.""" + report = build_report(config, result) + report_path = run_dir / "final-report.md" + report_path.parent.mkdir(parents=True, exist_ok=True) + report_path.write_text(report, encoding="utf-8") + logger.info("Report saved: %s", report_path) diff --git a/cross_eval/prompts.py b/cross_eval/prompts.py new file mode 100644 index 0000000..7ebf091 --- /dev/null +++ b/cross_eval/prompts.py @@ -0,0 +1,845 @@ +"""Default prompt templates and pipeline presets.""" +from __future__ import annotations + +import collections +from pathlib import Path +from typing import Callable, Optional + +from cross_eval.models import PhaseConfig, StepConfig + + +# --------------------------------------------------------------------------- +# Default prompt templates +# --------------------------------------------------------------------------- + +GENERATE_TEMPLATE = """\ +You are tasked with implementing code based on a plan and checklist. + +## Plan +{plan} + +## Checklist +{checklist} + +## Reference Documents +{docs} + +## Previous Review Feedback +{feedback} + +## Iteration +This is iteration {iteration} of {max_iterations}. + +## Instructions +1. Explore the project directory to understand the existing codebase structure. +2. Implement ONLY what the plan specifies. Do NOT add extra features, \ +unnecessary abstractions, or premature optimizations. +3. Follow every item in the checklist. +4. If there is previous feedback, address ONLY the specific issues mentioned. +5. If previous feedback contains items marked as DISMISSED or false positive, \ +IGNORE those items — they have been verified as correct. +6. Output the complete implementation. +""" + +REVIEW_TEMPLATE = """\ +You are tasked with reviewing code against a plan and checklist. + +## Plan +{plan} + +## Checklist +{checklist} + +## Reference Documents +{docs} + +## Generated Code / Previous Step Output +{generated_code} + +## Previous Review Feedback +{feedback} + +## Review Instructions +Explore the project directory to understand the full codebase context, \ +then evaluate the code against ONLY the plan and checklist above. + +For each issue found, classify it with BOTH severity AND category: + +Severity levels: +- **Critical**: Breaks functionality, causes data loss, or introduces security vulnerabilities. +- **Major**: Requirement mismatch, significant logic errors, or missing core functionality. +- **Minor**: Coding convention violations, trivial omissions, or style issues. + +Categories: +- **Over-engineering**: Code adds features, abstractions, or complexity \ +NOT required by the plan. +- **Omission**: A requirement from the plan or checklist that is missing or \ +incomplete in the implementation. + +If previous review feedback is provided above, you MUST assess each item: +- **CONFIRMED**: The issue is still present in the current code. +- **DISMISSED (false positive)**: The flagged item is actually correct per \ +the plan requirements. Provide rationale. + +If you find issues outside the plan/checklist scope (e.g. pre-existing bugs, \ +security concerns, performance problems), report them separately under \ +"Out of Scope Issues". + +## Output Format + +### Previous Feedback Assessment +(Only include this section if previous feedback was provided.) +- CONFIRMED: [item description] — still an issue because [reason] +- DISMISSED (false positive): [item description] — actually correct because [reason] +(Write "N/A" if no previous feedback was provided.) + +### Issues Found +List issues ordered by severity (Critical first): +- [Critical][Over-engineering] Description (reference specific plan/checklist item) +- [Major][Omission] Description (reference specific plan/checklist item) +- [Minor][Omission] Description (reference specific plan/checklist item) + +### Out of Scope Issues +Issues found outside plan/checklist scope but worth noting: +- [Critical] Description of issue +- [Minor] Description of issue +(Write "None" if no out-of-scope issues found.) + +### Summary +- Critical: N, Major: N, Minor: N +- Over-engineering count: N +- Omission count: N +- CONFIRMED: N, DISMISSED: N +- Overall quality: [BRIEF ASSESSMENT] + +### Verdict +If all checklist items are satisfied and there is no over-engineering or \ +omission, output: VERDICT: PASS +Otherwise output: VERDICT: FAIL +""" + + +GENERATE_TEMPLATE_KO = """\ +당신은 기획서와 체크리스트를 기반으로 코드를 구현하는 개발자입니다. + +## 기획서 +{plan} + +## 체크리스트 +{checklist} + +## 참고 문서 +{docs} + +## 이전 리뷰 피드백 +{feedback} + +## 반복 정보 +현재 {max_iterations}회 중 {iteration}번째 반복입니다. + +## 지침 +1. 프로젝트 디렉토리를 탐색하여 기존 코드베이스 구조를 파악하세요. +2. 기획서에 명시된 것만 구현하세요. 추가 기능, 불필요한 추상화, 과도한 최적화를 하지 마세요. +3. 체크리스트의 모든 항목을 충족하세요. +4. 이전 리뷰 피드백이 있다면 해당 이슈만 해결하세요. +5. 이전 피드백에서 DISMISSED 또는 오탐으로 표시된 항목은 무시하세요 — 이미 올바른 것으로 검증되었습니다. +6. 완전한 구현을 출력하세요. +""" + +REVIEW_TEMPLATE_KO = """\ +당신은 기획서와 체크리스트 기준으로 코드를 검토하는 리뷰어입니다. + +## 기획서 +{plan} + +## 체크리스트 +{checklist} + +## 참고 문서 +{docs} + +## 검토 대상 코드 +{generated_code} + +## 이전 리뷰 피드백 +{feedback} + +## 검토 지침 +프로젝트 디렉토리를 직접 탐색하여 전체 코드베이스 맥락을 파악한 뒤, \ +위 기획서와 체크리스트 기준으로만 코드를 평가하세요. + +발견된 각 이슈에 심각도와 카테고리를 모두 부여하세요: + +심각도: +- **Critical**: 기능 장애, 데이터 손실, 보안 취약점을 유발하는 문제. +- **Major**: 요구사항 불일치, 중대한 로직 오류, 핵심 기능 누락. +- **Minor**: 코딩 컨벤션 위반, 사소한 누락, 스타일 문제. + +카테고리: +- **과최적화**: 기획서에 없는 기능, 추상화, 복잡성을 추가한 경우. +- **누락**: 기획서/체크리스트에 있지만 구현에서 빠지거나 불완전한 요구사항. + +이전 리뷰 피드백이 제공된 경우, 각 항목을 반드시 평가하세요: +- **CONFIRMED**: 현재 코드에 여전히 존재하는 이슈. +- **DISMISSED (오탐)**: 기획서 요구사항상 실제로 올바른 항목. 근거를 제시하세요. + +기획서/체크리스트 범위 밖에서 발견된 문제(기존 버그, 보안 이슈, 성능 문제 등)는 \ +"범위 밖 이슈" 섹션에 별도로 보고하세요. + +## 출력 형식 + +### 이전 피드백 평가 +(이전 피드백이 제공된 경우에만 포함하세요.) +- CONFIRMED: [항목 설명] — 여전히 이슈인 이유: [근거] +- DISMISSED (오탐): [항목 설명] — 실제로 올바른 이유: [근거] +(이전 피드백이 없으면 "해당 없음"이라고 작성하세요.) + +### 발견된 이슈 +심각도 순서(Critical 먼저)로 나열: +- [Critical][과최적화] 이슈 설명 (관련 기획서/체크리스트 항목 참조) +- [Major][누락] 이슈 설명 (관련 기획서/체크리스트 항목 참조) +- [Minor][누락] 이슈 설명 (관련 기획서/체크리스트 항목 참조) + +### 범위 밖 이슈 +기획서/체크리스트 범위 밖이지만 주목할 만한 이슈: +- [Critical] 이슈 설명 +- [Minor] 이슈 설명 +(범위 밖 이슈가 없으면 "없음"이라고 작성하세요.) + +### 요약 +- Critical: N, Major: N, Minor: N +- 과최적화 수: N +- 누락 수: N +- CONFIRMED: N, DISMISSED: N +- 전체 품질: [간략한 평가] + +### 판정 +모든 체크리스트 항목이 충족되고 과최적화/누락이 없으면: VERDICT: PASS +그렇지 않으면: VERDICT: FAIL +""" + + +REVIEW_ONLY_TEMPLATE = """\ +You are tasked with reviewing existing code against a plan and checklist. + +## Plan +{plan} + +## Checklist +{checklist} + +## Reference Documents +{docs} + +## Previous Review (iteration {iteration} of {max_iterations}) +{feedback} + +## Review Instructions +Explore the project directory thoroughly to understand the full codebase, \ +then evaluate the EXISTING code against ONLY the plan and checklist above. + +You are NOT generating or modifying code. You are auditing what already exists. + +If previous review results are provided above, you MUST: +1. Verify each previously reported issue — is it a real issue or a false positive? +2. Look for issues the previous review MISSED. +3. Do NOT simply repeat the previous review. Provide your own independent assessment. +4. Explicitly mark items as CONFIRMED (still an issue) or DISMISSED (false positive). + +For each issue found, classify it with BOTH severity AND category: + +Severity levels: +- **Critical**: Breaks functionality, causes data loss, or introduces security vulnerabilities. +- **Major**: Requirement mismatch, significant logic errors, or missing core functionality. +- **Minor**: Coding convention violations, trivial omissions, or style issues. + +Categories: +- **Over-engineering**: Code adds features, abstractions, or complexity \ +NOT required by the plan. +- **Omission**: A requirement from the plan or checklist that is missing or \ +incomplete in the implementation. + +If you find issues outside the plan/checklist scope (e.g. pre-existing bugs, \ +security concerns, performance problems), report them separately under \ +"Out of Scope Issues". + +## Output Format + +### Issues Found +List issues ordered by severity (Critical first): +- [Critical][Over-engineering] Description (reference specific plan/checklist item) +- [Major][Omission] Description (reference specific plan/checklist item) +- [Minor][Omission] Description (reference specific plan/checklist item) + +### Out of Scope Issues +Issues found outside plan/checklist scope but worth noting: +- [Critical] Description of issue +- [Minor] Description of issue +(Write "None" if no out-of-scope issues found.) + +### Summary +- Critical: N, Major: N, Minor: N +- Over-engineering count: N +- Omission count: N +- CONFIRMED: N, DISMISSED: N +- Overall quality: [BRIEF ASSESSMENT] + +### Verdict +If all checklist items are satisfied and there is no over-engineering or \ +omission, output: VERDICT: PASS +Otherwise output: VERDICT: FAIL +""" + +REVIEW_ONLY_TEMPLATE_KO = """\ +당신은 기존 코드를 기획서와 체크리스트 기준으로 감사하는 리뷰어입니다. + +## 기획서 +{plan} + +## 체크리스트 +{checklist} + +## 참고 문서 +{docs} + +## 이전 리뷰 결과 ({max_iterations}회 중 {iteration}번째) +{feedback} + +## 검토 지침 +프로젝트 디렉토리를 직접 탐색하여 전체 코드베이스를 파악한 뒤, \ +위 기획서와 체크리스트 기준으로 **기존 코드**를 평가하세요. + +코드를 생성하거나 수정하지 마세요. 이미 존재하는 코드를 감사하는 것이 목적입니다. + +이전 리뷰 결과가 제공된 경우 반드시: +1. 이전에 보고된 각 이슈를 검증하세요 — 진짜 이슈인지 오탐인지? +2. 이전 리뷰가 놓친 새로운 이슈를 찾으세요. +3. 이전 리뷰를 그대로 반복하지 마세요. 독립적인 평가를 제공하세요. +4. 각 항목에 CONFIRMED (여전히 이슈) 또는 DISMISSED (오탐) 태그를 명시하세요. + +발견된 각 이슈에 심각도와 카테고리를 모두 부여하세요: + +심각도: +- **Critical**: 기능 장애, 데이터 손실, 보안 취약점을 유발하는 문제. +- **Major**: 요구사항 불일치, 중대한 로직 오류, 핵심 기능 누락. +- **Minor**: 코딩 컨벤션 위반, 사소한 누락, 스타일 문제. + +카테고리: +- **과최적화**: 기획서에 없는 기능, 추상화, 복잡성을 추가한 경우. +- **누락**: 기획서/체크리스트에 있지만 구현에서 빠지거나 불완전한 요구사항. + +기획서/체크리스트 범위 밖에서 발견된 문제(기존 버그, 보안 이슈, 성능 문제 등)는 \ +"범위 밖 이슈" 섹션에 별도로 보고하세요. + +## 출력 형식 + +### 발견된 이슈 +심각도 순서(Critical 먼저)로 나열: +- [Critical][과최적화] 이슈 설명 (관련 기획서/체크리스트 항목 참조) +- [Major][누락] 이슈 설명 (관련 기획서/체크리스트 항목 참조) +- [Minor][누락] 이슈 설명 (관련 기획서/체크리스트 항목 참조) + +### 범위 밖 이슈 +기획서/체크리스트 범위 밖이지만 주목할 만한 이슈: +- [Critical] 이슈 설명 +- [Minor] 이슈 설명 +(범위 밖 이슈가 없으면 "없음"이라고 작성하세요.) + +### 요약 +- Critical: N, Major: N, Minor: N +- 과최적화 수: N +- 누락 수: N +- CONFIRMED: N, DISMISSED: N +- 전체 품질: [간략한 평가] + +### 판정 +모든 체크리스트 항목이 충족되고 과최적화/누락이 없으면: VERDICT: PASS +그렇지 않으면: VERDICT: FAIL +""" + +AGGREGATE_REVIEW_TEMPLATE = """\ +You are adjudicating multiple review results and turning them into an actionable decision. + +## Plan +{plan} + +## Checklist +{checklist} + +## Reference Documents +{docs} + +## Candidate Outputs +{candidate_outputs} + +## Reviewer Findings +{reviews_bundle} + +## Previous Verification Feedback +{feedback} + +## Instructions +Explore the project directory to confirm the current codebase state. Then: +1. Deduplicate overlapping issues across reviewers. +2. Resolve disagreements explicitly. +3. Keep only issues supported by the plan, checklist, code, or reviewer evidence. +4. When evidence is mixed, explain what was confirmed, what was dismissed, and what still needs follow-up. +5. Produce a prioritized action list for the coder. +6. If no confirmed issue remains, output VERDICT: PASS. Otherwise VERDICT: FAIL. + +## Output Format + +### Confirmed Issues +- [Critical][Omission] Description with rationale and source reviewer(s) + +### Dismissed Findings +- [False positive] Claim — reason why it is actually correct (raised by: Reviewer X) +- [Already fixed] Claim — already resolved in the current code (raised by: Reviewer X) +(Write "None" if nothing was dismissed.) + +### Action Items +1. Concrete fix the coder should make +2. Concrete fix the coder should make + +### Summary +- Confirmed issues: N +- Dismissed findings: N (false positive: N, already fixed: N) +- Overall quality: [BRIEF ASSESSMENT] + +### Verdict +VERDICT: PASS or VERDICT: FAIL +""" + +AGGREGATE_REVIEW_TEMPLATE_KO = """\ +당신은 여러 리뷰 결과를 판정하고 coder가 수정할 액션으로 정리하는 시니어 리뷰어입니다. + +## 기획서 +{plan} + +## 체크리스트 +{checklist} + +## 참고 문서 +{docs} + +## 후보 결과물 +{candidate_outputs} + +## 개별 리뷰 결과 +{reviews_bundle} + +## 이전 검증 피드백 +{feedback} + +## 지침 +프로젝트 디렉토리를 탐색하여 현재 코드베이스 상태를 확인한 뒤 다음을 수행하세요. +1. 리뷰어들 사이에 중복되는 이슈를 합치세요. +2. 의견 충돌은 명시적으로 정리하세요. +3. 기획서, 체크리스트, 코드, 리뷰 근거로 뒷받침되는 이슈만 남기세요. +4. 근거가 엇갈리면 무엇이 확정이고 무엇이 기각 또는 추가확인 대상인지 분명히 적으세요. +5. coder가 바로 수정할 수 있는 우선순위 액션 아이템을 만드세요. +6. 확정된 이슈가 없으면 VERDICT: PASS, 있으면 VERDICT: FAIL 을 출력하세요. + +## 출력 형식 + +### 확정 이슈 +- [Critical][누락] 확정된 이슈 설명, 근거, 출처 리뷰어 + +### 기각된 주장 +- [오탐] 주장 내용 — 실제로 올바른 이유 (제기: 리뷰어 X) +- [수정 완료] 주장 내용 — 현재 코드에서 이미 해결됨 (제기: 리뷰어 X) +(기각된 항목이 없으면 "없음"이라고 작성하세요.) + +### 액션 아이템 +1. coder가 수정해야 할 구체적인 작업 +2. coder가 수정해야 할 구체적인 작업 + +### 요약 +- 확정 이슈 수: N +- 기각된 주장 수: N (오탐: N, 수정 완료: N) +- 전체 품질: [간략한 평가] + +### 판정 +VERDICT: PASS 또는 VERDICT: FAIL +""" + + +DEFAULT_TEMPLATES: dict[str, dict[str, str]] = { + "en": { + "generate": GENERATE_TEMPLATE, + "review": REVIEW_TEMPLATE, + "review-only": REVIEW_ONLY_TEMPLATE, + "aggregate-review": AGGREGATE_REVIEW_TEMPLATE, + }, + "ko": { + "generate": GENERATE_TEMPLATE_KO, + "review": REVIEW_TEMPLATE_KO, + "review-only": REVIEW_ONLY_TEMPLATE_KO, + "aggregate-review": AGGREGATE_REVIEW_TEMPLATE_KO, + }, +} + +# Current language (set by pipeline before run) +_current_language: str = "en" + + +def set_language(lang: str) -> None: + """Set the current template language.""" + global _current_language + if lang not in DEFAULT_TEMPLATES: + raise ValueError(f"Unsupported language '{lang}'. Available: {list(DEFAULT_TEMPLATES.keys())}") + _current_language = lang + + +# --------------------------------------------------------------------------- +# Pipeline presets +# --------------------------------------------------------------------------- + +def _safe_key(name: str) -> str: + """Sanitize agent name for use as template variable / output_key. + + Replaces hyphens with underscores so names like 'claude-coder' + become 'claude_coder', which is valid in format_map(). + """ + return name.replace("-", "_") + + +def _unique_safe_keys(names: list[str]) -> list[str]: + """Return stable, collision-free keys for agent names. + + Duplicate names keep the first key unchanged and receive numeric suffixes + from the second occurrence onward. + """ + totals = collections.Counter(_safe_key(name) for name in names) + seen: collections.defaultdict[str, int] = collections.defaultdict(int) + keys: list[str] = [] + + for name in names: + base = _safe_key(name) + seen[base] += 1 + if totals[base] == 1 or seen[base] == 1: + keys.append(base) + else: + keys.append(f"{base}_{seen[base]}") + + return keys + + +def _build_named_bundle( + labels: list[str], + step_names: list[str], + output_keys: list[str], + title: str, +) -> str: + """Build a templated bundle from prior step outputs.""" + parts: list[str] = [] + for label, step_name, output_key in zip(labels, step_names, output_keys): + parts.append( + f"## {title}: {label} ({step_name})\n" + f"{{{output_key}}}" + ) + return "\n\n---\n\n".join(parts) + + +def _build_simple_preset( + coders: list[str], reviewers: list[str], seniors: list[str], +) -> list[StepConfig]: + """First coder generates, first reviewer reviews.""" + if not coders: + raise ValueError("'simple' preset requires at least 1 coder") + if not reviewers: + raise ValueError("'simple' preset requires at least 1 reviewer") + steps = [ + StepConfig( + name="generate", + agent=coders[0], + role="generate", + prompt_template="default:generate", + output_key="generated_code", + ), + StepConfig( + name="review", + agent=reviewers[0], + role="review", + prompt_template="default:review", + output_key="review_result", + verdict=not seniors, + ), + ] + if seniors: + steps.append( + StepConfig( + name="senior_review", + agent=seniors[0], + role="review", + prompt_template="default:aggregate-review", + output_key="senior_review_result", + verdict=True, + context_override={ + "candidate_outputs": "## Generated code\n{generated_code}", + "reviews_bundle": f"## Review: {reviewers[0]} (review)\n{{review_result}}", + }, + ), + ) + return steps + + +def _build_cross_review_preset( + coders: list[str], reviewers: list[str], seniors: list[str], +) -> list[StepConfig]: + """Both coders generate, then cross-review each other's output.""" + if len(coders) < 2: + raise ValueError("'cross-review' preset requires at least 2 coders") + a, b = coders[0], coders[1] + ak, bk = _unique_safe_keys([a, b]) + steps = [ + StepConfig( + name=f"generate_{ak}", + agent=a, + role="generate", + prompt_template="default:generate", + output_key=f"code_{ak}", + parallel=True, + ), + StepConfig( + name=f"generate_{bk}", + agent=b, + role="generate", + prompt_template="default:generate", + output_key=f"code_{bk}", + parallel=True, + ), + StepConfig( + name=f"review_by_{ak}", + agent=a, + role="review", + prompt_template="default:review", + output_key=f"review_by_{ak}", + context_override={"generated_code": f"{{code_{bk}}}"}, + parallel=True, + verdict=not seniors, + ), + StepConfig( + name=f"review_by_{bk}", + agent=b, + role="review", + prompt_template="default:review", + output_key=f"review_by_{bk}", + verdict=not seniors, + context_override={"generated_code": f"{{code_{ak}}}"}, + parallel=True, + ), + ] + if seniors: + steps.append( + StepConfig( + name="senior_review", + agent=seniors[0], + role="review", + prompt_template="default:aggregate-review", + output_key="senior_review_result", + verdict=True, + context_override={ + "candidate_outputs": _build_named_bundle( + [a, b], + [f"generate_{ak}", f"generate_{bk}"], + [f"code_{ak}", f"code_{bk}"], + "Candidate", + ), + "reviews_bundle": _build_named_bundle( + [a, b], + [f"review_by_{ak}", f"review_by_{bk}"], + [f"review_by_{ak}", f"review_by_{bk}"], + "Review", + ), + }, + ), + ) + return steps + + +def _build_review_only_preset( + coders: list[str], reviewers: list[str], seniors: list[str], +) -> list[StepConfig]: + """Review-only: all reviewers audit existing code independently.""" + if not reviewers: + raise ValueError("'review-only' preset requires at least 1 reviewer") + + if len(reviewers) == 1 and not seniors: + # Single reviewer — backward compatible + return [ + StepConfig( + name="review", + agent=reviewers[0], + role="review", + prompt_template="default:review-only", + output_key="review_result", + verdict=True, + ), + ] + + # Multiple reviewers — each produces a separate review with verdict (parallel) + steps: list[StepConfig] = [] + reviewer_keys = _unique_safe_keys(reviewers) + for reviewer, rk in zip(reviewers, reviewer_keys): + steps.append( + StepConfig( + name=f"review_{rk}", + agent=reviewer, + role="review", + prompt_template="default:review-only", + output_key=f"review_{rk}", + verdict=not seniors, + parallel=True, + ), + ) + if seniors: + step_names = [f"review_{rk}" for rk in reviewer_keys] + output_keys = [f"review_{rk}" for rk in reviewer_keys] + steps.append( + StepConfig( + name="senior_review", + agent=seniors[0], + role="review", + prompt_template="default:aggregate-review", + output_key="senior_review_result", + verdict=True, + context_override={ + "candidate_outputs": "Current repository working tree under review.", + "reviews_bundle": _build_named_bundle( + reviewers, step_names, output_keys, "Review", + ), + }, + ), + ) + return steps + + +def _build_review_fix_preset( + coders: list[str], reviewers: list[str], seniors: list[str], +) -> list[PhaseConfig]: + """Review in parallel, aggregate findings, fix, then verify in a loop.""" + if not coders: + raise ValueError("'review-fix' preset requires at least 1 coder") + if not reviewers: + raise ValueError("'review-fix' preset requires at least 1 reviewer") + + review_steps: list[StepConfig] = [] + reviewer_keys = _unique_safe_keys(reviewers) + for reviewer, rk in zip(reviewers, reviewer_keys): + review_steps.append( + StepConfig( + name=f"review_{rk}", + agent=reviewer, + role="review", + prompt_template="default:review-only", + output_key=f"review_{rk}", + verdict=False, + parallel=True, + ), + ) + + fix_coder = coders[0] + senior_agent = seniors[0] if seniors else reviewers[0] + review_step_names = [f"review_{rk}" for rk in reviewer_keys] + review_output_keys = [f"review_{rk}" for rk in reviewer_keys] + + return [ + PhaseConfig( + name="review_fix", + steps=review_steps + [ + StepConfig( + name="aggregate_review", + agent=senior_agent, + role="review", + prompt_template="default:aggregate-review", + output_key="aggregate_review", + context_override={ + "candidate_outputs": "Current repository working tree under review.", + "reviews_bundle": _build_named_bundle( + reviewers, review_step_names, review_output_keys, "Review", + ), + }, + ), + StepConfig( + name="generate", + agent=fix_coder, + role="generate", + prompt_template="default:generate", + output_key="generated_code", + context_override={"feedback": "{aggregate_review}"}, + ), + StepConfig( + name="verify", + agent=senior_agent, + role="review", + prompt_template="default:review", + output_key="verify_result", + verdict=True, + ), + ], + max_iterations=5, + consecutive_pass=1, + ), + ] + + +PIPELINE_PRESETS: dict[str, Callable] = { + "simple": _build_simple_preset, + "cross-review": _build_cross_review_preset, + "review-only": _build_review_only_preset, +} + +PHASED_PRESETS: dict[str, Callable] = { + "review-fix": _build_review_fix_preset, +} + +ALL_PRESET_NAMES: list[str] = list(PIPELINE_PRESETS.keys()) + list(PHASED_PRESETS.keys()) + + +# --------------------------------------------------------------------------- +# Template resolution and rendering +# --------------------------------------------------------------------------- + +def resolve_template(template_ref: str, templates_dir: Optional[Path] = None) -> str: + """Resolve a template reference to its content string. + + Formats: + - "default:generate" -> built-in GENERATE_TEMPLATE + - "default:review" -> built-in REVIEW_TEMPLATE + - "path/to/file.md" -> read file contents + """ + if template_ref.startswith("default:"): + key = template_ref.split(":", 1)[1] + lang_templates = DEFAULT_TEMPLATES.get(_current_language, DEFAULT_TEMPLATES["en"]) + if key not in lang_templates: + raise ValueError( + f"Unknown default template '{key}'. " + f"Available: {list(lang_templates.keys())}" + ) + return lang_templates[key] + + # Treat as file path + path = Path(template_ref) + if templates_dir and not path.is_absolute(): + path = templates_dir / path + if not path.exists(): + raise FileNotFoundError(f"Template file not found: {path}") + return path.read_text(encoding="utf-8") + + +class _DefaultDict(collections.defaultdict): + """defaultdict that uses the missing key name in the default value.""" + + def __missing__(self, key: str) -> str: + return f"(no {key} provided)" + + +def render_template(template: str, context: dict[str, str]) -> str: + """Render a template string with {variable} placeholders. + + Missing keys produce "(no provided)" instead of raising KeyError. + """ + safe_context = _DefaultDict(str) + safe_context.update(context) + return template.format_map(safe_context) diff --git a/cross_eval/report.py b/cross_eval/report.py new file mode 100644 index 0000000..fac30f2 --- /dev/null +++ b/cross_eval/report.py @@ -0,0 +1,497 @@ +"""Markdown report generation.""" +from __future__ import annotations + +import re +from itertools import groupby + +from cross_eval.models import ( + IterationResult, + PipelineConfig, + PipelineResult, + ReviewMetrics, + StepConfig, +) + + +# --------------------------------------------------------------------------- +# i18n strings +# --------------------------------------------------------------------------- + +_STRINGS: dict[str, dict[str, str]] = { + "en": { + "title": "Cross-Eval Report", + "summary": "Summary", + "prop": "Property", + "val": "Value", + "total_iter": "Total Iterations", + "final_verdict": "Final Verdict", + "duration": "Duration", + "max_iter": "Max Iterations", + "phases_label": "Phases", + "iteration": "Iteration", + "phase": "Phase", + "steps": "Steps", + "max_iterations": "Max iterations", + "consec_pass": "Consecutive PASS required", + "step": "Step", + "verdict": "Verdict", + "output_chars": "Output ({n} chars)", + "feedback_next": "Feedback for next iteration:", + "oos_title": "Out of Scope Issues", + "oos_desc": ( + "The following issues were found outside the plan/checklist scope " + "but are worth noting." + ), + "final_verdict_title": "Final Verdict", + "repeat_title": "Repeated Aggregate Findings", + "repeat_desc": "The following aggregate-review outputs repeated across iterations.", + "pass_msg": "All checklist items satisfied. No over-engineering or omissions detected.", + "fail_phased": "Pipeline phases ({phases}) completed without full convergence.", + "fail_simple": "Maximum iterations ({max_iter}) reached without passing all checks.", + "metrics_title": "Review Metrics", + "metrics_trend_title": "Metrics Trend", + "metrics_iter": "Iter", + "metrics_total_issues": "Total Issues", + "metrics_na": "N/A", + }, + "ko": { + "title": "교차 검증 리포트", + "summary": "요약", + "prop": "항목", + "val": "값", + "total_iter": "총 반복 횟수", + "final_verdict": "최종 판정", + "duration": "소요 시간", + "max_iter": "최대 반복", + "phases_label": "페이즈", + "iteration": "반복", + "phase": "페이즈", + "steps": "단계", + "max_iterations": "최대 반복", + "consec_pass": "연속 PASS 필요", + "step": "단계", + "verdict": "판정", + "output_chars": "출력 ({n}자)", + "feedback_next": "다음 반복을 위한 피드백:", + "oos_title": "범위 밖 이슈", + "oos_desc": ( + "아래는 기획서/체크리스트 범위 밖이지만 " + "리뷰 중 발견된 이슈입니다." + ), + "final_verdict_title": "최종 판정", + "repeat_title": "반복된 Aggregate 이슈", + "repeat_desc": "아래 aggregate-review 결과가 여러 반복에서 동일하게 다시 나타났습니다.", + "pass_msg": "모든 체크리스트 항목 충족. 과최적화/누락 없음.", + "fail_phased": "파이프라인 페이즈 ({phases}) 완료, 완전한 수렴에 도달하지 못함.", + "fail_simple": "최대 반복 횟수 ({max_iter})에 도달, 모든 검증을 통과하지 못함.", + "metrics_title": "리뷰 메트릭", + "metrics_trend_title": "메트릭 추이", + "metrics_iter": "반복", + "metrics_total_issues": "총 이슈", + "metrics_na": "해당 없음", + }, +} + + +def _t(config: PipelineConfig, key: str, **kwargs: str) -> str: + """Get translated string.""" + lang = getattr(config, "language", "en") + strings = _STRINGS.get(lang, _STRINGS["en"]) + s = strings.get(key, _STRINGS["en"].get(key, key)) + if kwargs: + s = s.format(**kwargs) + return s + + +# --------------------------------------------------------------------------- +# Review output parsing +# --------------------------------------------------------------------------- + +def parse_review_metrics(output: str) -> ReviewMetrics: + """Parse review output to extract severity, category, and assessment counts.""" + metrics = ReviewMetrics() + + # Severity: count tagged issue lines (e.g. "[Critical]", "[Major]", "[Minor]") + metrics.critical = len(re.findall(r"\[Critical\]", output, re.IGNORECASE)) + metrics.major = len(re.findall(r"\[Major\]", output, re.IGNORECASE)) + metrics.minor = len(re.findall(r"\[Minor\]", output, re.IGNORECASE)) + + # Categories (EN and KO variants) + metrics.over_engineering = len(re.findall( + r"\[Over-engineering\]|\[과최적화\]", output, re.IGNORECASE, + )) + metrics.omission = len(re.findall( + r"\[Omission\]|\[누락\]", output, re.IGNORECASE, + )) + + # Assessments — match "CONFIRMED: " but not summary "CONFIRMED: N" + metrics.confirmed = len(re.findall(r"\bCONFIRMED:\s+(?!\d)", output)) + metrics.dismissed = len(re.findall(r"\bDISMISSED\b(?:\s*\([^)]*\))?\s*:\s+(?!\d)", output)) + + return metrics + + +def _aggregate_metrics(a: ReviewMetrics, b: ReviewMetrics) -> ReviewMetrics: + """Combine metrics from two review steps.""" + return ReviewMetrics( + critical=a.critical + b.critical, + major=a.major + b.major, + minor=a.minor + b.minor, + over_engineering=a.over_engineering + b.over_engineering, + omission=a.omission + b.omission, + confirmed=a.confirmed + b.confirmed, + dismissed=a.dismissed + b.dismissed, + ) + + +def _extract_out_of_scope(output: str) -> str: + """Extract the 'Out of Scope Issues' section from review output. + + Looks for '### Out of Scope Issues' or '### 범위 밖 이슈' heading, + captures text until the next '###' heading or end of string. + Returns empty string if not found or contains only 'None'/'없음'. + """ + pattern = r"###\s*(?:Out of Scope Issues|범위 밖 이슈)\s*\n(.*?)(?=\n###|\Z)" + match = re.search(pattern, output, re.DOTALL) + if not match: + return "" + content = match.group(1).strip() + if content.lower() in ("none", "없음", ""): + return "" + return content + + +def build_report(config: PipelineConfig, result: PipelineResult) -> str: + """Build the complete markdown report string.""" + has_phases = any(ir.phase_name for ir in result.iterations) + + if has_phases: + return _build_phased_report(config, result) + return _build_simple_report(config, result) + + +def _build_simple_report( + config: PipelineConfig, result: PipelineResult, +) -> str: + """Build report for a non-phased (simple) pipeline run.""" + lines: list[str] = [] + + lines.append(f"# {_t(config, 'title')}\n") + _append_summary_table(lines, config, result) + + out_of_scope_items: list[tuple[int, str]] = [] + + for iter_result in result.iterations: + lines.append("---\n") + lines.append(f"## {_t(config, 'iteration')} {iter_result.iteration}\n") + + _append_iteration_steps(lines, config, iter_result, config.pipeline, out_of_scope_items) + + if iter_result.feedback: + lines.append(f"**{_t(config, 'feedback_next')}** {iter_result.feedback[:200]}...") + lines.append("") + + _append_out_of_scope(lines, config, out_of_scope_items) + _append_review_metrics_table(lines, config, result) + _append_repeated_aggregate(lines, config, result) + _append_final_verdict(lines, config, result) + + return "\n".join(lines) + + +def _build_phased_report( + config: PipelineConfig, result: PipelineResult, +) -> str: + """Build report for a phased pipeline run (e.g. review-fix).""" + lines: list[str] = [] + + lines.append(f"# {_t(config, 'title')}\n") + _append_summary_table(lines, config, result, phased=True) + + phase_map = {p.name: p for p in config.phases} + out_of_scope_items: list[tuple[int, str]] = [] + + for phase_name, phase_iters_iter in groupby( + result.iterations, key=lambda ir: ir.phase_name, + ): + phase_iters = list(phase_iters_iter) + phase_config = phase_map.get(phase_name or "") + + lines.append("---\n") + lines.append(f"## {_t(config, 'phase')}: {phase_name}\n") + + if phase_config: + step_desc = " → ".join(s.name for s in phase_config.steps) + lines.append( + f"{_t(config, 'steps')}: {step_desc} | " + f"{_t(config, 'max_iterations')}: {phase_config.max_iterations} | " + f"{_t(config, 'consec_pass')}: {phase_config.consecutive_pass}\n" + ) + + steps = phase_config.steps if phase_config else config.pipeline + + consecutive = 0 + for iter_result in phase_iters: + verdict_label = "" + if iter_result.verdict: + if iter_result.verdict == "PASS": + consecutive += 1 + if phase_config and phase_config.consecutive_pass > 1: + verdict_label = f" — PASS ({consecutive}/{phase_config.consecutive_pass})" + if consecutive >= phase_config.consecutive_pass: + verdict_label += " ✓" + else: + verdict_label = " — PASS ✓" + else: + consecutive = 0 + verdict_label = " — FAIL" + + lines.append( + f"### {_t(config, 'iteration')} {iter_result.iteration}{verdict_label}\n" + ) + _append_iteration_steps(lines, config, iter_result, steps, out_of_scope_items) + + if iter_result.feedback: + lines.append( + f"**{_t(config, 'feedback_next')}** {iter_result.feedback[:200]}..." + ) + lines.append("") + + _append_out_of_scope(lines, config, out_of_scope_items) + _append_review_metrics_table(lines, config, result) + _append_repeated_aggregate(lines, config, result) + _append_final_verdict(lines, config, result) + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +def _append_summary_table( + lines: list[str], + config: PipelineConfig, + result: PipelineResult, + phased: bool = False, +) -> None: + """Append the summary table to lines.""" + total_iter = len(result.iterations) + minutes = int(result.total_duration // 60) + seconds = int(result.total_duration % 60) + duration_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s" + + lines.append(f"## {_t(config, 'summary')}\n") + lines.append(f"| {_t(config, 'prop')} | {_t(config, 'val')} |") + lines.append("|----------|-------|") + lines.append(f"| {_t(config, 'total_iter')} | {total_iter} |") + lines.append(f"| {_t(config, 'final_verdict')} | **{result.final_verdict}** |") + lines.append(f"| {_t(config, 'duration')} | {duration_str} |") + + if phased and config.phases: + phase_names = " → ".join(p.name for p in config.phases) + lines.append(f"| {_t(config, 'phases_label')} | {phase_names} |") + for p in config.phases: + lines.append( + f"| {_t(config, 'phase')}: {p.name} | " + f"{_t(config, 'max_iterations')} {p.max_iterations}, " + f"{p.consecutive_pass}x {_t(config, 'consec_pass')} |" + ) + else: + lines.append(f"| {_t(config, 'max_iter')} | {config.max_iterations} |") + + lines.append("") + + +def _append_iteration_steps( + lines: list[str], + config: PipelineConfig, + iter_result: IterationResult, + steps: list[StepConfig], + out_of_scope_items: list[tuple[int, str]], +) -> None: + """Append step details for one iteration.""" + for step in steps: + agent_result = iter_result.step_results.get(step.output_key) + output = iter_result.step_outputs.get(step.output_key, "") + + agent_name = agent_result.agent_name if agent_result else step.agent + duration = f" ({agent_result.duration_seconds}s)" if agent_result else "" + + lines.append(f"### {_t(config, 'step')}: {step.name} ({agent_name}){duration}\n") + + if step.verdict and iter_result.verdict: + lines.append(f"**{_t(config, 'verdict')}: {iter_result.verdict}**\n") + + if len(output) > 500: + lines.append("

") + lines.append( + f"{_t(config, 'output_chars', n=str(len(output)))}\n" + ) + lines.append(output) + lines.append("\n
\n") + else: + lines.append(output) + lines.append("") + + if step.role == "review": + oos = _extract_out_of_scope(output) + if oos: + out_of_scope_items.append((iter_result.iteration, oos)) + + # Parse and accumulate review metrics for this iteration + step_metrics = parse_review_metrics(output) + if iter_result.review_metrics is None: + iter_result.review_metrics = step_metrics + else: + iter_result.review_metrics = _aggregate_metrics( + iter_result.review_metrics, step_metrics, + ) + + +def _append_review_metrics_table( + lines: list[str], + config: PipelineConfig, + result: PipelineResult, +) -> None: + """Append per-iteration review metrics table and trend summary.""" + # Only include if at least one iteration has metrics + has_metrics = any(ir.review_metrics for ir in result.iterations) + if not has_metrics: + return + + na = _t(config, "metrics_na") + + lines.append("---\n") + lines.append(f"## {_t(config, 'metrics_title')}\n") + + # Table header + lines.append( + f"| {_t(config, 'metrics_iter')} | {_t(config, 'verdict')} " + f"| Critical | Major | Minor " + f"| Over-eng | Omission " + f"| CONFIRMED | DISMISSED |" + ) + lines.append("|------|---------|----------|-------|-------|----------|----------|-----------|-----------|") + + # Table rows + for ir in result.iterations: + m = ir.review_metrics + v = ir.verdict or "-" + if m: + lines.append( + f"| {ir.iteration} | {v} " + f"| {m.critical} | {m.major} | {m.minor} " + f"| {m.over_engineering} | {m.omission} " + f"| {m.confirmed} | {m.dismissed} |" + ) + else: + lines.append( + f"| {ir.iteration} | {v} " + f"| {na} | {na} | {na} " + f"| {na} | {na} " + f"| {na} | {na} |" + ) + + lines.append("") + + # Trend summary + metrics_list = [ + (ir.iteration, ir.review_metrics) + for ir in result.iterations + if ir.review_metrics + ] + if len(metrics_list) >= 2: + lines.append(f"### {_t(config, 'metrics_trend_title')}\n") + _append_trend_line( + lines, "Issues", + [(it, m.critical + m.major + m.minor) for it, m in metrics_list], + ) + _append_trend_line( + lines, "Over-engineering", + [(it, m.over_engineering) for it, m in metrics_list], + ) + _append_trend_line( + lines, "Omission", + [(it, m.omission) for it, m in metrics_list], + ) + _append_trend_line( + lines, "CONFIRMED", + [(it, m.confirmed) for it, m in metrics_list], + ) + _append_trend_line( + lines, "DISMISSED", + [(it, m.dismissed) for it, m in metrics_list], + ) + lines.append("") + + +def _append_trend_line( + lines: list[str], + label: str, + values: list[tuple[int, int]], +) -> None: + """Append a single trend line like '- Issues: 6 -> 2 -> 0 (decreasing)'.""" + nums = [v for _, v in values] + arrow = " → ".join(str(n) for n in nums) + if nums[-1] < nums[0]: + direction = "decreasing" + elif nums[-1] > nums[0]: + direction = "increasing" + else: + direction = "stable" + lines.append(f"- {label}: {arrow} ({direction})") + + +def _append_out_of_scope( + lines: list[str], + config: PipelineConfig, + out_of_scope_items: list[tuple[int, str]], +) -> None: + """Append the out-of-scope issues section if any exist.""" + if not out_of_scope_items: + return + lines.append("---\n") + lines.append(f"## {_t(config, 'oos_title')}\n") + lines.append(f"{_t(config, 'oos_desc')}\n") + for iteration_num, content in out_of_scope_items: + lines.append(f"### {_t(config, 'iteration')} {iteration_num}\n") + lines.append(content) + lines.append("") + + +def _append_final_verdict( + lines: list[str], + config: PipelineConfig, + result: PipelineResult, +) -> None: + """Append the final verdict section.""" + lines.append("---\n") + lines.append(f"## {_t(config, 'final_verdict_title')}: {result.final_verdict}\n") + + if result.final_verdict == "PASS": + lines.append(_t(config, "pass_msg")) + else: + if config.phases: + phase_names = " → ".join(p.name for p in config.phases) + lines.append(_t(config, "fail_phased", phases=phase_names)) + else: + lines.append( + _t(config, "fail_simple", max_iter=str(config.max_iterations)) + ) + + +def _append_repeated_aggregate( + lines: list[str], + config: PipelineConfig, + result: PipelineResult, +) -> None: + """Append repeated aggregate warnings if any exist.""" + if not result.repeated_aggregate_warnings: + return + lines.append("---\n") + lines.append(f"## {_t(config, 'repeat_title')}\n") + lines.append(f"{_t(config, 'repeat_desc')}\n") + for warning in result.repeated_aggregate_warnings: + lines.append(f"- {warning}") + lines.append("") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..24d3f55 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[build-system] +requires = ["setuptools>=68.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "cross-eval" +version = "0.1.0" +description = "AI agent cross-evaluation CLI tool" +requires-python = ">=3.9" +dependencies = [ + "pyyaml>=6.0", +] + +[project.scripts] +cross-eval = "cross_eval.cli:main" + +[tool.setuptools.packages.find] +include = ["cross_eval*"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3aecde9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pyyaml>=6.0 diff --git a/tests/__pycache__/test_config.cpython-312.pyc b/tests/__pycache__/test_config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2ce054f8e591de52b319d29733f0565d1543435 GIT binary patch literal 22178 zcmd^ndvH|Ondj~I?N+zck{Y3wkaYF1r4~rM-!@o0jST_=Cb1mS)#$cFLqFtuTL$&O z6iLX8?0Cw!Vh#IxD_thsB^({&?^fe?J`x+BXeN73WPe?TPH78p7S`w{&t%|<#uvn*;!8XMy<#8=^vaA-?pjoz7Q+vL&@^{lcML_+{YMe0 z3(LbpLn$dOhoxvlP9!Ekh704d1z#*)vcjz`0h!Duoaawz3np5C?l;I7_q&(7_= z2M;LqVKF)o8IGsJax@uBNpct!!$VS3j;58S$k0%HWJa?nnogq^S*c5=q(meh%S2}s zQYxNDh8Rgl!~Ln`Kx{DNQp$R06kFM=Qn4!?mDu;}kIEQ;Qnv4CM2>2?<@;ho(ReI5 z<6SH4+aG;C7Co^inwDbyvf??Ajt;3+*_4RUbhJN>K@CMQ0ui4-C`AVm4SAJHeEIhM zJqNdk4{qPHZ#N=JU`A5-$-PR&{_Tf$ZU0KXSXEI16$)r+dwX|3lP|fjR4Nq@_Uzob zfBR1Ay{Iu|(X3fiu`YaMI2ISf{ZdM%m8P%P*Qr*^(^3awC-t0QVNNQE<Li z#85n{rG~7ESM3K=2MkgILlH@))n)x9)amZmk(ENL`eK=S6_N8?lg#t>=7R((`)`WitqjI_*+|77gh=KMkQ$IxnJkPR?JNy{O zowl6fj+x786;4?$Ti@2wS92B)r8Lf=6<~pAOWXDLQww$n9xM$Flg?nkwN!v5r2}{V{d&%}$S$|55p6ntheO9i%sOyDyullYM zH?a-I2r>)0y5!@rp{~JLx{Fr2tN&=U|9Hr*xa8p@G&Kqzk>#kA-j$SSVibEs8kB(@ zb~zd!P&}-aa59pJ$~1OC_^fB?(uXXkCt_Q_{#%hPqu* z@P(ymbRjdSafj}q5v7Y#@s6)yxQPW*`Ia1}!G1c_`TFCLVKLgJF1>_JhYMLH`d+r8mn>2j;xdgukd^`}0-HIt zeDMNAPsZeQX6;;4K1aos)KeqC87{|rU-rG=JC}O%4=4FG`djkNi=Rj0R%pf^xPklsdo~TOCrKuGgBh)Y2ihb5r9Qm+RkG~&$ z^y-Q0{LYyR$fISpfMGoOD9p-e=`MOQC3|d4x{vCss z%dWRI%0F#MYl;xZowA6$=)UaH)dRNS)6Q7*oU$CF5f?ogUt$VZVU}2?kNW{zRJlv3 z7WvQFqJ#0PQZ2gkEdo8g8ozR|o%#}F0+It17jckCQq1!&=^=b8WC|yf7G%Xq6(m`4 z>(Yq~aYNNQq;)7NJxb-RN`;=UsWFOA+genuMj{pxa!M6c#Y@0VfKkGBifsS@Y0&mY zeUkE_Nh#_+x1CYOeRP}fFlZysCh=imDFrz>^?X#4Vj}3~Yje@f7j3-@Qh4fPmpxE^*TXGnIk!2tusvJT zkz3H34YW;Hx~n{+&gmM?-I4QFY6`w+1;MIUe4`PIqX=?8x0p4#!pWKSKkQk zm4!P*Q)Dq2O`ASc>?4EA@~X)r&lX<}W_b_}*c+K!1aT1B_$5z8)n z^_tA)QlQ0P*Db*+U$BWjVmZ)w%Ii3cpyJ0CGozWI^|WbPkH5EIizjYfPm0?zL|(HH zZlP%l?MuusV?7H~mEQ1atCc%t)r~9%z@B;)TYp0AM``2Hrp7W=izJguYk@jzg(_?7 zbBx$Sj?WTvaR9U7B|JnJH`729PD7a?eW&}8c&Z=Rx2Zck5P`Wy8cv#$RGWuHYC*<# zX#4(cyS5(OAnfZoa6sXDLr#ScGgT}cRy^TwA|(!kPlXk4I7|W~F=xt=qUm8NNxdmf zIW4A!(<}rxyTmc)QCx9DSzfQJjze|LS>yGZ>QCU+C3z?Nl?o{aWqTwPt5&1gL18XQV@t?pi>Si0Cw zh0ooJg3>kuO9;@OlPQPtJS3(1qq5wM6@r?SBt6yO~{I8md2wQOPL)T9we^QGVLayBnqhT;-x%Wpgf2uT-weHMCB<9hII@ z$8LnZM=lsk6OT=jds574Q&)S-yjTnWan-vZZJf?V{zf<85vC8MK_X z#JJNY1_|lVB06K1Q&!OhMoH6+=d>-)LRkqnE8#Jfu!A+0*waoK3dO8aq1`MwGO0}Q z8&*yr4cY6M7AXy4D4AN#%)7xT1G^8E2{}|$CS+0UB;qI)DxQYZQCJ3HGgUlb7DL17 z@bTzKrs4qji4c?J;iwRYP2fXfmcV3pOgHCd0)imt3kj^4@FC+~kHZ|qEV6(6Lla!}e;nD!Dh6jH-0tP{luo47)JI%%d17 zOi+qrASS`M<&>fCCPkU19(+mA3?M5RVa#ef4uiKPV=Jb`{wedt1cdNM8hell=_&amn%531_Y{&X+ z^@giEuRS+yvof-Aa#c(8yiM8a&A(chJ@_;-Nglk*Gnxrb@WHW%CV9b-CrC#09%5uO zQ=TBCX7a?JB2q{qc}pnd3SwjIfmC6+Z0NWk7X#5xfl!PM*;ZOQ5FH=_!WlfX5rOD5 zmjMyDdKGJw_%0uhBnO9~cVr$I<5rczxRt;-i&78kW060B;RN|YGW7;(_vp?BVMi<} zvdrq4NW2m#%v2aZzEBbfka6yk#=)ONoOTN48bQcBJQ^R@&{egcs#)P!vOf-UAITUq z#t3XDzfoq#T&Oz#cFtjF@)H0|H@O&?tn40jLQ4Ve+_hOB^-Octsxqh@JXh5?HZVT$ zer4CF^S7$qFI%(n${|&TB{N{QX7R;C*ZEa5(aqqkANx z7*^3^LRt%(Zp ztpLdQv?{9O1`-3)e97v-Y=blOmsO0bsSSn6H>7O^u&OiVJ3IifMB~Cy39?xt79Ytx zIXA8_chGzr`##1dp9L_Ph;Dk@bN*KB_}SJ~S^w&r{>CYn9SqLHl`n!wHtlmkOZ6L@ zyI#B{>u)ueZOD2XN|tS+ru{9Zvc8H>snMCJM$!}GY(EBS6mqsf3-l7}j{$Xbf#>dfJo4uQ`Tcdr$rC69D5zNjeFVFZyn_Nt%u7=KsaS9zKs4q>itprH2)ueC;{N&M= zp$Mgts5BUb`;6ofLXW)Ev2F%^4iCj+{piXVwj3Uero+R@*mJ|tFf~D2Wc|F!OI2e#+qMQKg#E2hStSzZTiusD=k-+PqeOn z-@68IY}M$&m!J90GuhVF6W%qSSV7m*jzW6Qg_i$ipj!Al z;cRH*g!hrs)wJH4ue8k4o8n!=1eo7{(!}}J{{9m`=X;dFwM>!ydG%H+|K8&2tuFp= z+p4#E`2sb--~@k!Nhsz7Pz0=lpj{IuC{6zsP`TAa<<`<1!Js*&U|LV`@J$q;SVn~{ti0}$tl6q$YVz_JHw z@$YDKdjWuEeihAfzJ_em(yVuxifl~A&6_BH9qNAHyY$im9YHQGnD8#m`P#A%Z_auj zQ2jDD+-R3fI9iJhOsYnXpJx zgoo|)s#8SML)3H?vfba%X!1EQ0XRuID7CMPt^yHquG%H`Xwg28+Wf42o18ea7#*yl zXV%(vzUF+ANJ}<(n%K@SQ5tR5Oa_of5&Mu> zg7gAF$fa)Aj6bqHdn;uz8yWE;#f2SOjP<8MH_U|gB4tzQLXw#wO*21{E=l9;T;8xj zJ|k6Dn71@NpT_0=q}2}t5Lgz z8#Rx|pg&*9x-Le4pRcY;`E%BV%IVg2o;>}KPWj>g!DF~tRS}uF8>AuRf^_n-Mj_Qm z-$TB%j|!;{SCwLj9Cf4GvKf-ciCFrmu6ssAQ8tkb$+nrDb8Rk~{LTc9`!w`V7==6r zV3sosLV=dq075AeH9#BEh2R^6x=oY(dNWE>deeobi%Tc@4M^8e`p5^v8i*w$@nXb$d@jU%ke158;N(BRxCk@bn4+Z#J39X^Ld##bcD(Nm{m9U)#-Uh+ z6fYe_gqcLO=h8_km9?7@uLS4){U>hD*U60Er6%k$9$r+8I-YWjA@Bx1XO&meiJ>9>C2&BeK3D7UB;#M4@J*X9V6 z!^;ToW3csu+Ll~%SFUA6t~r!j(vxdno@?uXc`;Zw?dO86v*g{auB@+_b_6QXSPdGh zXk@2u9=oPByfhnw+Q?RfpY}T6CgvZ^ag<+5qOKh*9p+M zN#-1uS}8`L6aVtF0B5-=tJ6_6<+nQ4Oz}3ye#?}{=6D=1K=CD3$L1-%&f%F_w-8{3 z+p&78rWUbP^#B_@0JRkWEj~x*RAZTA@l>7H(K)@S&2h-0_Dj9>P>9$+7l*PIUdi zs>=xAeb!RKrpW8G4Vnyi3ulmYb+7YQqFON>n-+R+b{^`>x%Lg8_?S|%yulB!-R_B0Ssal|7>aM&7%lsDBC2+aB?%Hd=R zt{I$93+q!E){!-{btb;6w;;4#Vm^&*1$xOp!8p!v(@w6uA=^mixSPJFZ1d`@cg;;7 z95pyAv*D(0cAIiL7I|KHa&+6dhj5y(s)k*g7^Z6zUBC4QZuo@>zmN-bTx`oNTy$zRXJ)#{#iO#s_YDcLu4s#iZM)=Fud1(ho$vK0Rp3+FkmD#eth%j_wiY4W= zG~AySw(jlSv1|XH?c0QMU4xH1fQ5>rcX+hOiiu6F4vu6*E2aM%3Aw_$r}kKL%17O{W|-11aHtHY25W zmuKww#Z{C3Wrc6|92%OrQT-k&=F#lZLp?#qdqGD}OCg#OPiJWMHAH4R&-y@jdCXpC z1CekA)Rx)2Q}aFLd7{4b=_ptnv(yUdln@)ldHtwq>2Z)I0si?fF~(jL+M$4R$0u7?P4cTXv4~#RUgy`% zRF92Ge+SjSlu0*j)p<$;^=kBvZj8r#m+!u1?fFeVUv|V1a%OhskB1n1Szk?<|Ac1? zx2nS+T8E?}d6w7YSzeQ8c}Re2V=GrJx;;tIKyT6F`^E3aU6N_aKbgL%T)8;N7JT3cI% zU3C5uQf+EDDSEnuXSYhRG>#0!4?nvX9*3@Iau8SNq3|RJ4+}jaZuJSn$z-%22a_Yx z2!!qaqsS8Ck&&oG)%Qe>r6km)Z5ju~4hwrIf)cSL&V34+4vriFzYZXPOG)7v>vz_Q z%wPcXMN-5i{y|o4+ZJ>jkISsX7&a<%hrKBQ6bVK`rP0sO@DZGW5PJ6Q@_6Wj4-DgS zku-w3`loIXmM;-l{~LrAh*673Ly6)k!2UR#Olx`CC+cY{sYP9C+8``jlHWY>=E++a zRi6@3a>y+;BVe{h9PQ9%UU4L75)sfq6hGY)3Y*qJ;Zs`IiW@p7&Y?myQrw1Yr0WGv zm3kFRqEOUoVtlyNZT#?&h#c$BtecHFs6yK8H6`L}KfxEB;qF?!Rd$@*tK)oRx$t(3 zTEJJedqS0O)Lp0>e`=C%H>WMUuyDM2l5e|D+Ku4iiQwXksJ+x&vQ&%p7wX3&lYED{ z980^-cg!IYB>y=uGSeZBSqc5010$QyI7=}yQ#~*wTkmkE#*l7ZdHtiGU%qwjb>a38 z-nsqaC=t0%sfVBa=&ycx`xmd@9{%y(^ zQL6Z-?+CZwef6U&XQ;l$TyBk?z4e2Cby#OR!tEb^`_}lM2_OCZ%96 z^TSc$)?Z%z=ogpojGe_G45diuQ01u52I0=P&R`6}ogcn*=f$hq09X?rUA=PWdw+8K z?CaK*-(#C#m0e5Z-@KOiOckwybo8Naf1}PhGAzjxdYv(;fYz;T~e(zQ?3uW<%PL8y;pX* zLf5Zx!^AM1&$wYy%E0Y6-yXqsU8+pwJjztfqfGTY$^_<7W`Vg3sK2IH%3#`6pZGTs z%aj*7Clwzt1^iZ&dX?m$^k*n^$twK;0)-#aer1cHf9BMEREv`IMwF(j0W)-u?esGi zLgWYpd(b#I1{3DtilLKM9^)$Oj-Fk+6+1zQ!OzjiyzG>-Dhtc1AD{gE!#4qFl%JE; z(MGQ_Irc5d!TYeP8RKW{3aS;UR6}qW;ewp{O~q_VpOVsZMxRh}7T^T*7`NYK#iyP_ zz`W9geO9jS?SjTb6kZpxq^p=0X6DQIv`pck1fPTA5u^R|>o3r&beAu4^pd(M(J7?- z)g$RNUW*Q=w=n)zd^bolEripNBk^cvGh}>ne2EMdCy2@}`#v|zEH7d| zI5EN4&nSkU@oIOLm^nJ**ZGic5?0Ff7=FbLr<_$*Bu%2K%6{B(3P&3#OR>}CcIgIX z{h9!g{aon#Gb&U9ebqxg8V16HR)Dx++Fik7v}+dZh=QDXVqVxhueSb?kb49}D?skn zTwMpw8sys6!@am{sp z1;e;r)e7n^vVuzgnkMbP5csbICJ6ikf!`4*GK0QHX#`UAQB8=D8MJH4zQ7Th>h=S) zRX94PTGl$2OiPwJ$4b^Ut3kg{kci+{sKTLc`h|jlVX_s&`bwL;!sA*?nz)SO(_`I< zRR3|wPPu;dFgg8bTsQj}8TNx6?83WJY06_Jl|l6a94Xl9Z^Dr6j~!_#D9i5E%j)%c z#sS&!7c!4aj4M`BjF_ZSZR)|;Dyu1{%ACVa8^|WdO5S^B_^Tzd9IGbwMHFLqhZ-qH zb6GGx6bTGwYi#SV< zW!ibbZaHKb`|6E`?umx(X^y>4$N43eot812e(anGc20BjHvM(WKC5Nr*vT7B%O;wZ nO>^{icfiisy`!1GvjubZ$}^Au@t$w&`S#wsHqKthaOeL5PKkCK literal 0 HcmV?d00001 diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..7a4cb1a --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,515 @@ +from __future__ import annotations + +import unittest +from unittest.mock import patch + +from cross_eval.agent import _supports_reasoning_effort +from cross_eval.agent import invoke_agent +from cross_eval.config import ( + BUILTIN_AGENTS, + _default_seniors_for_preset, + apply_reasoning_effort_settings, + normalize_reasoning_effort, + validate_config, +) +from cross_eval.models import ( + AgentConfig, + IterationResult, + PhaseConfig, + PipelineConfig, + PipelineResult, + ReviewMetrics, + StepConfig, +) +from cross_eval.pipeline import _detect_repeated_aggregate +from cross_eval.prompts import ( + GENERATE_TEMPLATE, + GENERATE_TEMPLATE_KO, + REVIEW_TEMPLATE, + REVIEW_TEMPLATE_KO, + REVIEW_ONLY_TEMPLATE, + REVIEW_ONLY_TEMPLATE_KO, + AGGREGATE_REVIEW_TEMPLATE, + AGGREGATE_REVIEW_TEMPLATE_KO, + _build_cross_review_preset, + _build_review_fix_preset, + _build_review_only_preset, + _build_simple_preset, +) +from cross_eval.report import build_report, parse_review_metrics + + +class BuiltinAgentConfigTest(unittest.TestCase): + def test_codex_builtin_agents_skip_git_repo_check(self) -> None: + for agent_name in ("codex-coder", "codex-reviewer", "codex-senior"): + with self.subTest(agent=agent_name): + self.assertIn( + "--skip-git-repo-check", + BUILTIN_AGENTS[agent_name].args, + ) + + def test_senior_builtin_agents_exist(self) -> None: + self.assertIn("claude-senior", BUILTIN_AGENTS) + self.assertIn("codex-senior", BUILTIN_AGENTS) + + def test_builtin_reasoning_effort_defaults_match_recommended_levels(self) -> None: + self.assertEqual(BUILTIN_AGENTS["codex-coder"].reasoning_effort, "medium") + self.assertEqual(BUILTIN_AGENTS["codex-reviewer"].reasoning_effort, "medium") + self.assertEqual(BUILTIN_AGENTS["codex-senior"].reasoning_effort, "high") + + def test_normalize_reasoning_effort_aliases(self) -> None: + self.assertEqual(normalize_reasoning_effort("extra-high"), "xhigh") + self.assertEqual(normalize_reasoning_effort("extra_high"), "xhigh") + self.assertEqual(normalize_reasoning_effort("x-high"), "xhigh") + + def test_apply_reasoning_effort_settings_uses_defaults_and_role_overrides(self) -> None: + config = PipelineConfig( + agents={ + "codex-coder": AgentConfig(name="codex-coder", command="codex"), + "codex-reviewer": AgentConfig(name="codex-reviewer", command="codex"), + "codex-senior": AgentConfig(name="codex-senior", command="codex"), + }, + coders=["codex-coder"], + reviewers=["codex-reviewer"], + seniors=["codex-senior"], + ) + + apply_reasoning_effort_settings( + config, + reviewer_effort="high", + senior_effort="xhigh", + ) + + self.assertEqual(config.agents["codex-coder"].reasoning_effort, "medium") + self.assertEqual(config.agents["codex-reviewer"].reasoning_effort, "high") + self.assertEqual(config.agents["codex-senior"].reasoning_effort, "xhigh") + + def test_codex_supports_reasoning_effort_override(self) -> None: + self.assertTrue(_supports_reasoning_effort("codex")) + self.assertFalse(_supports_reasoning_effort("claude")) + + def test_invoke_agent_passes_reasoning_effort_to_codex(self) -> None: + captured: dict[str, list[str]] = {} + + def _fake_run(cmd, **kwargs): + captured["cmd"] = cmd + + class _Result: + returncode = 0 + stdout = "VERDICT: PASS" + stderr = "" + + return _Result() + + agent = AgentConfig( + name="codex-reviewer", + command="codex", + args=["exec", "--model", "gpt-5.4", "-"], + reasoning_effort="high", + ) + + with patch("subprocess.run", side_effect=_fake_run): + invoke_agent(agent, "prompt", "review", quiet=True) + + self.assertEqual( + captured["cmd"][:3], + ["codex", "-c", 'model_reasoning_effort="high"'], + ) + + def test_detect_repeated_aggregate_warns_on_same_output(self) -> None: + steps = [ + StepConfig( + name="aggregate_review", + agent="codex-senior", + role="review", + prompt_template="default:aggregate-review", + output_key="aggregate_review", + ), + ] + history: dict[str, int] = {} + + first = _detect_repeated_aggregate( + steps, {"aggregate_review": "Same issue list"}, history, iteration=1, + ) + second = _detect_repeated_aggregate( + steps, {"aggregate_review": " same issue list "}, history, iteration=2, + ) + + self.assertIsNone(first) + self.assertEqual( + second, + "Repeated aggregate_review detected at iteration 2 (same as iteration 1).", + ) + + def test_report_includes_repeated_aggregate_section(self) -> None: + config = PipelineConfig(language="en") + result = PipelineResult(repeated_aggregate_warnings=[ + "Repeated aggregate_review detected at iteration 4 (same as iteration 3).", + ]) + + report = build_report(config, result) + + self.assertIn("Repeated Aggregate Findings", report) + self.assertIn("same as iteration 3", report) + + def test_review_fix_defaults_senior_from_reviewer_family(self) -> None: + self.assertEqual( + _default_seniors_for_preset( + "preset:review-fix", + ["codex-reviewer", "claude-reviewer"], + BUILTIN_AGENTS, + ), + ["codex-senior"], + ) + self.assertEqual( + _default_seniors_for_preset( + "preset:review-fix", + ["claude-reviewer"], + BUILTIN_AGENTS, + ), + ["claude-senior"], + ) + self.assertEqual( + _default_seniors_for_preset( + "preset:simple", + ["codex-reviewer"], + BUILTIN_AGENTS, + ), + [], + ) + + def test_review_fix_duplicate_reviewers_get_unique_step_keys(self) -> None: + phases = _build_review_fix_preset( + ["codex-coder"], + ["codex-reviewer", "codex-reviewer", "codex-reviewer"], + [], + ) + + converge = phases[0] + self.assertEqual( + [step.name for step in converge.steps[:3]], + [ + "review_codex_reviewer", + "review_codex_reviewer_2", + "review_codex_reviewer_3", + ], + ) + self.assertEqual( + [step.output_key for step in converge.steps[:3]], + [ + "review_codex_reviewer", + "review_codex_reviewer_2", + "review_codex_reviewer_3", + ], + ) + self.assertEqual( + [step.name for step in converge.steps[3:]], + ["aggregate_review", "generate", "verify"], + ) + + def test_review_only_duplicate_reviewers_get_unique_step_keys(self) -> None: + steps = _build_review_only_preset( + ["codex-coder"], + ["codex-reviewer", "codex-reviewer"], + [], + ) + + self.assertEqual( + [step.output_key for step in steps], + ["review_codex_reviewer", "review_codex_reviewer_2"], + ) + + def test_cross_review_duplicate_coders_get_unique_step_keys(self) -> None: + steps = _build_cross_review_preset( + ["codex-coder", "codex-coder"], + ["codex-reviewer"], + [], + ) + + self.assertEqual( + [step.output_key for step in steps], + [ + "code_codex_coder", + "code_codex_coder_2", + "review_by_codex_coder", + "review_by_codex_coder_2", + ], + ) + + def test_review_fix_uses_senior_for_aggregate_and_verify(self) -> None: + phases = _build_review_fix_preset( + ["codex-coder"], + ["claude-reviewer", "codex-reviewer"], + ["codex-senior"], + ) + + steps = phases[0].steps + self.assertEqual(steps[2].name, "aggregate_review") + self.assertEqual(steps[2].agent, "codex-senior") + self.assertEqual(steps[3].name, "generate") + self.assertEqual(steps[4].name, "verify") + self.assertEqual(steps[4].agent, "codex-senior") + self.assertTrue(steps[4].verdict) + + def test_review_only_with_senior_adds_aggregate_step(self) -> None: + steps = _build_review_only_preset( + ["codex-coder"], + ["claude-reviewer", "codex-reviewer"], + ["claude-senior"], + ) + + self.assertEqual(steps[-1].name, "senior_review") + self.assertEqual(steps[-1].agent, "claude-senior") + self.assertTrue(steps[-1].verdict) + self.assertFalse(steps[0].verdict) + self.assertFalse(steps[1].verdict) + + def test_simple_with_senior_adds_final_aggregate_step(self) -> None: + steps = _build_simple_preset( + ["codex-coder"], + ["codex-reviewer"], + ["codex-senior"], + ) + + self.assertEqual( + [step.name for step in steps], + ["generate", "review", "senior_review"], + ) + self.assertFalse(steps[1].verdict) + self.assertTrue(steps[2].verdict) + + def test_validate_config_rejects_duplicate_phase_step_names_and_output_keys(self) -> None: + config = PipelineConfig( + agents={ + "codex-reviewer": AgentConfig( + name="codex-reviewer", + command="codex", + ), + }, + phases=[ + PhaseConfig( + name="converge", + steps=[ + StepConfig( + name="review_dup", + agent="codex-reviewer", + role="review", + prompt_template="default:review-only", + output_key="same_key", + verdict=True, + ), + StepConfig( + name="review_dup", + agent="codex-reviewer", + role="review", + prompt_template="default:review-only", + output_key="same_key", + verdict=True, + ), + ], + ), + ], + ) + + errors = validate_config(config) + + self.assertIn("Phase 'converge' has duplicate step name 'review_dup'", errors) + self.assertIn("Phase 'converge' has duplicate output_key 'same_key'", errors) + + +class PromptTemplateTest(unittest.TestCase): + """Verify prompt template content after category/assessment refactor.""" + + def test_review_templates_no_false_positive_category(self) -> None: + """False positive should NOT appear as a category in review templates.""" + for tmpl, label in [ + (REVIEW_TEMPLATE, "REVIEW_TEMPLATE"), + (REVIEW_TEMPLATE_KO, "REVIEW_TEMPLATE_KO"), + (REVIEW_ONLY_TEMPLATE, "REVIEW_ONLY_TEMPLATE"), + (REVIEW_ONLY_TEMPLATE_KO, "REVIEW_ONLY_TEMPLATE_KO"), + ]: + with self.subTest(template=label): + # Should not contain "False positive" as a category bullet + self.assertNotIn( + "**False positive**", tmpl, + f"{label} still lists False positive as a category", + ) + # KO variant + if label.endswith("_KO"): + self.assertNotIn( + "**오탐**", tmpl, + f"{label} still lists 오탐 as a category", + ) + + def test_review_templates_have_confirmed_dismissed(self) -> None: + """Review templates should instruct CONFIRMED / DISMISSED assessment.""" + for tmpl, label in [ + (REVIEW_TEMPLATE, "REVIEW_TEMPLATE"), + (REVIEW_TEMPLATE_KO, "REVIEW_TEMPLATE_KO"), + ]: + with self.subTest(template=label): + self.assertIn("CONFIRMED", tmpl) + self.assertIn("DISMISSED", tmpl) + + def test_generate_templates_ignore_dismissed(self) -> None: + """Generate templates should tell coder to ignore DISMISSED items.""" + self.assertIn("DISMISSED", GENERATE_TEMPLATE) + self.assertIn("DISMISSED", GENERATE_TEMPLATE_KO) + + def test_aggregate_templates_dismissed_structure(self) -> None: + """Aggregate templates should use [False positive] / [Already fixed] tags.""" + self.assertIn("[False positive]", AGGREGATE_REVIEW_TEMPLATE) + self.assertIn("[Already fixed]", AGGREGATE_REVIEW_TEMPLATE) + self.assertIn("[오탐]", AGGREGATE_REVIEW_TEMPLATE_KO) + self.assertIn("[수정 완료]", AGGREGATE_REVIEW_TEMPLATE_KO) + + +class ReviewMetricsParsingTest(unittest.TestCase): + """Test review output metrics parsing.""" + + def test_parse_review_metrics_basic(self) -> None: + output = """\ +### Issues Found +- [Critical][Over-engineering] Added unnecessary caching layer +- [Major][Omission] Missing input validation for user_id +- [Major][Omission] Missing error handling for DB calls +- [Minor][Omission] No docstring on public API + +### Summary +- Critical: 1, Major: 2, Minor: 1 +- Over-engineering count: 1 +- Omission count: 3 +- CONFIRMED: 0, DISMISSED: 0 +""" + m = parse_review_metrics(output) + self.assertEqual(m.critical, 1) + self.assertEqual(m.major, 2) + self.assertEqual(m.minor, 1) + self.assertEqual(m.over_engineering, 1) + self.assertEqual(m.omission, 3) + self.assertEqual(m.confirmed, 0) + self.assertEqual(m.dismissed, 0) + + def test_parse_review_metrics_korean(self) -> None: + output = """\ +### 발견된 이슈 +- [Critical][과최적화] 불필요한 캐시 레이어 추가 +- [Major][누락] user_id 입력 검증 누락 + +### 이전 피드백 평가 +- CONFIRMED: DB 에러 핸들링 — 여전히 미구현 +- DISMISSED (오탐): 타입 힌트 누락 — 기획서에 없는 요구사항 +""" + m = parse_review_metrics(output) + self.assertEqual(m.critical, 1) + self.assertEqual(m.major, 1) + self.assertEqual(m.over_engineering, 1) + self.assertEqual(m.omission, 1) + self.assertEqual(m.confirmed, 1) + self.assertEqual(m.dismissed, 1) + + def test_parse_review_metrics_with_assessment(self) -> None: + output = """\ +### Previous Feedback Assessment +- CONFIRMED: Missing auth check — still not implemented +- CONFIRMED: SQL injection risk — still present +- DISMISSED (false positive): Unused import — actually used in tests + +### Issues Found +- [Critical][Omission] Missing auth check +- [Critical][Omission] SQL injection risk +""" + m = parse_review_metrics(output) + self.assertEqual(m.confirmed, 2) + self.assertEqual(m.dismissed, 1) + self.assertEqual(m.critical, 2) + self.assertEqual(m.omission, 2) + + def test_report_includes_metrics_table(self) -> None: + config = PipelineConfig( + language="en", + pipeline=[ + StepConfig( + name="review", + agent="claude-reviewer", + role="review", + prompt_template="default:review", + output_key="review_result", + verdict=True, + ), + ], + ) + result = PipelineResult( + iterations=[ + IterationResult( + iteration=1, + step_outputs={ + "review_result": ( + "### Issues Found\n" + "- [Critical][Omission] Missing auth\n" + "- [Major][Over-engineering] Extra abstraction\n" + "### Verdict\nVERDICT: FAIL" + ), + }, + verdict="FAIL", + ), + IterationResult( + iteration=2, + step_outputs={ + "review_result": ( + "### Previous Feedback Assessment\n" + "- CONFIRMED: Missing auth — still missing\n" + "- DISMISSED (false positive): Extra abstraction — needed per plan\n" + "### Issues Found\n" + "- [Major][Omission] Missing auth\n" + "### Verdict\nVERDICT: FAIL" + ), + }, + verdict="FAIL", + ), + ], + final_verdict="FAIL", + ) + + report = build_report(config, result) + + self.assertIn("Review Metrics", report) + # Check table headers + self.assertIn("Critical", report) + self.assertIn("CONFIRMED", report) + self.assertIn("DISMISSED", report) + # Check trend section + self.assertIn("Metrics Trend", report) + self.assertIn("decreasing", report) + + def test_report_no_metrics_table_without_review_steps(self) -> None: + config = PipelineConfig( + language="en", + pipeline=[ + StepConfig( + name="generate", + agent="claude-coder", + role="generate", + prompt_template="default:generate", + output_key="generated_code", + verdict=True, + ), + ], + ) + result = PipelineResult( + iterations=[ + IterationResult( + iteration=1, + step_outputs={"generated_code": "some code"}, + verdict="PASS", + ), + ], + final_verdict="PASS", + ) + + report = build_report(config, result) + self.assertNotIn("Review Metrics", report) + + +if __name__ == "__main__": + unittest.main()