cross-eval/cross_eval/prompts.py

"""Default prompt templates and pipeline presets."""
from __future__ import annotations

import collections
from pathlib import Path
from typing import Callable, Optional

from cross_eval.models import PhaseConfig, StepConfig


# ---------------------------------------------------------------------------
# Default prompt templates
# ---------------------------------------------------------------------------

GENERATE_TEMPLATE = """\
You are tasked with implementing code based on a plan and checklist.

## Plan
{plan}

## Checklist
{checklist}

## Reference Documents
{docs}

## Previous Review Feedback
{feedback}

## Iteration
This is iteration {iteration} of {max_iterations}.

## Instructions
1. Explore the project directory to understand the existing codebase structure.
2. Implement ONLY what the plan specifies. Do NOT add extra features, \
unnecessary abstractions, or premature optimizations.
3. Follow every item in the checklist.
4. If there is previous feedback, address ONLY the specific issues mentioned.
5. If previous feedback contains items marked as DISMISSED or false positive, \
IGNORE those items — they have been verified as correct.
6. Output the complete implementation.
"""

REVIEW_TEMPLATE = """\
You are tasked with reviewing code against a plan and checklist.

## Plan
{plan}

## Checklist
{checklist}

## Reference Documents
{docs}

## Generated Code / Previous Step Output
{generated_code}

## Previous Review Feedback
{feedback}

## Review Instructions
Explore the project directory to understand the full codebase context, \
then evaluate the code against ONLY the plan and checklist above.

For each issue found, classify it with BOTH severity AND category:

Severity levels:
- **Critical**: Breaks functionality, causes data loss, or introduces security vulnerabilities.
- **Major**: Requirement mismatch, significant logic errors, or missing core functionality.
- **Minor**: Coding convention violations, trivial omissions, or style issues.

Categories:
- **Over-engineering**: Code adds features, abstractions, or complexity \
NOT required by the plan.
- **Omission**: A requirement from the plan or checklist that is missing or \
incomplete in the implementation.

If previous review feedback is provided above, you MUST assess each item:
- **CONFIRMED**: The issue is still present in the current code.
- **DISMISSED (false positive)**: The flagged item is actually correct per \
the plan requirements. Provide rationale.

If you find issues outside the plan/checklist scope (e.g. pre-existing bugs, \
security concerns, performance problems), report them separately under \
"Out of Scope Issues".

## Output Format

### Previous Feedback Assessment
(Only include this section if previous feedback was provided.)
- CONFIRMED: [item description] — still an issue because [reason]
- DISMISSED (false positive): [item description] — actually correct because [reason]
(Write "N/A" if no previous feedback was provided.)

### Issues Found
List issues ordered by severity (Critical first):
- [Critical][Over-engineering] Description (reference specific plan/checklist item)
- [Major][Omission] Description (reference specific plan/checklist item)
- [Minor][Omission] Description (reference specific plan/checklist item)

### Out of Scope Issues
Issues found outside plan/checklist scope but worth noting:
- [Critical] Description of issue
- [Minor] Description of issue
(Write "None" if no out-of-scope issues found.)

### Summary
- Critical: N, Major: N, Minor: N
- Over-engineering count: N
- Omission count: N
- CONFIRMED: N, DISMISSED: N
- Overall quality: [BRIEF ASSESSMENT]

### Verdict
If all checklist items are satisfied and there is no over-engineering or \
omission, output: VERDICT: PASS
Otherwise output: VERDICT: FAIL
"""


GENERATE_TEMPLATE_KO = """\
당신은 기획서와 체크리스트를 기반으로 코드를 구현하는 개발자입니다.

## 기획서
{plan}

## 체크리스트
{checklist}

## 참고 문서
{docs}

## 이전 리뷰 피드백
{feedback}

## 반복 정보
현재 {max_iterations}회 중 {iteration}번째 반복입니다.

## 지침
1. 프로젝트 디렉토리를 탐색하여 기존 코드베이스 구조를 파악하세요.
2. 기획서에 명시된 것만 구현하세요. 추가 기능, 불필요한 추상화, 과도한 최적화를 하지 마세요.
3. 체크리스트의 모든 항목을 충족하세요.
4. 이전 리뷰 피드백이 있다면 해당 이슈만 해결하세요.
5. 이전 피드백에서 DISMISSED 또는 오탐으로 표시된 항목은 무시하세요 — 이미 올바른 것으로 검증되었습니다.
6. 완전한 구현을 출력하세요.
"""

REVIEW_TEMPLATE_KO = """\
당신은 기획서와 체크리스트 기준으로 코드를 검토하는 리뷰어입니다.

## 기획서
{plan}

## 체크리스트
{checklist}

## 참고 문서
{docs}

## 검토 대상 코드
{generated_code}

## 이전 리뷰 피드백
{feedback}

## 검토 지침
프로젝트 디렉토리를 직접 탐색하여 전체 코드베이스 맥락을 파악한 뒤, \
위 기획서와 체크리스트 기준으로만 코드를 평가하세요.

발견된 각 이슈에 심각도와 카테고리를 모두 부여하세요:

심각도:
- **Critical**: 기능 장애, 데이터 손실, 보안 취약점을 유발하는 문제.
- **Major**: 요구사항 불일치, 중대한 로직 오류, 핵심 기능 누락.
- **Minor**: 코딩 컨벤션 위반, 사소한 누락, 스타일 문제.

카테고리:
- **과최적화**: 기획서에 없는 기능, 추상화, 복잡성을 추가한 경우.
- **누락**: 기획서/체크리스트에 있지만 구현에서 빠지거나 불완전한 요구사항.

이전 리뷰 피드백이 제공된 경우, 각 항목을 반드시 평가하세요:
- **CONFIRMED**: 현재 코드에 여전히 존재하는 이슈.
- **DISMISSED (오탐)**: 기획서 요구사항상 실제로 올바른 항목. 근거를 제시하세요.

기획서/체크리스트 범위 밖에서 발견된 문제(기존 버그, 보안 이슈, 성능 문제 등)는 \
"범위 밖 이슈" 섹션에 별도로 보고하세요.

## 출력 형식

### 이전 피드백 평가
(이전 피드백이 제공된 경우에만 포함하세요.)
- CONFIRMED: [항목 설명] — 여전히 이슈인 이유: [근거]
- DISMISSED (오탐): [항목 설명] — 실제로 올바른 이유: [근거]
(이전 피드백이 없으면 "해당 없음"이라고 작성하세요.)

### 발견된 이슈
심각도 순서(Critical 먼저)로 나열:
- [Critical][과최적화] 이슈 설명 (관련 기획서/체크리스트 항목 참조)
- [Major][누락] 이슈 설명 (관련 기획서/체크리스트 항목 참조)
- [Minor][누락] 이슈 설명 (관련 기획서/체크리스트 항목 참조)

### 범위 밖 이슈
기획서/체크리스트 범위 밖이지만 주목할 만한 이슈:
- [Critical] 이슈 설명
- [Minor] 이슈 설명
(범위 밖 이슈가 없으면 "없음"이라고 작성하세요.)

### 요약
- Critical: N, Major: N, Minor: N
- 과최적화 수: N
- 누락 수: N
- CONFIRMED: N, DISMISSED: N
- 전체 품질: [간략한 평가]

### 판정
모든 체크리스트 항목이 충족되고 과최적화/누락이 없으면: VERDICT: PASS
그렇지 않으면: VERDICT: FAIL
"""


REVIEW_ONLY_TEMPLATE = """\
You are tasked with reviewing existing code against a plan and checklist.

## Plan
{plan}

## Checklist
{checklist}

## Reference Documents
{docs}

## Previous Review (iteration {iteration} of {max_iterations})
{feedback}

## Review Instructions
Explore the project directory thoroughly to understand the full codebase, \
then evaluate the EXISTING code against ONLY the plan and checklist above.

You are NOT generating or modifying code. You are auditing what already exists.

If previous review results are provided above, you MUST:
1. Verify each previously reported issue — is it a real issue or a false positive?
2. Look for issues the previous review MISSED.
3. Do NOT simply repeat the previous review. Provide your own independent assessment.
4. Explicitly mark items as CONFIRMED (still an issue) or DISMISSED (false positive).

For each issue found, classify it with BOTH severity AND category:

Severity levels:
- **Critical**: Breaks functionality, causes data loss, or introduces security vulnerabilities.
- **Major**: Requirement mismatch, significant logic errors, or missing core functionality.
- **Minor**: Coding convention violations, trivial omissions, or style issues.

Categories:
- **Over-engineering**: Code adds features, abstractions, or complexity \
NOT required by the plan.
- **Omission**: A requirement from the plan or checklist that is missing or \
incomplete in the implementation.

If you find issues outside the plan/checklist scope (e.g. pre-existing bugs, \
security concerns, performance problems), report them separately under \
"Out of Scope Issues".

## Output Format

### Issues Found
List issues ordered by severity (Critical first):
- [Critical][Over-engineering] Description (reference specific plan/checklist item)
- [Major][Omission] Description (reference specific plan/checklist item)
- [Minor][Omission] Description (reference specific plan/checklist item)

### Out of Scope Issues
Issues found outside plan/checklist scope but worth noting:
- [Critical] Description of issue
- [Minor] Description of issue
(Write "None" if no out-of-scope issues found.)

### Summary
- Critical: N, Major: N, Minor: N
- Over-engineering count: N
- Omission count: N
- CONFIRMED: N, DISMISSED: N
- Overall quality: [BRIEF ASSESSMENT]

### Verdict
If all checklist items are satisfied and there is no over-engineering or \
omission, output: VERDICT: PASS
Otherwise output: VERDICT: FAIL
"""

REVIEW_ONLY_TEMPLATE_KO = """\
당신은 기존 코드를 기획서와 체크리스트 기준으로 감사하는 리뷰어입니다.

## 기획서
{plan}

## 체크리스트
{checklist}

## 참고 문서
{docs}

## 이전 리뷰 결과 ({max_iterations}회 중 {iteration}번째)
{feedback}

## 검토 지침
프로젝트 디렉토리를 직접 탐색하여 전체 코드베이스를 파악한 뒤, \
위 기획서와 체크리스트 기준으로 **기존 코드**를 평가하세요.

코드를 생성하거나 수정하지 마세요. 이미 존재하는 코드를 감사하는 것이 목적입니다.

이전 리뷰 결과가 제공된 경우 반드시:
1. 이전에 보고된 각 이슈를 검증하세요 — 진짜 이슈인지 오탐인지?
2. 이전 리뷰가 놓친 새로운 이슈를 찾으세요.
3. 이전 리뷰를 그대로 반복하지 마세요. 독립적인 평가를 제공하세요.
4. 각 항목에 CONFIRMED (여전히 이슈) 또는 DISMISSED (오탐) 태그를 명시하세요.

발견된 각 이슈에 심각도와 카테고리를 모두 부여하세요:

심각도:
- **Critical**: 기능 장애, 데이터 손실, 보안 취약점을 유발하는 문제.
- **Major**: 요구사항 불일치, 중대한 로직 오류, 핵심 기능 누락.
- **Minor**: 코딩 컨벤션 위반, 사소한 누락, 스타일 문제.

카테고리:
- **과최적화**: 기획서에 없는 기능, 추상화, 복잡성을 추가한 경우.
- **누락**: 기획서/체크리스트에 있지만 구현에서 빠지거나 불완전한 요구사항.

기획서/체크리스트 범위 밖에서 발견된 문제(기존 버그, 보안 이슈, 성능 문제 등)는 \
"범위 밖 이슈" 섹션에 별도로 보고하세요.

## 출력 형식

### 발견된 이슈
심각도 순서(Critical 먼저)로 나열:
- [Critical][과최적화] 이슈 설명 (관련 기획서/체크리스트 항목 참조)
- [Major][누락] 이슈 설명 (관련 기획서/체크리스트 항목 참조)
- [Minor][누락] 이슈 설명 (관련 기획서/체크리스트 항목 참조)

### 범위 밖 이슈
기획서/체크리스트 범위 밖이지만 주목할 만한 이슈:
- [Critical] 이슈 설명
- [Minor] 이슈 설명
(범위 밖 이슈가 없으면 "없음"이라고 작성하세요.)

### 요약
- Critical: N, Major: N, Minor: N
- 과최적화 수: N
- 누락 수: N
- CONFIRMED: N, DISMISSED: N
- 전체 품질: [간략한 평가]

### 판정
모든 체크리스트 항목이 충족되고 과최적화/누락이 없으면: VERDICT: PASS
그렇지 않으면: VERDICT: FAIL
"""

AGGREGATE_REVIEW_TEMPLATE = """\
You are adjudicating multiple review results and turning them into an actionable decision.

## Plan
{plan}

## Checklist
{checklist}

## Reference Documents
{docs}

## Candidate Outputs
{candidate_outputs}

## Reviewer Findings
{reviews_bundle}

## Previous Verification Feedback
{feedback}

## Instructions
Explore the project directory to confirm the current codebase state. Then:
1. Deduplicate overlapping issues across reviewers.
2. Resolve disagreements explicitly.
3. Keep only issues supported by the plan, checklist, code, or reviewer evidence.
4. When evidence is mixed, explain what was confirmed, what was dismissed, and what still needs follow-up.
5. Produce a prioritized action list for the coder.
6. If no confirmed issue remains, output VERDICT: PASS. Otherwise VERDICT: FAIL.

## Output Format

### Confirmed Issues
- [Critical][Omission] Description with rationale and source reviewer(s)

### Dismissed Findings
- [False positive] Claim — reason why it is actually correct (raised by: Reviewer X)
- [Already fixed] Claim — already resolved in the current code (raised by: Reviewer X)
(Write "None" if nothing was dismissed.)

### Action Items
1. Concrete fix the coder should make
2. Concrete fix the coder should make

### Summary
- Confirmed issues: N
- Dismissed findings: N (false positive: N, already fixed: N)
- Overall quality: [BRIEF ASSESSMENT]

### Verdict
VERDICT: PASS or VERDICT: FAIL
"""

AGGREGATE_REVIEW_TEMPLATE_KO = """\
당신은 여러 리뷰 결과를 판정하고 coder가 수정할 액션으로 정리하는 시니어 리뷰어입니다.

## 기획서
{plan}

## 체크리스트
{checklist}

## 참고 문서
{docs}

## 후보 결과물
{candidate_outputs}

## 개별 리뷰 결과
{reviews_bundle}

## 이전 검증 피드백
{feedback}

## 지침
프로젝트 디렉토리를 탐색하여 현재 코드베이스 상태를 확인한 뒤 다음을 수행하세요.
1. 리뷰어들 사이에 중복되는 이슈를 합치세요.
2. 의견 충돌은 명시적으로 정리하세요.
3. 기획서, 체크리스트, 코드, 리뷰 근거로 뒷받침되는 이슈만 남기세요.
4. 근거가 엇갈리면 무엇이 확정이고 무엇이 기각 또는 추가확인 대상인지 분명히 적으세요.
5. coder가 바로 수정할 수 있는 우선순위 액션 아이템을 만드세요.
6. 확정된 이슈가 없으면 VERDICT: PASS, 있으면 VERDICT: FAIL 을 출력하세요.

## 출력 형식

### 확정 이슈
- [Critical][누락] 확정된 이슈 설명, 근거, 출처 리뷰어

### 기각된 주장
- [오탐] 주장 내용 — 실제로 올바른 이유 (제기: 리뷰어 X)
- [수정 완료] 주장 내용 — 현재 코드에서 이미 해결됨 (제기: 리뷰어 X)
(기각된 항목이 없으면 "없음"이라고 작성하세요.)

### 액션 아이템
1. coder가 수정해야 할 구체적인 작업
2. coder가 수정해야 할 구체적인 작업

### 요약
- 확정 이슈 수: N
- 기각된 주장 수: N (오탐: N, 수정 완료: N)
- 전체 품질: [간략한 평가]

### 판정
VERDICT: PASS 또는 VERDICT: FAIL
"""


DEFAULT_TEMPLATES: dict[str, dict[str, str]] = {
    "en": {
        "generate": GENERATE_TEMPLATE,
        "review": REVIEW_TEMPLATE,
        "review-only": REVIEW_ONLY_TEMPLATE,
        "aggregate-review": AGGREGATE_REVIEW_TEMPLATE,
    },
    "ko": {
        "generate": GENERATE_TEMPLATE_KO,
        "review": REVIEW_TEMPLATE_KO,
        "review-only": REVIEW_ONLY_TEMPLATE_KO,
        "aggregate-review": AGGREGATE_REVIEW_TEMPLATE_KO,
    },
}

# Current language (set by pipeline before run)
_current_language: str = "en"


def set_language(lang: str) -> None:
    """Set the current template language."""
    global _current_language
    if lang not in DEFAULT_TEMPLATES:
        raise ValueError(f"Unsupported language '{lang}'. Available: {list(DEFAULT_TEMPLATES.keys())}")
    _current_language = lang


# ---------------------------------------------------------------------------
# Pipeline presets
# ---------------------------------------------------------------------------

def _safe_key(name: str) -> str:
    """Sanitize agent name for use as template variable / output_key.

    Replaces hyphens with underscores so names like 'claude-coder'
    become 'claude_coder', which is valid in format_map().
    """
    return name.replace("-", "_")


def _unique_safe_keys(names: list[str]) -> list[str]:
    """Return stable, collision-free keys for agent names.

    Duplicate names keep the first key unchanged and receive numeric suffixes
    from the second occurrence onward.
    """
    totals = collections.Counter(_safe_key(name) for name in names)
    seen: collections.defaultdict[str, int] = collections.defaultdict(int)
    keys: list[str] = []

    for name in names:
        base = _safe_key(name)
        seen[base] += 1
        if totals[base] == 1 or seen[base] == 1:
            keys.append(base)
        else:
            keys.append(f"{base}_{seen[base]}")

    return keys


def _build_named_bundle(
    labels: list[str],
    step_names: list[str],
    output_keys: list[str],
    title: str,
) -> str:
    """Build a templated bundle from prior step outputs."""
    parts: list[str] = []
    for label, step_name, output_key in zip(labels, step_names, output_keys):
        parts.append(
            f"## {title}: {label} ({step_name})\n"
            f"{{{output_key}}}"
        )
    return "\n\n---\n\n".join(parts)


def _build_simple_preset(
    coders: list[str], reviewers: list[str], seniors: list[str],
) -> list[StepConfig]:
    """First coder generates, first reviewer reviews."""
    if not coders:
        raise ValueError("'simple' preset requires at least 1 coder")
    if not reviewers:
        raise ValueError("'simple' preset requires at least 1 reviewer")
    steps = [
        StepConfig(
            name="generate",
            agent=coders[0],
            role="generate",
            prompt_template="default:generate",
            output_key="generated_code",
        ),
        StepConfig(
            name="review",
            agent=reviewers[0],
            role="review",
            prompt_template="default:review",
            output_key="review_result",
            verdict=not seniors,
        ),
    ]
    if seniors:
        steps.append(
            StepConfig(
                name="senior_review",
                agent=seniors[0],
                role="review",
                prompt_template="default:aggregate-review",
                output_key="senior_review_result",
                verdict=True,
                context_override={
                    "candidate_outputs": "## Generated code\n{generated_code}",
                    "reviews_bundle": f"## Review: {reviewers[0]} (review)\n{{review_result}}",
                },
            ),
        )
    return steps


def _build_cross_review_preset(
    coders: list[str], reviewers: list[str], seniors: list[str],
) -> list[StepConfig]:
    """Both coders generate, then cross-review each other's output."""
    if len(coders) < 2:
        raise ValueError("'cross-review' preset requires at least 2 coders")
    a, b = coders[0], coders[1]
    ak, bk = _unique_safe_keys([a, b])
    steps = [
        StepConfig(
            name=f"generate_{ak}",
            agent=a,
            role="generate",
            prompt_template="default:generate",
            output_key=f"code_{ak}",
            parallel=True,
        ),
        StepConfig(
            name=f"generate_{bk}",
            agent=b,
            role="generate",
            prompt_template="default:generate",
            output_key=f"code_{bk}",
            parallel=True,
        ),
        StepConfig(
            name=f"review_by_{ak}",
            agent=a,
            role="review",
            prompt_template="default:review",
            output_key=f"review_by_{ak}",
            context_override={"generated_code": f"{{code_{bk}}}"},
            parallel=True,
            verdict=not seniors,
        ),
        StepConfig(
            name=f"review_by_{bk}",
            agent=b,
            role="review",
            prompt_template="default:review",
            output_key=f"review_by_{bk}",
            verdict=not seniors,
            context_override={"generated_code": f"{{code_{ak}}}"},
            parallel=True,
        ),
    ]
    if seniors:
        steps.append(
            StepConfig(
                name="senior_review",
                agent=seniors[0],
                role="review",
                prompt_template="default:aggregate-review",
                output_key="senior_review_result",
                verdict=True,
                context_override={
                    "candidate_outputs": _build_named_bundle(
                        [a, b],
                        [f"generate_{ak}", f"generate_{bk}"],
                        [f"code_{ak}", f"code_{bk}"],
                        "Candidate",
                    ),
                    "reviews_bundle": _build_named_bundle(
                        [a, b],
                        [f"review_by_{ak}", f"review_by_{bk}"],
                        [f"review_by_{ak}", f"review_by_{bk}"],
                        "Review",
                    ),
                },
            ),
        )
    return steps


def _build_review_only_preset(
    coders: list[str], reviewers: list[str], seniors: list[str],
) -> list[StepConfig]:
    """Review-only: all reviewers audit existing code independently."""
    if not reviewers:
        raise ValueError("'review-only' preset requires at least 1 reviewer")

    if len(reviewers) == 1 and not seniors:
        # Single reviewer — backward compatible
        return [
            StepConfig(
                name="review",
                agent=reviewers[0],
                role="review",
                prompt_template="default:review-only",
                output_key="review_result",
                verdict=True,
            ),
        ]

    # Multiple reviewers — each produces a separate review with verdict (parallel)
    steps: list[StepConfig] = []
    reviewer_keys = _unique_safe_keys(reviewers)
    for reviewer, rk in zip(reviewers, reviewer_keys):
        steps.append(
            StepConfig(
                name=f"review_{rk}",
                agent=reviewer,
                role="review",
                prompt_template="default:review-only",
                output_key=f"review_{rk}",
                verdict=not seniors,
                parallel=True,
            ),
        )
    if seniors:
        step_names = [f"review_{rk}" for rk in reviewer_keys]
        output_keys = [f"review_{rk}" for rk in reviewer_keys]
        steps.append(
            StepConfig(
                name="senior_review",
                agent=seniors[0],
                role="review",
                prompt_template="default:aggregate-review",
                output_key="senior_review_result",
                verdict=True,
                context_override={
                    "candidate_outputs": "Current repository working tree under review.",
                    "reviews_bundle": _build_named_bundle(
                        reviewers, step_names, output_keys, "Review",
                    ),
                },
            ),
        )
    return steps


def _build_review_fix_preset(
    coders: list[str], reviewers: list[str], seniors: list[str],
) -> list[PhaseConfig]:
    """Review in parallel, aggregate findings, fix, then verify in a loop."""
    if not coders:
        raise ValueError("'review-fix' preset requires at least 1 coder")
    if not reviewers:
        raise ValueError("'review-fix' preset requires at least 1 reviewer")

    review_steps: list[StepConfig] = []
    reviewer_keys = _unique_safe_keys(reviewers)
    for reviewer, rk in zip(reviewers, reviewer_keys):
        review_steps.append(
            StepConfig(
                name=f"review_{rk}",
                agent=reviewer,
                role="review",
                prompt_template="default:review-only",
                output_key=f"review_{rk}",
                verdict=False,
                parallel=True,
            ),
        )

    fix_coder = coders[0]
    senior_agent = seniors[0] if seniors else reviewers[0]
    review_step_names = [f"review_{rk}" for rk in reviewer_keys]
    review_output_keys = [f"review_{rk}" for rk in reviewer_keys]

    return [
        PhaseConfig(
            name="review_fix",
            steps=review_steps + [
                StepConfig(
                    name="aggregate_review",
                    agent=senior_agent,
                    role="review",
                    prompt_template="default:aggregate-review",
                    output_key="aggregate_review",
                    context_override={
                        "candidate_outputs": "Current repository working tree under review.",
                        "reviews_bundle": _build_named_bundle(
                            reviewers, review_step_names, review_output_keys, "Review",
                        ),
                    },
                ),
                StepConfig(
                    name="generate",
                    agent=fix_coder,
                    role="generate",
                    prompt_template="default:generate",
                    output_key="generated_code",
                    context_override={"feedback": "{aggregate_review}"},
                ),
                StepConfig(
                    name="verify",
                    agent=senior_agent,
                    role="review",
                    prompt_template="default:review",
                    output_key="verify_result",
                    verdict=True,
                ),
            ],
            max_iterations=5,
            consecutive_pass=1,
        ),
    ]


PIPELINE_PRESETS: dict[str, Callable] = {
    "simple": _build_simple_preset,
    "cross-review": _build_cross_review_preset,
    "review-only": _build_review_only_preset,
}

PHASED_PRESETS: dict[str, Callable] = {
    "review-fix": _build_review_fix_preset,
}

ALL_PRESET_NAMES: list[str] = list(PIPELINE_PRESETS.keys()) + list(PHASED_PRESETS.keys())


# ---------------------------------------------------------------------------
# Template resolution and rendering
# ---------------------------------------------------------------------------

def resolve_template(template_ref: str, templates_dir: Optional[Path] = None) -> str:
    """Resolve a template reference to its content string.

    Formats:
    - "default:generate" -> built-in GENERATE_TEMPLATE
    - "default:review"   -> built-in REVIEW_TEMPLATE
    - "path/to/file.md"  -> read file contents
    """
    if template_ref.startswith("default:"):
        key = template_ref.split(":", 1)[1]
        lang_templates = DEFAULT_TEMPLATES.get(_current_language, DEFAULT_TEMPLATES["en"])
        if key not in lang_templates:
            raise ValueError(
                f"Unknown default template '{key}'. "
                f"Available: {list(lang_templates.keys())}"
            )
        return lang_templates[key]

    # Treat as file path
    path = Path(template_ref)
    if templates_dir and not path.is_absolute():
        path = templates_dir / path
    if not path.exists():
        raise FileNotFoundError(f"Template file not found: {path}")
    return path.read_text(encoding="utf-8")


class _DefaultDict(collections.defaultdict):
    """defaultdict that uses the missing key name in the default value."""

    def __missing__(self, key: str) -> str:
        return f"(no {key} provided)"


def render_template(template: str, context: dict[str, str]) -> str:
    """Render a template string with {variable} placeholders.

    Missing keys produce "(no <key> provided)" instead of raising KeyError.
    """
    safe_context = _DefaultDict(str)
    safe_context.update(context)
    return template.format_map(safe_context)