This commit is contained in:
이충영 에이닷서비스개발
2026-03-15 17:54:30 +09:00
parent 28efd5bb8f
commit 0bbe0f6f7b
14 changed files with 871 additions and 183 deletions

View File

@@ -38,7 +38,7 @@ coders: [claude-coder]
reviewers: [claude-reviewer]
# seniors: [codex-senior]
# 파이프라인 종류: simple | cross-review | plan-review | review-only | review-fix | coding-review-fix
# 파이프라인 종류: plan-review | coding-plan-review
pipeline: preset:{preset}
# 반복 설정
@@ -194,20 +194,12 @@ def main(argv: list[str] | None = None) -> int:
)
init_parser.add_argument(
"--preset",
default="simple",
choices=[
"simple",
"cross-review",
"plan-review",
"review-only",
"review-fix",
"coding-review-fix",
],
default="coding-plan-review",
choices=["plan-review", "coding-plan-review"],
help=(
"파이프라인 종류 (기본: simple). "
"simple=코딩+리뷰, cross-review=교차리뷰, plan-review=문서리뷰수정재검증, "
"review-only=리뷰만, review-fix=리뷰수렴+자동수정, "
"coding-review-fix=초기코딩후리뷰수렴"
"파이프라인 종류 (기본: coding-plan-review). "
"plan-review=문서리뷰수정재검증, "
"coding-plan-review=문서기반구현후 코드+문서 리뷰/수정/재검증"
),
)
init_parser.add_argument(
@@ -252,9 +244,9 @@ def main(argv: list[str] | None = None) -> int:
)
demo_parser.add_argument(
"--preset",
default="simple",
choices=["simple", "review-fix", "coding-review-fix"],
help="데모할 파이프라인 종류 (기본: simple)",
default="coding-plan-review",
choices=["plan-review", "coding-plan-review"],
help="데모할 파이프라인 종류 (기본: coding-plan-review)",
)
demo_parser.add_argument(
"--escalate",
@@ -281,25 +273,12 @@ def main(argv: list[str] | None = None) -> int:
),
epilog=(
"파이프라인 종류 (--preset):\n"
" ┌───────────────────────────────────────────────────────────────────┐\n"
"simple │ Coder가 코드 작성 → Reviewer가 리뷰 \n"
" │ (기본값) │ FAIL이면 피드백 반영해서 재코딩, PASS까지 반복\n"
" ├───────────────────────────────────────────────────────────────────┤\n"
" │ review-fix │ 2단계 파이프라인: \n"
" │ │ Reviewer N명 병렬 리뷰 → 취합 → 수정 → 재검증 │\n"
" ├──────────────┼─────────────────────────────────────────────────────┤\n"
" │ coding- │ 3단계 파이프라인: │\n"
" │ review-fix │ 초기 코딩 1회 → 리뷰 취합 → 수정 → 재검증 반복 │\n"
" ├──────────────┼─────────────────────────────────────────────────────┤\n"
" │ plan-review │ 구현 전 기획서/체크리스트/문서를 검토하고 │\n"
" │ │ 수정한 뒤 시니어가 재검증할 때까지 반복 │\n"
" ├──────────────┼─────────────────────────────────────────────────────┤\n"
" │ review-only │ 코드 작성 없이 Reviewer N명이 기존 코드만 검토 │\n"
" │ │ (이미 작성된 코드의 품질 감사용) │\n"
" ├──────────────┼─────────────────────────────────────────────────────┤\n"
" │ cross-review │ Coder 2명이 각각 구현 → 상대방 코드를 교차 리뷰 │\n"
" │ │ (서로 다른 에이전트의 구현 비교용) │\n"
" └──────────────┴─────────────────────────────────────────────────────┘\n"
" ┌───────────────────────────────────────────────────────────────────┐\n"
"coding-plan-review │ 입력 문서 기반 구현 → 코드+문서 리뷰/수정\n"
" │ (기본값) │ → 재검증 반복 \n"
" ├───────────────────────────────────────────────────────────────────┤\n"
"plan-review │ 구현 전 문서 리뷰 → 문서 수정 → 재검증 반복\n"
" └─────────────────────┴──────────────────────────────────────────────┘\n"
"\n"
"기본 제공 에이전트:\n"
" ┌──────────────────┬─────────┬───────────┬──────────────────────────┐\n"
@@ -316,34 +295,13 @@ def main(argv: list[str] | None = None) -> int:
"\n"
"사용 예시:\n"
"\n"
" 기본 실행 (Claude가 코딩하고 Claude가 리뷰):\n"
" cross-eval run --plan plan.md\n"
"\n"
" Codex가 코딩, Claude가 리뷰:\n"
" cross-eval run --plan plan.md --coder codex --reviewer claude\n"
"\n"
" 리뷰어 2명 (Claude + Codex):\n"
" cross-eval run --plan plan.md --reviewer claude --reviewer codex\n"
"\n"
" 리뷰 취합용 Senior 추가:\n"
" cross-eval run --plan plan.md --preset review-fix \\\n"
" --reviewer claude --reviewer codex --senior codex\n"
"\n"
" 리뷰 수렴 후 자동 수정 (review-fix):\n"
" cross-eval run --plan plan.md --preset review-fix \\\n"
" --reviewer claude --reviewer codex\n"
"\n"
" 초기 코딩 후 리뷰 수렴 + 자동 수정 (coding-review-fix):\n"
" cross-eval run --plan plan.md --preset coding-review-fix \\\n"
" --reviewer claude --reviewer codex\n"
"\n"
" 기존 코드 리뷰만 (review-only):\n"
" cross-eval run --plan plan.md --preset review-only \\\n"
" --reviewer claude --reviewer codex\n"
" 코드 + 문서 구현/리뷰 루프 (coding-plan-review):\n"
" cross-eval run --plan plan.md --preset coding-plan-review \\\n"
" --coder claude --reviewer codex --reviewer claude --senior codex\n"
"\n"
" 문서 리뷰 + 수정 + 재검증 반복 (plan-review):\n"
" cross-eval run --plan plan.md --preset plan-review \\\n"
" --coder codex --reviewer codex\n"
" --coder claude --reviewer codex --reviewer claude --senior codex\n"
"\n"
" 모델 변경:\n"
" cross-eval run --plan plan.md --model sonnet\n"
@@ -420,7 +378,11 @@ def main(argv: list[str] | None = None) -> int:
)
agent_group.add_argument(
"--agentic", action="store_true", default=False,
help="Coder를 agentic 모드로 실행 (worktree에서 파일 직접 수정, git diff로 결과 캡처)",
help="Coder를 agentic 모드로 실행 (파일 직접 수정, git diff로 결과 캡처)",
)
agent_group.add_argument(
"--worktree", action="store_true", default=False,
help="기본 direct mode 대신 isolated git worktree에서 실행",
)
agent_group.add_argument(
"--model", default=None, metavar="MODEL",
@@ -443,15 +405,8 @@ def main(argv: list[str] | None = None) -> int:
pipe_group = run_parser.add_argument_group("파이프라인")
pipe_group.add_argument(
"--preset", default=None,
choices=[
"simple",
"cross-review",
"plan-review",
"review-only",
"review-fix",
"coding-review-fix",
],
help="파이프라인 종류 (기본: simple). 각 종류 설명은 아래 참조",
choices=["plan-review", "coding-plan-review"],
help="파이프라인 종류 (기본: coding-plan-review). 각 종류 설명은 아래 참조",
)
pipe_group.add_argument(
"--max-iter", type=int, default=None,
@@ -560,18 +515,11 @@ def cmd_demo(args: argparse.Namespace) -> int:
# ---------------------------------------------------------------------------
_PRESET_DESCRIPTIONS = {
"simple": "코딩 + 리뷰 (가장 기본)",
"review-fix": "리뷰 → 취합 → 수정 → 재검증 반복",
"coding-review-fix": "초기 코딩 + 리뷰 수렴 반복",
"coding-plan-review": "입력 문서 기반 구현 후 코드+문서 리뷰/수정 반복",
"plan-review": "문서 리뷰 → 수정 → 재검증 반복",
"review-only": "기존 코드만 리뷰 (코딩 없음)",
"cross-review": "2명이 각각 구현 후 교차 리뷰",
}
_PRESET_ORDER = [
"simple", "review-fix", "coding-review-fix",
"plan-review", "review-only", "cross-review",
]
_PRESET_ORDER = ["coding-plan-review", "plan-review"]
def _prompt_choice(
@@ -640,7 +588,7 @@ def _run_guided_init(target: Path) -> dict:
coder = _prompt_text(" Coder 에이전트", default="claude")
reviewer = _prompt_text(" Reviewer 에이전트", default="claude")
needs_senior = preset in ("review-fix", "coding-review-fix")
needs_senior = preset in ("coding-plan-review", "plan-review")
senior = ""
if needs_senior:
senior = _prompt_text(" Senior 에이전트", default=reviewer)
@@ -899,10 +847,10 @@ def cmd_run(args: argparse.Namespace) -> int:
need_rebuild = args.preset is not None or args.coders or args.reviewers or args.seniors
if need_rebuild:
from cross_eval.prompts import PHASED_PRESETS
preset = args.preset or "simple"
preset = args.preset or "coding-plan-review"
# Determine which preset was configured (from YAML or defaults)
if args.preset is None and config.phases:
preset = config.preset_name if config.preset_name != "custom" else "review-fix"
preset = config.preset_name if config.preset_name != "custom" else "coding-plan-review"
elif args.preset is None and not args.coders and not args.reviewers and not args.seniors:
pass # no changes needed
inferred_coders, inferred_reviewers, inferred_seniors = _infer_roles(
@@ -929,8 +877,6 @@ def cmd_run(args: argparse.Namespace) -> int:
elif preset in PIPELINE_PRESETS:
config.pipeline = PIPELINE_PRESETS[preset](coders, reviewers, seniors)
config.phases = []
if preset == "review-only" and args.max_iter is None and args.min_iter is None:
config.max_iterations = 1
sync_phased_iterations(config)
if args.max_iter is not None:
@@ -951,6 +897,9 @@ def cmd_run(args: argparse.Namespace) -> int:
if coder_name in config.agents:
_make_agentic(config.agents[coder_name])
if args.worktree:
config.use_worktree = True
ensure_fix_preset_agentic(config)
# --model: apply to ALL agents
@@ -988,7 +937,7 @@ def cmd_run(args: argparse.Namespace) -> int:
print(f"No files found in: {docs_dir}", file=sys.stderr)
return 1
config.inputs["docs"] = docs_content
config.inputs["docs_ref"] = str(docs_dir)
config.inputs["docs_ref"] = docs_dir
if args.env_files:
for env_file in args.env_files:
@@ -1062,6 +1011,9 @@ def cmd_run(args: argparse.Namespace) -> int:
if not args.dry_run and result.run_dir:
print(f"Output: {result.run_dir}/")
if args.dry_run:
return 0
if result.final_verdict == "ESCALATE":
from cross_eval.report import print_escalation_report
print_escalation_report(config, result)

View File

@@ -31,7 +31,10 @@ DEFAULT_ROLE_REASONING_EFFORTS = {
"reviewer": "medium",
"senior": "high",
}
FIX_STYLE_PRESETS = {"plan-review", "review-fix", "coding-review-fix"}
FIX_STYLE_PRESETS = {
"plan-review",
"coding-plan-review",
}
# ---------------------------------------------------------------------------
@@ -298,8 +301,7 @@ def _default_seniors_for_preset(
isinstance(pipeline_raw, str)
and pipeline_raw in {
"preset:plan-review",
"preset:review-fix",
"preset:coding-review-fix",
"preset:coding-plan-review",
}
and reviewers
):
@@ -382,9 +384,11 @@ def default_config() -> PipelineConfig:
coders = ["claude-coder"]
reviewers = ["claude-reviewer"]
seniors: list[str] = []
pipeline = PIPELINE_PRESETS["simple"](coders, reviewers, seniors)
pipeline: list[StepConfig] = []
phases = PHASED_PRESETS["coding-plan-review"](coders, reviewers, seniors)
return PipelineConfig(
output_dir=Path(".cross-eval/output"),
use_worktree=False,
max_iterations=3,
language="ko",
execution=ExecutionConfig(),
@@ -394,6 +398,8 @@ def default_config() -> PipelineConfig:
reviewers=reviewers,
seniors=seniors,
pipeline=pipeline,
phases=phases,
preset_name="coding-plan-review",
)
@@ -437,7 +443,7 @@ def _parse_raw(raw: dict[str, Any], config_path: Path) -> PipelineConfig:
)
# --- roles: explicit or inferred ---
pipeline_raw = raw.get("pipeline", "preset:simple")
pipeline_raw = raw.get("pipeline", "preset:coding-plan-review")
coders_raw = raw.get("coders")
reviewers_raw = raw.get("reviewers")
seniors_raw = raw.get("seniors")
@@ -498,6 +504,7 @@ def _parse_raw(raw: dict[str, Any], config_path: Path) -> PipelineConfig:
config = PipelineConfig(
output_dir=output_dir,
use_worktree=bool(raw.get("use_worktree", False)),
max_iterations=int(raw.get("max_iterations", 3)),
min_iterations=int(raw.get("min_iterations", 1)),
verbose=bool(raw.get("verbose", False)),
@@ -555,10 +562,10 @@ def _resolve_pipeline(
"""Resolve pipeline from preset string or explicit step list.
Returns (steps, phases) tuple. Only one will be non-empty.
- Simple/cross-review/plan-review/review-only → steps populated, phases empty.
- Phased presets (review-fix) → steps empty, phases populated.
- plan-review → steps populated, phases empty.
- coding-plan-review → steps empty, phases populated.
"""
# Preset: "preset:simple" or "preset:review-fix"
# Preset: "preset:plan-review" or "preset:coding-plan-review"
if isinstance(pipeline_raw, str) and pipeline_raw.startswith("preset:"):
preset_name = pipeline_raw.split(":", 1)[1]
if preset_name in PIPELINE_PRESETS:
@@ -592,7 +599,7 @@ def _resolve_pipeline(
return steps, []
raise ValueError(
f"'pipeline' must be a preset string (e.g. 'preset:simple') "
f"'pipeline' must be a preset string (e.g. 'preset:plan-review') "
f"or a list of step definitions, got {type(pipeline_raw).__name__}"
)

View File

@@ -165,7 +165,7 @@ CYAN = "\033[36m"
RESET = "\033[0m"
def run_mock_demo(preset: str = "simple", show_escalate: bool = False) -> None:
def run_mock_demo(preset: str = "coding-plan-review", show_escalate: bool = False) -> None:
"""Run a simulated demo showing the full pipeline lifecycle."""
steps = _MOCK_ESCALATE_STEPS if show_escalate else _MOCK_STEPS
@@ -229,7 +229,7 @@ def run_mock_demo(preset: str = "simple", show_escalate: bool = False) -> None:
def run_live_demo(
preset: str = "simple",
preset: str = "coding-plan-review",
timeout: int | None = None,
) -> PipelineResult:
"""Run a live demo with real agents using the built-in plan."""
@@ -255,8 +255,9 @@ def run_live_demo(
pipeline = []
phases = PHASED_PRESETS[preset](coders, reviewers, seniors)
else:
pipeline = PIPELINE_PRESETS["simple"](coders, reviewers, seniors)
phases = []
pipeline = []
phases = PHASED_PRESETS["coding-plan-review"](coders, reviewers, seniors)
with tempfile.TemporaryDirectory() as tmpdir:
plan_path = Path(tmpdir) / "plan.md"

View File

@@ -62,6 +62,7 @@ class PipelineConfig:
"""Full cross-eval configuration."""
output_dir: Path = field(default_factory=lambda: Path(".cross-eval/output"))
use_worktree: bool = False
max_iterations: int = 3
min_iterations: int = 1
verbose: bool = False

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import logging
import os
import re
import shutil
import subprocess
import time
from hashlib import sha256
@@ -34,6 +35,19 @@ from cross_eval.runtime_env import (
logger = logging.getLogger(__name__)
def _get_current_head(cwd: Path) -> str | None:
"""Return the current HEAD SHA for an existing repository."""
result = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=cwd,
capture_output=True,
text=True,
)
if result.returncode != 0:
return None
return result.stdout.strip() or None
def run_pipeline(
config: PipelineConfig,
cwd: Path | None = None,
@@ -124,8 +138,6 @@ def _copy_inputs_to_worktree(
Updates ``config.inputs`` in-place so subsequent reference refreshes use
worktree-local paths.
"""
import shutil
base_root = base_cwd.resolve()
track_external_inputs = config.preset_name == "plan-review"
inputs_dir = worktree_path / ".cross-eval-inputs"
@@ -134,7 +146,7 @@ def _copy_inputs_to_worktree(
# Exclude read-only input copies from git so they don't pollute code diffs.
(inputs_dir / ".gitignore").write_text("*\n", encoding="utf-8")
for key, val in list(config.inputs.items()):
if key.endswith("_ref") or not isinstance(val, Path):
if not isinstance(val, Path):
continue
if not val.exists():
continue
@@ -143,17 +155,71 @@ def _copy_inputs_to_worktree(
rel_path = resolved.relative_to(base_root)
except ValueError:
dest = inputs_dir / val.name
shutil.copy2(resolved, dest)
_copy_path(resolved, dest)
config.inputs[key] = dest
continue
worktree_target = worktree_path / rel_path
if not worktree_target.exists():
worktree_target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(resolved, worktree_target)
_copy_path(resolved, worktree_target)
config.inputs[key] = worktree_target
def _snapshot_input_paths(config: PipelineConfig) -> dict[str, Path]:
"""Capture original on-disk input paths before remapping into a worktree."""
return {
key: val
for key, val in config.inputs.items()
if isinstance(val, Path)
}
def _apply_worktree_inputs_to_base(
config: PipelineConfig,
original_inputs: dict[str, Path],
*,
cwd: Path,
) -> list[Path]:
"""Copy the final worktree-edited inputs back onto the user-provided paths."""
restored: list[Path] = []
for key, original_path in original_inputs.items():
current_path = config.inputs.get(key)
if not isinstance(current_path, Path) or not current_path.exists():
continue
if current_path.resolve() == original_path.resolve():
continue
_copy_path(current_path, original_path)
restored.append(original_path)
return restored
def _commit_base_repo_paths(cwd: Path, paths: list[Path], message: str) -> bool:
"""Commit changed input paths in the base repository when they live under cwd."""
rel_paths: list[str] = []
for path in paths:
try:
rel_paths.append(str(path.resolve().relative_to(cwd.resolve())))
except ValueError:
continue
if not rel_paths:
return False
subprocess.run(
["git", "add", "--", *rel_paths],
cwd=cwd,
capture_output=True,
check=True,
)
result = subprocess.run(
["git", "commit", "-m", message],
cwd=cwd,
capture_output=True,
text=True,
)
return result.returncode == 0
def _snapshot_repo_state(cwd: Path) -> dict[str, str]:
"""Capture the base repository working-tree state.
@@ -344,18 +410,26 @@ def _run_simple_pipeline(
# Setup shared worktree for agentic mode
worktree_path: Path | None = None
agent_execution_path: Path | None = None
agentic_branch_name: str | None = None
agentic_base_commit: str | None = None
original_input_paths: dict[str, Path] = {}
base_repo_state: dict[str, str] | None = None
base_repo_status: str | None = None
if not dry_run and _has_agentic_steps(config, config.pipeline):
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
cwd, run_dir, config.preset_name,
)
_copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd)
_refresh_input_references(config, input_contents)
base_repo_state = _snapshot_repo_state(cwd)
base_repo_status = _snapshot_repo_status(cwd)
if config.use_worktree:
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
cwd, run_dir, config.preset_name,
)
original_input_paths = _snapshot_input_paths(config)
_copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd)
_refresh_input_references(config, input_contents)
base_repo_state = _snapshot_repo_state(cwd)
base_repo_status = _snapshot_repo_status(cwd)
agent_execution_path = worktree_path
else:
agent_execution_path = cwd
agentic_base_commit = _get_current_head(cwd)
feedback = "(no feedback — first iteration)"
iterations: list[IterationResult] = []
@@ -381,7 +455,7 @@ def _run_simple_pipeline(
config.pipeline, config, input_contents, feedback,
i, config.max_iterations, cwd, timeout, dry_run,
run_dir=run_dir, output_iter=i,
worktree_path=worktree_path,
worktree_path=agent_execution_path,
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
@@ -389,7 +463,7 @@ def _run_simple_pipeline(
)
# Intermediate commit so next iteration's diff only shows new changes
if worktree_path is not None:
if config.use_worktree and worktree_path is not None:
agentic_base_commit = _commit_iteration(worktree_path, config.preset_name, i, verdict)
iter_result = IterationResult(
@@ -480,8 +554,25 @@ def _run_simple_pipeline(
break
finally:
if config.use_worktree and worktree_path is not None and original_input_paths:
restored_paths = _apply_worktree_inputs_to_base(
config, original_input_paths, cwd=cwd,
)
if restored_paths:
try:
committed = _commit_base_repo_paths(
cwd,
restored_paths,
f"cross-eval: {config.preset_name} ({final_verdict})",
)
if committed:
logger.info(" Applied and committed final input changes in base repo.")
else:
logger.info(" Applied final input changes in base repo (no commit created).")
except Exception:
logger.warning(" Failed to commit final input changes in base repo", exc_info=True)
agentic_branch: str | None = None
if worktree_path is not None and agentic_branch_name is not None:
if config.use_worktree and worktree_path is not None and agentic_branch_name is not None:
agentic_branch = _finalize_worktree(
cwd, worktree_path, agentic_branch_name,
config.preset_name, final_verdict,
@@ -523,18 +614,26 @@ def _run_phased_pipeline(
# Setup shared worktree for agentic mode
all_phase_steps = [s for p in config.phases for s in p.steps]
worktree_path: Path | None = None
agent_execution_path: Path | None = None
agentic_branch_name: str | None = None
agentic_base_commit: str | None = None
original_input_paths: dict[str, Path] = {}
base_repo_state: dict[str, str] | None = None
base_repo_status: str | None = None
if not dry_run and _has_agentic_steps(config, all_phase_steps):
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
cwd, run_dir, config.preset_name,
)
_copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd)
_refresh_input_references(config, input_contents)
base_repo_state = _snapshot_repo_state(cwd)
base_repo_status = _snapshot_repo_status(cwd)
if config.use_worktree:
worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
cwd, run_dir, config.preset_name,
)
original_input_paths = _snapshot_input_paths(config)
_copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd)
_refresh_input_references(config, input_contents)
base_repo_state = _snapshot_repo_state(cwd)
base_repo_status = _snapshot_repo_status(cwd)
agent_execution_path = worktree_path
else:
agent_execution_path = cwd
agentic_base_commit = _get_current_head(cwd)
iterations: list[IterationResult] = []
feedback = "(no feedback — first iteration)"
@@ -581,7 +680,7 @@ def _run_phased_pipeline(
phase.steps, config, input_contents, feedback,
pi, phase.max_iterations, cwd, timeout, dry_run,
run_dir=run_dir, output_iter=global_iter, phase_name=phase.name,
worktree_path=worktree_path,
worktree_path=agent_execution_path,
runtime_env=runtime_env,
base_repo_state=base_repo_state,
base_repo_status=base_repo_status,
@@ -589,7 +688,7 @@ def _run_phased_pipeline(
)
# Intermediate commit so next iteration's diff only shows new changes
if worktree_path is not None:
if config.use_worktree and worktree_path is not None:
agentic_base_commit = _commit_iteration(
worktree_path, f"{config.preset_name}/{phase.name}",
global_iter, verdict,
@@ -717,8 +816,25 @@ def _run_phased_pipeline(
final_verdict = "PASS" if phase_converged else "MAX_ITERATIONS_REACHED"
finally:
if config.use_worktree and worktree_path is not None and original_input_paths:
restored_paths = _apply_worktree_inputs_to_base(
config, original_input_paths, cwd=cwd,
)
if restored_paths:
try:
committed = _commit_base_repo_paths(
cwd,
restored_paths,
f"cross-eval: {config.preset_name} ({final_verdict})",
)
if committed:
logger.info(" Applied and committed final input changes in base repo.")
else:
logger.info(" Applied final input changes in base repo (no commit created).")
except Exception:
logger.warning(" Failed to commit final input changes in base repo", exc_info=True)
agentic_branch: str | None = None
if worktree_path is not None and agentic_branch_name is not None:
if config.use_worktree and worktree_path is not None and agentic_branch_name is not None:
agentic_branch = _finalize_worktree(
cwd, worktree_path, agentic_branch_name,
config.preset_name, final_verdict,
@@ -752,6 +868,8 @@ def _load_inputs(config: PipelineConfig) -> dict[str, str]:
for key, val in config.inputs.items():
if key.endswith("_ref"):
input_contents[key] = str(val)
elif key == "docs":
input_contents[key] = _load_docs_input(config, current_value=val)
elif isinstance(val, str):
input_contents[key] = val
else:
@@ -767,6 +885,8 @@ def _refresh_inputs(
for key, val in config.inputs.items():
if key.endswith("_ref"):
input_contents[key] = str(val)
elif key == "docs":
input_contents[key] = _load_docs_input(config, current_value=val)
elif isinstance(val, str):
input_contents[key] = val
elif isinstance(val, Path) and val.exists():
@@ -774,6 +894,40 @@ def _refresh_inputs(
_refresh_input_references(config, input_contents)
def _load_docs_input(config: PipelineConfig, *, current_value: Path | str) -> str:
"""Load docs content from docs_ref when available so edits are visible next iteration."""
docs_ref = config.inputs.get("docs_ref")
docs_path = docs_ref if isinstance(docs_ref, Path) else None
if docs_path is not None and docs_path.exists():
if docs_path.is_dir():
return _read_docs_tree(docs_path)
try:
return docs_path.read_text(encoding="utf-8")
except (UnicodeDecodeError, OSError):
return ""
if isinstance(current_value, str):
return current_value
if current_value.exists() and current_value.is_file():
return current_value.read_text(encoding="utf-8")
return ""
def _read_docs_tree(docs_dir: Path) -> str:
"""Read all visible text files under a docs tree and concatenate them."""
parts: list[str] = []
for f in sorted(
path for path in docs_dir.rglob("*")
if path.is_file() and not any(part.startswith(".") for part in path.relative_to(docs_dir).parts)
):
try:
content = f.read_text(encoding="utf-8")
except (UnicodeDecodeError, OSError):
continue
rel_path = f.relative_to(docs_dir).as_posix()
parts.append(f"### {rel_path}\n{content}")
return "\n\n".join(parts)
def _refresh_input_references(
config: PipelineConfig,
input_contents: dict[str, str],
@@ -1703,3 +1857,12 @@ def _save_report(run_dir: Path, config: PipelineConfig, result: PipelineResult)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(report, encoding="utf-8")
logger.info("Report saved: %s", report_path)
def _copy_path(src: Path, dest: Path) -> None:
"""Copy a file or directory into the worktree, preserving structure."""
if src.is_dir():
shutil.copytree(src, dest, dirs_exist_ok=True)
return
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dest)

View File

@@ -512,6 +512,218 @@ PLAN_FIX_TEMPLATE_KO = """\
8. 수정이 끝나면 무엇을 바꿨는지와 아직 사람 판단이 필요한 blocker가 있는지 짧게 정리하세요.
"""
PLAN_VERIFY_TEMPLATE = """\
You are verifying the latest planning package after plan-only revisions.
## Plan
{plan}
## Checklist
{checklist}
## Reference Documents
{docs}
## Previous Review (iteration {iteration} of {max_iterations})
{feedback}
## Execution Evidence
{execution_evidence}
## Verify Instructions
Review the latest planning package itself: the plan, checklist, and reference documents.
You MAY inspect the current repository to confirm that the documents describe the current reality accurately enough.
Do NOT require production code, scripts, infrastructure, or external environments to already be fixed.
For `plan-review`, PASS means the documents are now clear enough to execute without further document edits.
A known implementation gap, repo mismatch, legacy script problem, external dependency, or environment blocker is NOT a FAIL by itself if:
- the issue is described accurately in the planning package,
- the affected scope or gate is documented clearly,
- the required follow-up action or non-go condition is documented clearly, and
- the package does not misrepresent unresolved work as already complete.
Only mark FAIL when the planning package still needs correction, such as:
- unresolved ambiguity or contradiction in the documents,
- missing prerequisite, dependency, gate, ownership, or evidence rule,
- a known blocker that is still described inaccurately or misleadingly,
- conflicting source-of-truth rules across the planning documents,
- checklist or status criteria that would cause an operator to make the wrong decision.
Report implementation/repository problems that are already documented correctly under "Out of Scope Issues" or note them as documented risks, not as FAIL reasons.
## Output Format
### Remaining Document Issues
- [Major][Omission] Description (reference specific plan/checklist/doc item)
(Write "None" if no document issue remains.)
### Documented Risks / Out of Scope
- Description of a real implementation/repository/environment risk that is already documented correctly
(Write "None" if nothing notable remains.)
### Summary
- Remaining document issues: N
- Documented risks / out-of-scope items: N
- Overall quality: [BRIEF ASSESSMENT]
### Verdict
If the planning package no longer needs document changes, output: VERDICT: PASS
Otherwise output: VERDICT: FAIL
"""
PLAN_VERIFY_TEMPLATE_KO = """\
당신은 plan-only 수정 이후 최신 기획 패키지를 재검증하는 검토자입니다.
## 기획서
{plan}
## 체크리스트
{checklist}
## 참고 문서
{docs}
## 이전 리뷰 결과 ({max_iterations}회 중 {iteration}번째)
{feedback}
## 실행 증거
{execution_evidence}
## 검증 지침
최신 기획 패키지 자체를 다시 검토하세요: 기획서, 체크리스트, 참고 문서를 함께 봅니다.
현재 저장소를 살펴보며 문서가 현실을 정확히 설명하는지 확인할 수는 있지만, 프로덕션 코드, 스크립트, 인프라, 외부 환경이 이미 수정되어 있을 것을 요구하면 안 됩니다.
`plan-review`에서 PASS의 뜻은 "이제 문서를 더 고칠 필요 없이 이 계획을 실행할 수 있다"입니다.
즉 구현 공백, 저장소 불일치, legacy 스크립트 문제, 외부 의존성, 환경 blocker가 남아 있어도 아래 조건을 만족하면 FAIL 사유가 아닙니다.
- 그 문제가 기획 패키지에 정확히 기록되어 있고
- 어떤 범위/게이트에 영향을 주는지 분명히 적혀 있고
- 필요한 후속 조치나 non-go 조건이 명확히 적혀 있고
- 아직 해결되지 않은 일을 이미 해결된 것처럼 오해하게 만들지 않는 경우
반대로 아래와 같은 경우에만 FAIL로 판정하세요.
- 문서 안에 아직 모호성이나 모순이 남아 있는 경우
- 선행조건, 의존성, 게이트, 담당 주체, evidence 규칙이 빠진 경우
- 알려진 blocker가 여전히 부정확하거나 오해를 부르는 방식으로 서술된 경우
- 기획 문서들 사이에서 source-of-truth 규칙이 충돌하는 경우
- 체크리스트나 상태 판정 기준 때문에 실행자가 잘못된 결정을 내릴 수 있는 경우
이미 문서에 정확히 기록된 구현/저장소 문제는 "범위 밖 이슈" 또는 "문서화된 리스크"로만 남기고, 그 자체를 FAIL 사유로 삼지 마세요.
## 출력 형식
### 남은 문서 이슈
- [Major][누락] 이슈 설명 (관련 기획서/체크리스트/참고 문서 항목 참조)
(남은 문서 이슈가 없으면 "없음"이라고 작성하세요.)
### 문서화된 리스크 / 범위 밖 이슈
- 실제 구현/저장소/환경 리스크이지만 문서에는 이미 정확히 반영된 항목
(해당 사항이 없으면 "없음"이라고 작성하세요.)
### 요약
- 남은 문서 이슈 수: N
- 문서화된 리스크 / 범위 밖 항목 수: N
- 전체 품질: [간략한 평가]
### 판정
기획 패키지를 더 수정할 필요가 없으면: VERDICT: PASS
그렇지 않으면: VERDICT: FAIL
"""
CODING_PLAN_REVIEW_TEMPLATE = """\
You are reviewing both the implementation and the planning package together.
## Artifact References
{artifact_references}
## Execution Evidence
{execution_evidence}
## Review Instructions
Read the referenced plan/checklist/docs/review artifacts directly from disk. \
Inspect the current repository and evaluate BOTH:
1. whether the implementation matches the plan/checklist/docs, and
2. whether the planning package still accurately describes the implementation target and constraints.
Report only issues that matter to delivering the original plan correctly. \
Do not invent new scope. Distinguish between code issues, document issues, and consistency gaps between them.
For each issue found, classify it with BOTH severity AND category:
- Severity: Critical / Major / Minor
- Category: Over-engineering / Omission
If previous review feedback is provided above, mark each prior item as CONFIRMED or DISMISSED.
If you find issues outside the original plan scope, report them separately under "Out of Scope Issues".
### Verdict
If the implementation satisfies the plan/checklist and the planning package no longer needs correction, output: VERDICT: PASS
Otherwise output: VERDICT: FAIL
"""
CODING_PLAN_REVIEW_TEMPLATE_KO = """\
당신은 구현 결과와 기획 문서 패키지를 함께 검토하는 리뷰어입니다.
## 참조 아티팩트
{artifact_references}
## 실행 증거
{execution_evidence}
## 검토 지침
참조된 plan/checklist/docs/review markdown를 직접 읽고 현재 저장소를 확인한 뒤, 아래 두 가지를 함께 평가하세요.
1. 현재 구현이 plan/checklist/docs와 일치하는가
2. 기획 문서 패키지가 현재 구현 목표와 제약을 여전히 정확하게 설명하는가
원래 계획을 제대로 완수하는 데 필요한 이슈만 보고하세요. 새로운 범위를 만들지 마세요.
코드 이슈, 문서 이슈, 코드-문서 불일치를 구분해서 적으세요.
발견된 각 이슈에 심각도와 카테고리를 모두 부여하세요.
- 심각도: Critical / Major / Minor
- 카테고리: 과최적화 / 누락
이전 리뷰 피드백이 있으면 각 항목을 CONFIRMED 또는 DISMISSED로 판정하세요.
원래 계획 범위 밖 이슈는 "범위 밖 이슈"로 별도 분리하세요.
### 판정
구현이 plan/checklist를 충족하고 기획 문서 패키지도 더 이상 수정할 필요가 없으면: VERDICT: PASS
그렇지 않으면: VERDICT: FAIL
"""
CODING_PLAN_FIX_TEMPLATE = """\
You are fixing confirmed issues in both the implementation and the planning package.
## Artifact References
{artifact_references}
## Current Review Feedback
{feedback}
## Instructions
1. Read the referenced plan/checklist/docs/review artifacts directly from disk.
2. Fix ONLY the confirmed issues from the current review feedback.
3. You may update both implementation files and planning artifacts when needed.
4. Preserve the original plan intent and scope. Do not silently broaden requirements.
5. Keep code, plan, checklist, and supporting docs consistent after edits.
6. After editing, briefly summarize what you changed and any blocker that still needs human input.
"""
CODING_PLAN_FIX_TEMPLATE_KO = """\
당신은 현재 리뷰에서 확정된 이슈를 코드와 기획 문서 패키지에 함께 반영하는 수정 담당자입니다.
## 참조 아티팩트
{artifact_references}
## 현재 리뷰 피드백
{feedback}
## 지침
1. 참조된 plan/checklist/docs/review markdown를 직접 읽으세요.
2. 현재 리뷰 피드백에서 확정된 이슈만 수정하세요.
3. 필요하면 코드와 기획 문서를 모두 수정할 수 있습니다.
4. 최초 plan의 의도와 범위를 유지하세요. 요구사항을 몰래 넓히지 마세요.
5. 수정 후 코드, plan, checklist, 참고 문서가 서로 모순되지 않게 유지하세요.
6. 수정이 끝나면 무엇을 바꿨는지와 아직 사람 판단이 필요한 blocker가 있는지 짧게 정리하세요.
"""
AGGREGATE_REVIEW_TEMPLATE = """\
You are adjudicating multiple review results and turning them into an actionable decision.
@@ -645,6 +857,9 @@ DEFAULT_TEMPLATES: dict[str, dict[str, str]] = {
"review": REVIEW_TEMPLATE,
"plan-review": PLAN_REVIEW_TEMPLATE,
"plan-fix": PLAN_FIX_TEMPLATE,
"plan-verify": PLAN_VERIFY_TEMPLATE,
"coding-plan-review": CODING_PLAN_REVIEW_TEMPLATE,
"coding-plan-fix": CODING_PLAN_FIX_TEMPLATE,
"review-only": REVIEW_ONLY_TEMPLATE,
"aggregate-review": AGGREGATE_REVIEW_TEMPLATE,
},
@@ -653,6 +868,9 @@ DEFAULT_TEMPLATES: dict[str, dict[str, str]] = {
"review": REVIEW_TEMPLATE_KO,
"plan-review": PLAN_REVIEW_TEMPLATE_KO,
"plan-fix": PLAN_FIX_TEMPLATE_KO,
"plan-verify": PLAN_VERIFY_TEMPLATE_KO,
"coding-plan-review": CODING_PLAN_REVIEW_TEMPLATE_KO,
"coding-plan-fix": CODING_PLAN_FIX_TEMPLATE_KO,
"review-only": REVIEW_ONLY_TEMPLATE_KO,
"aggregate-review": AGGREGATE_REVIEW_TEMPLATE_KO,
},
@@ -961,7 +1179,7 @@ def _build_plan_review_preset(
name="verify",
agent=senior_agent,
role="review",
prompt_template="default:plan-review",
prompt_template="default:plan-verify",
output_key="verify_result",
verdict=True,
),
@@ -1065,16 +1283,97 @@ def _build_coding_review_fix_preset(
]
def _build_coding_plan_review_preset(
coders: list[str], reviewers: list[str], seniors: list[str],
) -> list[PhaseConfig]:
"""Implement from plan/docs, then review and fix code+docs together."""
if not coders:
raise ValueError("'coding-plan-review' preset requires at least 1 coder")
if not reviewers:
raise ValueError("'coding-plan-review' preset requires at least 1 reviewer")
review_steps: list[StepConfig] = []
reviewer_keys = _unique_safe_keys(reviewers)
for reviewer, rk in zip(reviewers, reviewer_keys):
review_steps.append(
StepConfig(
name=f"review_{rk}",
agent=reviewer,
role="review",
prompt_template="default:coding-plan-review",
output_key=f"review_{rk}",
verdict=False,
parallel=True,
),
)
senior_agent = seniors[0] if seniors else reviewers[0]
review_step_names = [f"review_{rk}" for rk in reviewer_keys]
review_output_keys = [f"review_{rk}" for rk in reviewer_keys]
return [
PhaseConfig(
name="initial_coding",
steps=[
StepConfig(
name="coding",
agent=coders[0],
role="coding",
prompt_template="default:coding",
output_key="coding_output",
),
],
max_iterations=1,
consecutive_pass=1,
),
PhaseConfig(
name="coding_plan_review",
steps=review_steps + [
StepConfig(
name="aggregate_review",
agent=senior_agent,
role="review",
prompt_template="default:aggregate-review",
output_key="aggregate_review",
context_override={
"candidate_outputs": (
"Current implementation and planning package under review "
"(code + plan/checklist/reference docs)."
),
"reviews_bundle": _build_named_bundle(
reviewers, review_step_names, review_output_keys, "Review",
),
},
),
StepConfig(
name="coding_plan_fix",
agent=coders[0],
role="coding",
prompt_template="default:coding-plan-fix",
output_key="coding_plan_fix_output",
context_override={"feedback": "{aggregate_review}"},
),
StepConfig(
name="verify",
agent=senior_agent,
role="review",
prompt_template="default:coding-plan-review",
output_key="verify_result",
verdict=True,
),
],
max_iterations=5,
consecutive_pass=1,
),
]
PIPELINE_PRESETS: dict[str, Callable] = {
"simple": _build_simple_preset,
"cross-review": _build_cross_review_preset,
"plan-review": _build_plan_review_preset,
"review-only": _build_review_only_preset,
}
PHASED_PRESETS: dict[str, Callable] = {
"review-fix": _build_review_fix_preset,
"coding-review-fix": _build_coding_review_fix_preset,
"coding-plan-review": _build_coding_plan_review_preset,
}
ALL_PRESET_NAMES: list[str] = list(PIPELINE_PRESETS.keys()) + list(PHASED_PRESETS.keys())