Compare commits
2 Commits
7b95233edf
...
0858675076
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0858675076 | ||
|
|
cc8d583914 |
@@ -434,6 +434,10 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
"--reviewer-model", default=None, metavar="MODEL",
|
"--reviewer-model", default=None, metavar="MODEL",
|
||||||
help="Reviewer 에이전트 모델만 변경",
|
help="Reviewer 에이전트 모델만 변경",
|
||||||
)
|
)
|
||||||
|
agent_group.add_argument(
|
||||||
|
"--senior-model", default=None, metavar="MODEL",
|
||||||
|
help="Senior 에이전트 모델만 변경",
|
||||||
|
)
|
||||||
|
|
||||||
# -- 파이프라인 --
|
# -- 파이프라인 --
|
||||||
pipe_group = run_parser.add_argument_group("파이프라인")
|
pipe_group = run_parser.add_argument_group("파이프라인")
|
||||||
@@ -474,7 +478,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
)
|
)
|
||||||
etc_group.add_argument(
|
etc_group.add_argument(
|
||||||
"--output-dir", type=Path, default=None,
|
"--output-dir", type=Path, default=None,
|
||||||
help="결과 저장 디렉토리 (기본: output/)",
|
help="결과 저장 디렉토리 (기본: .cross-eval/output/)",
|
||||||
)
|
)
|
||||||
etc_group.add_argument(
|
etc_group.add_argument(
|
||||||
"--dry-run", action="store_true",
|
"--dry-run", action="store_true",
|
||||||
@@ -953,13 +957,16 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|||||||
if args.model is not None:
|
if args.model is not None:
|
||||||
for agent_name in config.agents:
|
for agent_name in config.agents:
|
||||||
_apply_model_override(config, agent_name, args.model)
|
_apply_model_override(config, agent_name, args.model)
|
||||||
# --coder-model / --reviewer-model: apply by role
|
# --coder-model / --reviewer-model / --senior-model: apply by role
|
||||||
if args.coder_model is not None:
|
if args.coder_model is not None:
|
||||||
for coder_name in config.coders:
|
for coder_name in config.coders:
|
||||||
_apply_model_override(config, coder_name, args.coder_model)
|
_apply_model_override(config, coder_name, args.coder_model)
|
||||||
if args.reviewer_model is not None:
|
if args.reviewer_model is not None:
|
||||||
for reviewer_name in config.reviewers:
|
for reviewer_name in config.reviewers:
|
||||||
_apply_model_override(config, reviewer_name, args.reviewer_model)
|
_apply_model_override(config, reviewer_name, args.reviewer_model)
|
||||||
|
if args.senior_model is not None:
|
||||||
|
for senior_name in config.seniors:
|
||||||
|
_apply_model_override(config, senior_name, args.senior_model)
|
||||||
|
|
||||||
# --plan / --checklist shortcuts
|
# --plan / --checklist shortcuts
|
||||||
for key, val in [("plan", args.plan), ("checklist", args.checklist)]:
|
for key, val in [("plan", args.plan), ("checklist", args.checklist)]:
|
||||||
|
|||||||
@@ -62,15 +62,7 @@ _CLAUDE_CODER_ARGS = list(_CLAUDE_BASE_ARGS) + [
|
|||||||
"bypassPermissions",
|
"bypassPermissions",
|
||||||
]
|
]
|
||||||
|
|
||||||
_CLAUDE_REVIEW_ARGS = [
|
_CLAUDE_REVIEW_ARGS = list(_CLAUDE_BASE_ARGS)
|
||||||
"--setting-sources",
|
|
||||||
"user",
|
|
||||||
"--disable-slash-commands",
|
|
||||||
"--model",
|
|
||||||
"opus",
|
|
||||||
"--permission-mode",
|
|
||||||
"plan",
|
|
||||||
]
|
|
||||||
|
|
||||||
_CODER_SYSTEM_PROMPT = (
|
_CODER_SYSTEM_PROMPT = (
|
||||||
"You are a senior software engineer implementing code changes.\n"
|
"You are a senior software engineer implementing code changes.\n"
|
||||||
@@ -425,6 +417,8 @@ def load_config(path: Path) -> PipelineConfig:
|
|||||||
|
|
||||||
def _parse_raw(raw: dict[str, Any], config_path: Path) -> PipelineConfig:
|
def _parse_raw(raw: dict[str, Any], config_path: Path) -> PipelineConfig:
|
||||||
"""Parse raw YAML dict into PipelineConfig."""
|
"""Parse raw YAML dict into PipelineConfig."""
|
||||||
|
project_root = config_path.parent.parent if config_path.parent.name == ".cross-eval" else config_path.parent
|
||||||
|
|
||||||
# --- agents ---
|
# --- agents ---
|
||||||
agents: dict[str, AgentConfig] = {}
|
agents: dict[str, AgentConfig] = {}
|
||||||
for name, agent_data in raw.get("agents", {}).items():
|
for name, agent_data in raw.get("agents", {}).items():
|
||||||
@@ -494,8 +488,12 @@ def _parse_raw(raw: dict[str, Any], config_path: Path) -> PipelineConfig:
|
|||||||
if isinstance(pipeline_raw, str) and pipeline_raw.startswith("preset:"):
|
if isinstance(pipeline_raw, str) and pipeline_raw.startswith("preset:"):
|
||||||
preset_name = pipeline_raw.split(":", 1)[1]
|
preset_name = pipeline_raw.split(":", 1)[1]
|
||||||
|
|
||||||
|
output_dir = Path(raw.get("output_dir", ".cross-eval/output"))
|
||||||
|
if not output_dir.is_absolute():
|
||||||
|
output_dir = project_root / output_dir
|
||||||
|
|
||||||
config = PipelineConfig(
|
config = PipelineConfig(
|
||||||
output_dir=Path(raw.get("output_dir", ".cross-eval/output")),
|
output_dir=output_dir,
|
||||||
max_iterations=int(raw.get("max_iterations", 3)),
|
max_iterations=int(raw.get("max_iterations", 3)),
|
||||||
min_iterations=int(raw.get("min_iterations", 1)),
|
min_iterations=int(raw.get("min_iterations", 1)),
|
||||||
verbose=bool(raw.get("verbose", False)),
|
verbose=bool(raw.get("verbose", False)),
|
||||||
|
|||||||
@@ -104,45 +104,73 @@ def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, s
|
|||||||
return worktree_path, branch_name
|
return worktree_path, branch_name
|
||||||
|
|
||||||
|
|
||||||
def _snapshot_repo_state(cwd: Path) -> str:
|
def _copy_inputs_to_worktree(
|
||||||
|
config: PipelineConfig,
|
||||||
|
worktree_path: Path,
|
||||||
|
) -> None:
|
||||||
|
"""Copy input files (plan, checklist, etc.) into the worktree.
|
||||||
|
|
||||||
|
This ensures agents running in plan/read-only mode within the worktree
|
||||||
|
can access these files, even though the originals live in the base repo.
|
||||||
|
Updates config.inputs in-place so subsequent reference refreshes use
|
||||||
|
worktree-local paths.
|
||||||
|
"""
|
||||||
|
import shutil
|
||||||
|
inputs_dir = worktree_path / ".cross-eval-inputs"
|
||||||
|
inputs_dir.mkdir(exist_ok=True)
|
||||||
|
# Exclude from git so these don't pollute agentic diffs
|
||||||
|
(inputs_dir / ".gitignore").write_text("*\n", encoding="utf-8")
|
||||||
|
for key, val in list(config.inputs.items()):
|
||||||
|
if key.endswith("_ref") or not isinstance(val, Path):
|
||||||
|
continue
|
||||||
|
if not val.exists():
|
||||||
|
continue
|
||||||
|
dest = inputs_dir / val.name
|
||||||
|
shutil.copy2(val, dest)
|
||||||
|
config.inputs[key] = dest
|
||||||
|
|
||||||
|
|
||||||
|
def _snapshot_repo_state(cwd: Path) -> dict[str, str]:
|
||||||
"""Capture the base repository working-tree state.
|
"""Capture the base repository working-tree state.
|
||||||
|
|
||||||
This is used to detect agentic runs that accidentally modify the original
|
This is used to detect agentic runs that accidentally modify the original
|
||||||
checkout instead of the isolated worktree.
|
checkout instead of the isolated worktree.
|
||||||
|
|
||||||
|
We intentionally use only two components:
|
||||||
|
- ``diff``: ``git diff HEAD`` — all tracked changes vs HEAD, combining
|
||||||
|
staged and unstaged so that staging-state changes don't cause false
|
||||||
|
positives.
|
||||||
|
- ``untracked``: SHA-256 hashes of untracked files — detects new or
|
||||||
|
modified untracked files appearing in the base repo.
|
||||||
|
|
||||||
|
``git status --short`` and ``git diff --cached`` are NOT used because
|
||||||
|
external tools (IDEs, git hooks) can change staging state during a
|
||||||
|
long-running pipeline, causing spurious failures.
|
||||||
"""
|
"""
|
||||||
status = subprocess.run(
|
# Refresh index stat cache to prevent false positives from mtime drift
|
||||||
["git", "status", "--short", "--untracked-files=all"],
|
subprocess.run(
|
||||||
|
["git", "update-index", "--refresh", "-q"],
|
||||||
cwd=cwd,
|
cwd=cwd,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
|
||||||
)
|
)
|
||||||
if status.returncode != 0:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
# git diff HEAD: all changes (staged + unstaged) vs HEAD
|
||||||
diff = subprocess.run(
|
diff = subprocess.run(
|
||||||
["git", "diff", "--no-ext-diff", "--binary", "HEAD"],
|
["git", "diff", "--no-ext-diff", "--binary", "HEAD"],
|
||||||
cwd=cwd,
|
cwd=cwd,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
)
|
)
|
||||||
cached_diff = subprocess.run(
|
if diff.returncode != 0:
|
||||||
["git", "diff", "--no-ext-diff", "--binary", "--cached"],
|
return {}
|
||||||
cwd=cwd,
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
)
|
|
||||||
untracked = subprocess.run(
|
untracked = subprocess.run(
|
||||||
["git", "ls-files", "--others", "--exclude-standard", "-z"],
|
["git", "ls-files", "--others", "--exclude-standard", "-z"],
|
||||||
cwd=cwd,
|
cwd=cwd,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
parts = [
|
untracked_parts: list[str] = []
|
||||||
status.stdout,
|
|
||||||
diff.stdout,
|
|
||||||
cached_diff.stdout,
|
|
||||||
]
|
|
||||||
|
|
||||||
if untracked.returncode == 0 and untracked.stdout:
|
if untracked.returncode == 0 and untracked.stdout:
|
||||||
for rel_path in untracked.stdout.decode("utf-8", errors="replace").split("\0"):
|
for rel_path in untracked.stdout.decode("utf-8", errors="replace").split("\0"):
|
||||||
if not rel_path:
|
if not rel_path:
|
||||||
@@ -150,11 +178,14 @@ def _snapshot_repo_state(cwd: Path) -> str:
|
|||||||
file_path = cwd / rel_path
|
file_path = cwd / rel_path
|
||||||
if file_path.is_file():
|
if file_path.is_file():
|
||||||
digest = sha256(file_path.read_bytes()).hexdigest()
|
digest = sha256(file_path.read_bytes()).hexdigest()
|
||||||
parts.append(f"UNTRACKED {rel_path} {digest}")
|
untracked_parts.append(f"UNTRACKED {rel_path} {digest}")
|
||||||
else:
|
else:
|
||||||
parts.append(f"UNTRACKED {rel_path} (non-file)")
|
untracked_parts.append(f"UNTRACKED {rel_path} (non-file)")
|
||||||
|
|
||||||
return "\n".join(parts)
|
return {
|
||||||
|
"diff": diff.stdout,
|
||||||
|
"untracked": "\n".join(untracked_parts),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _snapshot_repo_status(cwd: Path) -> str:
|
def _snapshot_repo_status(cwd: Path) -> str:
|
||||||
@@ -172,7 +203,7 @@ def _snapshot_repo_status(cwd: Path) -> str:
|
|||||||
|
|
||||||
def _assert_base_repo_isolation(
|
def _assert_base_repo_isolation(
|
||||||
cwd: Path,
|
cwd: Path,
|
||||||
baseline_state: str,
|
baseline_state: dict[str, str],
|
||||||
*,
|
*,
|
||||||
step_name: str,
|
step_name: str,
|
||||||
agent_name: str,
|
agent_name: str,
|
||||||
@@ -184,6 +215,18 @@ def _assert_base_repo_isolation(
|
|||||||
if current_state == baseline_state:
|
if current_state == baseline_state:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Identify which component(s) actually changed
|
||||||
|
changed: list[str] = []
|
||||||
|
for key in ("diff", "untracked"):
|
||||||
|
if baseline_state.get(key, "") != current_state.get(key, ""):
|
||||||
|
changed.append(key)
|
||||||
|
|
||||||
|
if not changed:
|
||||||
|
# State dicts differ only in keys we no longer track — benign.
|
||||||
|
return
|
||||||
|
|
||||||
|
# untracked-only change: new files appeared — real leak
|
||||||
|
# diff-only change: tracked file content changed — real leak
|
||||||
current_status = _snapshot_repo_status(cwd)
|
current_status = _snapshot_repo_status(cwd)
|
||||||
before = baseline_status or "(clean)"
|
before = baseline_status or "(clean)"
|
||||||
after = current_status or "(clean)"
|
after = current_status or "(clean)"
|
||||||
@@ -191,7 +234,8 @@ def _assert_base_repo_isolation(
|
|||||||
"Agent modified the base repository instead of the isolated worktree.\n\n"
|
"Agent modified the base repository instead of the isolated worktree.\n\n"
|
||||||
f"Step: {step_name}\n"
|
f"Step: {step_name}\n"
|
||||||
f"Agent: {agent_name}\n"
|
f"Agent: {agent_name}\n"
|
||||||
f"Worktree: {worktree_path}\n\n"
|
f"Worktree: {worktree_path}\n"
|
||||||
|
f"Changed components: {', '.join(changed)}\n\n"
|
||||||
f"Baseline status:\n{before}\n\n"
|
f"Baseline status:\n{before}\n\n"
|
||||||
f"Current status:\n{after}"
|
f"Current status:\n{after}"
|
||||||
)
|
)
|
||||||
@@ -268,12 +312,14 @@ def _run_simple_pipeline(
|
|||||||
# Setup shared worktree for agentic mode
|
# Setup shared worktree for agentic mode
|
||||||
worktree_path: Path | None = None
|
worktree_path: Path | None = None
|
||||||
agentic_branch_name: str | None = None
|
agentic_branch_name: str | None = None
|
||||||
base_repo_state: str | None = None
|
base_repo_state: dict[str, str] | None = None
|
||||||
base_repo_status: str | None = None
|
base_repo_status: str | None = None
|
||||||
if not dry_run and _has_agentic_steps(config, config.pipeline):
|
if not dry_run and _has_agentic_steps(config, config.pipeline):
|
||||||
worktree_path, agentic_branch_name = _setup_worktree(
|
worktree_path, agentic_branch_name = _setup_worktree(
|
||||||
cwd, run_dir, config.preset_name,
|
cwd, run_dir, config.preset_name,
|
||||||
)
|
)
|
||||||
|
_copy_inputs_to_worktree(config, worktree_path)
|
||||||
|
_refresh_input_references(config, input_contents)
|
||||||
base_repo_state = _snapshot_repo_state(cwd)
|
base_repo_state = _snapshot_repo_state(cwd)
|
||||||
base_repo_status = _snapshot_repo_status(cwd)
|
base_repo_status = _snapshot_repo_status(cwd)
|
||||||
|
|
||||||
@@ -443,12 +489,14 @@ def _run_phased_pipeline(
|
|||||||
all_phase_steps = [s for p in config.phases for s in p.steps]
|
all_phase_steps = [s for p in config.phases for s in p.steps]
|
||||||
worktree_path: Path | None = None
|
worktree_path: Path | None = None
|
||||||
agentic_branch_name: str | None = None
|
agentic_branch_name: str | None = None
|
||||||
base_repo_state: str | None = None
|
base_repo_state: dict[str, str] | None = None
|
||||||
base_repo_status: str | None = None
|
base_repo_status: str | None = None
|
||||||
if not dry_run and _has_agentic_steps(config, all_phase_steps):
|
if not dry_run and _has_agentic_steps(config, all_phase_steps):
|
||||||
worktree_path, agentic_branch_name = _setup_worktree(
|
worktree_path, agentic_branch_name = _setup_worktree(
|
||||||
cwd, run_dir, config.preset_name,
|
cwd, run_dir, config.preset_name,
|
||||||
)
|
)
|
||||||
|
_copy_inputs_to_worktree(config, worktree_path)
|
||||||
|
_refresh_input_references(config, input_contents)
|
||||||
base_repo_state = _snapshot_repo_state(cwd)
|
base_repo_state = _snapshot_repo_state(cwd)
|
||||||
base_repo_status = _snapshot_repo_status(cwd)
|
base_repo_status = _snapshot_repo_status(cwd)
|
||||||
|
|
||||||
@@ -844,7 +892,7 @@ def _run_steps(
|
|||||||
phase_name: str | None = None,
|
phase_name: str | None = None,
|
||||||
worktree_path: Path | None = None,
|
worktree_path: Path | None = None,
|
||||||
runtime_env: dict[str, str] | None = None,
|
runtime_env: dict[str, str] | None = None,
|
||||||
base_repo_state: str | None = None,
|
base_repo_state: dict[str, str] | None = None,
|
||||||
base_repo_status: str | None = None,
|
base_repo_status: str | None = None,
|
||||||
) -> tuple[dict[str, str], dict[str, AgentResult], str | None]:
|
) -> tuple[dict[str, str], dict[str, AgentResult], str | None]:
|
||||||
"""Execute all steps in one iteration, parallelizing where possible."""
|
"""Execute all steps in one iteration, parallelizing where possible."""
|
||||||
@@ -933,7 +981,7 @@ def _execute_step(
|
|||||||
quiet: bool = False,
|
quiet: bool = False,
|
||||||
worktree_path: Path | None = None,
|
worktree_path: Path | None = None,
|
||||||
runtime_env: dict[str, str] | None = None,
|
runtime_env: dict[str, str] | None = None,
|
||||||
base_repo_state: str | None = None,
|
base_repo_state: dict[str, str] | None = None,
|
||||||
base_repo_status: str | None = None,
|
base_repo_status: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Execute a single step, updating step_outputs and step_results in place."""
|
"""Execute a single step, updating step_outputs and step_results in place."""
|
||||||
@@ -1066,7 +1114,7 @@ def _execute_parallel_batch(
|
|||||||
phase_name: str | None = None,
|
phase_name: str | None = None,
|
||||||
worktree_path: Path | None = None,
|
worktree_path: Path | None = None,
|
||||||
runtime_env: dict[str, str] | None = None,
|
runtime_env: dict[str, str] | None = None,
|
||||||
base_repo_state: str | None = None,
|
base_repo_state: dict[str, str] | None = None,
|
||||||
base_repo_status: str | None = None,
|
base_repo_status: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Execute multiple steps in parallel using threads."""
|
"""Execute multiple steps in parallel using threads."""
|
||||||
|
|||||||
@@ -206,21 +206,28 @@ class TestMakeWorktreeDir(unittest.TestCase):
|
|||||||
class TestBaseRepoIsolation(unittest.TestCase):
|
class TestBaseRepoIsolation(unittest.TestCase):
|
||||||
"""Base repo mutations should fail fast during agentic execution."""
|
"""Base repo mutations should fail fast during agentic execution."""
|
||||||
|
|
||||||
def test_raises_when_base_repo_status_changes(self) -> None:
|
def test_raises_when_base_repo_state_changes(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as td:
|
with tempfile.TemporaryDirectory() as td:
|
||||||
base = Path(td) / "repo"
|
base = Path(td) / "repo"
|
||||||
worktree = Path(td) / "worktree"
|
worktree = Path(td) / "worktree"
|
||||||
base.mkdir()
|
base.mkdir()
|
||||||
worktree.mkdir()
|
worktree.mkdir()
|
||||||
|
|
||||||
|
# Baseline has a diff that won't match a non-git directory
|
||||||
|
# (which returns {}), triggering the isolation error.
|
||||||
|
baseline_state = {
|
||||||
|
"diff": "diff --git a/file.py ...\n",
|
||||||
|
"untracked": "",
|
||||||
|
}
|
||||||
|
|
||||||
with self.assertRaises(RuntimeError) as ctx:
|
with self.assertRaises(RuntimeError) as ctx:
|
||||||
_assert_base_repo_isolation(
|
_assert_base_repo_isolation(
|
||||||
base,
|
base,
|
||||||
"M cross_eval/agent.py",
|
baseline_state,
|
||||||
step_name="coding",
|
step_name="coding",
|
||||||
agent_name="claude-coder",
|
agent_name="claude-coder",
|
||||||
worktree_path=worktree,
|
worktree_path=worktree,
|
||||||
baseline_status="M cross_eval/agent.py",
|
baseline_status="M file.py",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("base repository", str(ctx.exception))
|
self.assertIn("base repository", str(ctx.exception))
|
||||||
|
|||||||
@@ -1030,6 +1030,60 @@ class FixPresetBehaviorTest(unittest.TestCase):
|
|||||||
self.assertTrue(captured["agentic"])
|
self.assertTrue(captured["agentic"])
|
||||||
self.assertEqual(captured["phase_max"], 3)
|
self.assertEqual(captured["phase_max"], 3)
|
||||||
|
|
||||||
|
def test_run_senior_model_override_applies_only_to_seniors(self) -> None:
|
||||||
|
captured: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
def _fake_run_pipeline(config, **kwargs):
|
||||||
|
captured["coder_args"] = list(config.agents[config.coders[0]].args)
|
||||||
|
captured["reviewer_args"] = list(config.agents[config.reviewers[0]].args)
|
||||||
|
captured["senior_args"] = list(config.agents[config.seniors[0]].args)
|
||||||
|
return PipelineResult(
|
||||||
|
iterations=[],
|
||||||
|
final_verdict="PASS",
|
||||||
|
run_dir=Path(".cross-eval/output"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("cross_eval.pipeline.run_pipeline", side_effect=_fake_run_pipeline):
|
||||||
|
exit_code = main([
|
||||||
|
"run",
|
||||||
|
"--preset", "review-fix",
|
||||||
|
"--coder", "claude",
|
||||||
|
"--reviewer", "claude",
|
||||||
|
"--senior", "claude",
|
||||||
|
"--senior-model", "sonnet",
|
||||||
|
"--dry-run",
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertEqual(exit_code, 0)
|
||||||
|
self.assertIn("opus", captured["coder_args"])
|
||||||
|
self.assertIn("opus", captured["reviewer_args"])
|
||||||
|
self.assertIn("sonnet", captured["senior_args"])
|
||||||
|
|
||||||
|
|
||||||
|
class OutputDirectoryResolutionTest(unittest.TestCase):
|
||||||
|
def test_load_config_resolves_output_dir_from_project_root(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
ce_dir = root / ".cross-eval"
|
||||||
|
ce_dir.mkdir()
|
||||||
|
(ce_dir / "plan.md").write_text("# plan\n", encoding="utf-8")
|
||||||
|
config_path = ce_dir / "config.yaml"
|
||||||
|
config_path.write_text(
|
||||||
|
(
|
||||||
|
"inputs:\n"
|
||||||
|
" plan: plan.md\n"
|
||||||
|
"coders: [claude-coder]\n"
|
||||||
|
"reviewers: [claude-reviewer]\n"
|
||||||
|
"pipeline: preset:simple\n"
|
||||||
|
"output_dir: .cross-eval/output\n"
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
config = load_config(config_path)
|
||||||
|
|
||||||
|
self.assertEqual(config.output_dir.resolve(), (root / ".cross-eval" / "output").resolve())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -390,7 +390,7 @@ class TestPipelineHelpers(unittest.TestCase):
|
|||||||
|
|
||||||
snapshot = _snapshot_repo_state(repo)
|
snapshot = _snapshot_repo_state(repo)
|
||||||
|
|
||||||
self.assertIn("UNTRACKED scratch.txt", snapshot)
|
self.assertIn("UNTRACKED scratch.txt", snapshot["untracked"])
|
||||||
|
|
||||||
def test_finalize_worktree_deletes_empty_branch(self) -> None:
|
def test_finalize_worktree_deletes_empty_branch(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
|||||||
Reference in New Issue
Block a user