Fix plan-review worktree document tracking

2026-03-15 00:35:42 +09:00
parent a85a490a9b
commit bf64d19123
4 changed files with 89 additions and 10 deletions
--- a/cross_eval/agent.py
+++ b/cross_eval/agent.py
@@ -34,6 +34,12 @@ _NO_CHANGE_ACK_MARKERS = (
    "code is correct as-is",
    "already correct",
    "no action required",
    "변경 없음",
    "수정 없음",
    "수정할 필요 없음",
    "변경할 필요 없음",
    "이미 올바름",
    "조치 불필요",
 )
 _CHANGE_CLAIM_MARKERS = (
    "summary of all changes made",
@@ -73,6 +79,15 @@ _CHANGE_CLAIM_MARKERS = (
    "completed the implementation",
    "all changes have been made",
    "changes are complete",
    "수정 완료",
    "모든 수정이 완료",
    "변경 요약",
    "변경 파일",
    "신규 생성",
    "기획서 수정",
    "체크리스트 수정",
    "문서를 수정",
    "문서 수정",
 )
--- a/cross_eval/pipeline.py
+++ b/cross_eval/pipeline.py
@@ -108,27 +108,48 @@ def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, s
 def _copy_inputs_to_worktree(
    config: PipelineConfig,
    worktree_path: Path,
    *,
    base_cwd: Path,
 ) -> None:
    """Copy input files (plan, checklist, etc.) into the worktree.
-    This ensures agents running in plan/read-only mode within the worktree
+    Repo-local inputs are remapped to the corresponding path inside the worktree
-    can access these files, even though the originals live in the base repo.
+    so agentic edits produce a real git diff. External inputs are copied into a
-    Updates config.inputs in-place so subsequent reference refreshes use
+    dedicated inputs directory. For ``plan-review`` these external copies remain
    tracked so document edits can survive on the branch; other presets keep them
    ignored to avoid polluting code diffs.
    Updates ``config.inputs`` in-place so subsequent reference refreshes use
    worktree-local paths.
    """
    import shutil
    base_root = base_cwd.resolve()
    track_external_inputs = config.preset_name == "plan-review"
    inputs_dir = worktree_path / ".cross-eval-inputs"
    inputs_dir.mkdir(exist_ok=True)
-    # Exclude from git so these don't pollute agentic diffs
+    if not track_external_inputs:
        # Exclude read-only input copies from git so they don't pollute code diffs.
        (inputs_dir / ".gitignore").write_text("*\n", encoding="utf-8")
    for key, val in list(config.inputs.items()):
        if key.endswith("_ref") or not isinstance(val, Path):
            continue
        if not val.exists():
            continue
        resolved = val.resolve()
        try:
            rel_path = resolved.relative_to(base_root)
        except ValueError:
            dest = inputs_dir / val.name
-        shutil.copy2(val, dest)
+            shutil.copy2(resolved, dest)
            config.inputs[key] = dest
            continue
        worktree_target = worktree_path / rel_path
        if not worktree_target.exists():
            worktree_target.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(resolved, worktree_target)
        config.inputs[key] = worktree_target
 def _snapshot_repo_state(cwd: Path) -> dict[str, str]:
@@ -329,7 +350,7 @@ def _run_simple_pipeline(
        worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
            cwd, run_dir, config.preset_name,
        )
-        _copy_inputs_to_worktree(config, worktree_path)
+        _copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd)
        _refresh_input_references(config, input_contents)
        base_repo_state = _snapshot_repo_state(cwd)
        base_repo_status = _snapshot_repo_status(cwd)
@@ -508,7 +529,7 @@ def _run_phased_pipeline(
        worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
            cwd, run_dir, config.preset_name,
        )
-        _copy_inputs_to_worktree(config, worktree_path)
+        _copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd)
        _refresh_input_references(config, input_contents)
        base_repo_state = _snapshot_repo_state(cwd)
        base_repo_status = _snapshot_repo_status(cwd)
--- a/tests/test_evidence.py
+++ b/tests/test_evidence.py
@@ -465,6 +465,9 @@ class TestExpandedClaimMarkers(unittest.TestCase):
    def test_changes_are_complete(self) -> None:
        self.assertTrue(_claims_file_changes("All changes are complete"))
    def test_korean_change_summary_triggers(self) -> None:
        self.assertTrue(_claims_file_changes("모든 수정이 완료되었습니다. 아래는 변경 요약입니다."))
 class TestExpandedNoChangeMarkers(unittest.TestCase):
    """New no-change markers prevent false positives."""
@@ -484,6 +487,9 @@ class TestExpandedNoChangeMarkers(unittest.TestCase):
    def test_no_action_required(self) -> None:
        self.assertFalse(_claims_file_changes("No action required"))
    def test_korean_no_change_marker(self) -> None:
        self.assertFalse(_claims_file_changes("변경할 필요 없음"))
 # ---------------------------------------------------------------------------
 # 6. Cross-iteration evidence propagation
--- a/tests/test_runtime_misc.py
+++ b/tests/test_runtime_misc.py
@@ -16,6 +16,7 @@ from cross_eval.agent import (
 )
 from cross_eval.models import AgentConfig, AgentResult, ExecutionConfig, PipelineConfig, StepConfig
 from cross_eval.pipeline import (
    _copy_inputs_to_worktree,
    _commit_iteration,
    _execute_parallel_batch,
    _execute_step,
@@ -118,6 +119,42 @@ class TestInvokeAgentRuntime(unittest.TestCase):
        self.assertEqual(ctx.exception.failure_type, "API_ERROR")
        self.assertIn("backend down", ctx.exception.raw_error)
 class TestWorktreeInputMapping(unittest.TestCase):
    def test_repo_local_plan_input_maps_to_tracked_worktree_path(self) -> None:
        with tempfile.TemporaryDirectory() as tmpdir:
            repo = Path(tmpdir) / "repo"
            repo.mkdir()
            _init_git_repo(repo)
            (repo / "plan.md").write_text("plan v1\n", encoding="utf-8")
            subprocess.run(["git", "add", "plan.md"], cwd=repo, capture_output=True, check=True)
            subprocess.run(
                ["git", "commit", "-m", "add plan"],
                cwd=repo,
                capture_output=True,
                check=True,
            )
            worktree_dir = Path(tmpdir) / "wt"
            branch = "cross-eval/test-plan-review"
            worktree_path, _ = create_worktree(repo, worktree_dir, branch)
            try:
                config = PipelineConfig(
                    inputs={"plan": repo / "plan.md"},
                    preset_name="plan-review",
                )
                _copy_inputs_to_worktree(config, worktree_path, base_cwd=repo)
                self.assertEqual(config.inputs["plan"], worktree_path / "plan.md")
            finally:
                remove_worktree(base_cwd=repo, work_dir=worktree_path)
                subprocess.run(
                    ["git", "branch", "-D", branch],
                    cwd=repo,
                    capture_output=True,
                )
    def test_classify_unknown_failure(self) -> None:
        failure_type, suggested_action = _classify_agent_failure("weird crash")
        self.assertEqual(failure_type, "UNKNOWN")