Fix plan-review worktree document tracking

2026-03-15 00:35:42 +09:00
parent a85a490a9b
commit bf64d19123
4 changed files with 89 additions and 10 deletions
--- a/cross_eval/agent.py
+++ b/cross_eval/agent.py
@@ -34,6 +34,12 @@ _NO_CHANGE_ACK_MARKERS = (
    "code is correct as-is",
    "already correct",
    "no action required",
+    "변경 없음",
+    "수정 없음",
+    "수정할 필요 없음",
+    "변경할 필요 없음",
+    "이미 올바름",
+    "조치 불필요",
 )
 _CHANGE_CLAIM_MARKERS = (
    "summary of all changes made",
@@ -73,6 +79,15 @@ _CHANGE_CLAIM_MARKERS = (
    "completed the implementation",
    "all changes have been made",
    "changes are complete",
+    "수정 완료",
+    "모든 수정이 완료",
+    "변경 요약",
+    "변경 파일",
+    "신규 생성",
+    "기획서 수정",
+    "체크리스트 수정",
+    "문서를 수정",
+    "문서 수정",
 )


--- a/cross_eval/pipeline.py
+++ b/cross_eval/pipeline.py
@@ -108,27 +108,48 @@ def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, s
 def _copy_inputs_to_worktree(
    config: PipelineConfig,
    worktree_path: Path,
+    *,
+    base_cwd: Path,
 ) -> None:
    """Copy input files (plan, checklist, etc.) into the worktree.

-    This ensures agents running in plan/read-only mode within the worktree
-    can access these files, even though the originals live in the base repo.
-    Updates config.inputs in-place so subsequent reference refreshes use
+    Repo-local inputs are remapped to the corresponding path inside the worktree
+    so agentic edits produce a real git diff. External inputs are copied into a
+    dedicated inputs directory. For ``plan-review`` these external copies remain
+    tracked so document edits can survive on the branch; other presets keep them
+    ignored to avoid polluting code diffs.
+
+    Updates ``config.inputs`` in-place so subsequent reference refreshes use
    worktree-local paths.
    """
    import shutil
+
+    base_root = base_cwd.resolve()
+    track_external_inputs = config.preset_name == "plan-review"
    inputs_dir = worktree_path / ".cross-eval-inputs"
    inputs_dir.mkdir(exist_ok=True)
-    # Exclude from git so these don't pollute agentic diffs
-    (inputs_dir / ".gitignore").write_text("*\n", encoding="utf-8")
+    if not track_external_inputs:
+        # Exclude read-only input copies from git so they don't pollute code diffs.
+        (inputs_dir / ".gitignore").write_text("*\n", encoding="utf-8")
    for key, val in list(config.inputs.items()):
        if key.endswith("_ref") or not isinstance(val, Path):
            continue
        if not val.exists():
            continue
-        dest = inputs_dir / val.name
-        shutil.copy2(val, dest)
-        config.inputs[key] = dest
+        resolved = val.resolve()
+        try:
+            rel_path = resolved.relative_to(base_root)
+        except ValueError:
+            dest = inputs_dir / val.name
+            shutil.copy2(resolved, dest)
+            config.inputs[key] = dest
+            continue
+
+        worktree_target = worktree_path / rel_path
+        if not worktree_target.exists():
+            worktree_target.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(resolved, worktree_target)
+        config.inputs[key] = worktree_target


 def _snapshot_repo_state(cwd: Path) -> dict[str, str]:
@@ -329,7 +350,7 @@ def _run_simple_pipeline(
        worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
            cwd, run_dir, config.preset_name,
        )
-        _copy_inputs_to_worktree(config, worktree_path)
+        _copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd)
        _refresh_input_references(config, input_contents)
        base_repo_state = _snapshot_repo_state(cwd)
        base_repo_status = _snapshot_repo_status(cwd)
@@ -508,7 +529,7 @@ def _run_phased_pipeline(
        worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree(
            cwd, run_dir, config.preset_name,
        )
-        _copy_inputs_to_worktree(config, worktree_path)
+        _copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd)
        _refresh_input_references(config, input_contents)
        base_repo_state = _snapshot_repo_state(cwd)
        base_repo_status = _snapshot_repo_status(cwd)
--- a/tests/test_evidence.py
+++ b/tests/test_evidence.py
@@ -465,6 +465,9 @@ class TestExpandedClaimMarkers(unittest.TestCase):
    def test_changes_are_complete(self) -> None:
        self.assertTrue(_claims_file_changes("All changes are complete"))

+    def test_korean_change_summary_triggers(self) -> None:
+        self.assertTrue(_claims_file_changes("모든 수정이 완료되었습니다. 아래는 변경 요약입니다."))
+

 class TestExpandedNoChangeMarkers(unittest.TestCase):
    """New no-change markers prevent false positives."""
@@ -484,6 +487,9 @@ class TestExpandedNoChangeMarkers(unittest.TestCase):
    def test_no_action_required(self) -> None:
        self.assertFalse(_claims_file_changes("No action required"))

+    def test_korean_no_change_marker(self) -> None:
+        self.assertFalse(_claims_file_changes("변경할 필요 없음"))
+

 # ---------------------------------------------------------------------------
 # 6. Cross-iteration evidence propagation
--- a/tests/test_runtime_misc.py
+++ b/tests/test_runtime_misc.py
@@ -16,6 +16,7 @@ from cross_eval.agent import (
 )
 from cross_eval.models import AgentConfig, AgentResult, ExecutionConfig, PipelineConfig, StepConfig
 from cross_eval.pipeline import (
+    _copy_inputs_to_worktree,
    _commit_iteration,
    _execute_parallel_batch,
    _execute_step,
@@ -118,6 +119,42 @@ class TestInvokeAgentRuntime(unittest.TestCase):
        self.assertEqual(ctx.exception.failure_type, "API_ERROR")
        self.assertIn("backend down", ctx.exception.raw_error)

+
+class TestWorktreeInputMapping(unittest.TestCase):
+    def test_repo_local_plan_input_maps_to_tracked_worktree_path(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            repo = Path(tmpdir) / "repo"
+            repo.mkdir()
+            _init_git_repo(repo)
+            (repo / "plan.md").write_text("plan v1\n", encoding="utf-8")
+            subprocess.run(["git", "add", "plan.md"], cwd=repo, capture_output=True, check=True)
+            subprocess.run(
+                ["git", "commit", "-m", "add plan"],
+                cwd=repo,
+                capture_output=True,
+                check=True,
+            )
+
+            worktree_dir = Path(tmpdir) / "wt"
+            branch = "cross-eval/test-plan-review"
+            worktree_path, _ = create_worktree(repo, worktree_dir, branch)
+            try:
+                config = PipelineConfig(
+                    inputs={"plan": repo / "plan.md"},
+                    preset_name="plan-review",
+                )
+
+                _copy_inputs_to_worktree(config, worktree_path, base_cwd=repo)
+
+                self.assertEqual(config.inputs["plan"], worktree_path / "plan.md")
+            finally:
+                remove_worktree(base_cwd=repo, work_dir=worktree_path)
+                subprocess.run(
+                    ["git", "branch", "-D", branch],
+                    cwd=repo,
+                    capture_output=True,
+                )
+
    def test_classify_unknown_failure(self) -> None:
        failure_type, suggested_action = _classify_agent_failure("weird crash")
        self.assertEqual(failure_type, "UNKNOWN")