diff --git a/cross_eval/agent.py b/cross_eval/agent.py index 779d282..ef51f7d 100644 --- a/cross_eval/agent.py +++ b/cross_eval/agent.py @@ -34,6 +34,12 @@ _NO_CHANGE_ACK_MARKERS = ( "code is correct as-is", "already correct", "no action required", + "변경 없음", + "수정 없음", + "수정할 필요 없음", + "변경할 필요 없음", + "이미 올바름", + "조치 불필요", ) _CHANGE_CLAIM_MARKERS = ( "summary of all changes made", @@ -73,6 +79,15 @@ _CHANGE_CLAIM_MARKERS = ( "completed the implementation", "all changes have been made", "changes are complete", + "수정 완료", + "모든 수정이 완료", + "변경 요약", + "변경 파일", + "신규 생성", + "기획서 수정", + "체크리스트 수정", + "문서를 수정", + "문서 수정", ) diff --git a/cross_eval/pipeline.py b/cross_eval/pipeline.py index 7080a61..3b2ff8e 100644 --- a/cross_eval/pipeline.py +++ b/cross_eval/pipeline.py @@ -108,27 +108,48 @@ def _setup_worktree(cwd: Path, run_dir: Path, preset_name: str) -> tuple[Path, s def _copy_inputs_to_worktree( config: PipelineConfig, worktree_path: Path, + *, + base_cwd: Path, ) -> None: """Copy input files (plan, checklist, etc.) into the worktree. - This ensures agents running in plan/read-only mode within the worktree - can access these files, even though the originals live in the base repo. - Updates config.inputs in-place so subsequent reference refreshes use + Repo-local inputs are remapped to the corresponding path inside the worktree + so agentic edits produce a real git diff. External inputs are copied into a + dedicated inputs directory. For ``plan-review`` these external copies remain + tracked so document edits can survive on the branch; other presets keep them + ignored to avoid polluting code diffs. + + Updates ``config.inputs`` in-place so subsequent reference refreshes use worktree-local paths. """ import shutil + + base_root = base_cwd.resolve() + track_external_inputs = config.preset_name == "plan-review" inputs_dir = worktree_path / ".cross-eval-inputs" inputs_dir.mkdir(exist_ok=True) - # Exclude from git so these don't pollute agentic diffs - (inputs_dir / ".gitignore").write_text("*\n", encoding="utf-8") + if not track_external_inputs: + # Exclude read-only input copies from git so they don't pollute code diffs. + (inputs_dir / ".gitignore").write_text("*\n", encoding="utf-8") for key, val in list(config.inputs.items()): if key.endswith("_ref") or not isinstance(val, Path): continue if not val.exists(): continue - dest = inputs_dir / val.name - shutil.copy2(val, dest) - config.inputs[key] = dest + resolved = val.resolve() + try: + rel_path = resolved.relative_to(base_root) + except ValueError: + dest = inputs_dir / val.name + shutil.copy2(resolved, dest) + config.inputs[key] = dest + continue + + worktree_target = worktree_path / rel_path + if not worktree_target.exists(): + worktree_target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(resolved, worktree_target) + config.inputs[key] = worktree_target def _snapshot_repo_state(cwd: Path) -> dict[str, str]: @@ -329,7 +350,7 @@ def _run_simple_pipeline( worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree( cwd, run_dir, config.preset_name, ) - _copy_inputs_to_worktree(config, worktree_path) + _copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd) _refresh_input_references(config, input_contents) base_repo_state = _snapshot_repo_state(cwd) base_repo_status = _snapshot_repo_status(cwd) @@ -508,7 +529,7 @@ def _run_phased_pipeline( worktree_path, agentic_branch_name, agentic_base_commit = _setup_worktree( cwd, run_dir, config.preset_name, ) - _copy_inputs_to_worktree(config, worktree_path) + _copy_inputs_to_worktree(config, worktree_path, base_cwd=cwd) _refresh_input_references(config, input_contents) base_repo_state = _snapshot_repo_state(cwd) base_repo_status = _snapshot_repo_status(cwd) diff --git a/tests/test_evidence.py b/tests/test_evidence.py index 9e87503..ca11ffc 100644 --- a/tests/test_evidence.py +++ b/tests/test_evidence.py @@ -465,6 +465,9 @@ class TestExpandedClaimMarkers(unittest.TestCase): def test_changes_are_complete(self) -> None: self.assertTrue(_claims_file_changes("All changes are complete")) + def test_korean_change_summary_triggers(self) -> None: + self.assertTrue(_claims_file_changes("모든 수정이 완료되었습니다. 아래는 변경 요약입니다.")) + class TestExpandedNoChangeMarkers(unittest.TestCase): """New no-change markers prevent false positives.""" @@ -484,6 +487,9 @@ class TestExpandedNoChangeMarkers(unittest.TestCase): def test_no_action_required(self) -> None: self.assertFalse(_claims_file_changes("No action required")) + def test_korean_no_change_marker(self) -> None: + self.assertFalse(_claims_file_changes("변경할 필요 없음")) + # --------------------------------------------------------------------------- # 6. Cross-iteration evidence propagation diff --git a/tests/test_runtime_misc.py b/tests/test_runtime_misc.py index acdce9c..3f7c39d 100644 --- a/tests/test_runtime_misc.py +++ b/tests/test_runtime_misc.py @@ -16,6 +16,7 @@ from cross_eval.agent import ( ) from cross_eval.models import AgentConfig, AgentResult, ExecutionConfig, PipelineConfig, StepConfig from cross_eval.pipeline import ( + _copy_inputs_to_worktree, _commit_iteration, _execute_parallel_batch, _execute_step, @@ -118,6 +119,42 @@ class TestInvokeAgentRuntime(unittest.TestCase): self.assertEqual(ctx.exception.failure_type, "API_ERROR") self.assertIn("backend down", ctx.exception.raw_error) + +class TestWorktreeInputMapping(unittest.TestCase): + def test_repo_local_plan_input_maps_to_tracked_worktree_path(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + repo = Path(tmpdir) / "repo" + repo.mkdir() + _init_git_repo(repo) + (repo / "plan.md").write_text("plan v1\n", encoding="utf-8") + subprocess.run(["git", "add", "plan.md"], cwd=repo, capture_output=True, check=True) + subprocess.run( + ["git", "commit", "-m", "add plan"], + cwd=repo, + capture_output=True, + check=True, + ) + + worktree_dir = Path(tmpdir) / "wt" + branch = "cross-eval/test-plan-review" + worktree_path, _ = create_worktree(repo, worktree_dir, branch) + try: + config = PipelineConfig( + inputs={"plan": repo / "plan.md"}, + preset_name="plan-review", + ) + + _copy_inputs_to_worktree(config, worktree_path, base_cwd=repo) + + self.assertEqual(config.inputs["plan"], worktree_path / "plan.md") + finally: + remove_worktree(base_cwd=repo, work_dir=worktree_path) + subprocess.run( + ["git", "branch", "-D", branch], + cwd=repo, + capture_output=True, + ) + def test_classify_unknown_failure(self) -> None: failure_type, suggested_action = _classify_agent_failure("weird crash") self.assertEqual(failure_type, "UNKNOWN")