feat: harden runtime evidence and claude agentic validation

This commit is contained in:
chungyeong
2026-03-13 22:29:22 +09:00
parent 28dd794f54
commit 3fb19e90c0
5 changed files with 655 additions and 59 deletions

View File

@@ -14,7 +14,7 @@ import unittest
from pathlib import Path
from unittest.mock import MagicMock, call, patch
from cross_eval.agent import invoke_agent_agentic
from cross_eval.agent import AgentInvocationError, invoke_agent_agentic
from cross_eval.config import BUILTIN_AGENTS, _make_agentic
from cross_eval.models import (
AgentConfig,
@@ -309,6 +309,74 @@ class TestTaskFileCleanup(unittest.TestCase):
self.assertFalse((wt / "CROSS_EVAL_TASK.md").exists())
class TestAgenticEmptyDiffDetection(unittest.TestCase):
"""Agentic coders should not succeed when they only claim changes in stdout."""
@patch("cross_eval.worktree.capture_diff", return_value="")
@patch("subprocess.run")
def test_claude_empty_diff_with_change_claim_fails(
self, mock_run: MagicMock, mock_diff: MagicMock,
) -> None:
mock_run.return_value = MagicMock(
returncode=0,
stdout=(
"All tests pass.\n"
"Here's a summary of all changes made:\n"
"- Updated discovery.py\n"
),
stderr="",
)
agent = AgentConfig(
name="claude-coder",
command="claude",
args=["--setting-sources", "user"],
agentic=True,
)
with tempfile.TemporaryDirectory() as td:
wt = Path(td)
_init_git_repo(wt)
with self.assertRaises(AgentInvocationError) as ctx:
invoke_agent_agentic(
agent, "implement feature X", "coding",
worktree_path=wt, quiet=True,
)
self.assertEqual(ctx.exception.failure_type, "EMPTY_DIFF")
self.assertIn("summary of all changes made", ctx.exception.raw_error.lower())
@patch("cross_eval.worktree.capture_diff", return_value="")
@patch("subprocess.run")
def test_empty_diff_without_change_claim_is_allowed(
self, mock_run: MagicMock, mock_diff: MagicMock,
) -> None:
mock_run.return_value = MagicMock(
returncode=0,
stdout="No changes were required; the current implementation already satisfies the task.",
stderr="",
)
agent = AgentConfig(
name="claude-coder",
command="claude",
args=["--setting-sources", "user"],
agentic=True,
)
with tempfile.TemporaryDirectory() as td:
wt = Path(td)
_init_git_repo(wt)
result = invoke_agent_agentic(
agent, "check whether any fix is needed", "coding",
worktree_path=wt, quiet=True,
)
self.assertEqual(result.output, "(no changes)")
# ===================================================================
# 3. config.py tests
# ===================================================================