feat: tighten agentic runtime handoffs and quality gates

This commit is contained in:
chungyeong
2026-03-14 10:05:25 +09:00
parent 87bc0ffbfb
commit 7b95233edf
15 changed files with 1148 additions and 167 deletions

View File

@@ -12,10 +12,10 @@ import subprocess
import tempfile
import unittest
from pathlib import Path
from unittest.mock import MagicMock, call, patch
from unittest.mock import MagicMock, patch
from cross_eval.agent import AgentInvocationError, invoke_agent_agentic
from cross_eval.config import BUILTIN_AGENTS, _make_agentic
from cross_eval.config import _make_agentic
from cross_eval.models import (
AgentConfig,
AgentResult,
@@ -24,8 +24,6 @@ from cross_eval.models import (
)
from cross_eval.pipeline import (
_assert_base_repo_isolation,
_commit_iteration,
_finalize_worktree,
_has_agentic_steps,
_setup_worktree,
run_pipeline,
@@ -267,6 +265,7 @@ class TestInvokeAgentAgenticClaude(unittest.TestCase):
break
self.assertIsNotNone(agent_call, "Expected a subprocess.run call with 'claude'")
assert agent_call is not None
cmd = agent_call[0][0]
# No -p flag
@@ -274,6 +273,7 @@ class TestInvokeAgentAgenticClaude(unittest.TestCase):
# Prompt is delivered via stdin (input kwarg), not as a positional arg
input_data = agent_call[1].get("input")
self.assertIsNotNone(input_data)
assert input_data is not None
self.assertIn("implement feature X", input_data)
@@ -311,6 +311,7 @@ class TestInvokeAgentAgenticCodex(unittest.TestCase):
break
self.assertIsNotNone(agent_call, "Expected a subprocess.run call with 'codex'")
assert agent_call is not None
cmd = agent_call[0][0]
# Should have "-" sentinel at the end for stdin
@@ -318,6 +319,7 @@ class TestInvokeAgentAgenticCodex(unittest.TestCase):
# Stdin input should contain the prompt
input_data = agent_call[1].get("input")
self.assertIsNotNone(input_data)
assert input_data is not None
self.assertIn("implement feature Y", input_data)
@@ -435,6 +437,16 @@ class TestMakeAgenticClaude(unittest.TestCase):
self.assertNotIn("-p", agent.args)
self.assertIn("--setting-sources", agent.args)
def test_strips_dash_dash_print_alias(self) -> None:
agent = AgentConfig(
name="claude-coder",
command="claude",
args=["--print", "--setting-sources", "user"],
)
_make_agentic(agent)
self.assertTrue(agent.agentic)
self.assertNotIn("--print", agent.args)
def test_idempotent_when_no_dash_p(self) -> None:
agent = AgentConfig(
name="claude-coder",