feat: propagate execution evidence across iterations and enhance reports
- Carry execution evidence forward so reviewer/senior prompts in
subsequent iterations can inspect prior transcript and command data
- Add {execution_evidence} to REVIEW_ONLY templates (en/ko)
- Add evidence summary table to iteration reports
- Fix test_agentic to match stdin-based prompt delivery for Claude
- Add expanded claim/no-change marker tests and cross-iteration
evidence propagation tests
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -233,11 +233,11 @@ class TestBaseRepoIsolation(unittest.TestCase):
|
||||
# ===================================================================
|
||||
|
||||
class TestInvokeAgentAgenticClaude(unittest.TestCase):
|
||||
"""invoke_agent_agentic builds correct cmd for claude (no -p, prompt as positional arg)."""
|
||||
"""invoke_agent_agentic builds correct cmd for claude (no -p, prompt via stdin)."""
|
||||
|
||||
@patch("cross_eval.worktree.capture_diff", return_value="diff --git a/file ...")
|
||||
@patch("subprocess.run")
|
||||
def test_claude_cmd_has_no_dash_p_and_prompt_as_positional(
|
||||
def test_claude_cmd_has_no_dash_p_and_prompt_via_stdin(
|
||||
self, mock_run: MagicMock, mock_diff: MagicMock,
|
||||
) -> None:
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="")
|
||||
@@ -271,8 +271,10 @@ class TestInvokeAgentAgenticClaude(unittest.TestCase):
|
||||
|
||||
# No -p flag
|
||||
self.assertNotIn("-p", cmd)
|
||||
# Last arg is a task file reference (not raw prompt — avoids arg length limits)
|
||||
self.assertIn("task file", cmd[-1].lower())
|
||||
# Prompt is delivered via stdin (input kwarg), not as a positional arg
|
||||
input_data = agent_call[1].get("input")
|
||||
self.assertIsNotNone(input_data)
|
||||
self.assertIn("implement feature X", input_data)
|
||||
|
||||
|
||||
class TestInvokeAgentAgenticCodex(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user