feat: tighten agentic runtime handoffs and quality gates
This commit is contained in:
@@ -26,10 +26,9 @@ from cross_eval.models import (
|
||||
IterationResult,
|
||||
PipelineConfig,
|
||||
PipelineResult,
|
||||
ReviewMetrics,
|
||||
StepConfig,
|
||||
)
|
||||
from cross_eval.pipeline import _format_execution_evidence, run_pipeline
|
||||
from cross_eval.pipeline import _build_artifact_references, _format_execution_evidence, run_pipeline
|
||||
from cross_eval.report import build_report
|
||||
|
||||
|
||||
@@ -59,7 +58,7 @@ class TestFormatExecutionEvidence(unittest.TestCase):
|
||||
self.assertIn("Exit code: 0", evidence)
|
||||
self.assertIn("12.3s", evidence)
|
||||
self.assertIn("claude --setting-sources user", evidence)
|
||||
self.assertIn("Transcript excerpt", evidence)
|
||||
self.assertNotIn("Transcript excerpt", evidence)
|
||||
|
||||
def test_multiple_results_separated(self) -> None:
|
||||
r1 = AgentResult(
|
||||
@@ -88,10 +87,60 @@ class TestFormatExecutionEvidence(unittest.TestCase):
|
||||
transcript=long_transcript,
|
||||
)
|
||||
evidence = _format_execution_evidence({"key": result})
|
||||
self.assertIn("truncated", evidence)
|
||||
# The full 3000-char transcript should NOT appear
|
||||
self.assertNotIn("x" * 3000, evidence)
|
||||
|
||||
def test_artifact_paths_included_when_run_dir_provided(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = AgentResult(
|
||||
output="diff",
|
||||
exit_code=0,
|
||||
agent_name="coder",
|
||||
step_name="coding",
|
||||
duration_seconds=1.2,
|
||||
transcript="stdout",
|
||||
command_preview="claude ...",
|
||||
)
|
||||
evidence = _format_execution_evidence(
|
||||
{"coding_output": result},
|
||||
run_dir=Path(tmpdir),
|
||||
iteration=2,
|
||||
)
|
||||
self.assertIn("v2/coding.md", evidence)
|
||||
self.assertIn("v2/coding_transcript.md", evidence)
|
||||
|
||||
|
||||
class TestArtifactReferences(unittest.TestCase):
|
||||
"""Artifact references should prefer file paths and git state over inline text."""
|
||||
|
||||
def test_contains_input_refs_and_git_context(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
repo = Path(tmpdir) / "repo"
|
||||
repo.mkdir()
|
||||
(repo / "plan.md").write_text("plan", encoding="utf-8")
|
||||
(repo / "checklist.md").write_text("checklist", encoding="utf-8")
|
||||
|
||||
import subprocess
|
||||
subprocess.run(["git", "init"], cwd=repo, capture_output=True, check=True)
|
||||
subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=repo, capture_output=True, check=True)
|
||||
subprocess.run(["git", "config", "user.name", "Test"], cwd=repo, capture_output=True, check=True)
|
||||
subprocess.run(["git", "add", "."], cwd=repo, capture_output=True, check=True)
|
||||
subprocess.run(["git", "commit", "-m", "init"], cwd=repo, capture_output=True, check=True)
|
||||
|
||||
refs = _build_artifact_references(
|
||||
{
|
||||
"plan_ref": str((repo / "plan.md").resolve()),
|
||||
"checklist_ref": str((repo / "checklist.md").resolve()),
|
||||
"docs_ref": "(none)",
|
||||
},
|
||||
cwd=repo,
|
||||
run_dir=repo / ".cross-eval" / "output" / "run",
|
||||
iteration=1,
|
||||
worktree_path=None,
|
||||
)
|
||||
self.assertIn("Plan:", refs)
|
||||
self.assertIn("Git commit:", refs)
|
||||
self.assertIn("Suggested git commands", refs)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Evidence in reviewer prompts (integration)
|
||||
@@ -162,7 +211,7 @@ class TestEvidenceInReviewerPrompt(unittest.TestCase):
|
||||
]
|
||||
self.assertTrue(len(review_prompts) >= 1)
|
||||
review_prompt = review_prompts[0]["prompt"]
|
||||
# Evidence section should reference the coding step's command
|
||||
self.assertIn("Artifact References", review_prompt)
|
||||
self.assertIn("Execution Evidence", review_prompt)
|
||||
self.assertIn("claude-coder", review_prompt)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user