feat: ESCALATE verdict, issue tracker, onboarding commands

Add 3-verdict system (PASS/FAIL/ESCALATE) with priority handling across simple and phased pipelines. Senior reviewers can now escalate issues requiring human intervention, immediately breaking the review loop. - ESCALATE verdict extraction with highest priority over PASS/FAIL - Issue Tracker tables (ISS-NNN) carried across iterations - Auto-escalate heuristic using (file, keyword) composite fingerprints - Report restructuring: executive view first (verdict → tracker → metrics) - Onboarding: `doctor`, `demo`, `init --guided` commands - Exit codes: PASS=0, FAIL=1, ESCALATE=2 - 87 tests passing (54 config + 25 onboarding + 8 integration) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 18:19:05 +09:00
parent ee4f1a07ef
commit 204e071b74
15 changed files with 3032 additions and 156 deletions
--- a/cross_eval/agent.py
+++ b/cross_eval/agent.py
@@ -19,6 +19,34 @@ _SYSTEM_PROMPT_AGENTS = ("claude",)
 _REASONING_EFFORT_AGENTS = ("codex",)


+class AgentInvocationError(RuntimeError):
+    """Structured error for agent CLI failures."""
+
+    def __init__(
+        self,
+        *,
+        agent_name: str,
+        step_name: str,
+        cmd_preview: str,
+        raw_error: str,
+        failure_type: str,
+        suggested_action: str,
+    ) -> None:
+        self.agent_name = agent_name
+        self.step_name = step_name
+        self.cmd_preview = cmd_preview
+        self.raw_error = raw_error
+        self.failure_type = failure_type
+        self.suggested_action = suggested_action
+        super().__init__(
+            f"Agent '{agent_name}' failed (exit code != 0) at step '{step_name}':\n"
+            f"  type: {failure_type}\n"
+            f"  cmd: {cmd_preview}\n"
+            f"  error: {raw_error or '(no output)'}\n"
+            f"  action: {suggested_action}"
+        )
+
+
 def _supports_system_prompt_flag(command: str) -> bool:
    """Check if the agent CLI supports --system-prompt flag."""
    return any(name in command for name in _SYSTEM_PROMPT_AGENTS)
@@ -29,6 +57,53 @@ def _supports_reasoning_effort(command: str) -> bool:
    return any(name in command for name in _REASONING_EFFORT_AGENTS)


+def _classify_agent_failure(detail: str) -> tuple[str, str]:
+    """Classify a failed agent invocation into a user-actionable bucket."""
+    normalized = detail.lower()
+
+    auth_markers = (
+        "not logged in",
+        "please run /login",
+        "auth",
+        "authentication",
+        "invalid api key",
+        "api key",
+        "unauthorized",
+        "forbidden",
+    )
+    usage_limit_markers = (
+        "quota",
+        "rate limit",
+        "credits",
+        "credit balance",
+        "budget",
+        "insufficient funds",
+        "usage limit",
+        "token limit",
+        "billing",
+    )
+
+    if any(marker in normalized for marker in auth_markers):
+        return (
+            "AUTH",
+            "Agent CLI authentication is missing or expired. Re-authenticate the CLI, then rerun.",
+        )
+    if any(marker in normalized for marker in usage_limit_markers):
+        return (
+            "USAGE_LIMIT",
+            "Agent CLI hit a quota, billing, or token budget limit. Refill or raise the limit, then rerun.",
+        )
+    if "api error" in normalized:
+        return (
+            "API_ERROR",
+            "Agent CLI returned an API error. Inspect the saved error file for the raw response.",
+        )
+    return (
+        "UNKNOWN",
+        "Agent CLI failed for an unknown reason. Inspect the saved error file for details.",
+    )
+
+
 class _Spinner:
    """Animated spinner for long-running agent calls."""

@@ -137,11 +212,14 @@ def invoke_agent(
        if err_detail and len(err_detail) > 500:
            err_detail = err_detail[:500] + "..."
        cmd_preview = " ".join(cmd[:6])
-        raise RuntimeError(
-            f"Agent '{agent.name}' failed (exit code {result.returncode}) "
-            f"at step '{step_name}':\n"
-            f"  cmd: {cmd_preview}\n"
-            f"  error: {err_detail or '(no output)'}"
+        failure_type, suggested_action = _classify_agent_failure(err_detail or "")
+        raise AgentInvocationError(
+            agent_name=agent.name,
+            step_name=step_name,
+            cmd_preview=cmd_preview,
+            raw_error=err_detail or "(no output)",
+            failure_type=failure_type,
+            suggested_action=suggested_action,
        )

    if spinner: