feat(my-deepagent): v0.2 PR #2b — mydeepagent runs resume <id> real implementation

Closes the v0.1.0 KNOWN LIMIT where resume was an exit-2 stub. Builds on
v0.2 PR #2a's LangGraph wiring + the existing DB phase-state machine +
sweep_orphan_runs — no Temporal (per DR-3).

Highlights
- `WorkflowEngine.resume(run_id)` (new async method):
  - Loads RunRow, rejects terminal states with
    MyDeepAgentError("run_already_terminal").
  - Reloads worktree_root from `RunRow.worktree_root`, template via
    `_reload_template` (WorkflowTemplateRow JOIN + model_validate), and
    bindings via `_reload_bindings` (run_bindings ⨝ agent_personas).
  - **Does NOT call `bind_personas` again** — locks in the original
    binding so consent / persona-pool changes since the original run
    don't silently shift role assignment.
- `_execute_run` (extracted shared phase loop): `run()` and `resume()`
  both dispatch through it. Skips already-completed phases (emits
  `phase.skipped` event) and re-executes the rest.
- 4 new private helpers on WorkflowEngine: `_get_run_or_raise`,
  `_reload_template`, `_reload_bindings`, `_get_completed_phase_keys`.
- `RunEventType.RUN_RESUMED` and `PHASE_SKIPPED` are now actually
  emitted (the enum members existed already).
- `cli/runs.py _runs_resume_async`: stub → real impl. Validates the run
  exists + non-terminal, loads seed personas + artifact schemas from
  `docs/schemas/`, constructs WorkflowEngine with an
  "abort-on-new-approval" callback (resume should not silently re-prompt
  the user — original gates already passed; a new gate means the
  workflow has changed). Calls engine.resume(UUID(id)), prints final
  state + report. Catches MyDeepAgentError and exits 1 with red error.

Tests
- `tests/integration/test_resume.py` (new, 5 scenarios):
  1. 2-phase mock workflow: phase 1 succeeds, phase 2 fails first time,
     row flipped back to executing → resume → phase 2 completes.
     Asserts `phase.skipped` event for phase 1, `run.resumed` event,
     and exactly 1 mock invocation for phase 2 on resume.
  2. Terminal run → `MyDeepAgentError(code="run_already_terminal")`.
  3. Unknown run id → `MyDeepAgentError(code="run_not_found")`.
  4. RunBindingRow rows missing → `MyDeepAgentError(code="run_metadata_missing")`.
  5. Corrupt `workflow_templates.definition` →
     `MyDeepAgentError(code="template_load_failed")`.
  Mock pattern matches existing test_engine.py: patch
  `my_deepagent.engine.build_agent` to return a fake agent that writes
  the expected artifact and drives the watcher middleware.

Gates
- ruff check + ruff format --check + mypy --strict: PASS (103 source files)
- pytest non-E2E: 587 PASS (12.69 s) — +5 from new resume tests
- pytest E2E real OpenRouter on Postgres: PASS 78.52 s (baseline 71–122 s;
  within DR-3 acceptance threshold ≤+20%)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
chungyeong
2026-05-16 22:07:24 +09:00
parent 50aacd3382
commit 501292a5cd
4 changed files with 804 additions and 16 deletions

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import asyncio
import json
import logging
import signal
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager, suppress
@@ -58,6 +59,8 @@ ApprovalCallback = Any # Callable[[dict, list[str]], Awaitable[ApprovalDecision
_DEFAULT_PHASE_TIMEOUT_SECONDS = 300 # 5 minutes
_LOG_CORRUPT_PERSONA = logging.getLogger(__name__ + ".resume")
@dataclass(frozen=True)
class RunResult:
@@ -165,6 +168,11 @@ class WorkflowEngine:
requirements_md: str = "",
override: BindingOverride | None = None,
) -> RunResult:
"""Start a brand-new run. Allocates a new `run_id`, binds personas, persists
skeleton metadata, and dispatches to the shared `_execute_run` phase loop.
For resuming an existing non-terminal run, use :meth:`resume` instead.
"""
run_id = uuid4()
worktree_root = self._config.workspace_root / str(run_id)
worktree_root.mkdir(parents=True, exist_ok=True)
@@ -186,6 +194,59 @@ class WorkflowEngine:
await self._append_event(run_id, None, RunEventType.RUN_CREATED, {})
await self._append_event(run_id, None, RunEventType.RUN_STARTED, {})
return await self._execute_run(run_id, template, worktree_root, bindings)
async def resume(self, run_id: UUID) -> RunResult:
"""Resume a non-terminal run from its first non-completed phase.
Reloads worktree_root, template, and bindings from the DB — does **not**
re-run `bind_personas`, so consent/pool changes since the original run
do not silently shift the binding. Phases whose `RunPhaseRow.state` is
already ``completed`` are skipped; the rest re-execute and (when a
LangGraph saver is wired) replay deepagents from the last checkpoint
for that phase's thread_id.
Raises:
MyDeepAgentError: if the run is missing, terminal, or its bindings
/ template metadata cannot be reloaded.
"""
run_row = await self._get_run_or_raise(run_id)
if run_row.state in {
RunState.COMPLETED.value,
RunState.FAILED.value,
RunState.ABORTED.value,
}:
raise MyDeepAgentError.human_required(
"run_already_terminal",
message=(
f"run {run_id} is already {run_row.state}; start a fresh run "
f"with `mydeepagent run`"
),
)
worktree_root = Path(run_row.worktree_root)
template = await self._reload_template(run_row.template_id)
bindings = await self._reload_bindings(run_id)
if not bindings:
raise MyDeepAgentError.human_required(
"run_metadata_missing",
message=(
f"run {run_id} has no binding rows; cannot resume — start a fresh run instead"
),
)
await self._append_event(run_id, None, RunEventType.RUN_RESUMED, {})
return await self._execute_run(run_id, template, worktree_root, bindings)
async def _execute_run(
self,
run_id: UUID,
template: WorkflowTemplate,
worktree_root: Path,
bindings: dict[str, Binding],
) -> RunResult:
"""Shared phase loop used by both `run` (new) and `resume`."""
await self._set_run_state(run_id, RunState.EXECUTING)
# Open the LangGraph AsyncPostgresSaver once per run; all phases share it.
@@ -195,8 +256,17 @@ class WorkflowEngine:
# checkpointer=None and runs without resume support.
async with self._maybe_open_saver() as saver:
self._saver = saver
completed_keys = await self._get_completed_phase_keys(run_id)
try:
for phase_def in template.phases:
if phase_def.key in completed_keys:
await self._append_event(
run_id,
None,
RunEventType.PHASE_SKIPPED,
{"phase_key": phase_def.key, "reason": "already_completed"},
)
continue
role_binding = bindings[phase_def.role]
await self._run_phase(run_id, worktree_root, template, phase_def, role_binding)
await self._set_run_state(run_id, RunState.COMPLETED)
@@ -933,6 +1003,90 @@ class WorkflowEngine:
except Exception:
await s.rollback()
# ------------------------------------------------------------------
# Resume helpers (used by `resume` to rehydrate state from DB)
# ------------------------------------------------------------------
async def _get_run_or_raise(self, run_id: UUID) -> RunRow:
async with self._db.session() as s:
row = await s.get(RunRow, str(run_id))
if row is None:
raise MyDeepAgentError.human_required(
"run_not_found",
message=f"run {run_id} not found in DB",
)
return row
async def _reload_template(self, template_id: str) -> WorkflowTemplate:
async with self._db.session() as s:
row = await s.get(WorkflowTemplateRow, template_id)
if row is None:
raise MyDeepAgentError.fatal(
"template_load_failed",
message=f"workflow_templates row {template_id} not found",
)
try:
return WorkflowTemplate.model_validate(row.definition)
except Exception as e:
raise MyDeepAgentError.fatal(
"template_load_failed",
message=f"workflow_templates.definition for {template_id} is malformed: {e}",
) from e
async def _reload_bindings(self, run_id: UUID) -> dict[str, Binding]:
"""Rebuild the `{role_id: Binding}` dict from `run_bindings` + `agent_personas`.
Empty result means the run was never fully persisted — caller raises
`run_metadata_missing`. We do NOT re-run `bind_personas` here on purpose:
consent / pool state could have shifted since the original run.
"""
from .binding import Binding as _Binding # local import to avoid cycle hint
async with self._db.session() as s:
binding_rows = (
(await s.execute(select(RunBindingRow).where(RunBindingRow.run_id == str(run_id))))
.scalars()
.all()
)
persona_rows: dict[str, AgentPersonaRow] = {}
for br in binding_rows:
pr = await s.get(AgentPersonaRow, br.persona_id)
if pr is not None:
persona_rows[br.persona_id] = pr
out: dict[str, Binding] = {}
for br in binding_rows:
pr = persona_rows.get(br.persona_id)
if pr is None:
continue
try:
persona = Persona.model_validate(pr.definition)
except Exception as e:
# Corrupt persona JSON: skip the binding; an empty bindings dict
# surfaces as `run_metadata_missing` in `resume`.
_LOG_CORRUPT_PERSONA.warning("corrupt persona row %s during resume: %s", pr.id, e)
continue
out[br.role_id] = _Binding(
role_id=br.role_id, persona=persona, binding_hash=br.binding_hash
)
return out
async def _get_completed_phase_keys(self, run_id: UUID) -> set[str]:
"""Return the set of phase_keys that already reached `completed` state."""
async with self._db.session() as s:
rows = (
(
await s.execute(
select(RunPhaseRow.phase_key)
.where(RunPhaseRow.run_id == str(run_id))
.where(RunPhaseRow.state == RunPhaseState.COMPLETED.value)
)
)
.scalars()
.all()
)
return set(rows)
# ------------------------------------------------------------------
# Module-level helpers