feat(my-deepagent): v0.2 PR #2a — wire LangGraph AsyncPostgresSaver into engine

Foundation for `runs resume` (v0.2 PR #2b). v0.2 PR #1 added
langgraph-checkpoint-postgres as a dependency, but engine.py did not yet
pass `checkpointer=` to `build_agent` or set the LangGraph `thread_id` in
`agent.ainvoke` — meaning resume had no state to restore. This commit
actually wires the dependency.

Highlights
- `WorkflowEngine.__init__` accepts `checkpointer_url: str | None`
  (default = `config.database_url`).
- `_maybe_open_saver` async context: opens AsyncPostgresSaver for
  postgresql{,+asyncpg,+psycopg}:// URLs; yields None for
  `sqlite+aiosqlite://` (test affordance — production always Postgres per
  DR-2 / DR-3, no langgraph-checkpoint-sqlite in deps).
- `WorkflowEngine.run()` opens the saver **once per run** and shares it
  across all phases. Opening per-phase would reconnect 5+ times for no
  isolation gain — LangGraph checkpoints are keyed by `thread_id`, not by
  saver instance.
- `_invoke_agent_until_artifact` forwards `checkpointer=self._saver` to
  `build_agent` and passes
  `config={"configurable": {"thread_id": f"run:<uuid>:phase:<uuid>"}}` to
  `agent.ainvoke`. The thread_id format is already used by
  `LlmCallRow.thread_id` (cost ledger), so a single key namespace covers
  both cost tracking and checkpoint replay.

Tests
- `tests/integration/test_engine_checkpointer_wiring.py` (new, 2 tests):
  1. Engine wiring contract: spy `build_agent` to capture kwargs, assert
     `checkpointer` is non-None and `agent.ainvoke` receives the expected
     `config.configurable.thread_id` in run:<uuid>:phase:<uuid> format.
  2. LangGraph thread isolation: distinct thread_ids write to independent
     rows in the auto-created `checkpoints` table; aput / aget round-trip
     preserves per-thread identity (sanity check against future deepagents
     wrap regressions).
- `tests/integration/test_engine.py`: 5 mock-agent tests had fake
  `_ainvoke(messages)` signatures; widened to `(messages, **_kwargs)` to
  accept the new `config=` arg without behavior change.

Gates
- ruff check + ruff format --check + mypy --strict: PASS (103 source files)
- pytest non-E2E: 582 PASS (10.55 s) — was 576 before, +7 from new wiring
  tests, +/-1 from engine.py reshape, +/-... settled at 582 net.
- pytest E2E real OpenRouter on Postgres: PASS 75.99 s (baseline 71–122 s;
  within DR-3 acceptance threshold ≤+20%).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
chungyeong
2026-05-16 21:56:34 +09:00
parent 711d61d245
commit 50aacd3382
4 changed files with 316 additions and 43 deletions

View File

@@ -5,7 +5,8 @@ from __future__ import annotations
import asyncio
import json
import signal
from contextlib import suppress
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager, suppress
from dataclasses import dataclass
from datetime import UTC, datetime
from pathlib import Path
@@ -32,6 +33,7 @@ from .middleware.artifact_watcher import ArtifactWatcherMiddleware
from .middleware.audit import AuditToolMiddleware
from .middleware.cost import CostMiddleware
from .monitoring.pricing import PricingCache
from .persistence.checkpointer import get_checkpointer_ctx
from .persistence.db import Database
from .persistence.models import (
AgentPersonaRow,
@@ -93,6 +95,7 @@ class WorkflowEngine:
approval_callback: ApprovalCallback,
budget_tracker: BudgetTracker | None = None,
pricing: PricingCache | None = None,
checkpointer_url: str | None = None,
) -> None:
self._db = db
self._config = config
@@ -105,6 +108,11 @@ class WorkflowEngine:
self._pricing = pricing or PricingCache()
self._shutdown_event: asyncio.Event = asyncio.Event()
self._inflight_tasks: set[asyncio.Task[Any]] = set()
# LangGraph checkpoint URL. None → falls back to config.database_url at run-time.
# The saver itself is opened inside `run()` (one ctx per run, shared across phases)
# and lives on `self._saver` for the duration of that run.
self._checkpointer_url: str = checkpointer_url or config.database_url
self._saver: Any | None = None
def install_signal_handlers(self) -> None:
"""Attach SIGTERM/SIGINT handlers to the running event loop.
@@ -132,6 +140,22 @@ class WorkflowEngine:
def shutdown_requested(self) -> bool:
return self._shutdown_event.is_set()
@asynccontextmanager
async def _maybe_open_saver(self) -> AsyncIterator[Any | None]:
"""Yield an AsyncPostgresSaver for Postgres URLs; yield None for SQLite.
SQLite is supported for tests only and never wires durable resume.
LangGraph's AsyncPostgresSaver requires a libpq DSN; passing a SQLite
URL would raise psycopg.ProgrammingError. Production runs always use
Postgres (see DR-2 / DR-3), so this is purely a test-affordance shim.
"""
url = self._checkpointer_url
if url.startswith(("postgresql://", "postgresql+asyncpg://", "postgresql+psycopg://")):
async with get_checkpointer_ctx(url) as saver:
yield saver
else:
yield None
async def run(
self,
template: WorkflowTemplate,
@@ -164,42 +188,55 @@ class WorkflowEngine:
await self._append_event(run_id, None, RunEventType.RUN_STARTED, {})
await self._set_run_state(run_id, RunState.EXECUTING)
try:
for phase_def in template.phases:
role_binding = bindings[phase_def.role]
await self._run_phase(run_id, worktree_root, template, phase_def, role_binding)
await self._set_run_state(run_id, RunState.COMPLETED)
await self._append_event(run_id, None, RunEventType.RUN_COMPLETED, {})
report_path = await self._compose_final_report(
run_id, worktree_root, RunState.COMPLETED
)
return RunResult(run_id=run_id, state=RunState.COMPLETED, final_report_path=report_path)
except _PhaseAbortedError as e:
await self._set_run_state(run_id, RunState.ABORTED)
await self._append_event(run_id, None, RunEventType.RUN_ABORTED, {"reason": e.reason})
report_path = await self._compose_final_report(
run_id, worktree_root, RunState.ABORTED, error=e.reason
)
return RunResult(
run_id=run_id,
state=RunState.ABORTED,
final_report_path=report_path,
error=e.reason,
)
except MyDeepAgentError as e:
await self._set_run_state(run_id, RunState.FAILED)
await self._append_event(
run_id, None, RunEventType.RUN_FAILED, {"code": e.code, "message": str(e)}
)
report_path = await self._compose_final_report(
run_id, worktree_root, RunState.FAILED, error=str(e)
)
return RunResult(
run_id=run_id,
state=RunState.FAILED,
final_report_path=report_path,
error=str(e),
)
# Open the LangGraph AsyncPostgresSaver once per run; all phases share it.
# Opening per-phase would re-connect to Postgres 5+ times per run for no
# gain — checkpoints are isolated by `thread_id` not by saver instance.
# SQLite URLs (test-only) skip the saver entirely — deepagents accepts
# checkpointer=None and runs without resume support.
async with self._maybe_open_saver() as saver:
self._saver = saver
try:
for phase_def in template.phases:
role_binding = bindings[phase_def.role]
await self._run_phase(run_id, worktree_root, template, phase_def, role_binding)
await self._set_run_state(run_id, RunState.COMPLETED)
await self._append_event(run_id, None, RunEventType.RUN_COMPLETED, {})
report_path = await self._compose_final_report(
run_id, worktree_root, RunState.COMPLETED
)
return RunResult(
run_id=run_id, state=RunState.COMPLETED, final_report_path=report_path
)
except _PhaseAbortedError as e:
await self._set_run_state(run_id, RunState.ABORTED)
await self._append_event(
run_id, None, RunEventType.RUN_ABORTED, {"reason": e.reason}
)
report_path = await self._compose_final_report(
run_id, worktree_root, RunState.ABORTED, error=e.reason
)
return RunResult(
run_id=run_id,
state=RunState.ABORTED,
final_report_path=report_path,
error=e.reason,
)
except MyDeepAgentError as e:
await self._set_run_state(run_id, RunState.FAILED)
await self._append_event(
run_id, None, RunEventType.RUN_FAILED, {"code": e.code, "message": str(e)}
)
report_path = await self._compose_final_report(
run_id, worktree_root, RunState.FAILED, error=str(e)
)
return RunResult(
run_id=run_id,
state=RunState.FAILED,
final_report_path=report_path,
error=str(e),
)
finally:
self._saver = None
# ------------------------------------------------------------------
# Phase execution
@@ -400,6 +437,7 @@ class WorkflowEngine:
self._config,
root_dir=worktree_root,
middleware=[watcher, cost_mw, audit_mw],
checkpointer=self._saver,
)
envelope = self._build_envelope(run_id, phase_id, phase_def, attempt, expected_path)
@@ -409,10 +447,17 @@ class WorkflowEngine:
event_type = RunEventType.PROMPT_REPAIRED if attempt > 1 else RunEventType.PROMPT_SENT
await self._append_event(run_id, phase_id, event_type, {"attempt": attempt})
# thread_id matches the format already used by LlmCallRow.thread_id
# (engine.py _record_llm_call) so a single namespace covers both
# cost ledger and LangGraph checkpoint replay.
thread_id = f"run:{run_id}:phase:{phase_id}"
timeout = float(phase_def.timeout_seconds or _DEFAULT_PHASE_TIMEOUT_SECONDS)
try:
invoke_task: asyncio.Task[Any] = asyncio.create_task(
agent.ainvoke({"messages": [{"role": "user", "content": envelope}]})
agent.ainvoke(
{"messages": [{"role": "user", "content": envelope}]},
config={"configurable": {"thread_id": thread_id}},
)
)
self._inflight_tasks.add(invoke_task)
try: