feat(my-deepagent): v0.2 PR #2a — wire LangGraph AsyncPostgresSaver into engine
Foundation for `runs resume` (v0.2 PR #2b). v0.2 PR #1 added langgraph-checkpoint-postgres as a dependency, but engine.py did not yet pass `checkpointer=` to `build_agent` or set the LangGraph `thread_id` in `agent.ainvoke` — meaning resume had no state to restore. This commit actually wires the dependency. Highlights - `WorkflowEngine.__init__` accepts `checkpointer_url: str | None` (default = `config.database_url`). - `_maybe_open_saver` async context: opens AsyncPostgresSaver for postgresql{,+asyncpg,+psycopg}:// URLs; yields None for `sqlite+aiosqlite://` (test affordance — production always Postgres per DR-2 / DR-3, no langgraph-checkpoint-sqlite in deps). - `WorkflowEngine.run()` opens the saver **once per run** and shares it across all phases. Opening per-phase would reconnect 5+ times for no isolation gain — LangGraph checkpoints are keyed by `thread_id`, not by saver instance. - `_invoke_agent_until_artifact` forwards `checkpointer=self._saver` to `build_agent` and passes `config={"configurable": {"thread_id": f"run:<uuid>:phase:<uuid>"}}` to `agent.ainvoke`. The thread_id format is already used by `LlmCallRow.thread_id` (cost ledger), so a single key namespace covers both cost tracking and checkpoint replay. Tests - `tests/integration/test_engine_checkpointer_wiring.py` (new, 2 tests): 1. Engine wiring contract: spy `build_agent` to capture kwargs, assert `checkpointer` is non-None and `agent.ainvoke` receives the expected `config.configurable.thread_id` in run:<uuid>:phase:<uuid> format. 2. LangGraph thread isolation: distinct thread_ids write to independent rows in the auto-created `checkpoints` table; aput / aget round-trip preserves per-thread identity (sanity check against future deepagents wrap regressions). - `tests/integration/test_engine.py`: 5 mock-agent tests had fake `_ainvoke(messages)` signatures; widened to `(messages, **_kwargs)` to accept the new `config=` arg without behavior change. Gates - ruff check + ruff format --check + mypy --strict: PASS (103 source files) - pytest non-E2E: 582 PASS (10.55 s) — was 576 before, +7 from new wiring tests, +/-1 from engine.py reshape, +/-... settled at 582 net. - pytest E2E real OpenRouter on Postgres: PASS 75.99 s (baseline 71–122 s; within DR-3 acceptance threshold ≤+20%). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,8 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import json
|
||||
import signal
|
||||
from contextlib import suppress
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager, suppress
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
@@ -32,6 +33,7 @@ from .middleware.artifact_watcher import ArtifactWatcherMiddleware
|
||||
from .middleware.audit import AuditToolMiddleware
|
||||
from .middleware.cost import CostMiddleware
|
||||
from .monitoring.pricing import PricingCache
|
||||
from .persistence.checkpointer import get_checkpointer_ctx
|
||||
from .persistence.db import Database
|
||||
from .persistence.models import (
|
||||
AgentPersonaRow,
|
||||
@@ -93,6 +95,7 @@ class WorkflowEngine:
|
||||
approval_callback: ApprovalCallback,
|
||||
budget_tracker: BudgetTracker | None = None,
|
||||
pricing: PricingCache | None = None,
|
||||
checkpointer_url: str | None = None,
|
||||
) -> None:
|
||||
self._db = db
|
||||
self._config = config
|
||||
@@ -105,6 +108,11 @@ class WorkflowEngine:
|
||||
self._pricing = pricing or PricingCache()
|
||||
self._shutdown_event: asyncio.Event = asyncio.Event()
|
||||
self._inflight_tasks: set[asyncio.Task[Any]] = set()
|
||||
# LangGraph checkpoint URL. None → falls back to config.database_url at run-time.
|
||||
# The saver itself is opened inside `run()` (one ctx per run, shared across phases)
|
||||
# and lives on `self._saver` for the duration of that run.
|
||||
self._checkpointer_url: str = checkpointer_url or config.database_url
|
||||
self._saver: Any | None = None
|
||||
|
||||
def install_signal_handlers(self) -> None:
|
||||
"""Attach SIGTERM/SIGINT handlers to the running event loop.
|
||||
@@ -132,6 +140,22 @@ class WorkflowEngine:
|
||||
def shutdown_requested(self) -> bool:
|
||||
return self._shutdown_event.is_set()
|
||||
|
||||
@asynccontextmanager
|
||||
async def _maybe_open_saver(self) -> AsyncIterator[Any | None]:
|
||||
"""Yield an AsyncPostgresSaver for Postgres URLs; yield None for SQLite.
|
||||
|
||||
SQLite is supported for tests only and never wires durable resume.
|
||||
LangGraph's AsyncPostgresSaver requires a libpq DSN; passing a SQLite
|
||||
URL would raise psycopg.ProgrammingError. Production runs always use
|
||||
Postgres (see DR-2 / DR-3), so this is purely a test-affordance shim.
|
||||
"""
|
||||
url = self._checkpointer_url
|
||||
if url.startswith(("postgresql://", "postgresql+asyncpg://", "postgresql+psycopg://")):
|
||||
async with get_checkpointer_ctx(url) as saver:
|
||||
yield saver
|
||||
else:
|
||||
yield None
|
||||
|
||||
async def run(
|
||||
self,
|
||||
template: WorkflowTemplate,
|
||||
@@ -164,42 +188,55 @@ class WorkflowEngine:
|
||||
await self._append_event(run_id, None, RunEventType.RUN_STARTED, {})
|
||||
await self._set_run_state(run_id, RunState.EXECUTING)
|
||||
|
||||
try:
|
||||
for phase_def in template.phases:
|
||||
role_binding = bindings[phase_def.role]
|
||||
await self._run_phase(run_id, worktree_root, template, phase_def, role_binding)
|
||||
await self._set_run_state(run_id, RunState.COMPLETED)
|
||||
await self._append_event(run_id, None, RunEventType.RUN_COMPLETED, {})
|
||||
report_path = await self._compose_final_report(
|
||||
run_id, worktree_root, RunState.COMPLETED
|
||||
)
|
||||
return RunResult(run_id=run_id, state=RunState.COMPLETED, final_report_path=report_path)
|
||||
except _PhaseAbortedError as e:
|
||||
await self._set_run_state(run_id, RunState.ABORTED)
|
||||
await self._append_event(run_id, None, RunEventType.RUN_ABORTED, {"reason": e.reason})
|
||||
report_path = await self._compose_final_report(
|
||||
run_id, worktree_root, RunState.ABORTED, error=e.reason
|
||||
)
|
||||
return RunResult(
|
||||
run_id=run_id,
|
||||
state=RunState.ABORTED,
|
||||
final_report_path=report_path,
|
||||
error=e.reason,
|
||||
)
|
||||
except MyDeepAgentError as e:
|
||||
await self._set_run_state(run_id, RunState.FAILED)
|
||||
await self._append_event(
|
||||
run_id, None, RunEventType.RUN_FAILED, {"code": e.code, "message": str(e)}
|
||||
)
|
||||
report_path = await self._compose_final_report(
|
||||
run_id, worktree_root, RunState.FAILED, error=str(e)
|
||||
)
|
||||
return RunResult(
|
||||
run_id=run_id,
|
||||
state=RunState.FAILED,
|
||||
final_report_path=report_path,
|
||||
error=str(e),
|
||||
)
|
||||
# Open the LangGraph AsyncPostgresSaver once per run; all phases share it.
|
||||
# Opening per-phase would re-connect to Postgres 5+ times per run for no
|
||||
# gain — checkpoints are isolated by `thread_id` not by saver instance.
|
||||
# SQLite URLs (test-only) skip the saver entirely — deepagents accepts
|
||||
# checkpointer=None and runs without resume support.
|
||||
async with self._maybe_open_saver() as saver:
|
||||
self._saver = saver
|
||||
try:
|
||||
for phase_def in template.phases:
|
||||
role_binding = bindings[phase_def.role]
|
||||
await self._run_phase(run_id, worktree_root, template, phase_def, role_binding)
|
||||
await self._set_run_state(run_id, RunState.COMPLETED)
|
||||
await self._append_event(run_id, None, RunEventType.RUN_COMPLETED, {})
|
||||
report_path = await self._compose_final_report(
|
||||
run_id, worktree_root, RunState.COMPLETED
|
||||
)
|
||||
return RunResult(
|
||||
run_id=run_id, state=RunState.COMPLETED, final_report_path=report_path
|
||||
)
|
||||
except _PhaseAbortedError as e:
|
||||
await self._set_run_state(run_id, RunState.ABORTED)
|
||||
await self._append_event(
|
||||
run_id, None, RunEventType.RUN_ABORTED, {"reason": e.reason}
|
||||
)
|
||||
report_path = await self._compose_final_report(
|
||||
run_id, worktree_root, RunState.ABORTED, error=e.reason
|
||||
)
|
||||
return RunResult(
|
||||
run_id=run_id,
|
||||
state=RunState.ABORTED,
|
||||
final_report_path=report_path,
|
||||
error=e.reason,
|
||||
)
|
||||
except MyDeepAgentError as e:
|
||||
await self._set_run_state(run_id, RunState.FAILED)
|
||||
await self._append_event(
|
||||
run_id, None, RunEventType.RUN_FAILED, {"code": e.code, "message": str(e)}
|
||||
)
|
||||
report_path = await self._compose_final_report(
|
||||
run_id, worktree_root, RunState.FAILED, error=str(e)
|
||||
)
|
||||
return RunResult(
|
||||
run_id=run_id,
|
||||
state=RunState.FAILED,
|
||||
final_report_path=report_path,
|
||||
error=str(e),
|
||||
)
|
||||
finally:
|
||||
self._saver = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Phase execution
|
||||
@@ -400,6 +437,7 @@ class WorkflowEngine:
|
||||
self._config,
|
||||
root_dir=worktree_root,
|
||||
middleware=[watcher, cost_mw, audit_mw],
|
||||
checkpointer=self._saver,
|
||||
)
|
||||
envelope = self._build_envelope(run_id, phase_id, phase_def, attempt, expected_path)
|
||||
|
||||
@@ -409,10 +447,17 @@ class WorkflowEngine:
|
||||
event_type = RunEventType.PROMPT_REPAIRED if attempt > 1 else RunEventType.PROMPT_SENT
|
||||
await self._append_event(run_id, phase_id, event_type, {"attempt": attempt})
|
||||
|
||||
# thread_id matches the format already used by LlmCallRow.thread_id
|
||||
# (engine.py _record_llm_call) so a single namespace covers both
|
||||
# cost ledger and LangGraph checkpoint replay.
|
||||
thread_id = f"run:{run_id}:phase:{phase_id}"
|
||||
timeout = float(phase_def.timeout_seconds or _DEFAULT_PHASE_TIMEOUT_SECONDS)
|
||||
try:
|
||||
invoke_task: asyncio.Task[Any] = asyncio.create_task(
|
||||
agent.ainvoke({"messages": [{"role": "user", "content": envelope}]})
|
||||
agent.ainvoke(
|
||||
{"messages": [{"role": "user", "content": envelope}]},
|
||||
config={"configurable": {"thread_id": thread_id}},
|
||||
)
|
||||
)
|
||||
self._inflight_tasks.add(invoke_task)
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user