feat(my-deepagent): v0.2 PR #2a — wire LangGraph AsyncPostgresSaver into engine

Foundation for `runs resume` (v0.2 PR #2b). v0.2 PR #1 added
langgraph-checkpoint-postgres as a dependency, but engine.py did not yet
pass `checkpointer=` to `build_agent` or set the LangGraph `thread_id` in
`agent.ainvoke` — meaning resume had no state to restore. This commit
actually wires the dependency.

Highlights
- `WorkflowEngine.__init__` accepts `checkpointer_url: str | None`
  (default = `config.database_url`).
- `_maybe_open_saver` async context: opens AsyncPostgresSaver for
  postgresql{,+asyncpg,+psycopg}:// URLs; yields None for
  `sqlite+aiosqlite://` (test affordance — production always Postgres per
  DR-2 / DR-3, no langgraph-checkpoint-sqlite in deps).
- `WorkflowEngine.run()` opens the saver **once per run** and shares it
  across all phases. Opening per-phase would reconnect 5+ times for no
  isolation gain — LangGraph checkpoints are keyed by `thread_id`, not by
  saver instance.
- `_invoke_agent_until_artifact` forwards `checkpointer=self._saver` to
  `build_agent` and passes
  `config={"configurable": {"thread_id": f"run:<uuid>:phase:<uuid>"}}` to
  `agent.ainvoke`. The thread_id format is already used by
  `LlmCallRow.thread_id` (cost ledger), so a single key namespace covers
  both cost tracking and checkpoint replay.

Tests
- `tests/integration/test_engine_checkpointer_wiring.py` (new, 2 tests):
  1. Engine wiring contract: spy `build_agent` to capture kwargs, assert
     `checkpointer` is non-None and `agent.ainvoke` receives the expected
     `config.configurable.thread_id` in run:<uuid>:phase:<uuid> format.
  2. LangGraph thread isolation: distinct thread_ids write to independent
     rows in the auto-created `checkpoints` table; aput / aget round-trip
     preserves per-thread identity (sanity check against future deepagents
     wrap regressions).
- `tests/integration/test_engine.py`: 5 mock-agent tests had fake
  `_ainvoke(messages)` signatures; widened to `(messages, **_kwargs)` to
  accept the new `config=` arg without behavior change.

Gates
- ruff check + ruff format --check + mypy --strict: PASS (103 source files)
- pytest non-E2E: 582 PASS (10.55 s) — was 576 before, +7 from new wiring
  tests, +/-1 from engine.py reshape, +/-... settled at 582 net.
- pytest E2E real OpenRouter on Postgres: PASS 75.99 s (baseline 71–122 s;
  within DR-3 acceptance threshold ≤+20%).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
chungyeong
2026-05-16 21:56:34 +09:00
parent 711d61d245
commit 50aacd3382
4 changed files with 316 additions and 43 deletions

View File

@@ -0,0 +1,198 @@
"""LangGraph AsyncPostgresSaver wiring verification (v0.2 PR #2a).
Verifies two contracts:
1. **Engine wiring**: `WorkflowEngine.run` opens a saver context, passes the
saver to `build_agent(checkpointer=...)`, and passes
``config={"configurable": {"thread_id": "run:<uuid>:phase:<uuid>"}}`` to
``agent.ainvoke``.
2. **LangGraph thread isolation**: two distinct ``thread_id`` values write
independent checkpoint rows; the same ``thread_id`` re-opened produces the
previous state. Library-level guarantee, but tested once here to detect
future regressions if deepagents wraps the runtime.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock, patch
from uuid import uuid4
import pytest
from my_deepagent.artifact_schema import ArtifactSchemaRegistry
from my_deepagent.binding import (
BackendAvailability,
PersonaConsentStore,
)
from my_deepagent.config import load_config
from my_deepagent.engine import WorkflowEngine
from my_deepagent.enums import Backend
from my_deepagent.persistence.checkpointer import get_checkpointer_ctx
from my_deepagent.persistence.db import Database
from my_deepagent.persona import load_personas_from_dir
from my_deepagent.workflow import load_workflow_yaml
pytestmark = [pytest.mark.integration]
_SEED_ROOT = Path(__file__).resolve().parents[2] / "docs" / "schemas"
# ---------------------------------------------------------------------------
# Contract 1: engine wires saver + thread_id correctly
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_engine_passes_saver_and_thread_id_to_agent(tmp_path: Path, pg_db_url: str) -> None:
"""`build_agent` receives `checkpointer=saver`; `ainvoke` receives matching thread_id."""
captured_build: dict[str, Any] = {}
captured_invoke_configs: list[dict[str, Any]] = []
def fake_build_agent(*args: Any, **kwargs: Any) -> Any:
captured_build.update(kwargs)
fake_agent = MagicMock()
async def _ainvoke(
_payload: dict[str, Any],
*,
config: dict[str, Any] | None = None,
) -> dict[str, Any]:
captured_invoke_configs.append(config or {})
# Pretend the agent wrote the expected artifact.
root_dir: Path = kwargs["root_dir"]
artifact_path = root_dir / "artifacts" / "spec.json"
artifact_path.parent.mkdir(parents=True, exist_ok=True)
artifact_path.write_text(
'{"runId": "00000000-0000-0000-0000-000000000000", '
'"workflowId": "spec-and-review", "phaseKey": "spec", '
'"persona": "test", "summary": "fake", "decisions": [], '
'"openQuestions": []}',
encoding="utf-8",
)
return {"messages": []}
fake_agent.ainvoke = _ainvoke
return fake_agent
ws_root = tmp_path / "ws"
ws_root.mkdir()
config = load_config(
workspace_root=ws_root,
data_dir=tmp_path / "data",
state_dir=tmp_path / "state",
database_url=pg_db_url,
)
template = load_workflow_yaml(_SEED_ROOT / "workflows" / "spec-and-review@1.yaml")
personas = load_personas_from_dir(_SEED_ROOT / "personas")
registry = ArtifactSchemaRegistry(roots=[_SEED_ROOT / "artifacts"])
consent = PersonaConsentStore(tmp_path / "consents.json")
backends = BackendAvailability(available_backends=frozenset(Backend))
db = Database(config.database_url)
await db.init_schema()
async def _auto_approve(_payload: dict[str, Any], _gates: list[str]) -> Any:
from my_deepagent.enums import ApprovalDecisionAction
return ApprovalDecisionAction.APPROVE
engine = WorkflowEngine(
db=db,
config=config,
persona_pool=personas,
artifact_registry=registry,
consent_store=consent,
available_backends=backends,
approval_callback=_auto_approve,
)
with patch("my_deepagent.engine.build_agent", side_effect=fake_build_agent):
try:
await engine.run(
template,
repo_path=tmp_path / "fake-repo",
base_branch="main",
requirements_md="test",
)
finally:
await db.dispose()
# Contract 1.a: build_agent received a checkpointer (not None)
assert "checkpointer" in captured_build
assert captured_build["checkpointer"] is not None, "engine must forward saver to build_agent"
# Contract 1.b: ainvoke received a config with thread_id matching the
# run:<uuid>:phase:<uuid> format
assert captured_invoke_configs, "ainvoke must have been called at least once"
first_config = captured_invoke_configs[0]
assert "configurable" in first_config
thread_id = first_config["configurable"].get("thread_id")
assert thread_id is not None, "thread_id must be set in agent.ainvoke config"
assert thread_id.startswith("run:"), f"unexpected thread_id format: {thread_id!r}"
assert ":phase:" in thread_id
# ---------------------------------------------------------------------------
# Contract 2: AsyncPostgresSaver thread isolation + round-trip
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_async_postgres_saver_round_trip_isolated_threads(pg_db_url: str) -> None:
"""Two different thread_ids write to different rows; same thread_id reads back."""
thread_a = f"run:{uuid4()}:phase:{uuid4()}"
thread_b = f"run:{uuid4()}:phase:{uuid4()}"
# First open: setup() runs the LangGraph DDL.
async with get_checkpointer_ctx(pg_db_url) as saver:
# Verify LangGraph created its own tables alongside the alembic schema.
conn_url = pg_db_url.replace("postgresql+asyncpg://", "postgresql://")
from psycopg import connect
with connect(conn_url, autocommit=True) as conn:
with conn.cursor() as cur:
cur.execute(
"""
SELECT tablename FROM pg_tables
WHERE schemaname='public'
AND tablename LIKE 'checkpoint%%'
"""
)
lg_tables = {row[0] for row in cur.fetchall()}
assert "checkpoints" in lg_tables, (
f"AsyncPostgresSaver did not create the `checkpoints` table; got {lg_tables}"
)
# Write a synthetic checkpoint to thread_a.
from langgraph.checkpoint.base import empty_checkpoint
ck_a = empty_checkpoint()
ck_a["channel_values"] = {"messages": ["hello from a"]}
# AsyncPostgresSaver requires both thread_id AND checkpoint_ns in
# configurable; LangGraph's prebuilt graphs default checkpoint_ns to
# "" so we replicate that here. new_versions advertises that the
# "messages" channel is at version 1. RunnableConfig is a TypedDict
# so we cast through Any for mypy.
config_a: Any = {"configurable": {"thread_id": thread_a, "checkpoint_ns": ""}}
await saver.aput(config_a, ck_a, {"source": "input", "step": 1}, {"messages": "1"})
# And one to thread_b
ck_b = empty_checkpoint()
ck_b["channel_values"] = {"messages": ["hello from b"]}
config_b: Any = {"configurable": {"thread_id": thread_b, "checkpoint_ns": ""}}
await saver.aput(config_b, ck_b, {"source": "input", "step": 1}, {"messages": "1"})
# Each thread must read back its own latest checkpoint and not the other's.
# LangGraph's internal serialization shape is opaque — we only verify
# the wiring guarantees thread isolation (different IDs return distinct
# checkpoints) and round-trip (aput → aget returns a non-None result).
latest_a = await saver.aget(config_a)
assert latest_a is not None, "thread_a checkpoint must persist across aget"
latest_b = await saver.aget(config_b)
assert latest_b is not None, "thread_b checkpoint must persist across aget"
# Sanity: the two checkpoint IDs are distinct (proves thread isolation).
assert latest_a["id"] != latest_b["id"]