chore: my-deepagent-seed (BudgetTracker PoC + v0.1.0 seed assets)

Pre-flight assets prepared on the main machine before the new-machine rewrite of my-deepagent in Python. - poc/: BudgetTracker + CostMiddleware + MockChatModel PoC. Validates wrap_model_call pattern, SQLite WAL + ON CONFLICT upsert, per-scope cap accounting. 5/5 pytest PASS in isolated uv venv. - schemas/: 10 personas (Anthropic Sonnet/Opus/Haiku + DeepSeek mix), 3 workflows (spec-and-review, bug-fix-with-reproduction, code-investigation), 4 artifact JSON Schemas (dev/spec@1, dev/phase-plan@1, dev/review-finding-batch@1, common/final-report@1). - schemas/validate.py: pydantic + Draft202012 cross-validation. 18/18 assets verified. - README.md: new-machine bootstrap instructions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 19:39:37 +09:00
parent c9fed71cc9
commit 1fe59d16ca
42 changed files with 3173 additions and 0 deletions
--- a/my-deepagent-seed/poc/src/poc.egg-info/PKG-INFO
+++ b/my-deepagent-seed/poc/src/poc.egg-info/PKG-INFO
@@ -0,0 +1,9 @@
+Metadata-Version: 2.4
+Name: poc
+Version: 0.1.0
+Summary: Add your description here
+Requires-Python: >=3.12
+Requires-Dist: aiosqlite>=0.20
+Requires-Dist: langchain<2.0.0,>=1.3.0
+Requires-Dist: langchain-core<2.0.0,>=1.4.0
+Requires-Dist: pydantic>=2.9
--- a/my-deepagent-seed/poc/src/poc.egg-info/SOURCES.txt
+++ b/my-deepagent-seed/poc/src/poc.egg-info/SOURCES.txt
@@ -0,0 +1,12 @@
+pyproject.toml
+src/poc/__init__.py
+src/poc/budget.py
+src/poc/middleware.py
+src/poc/mock_model.py
+src/poc/pricing.py
+src/poc.egg-info/PKG-INFO
+src/poc.egg-info/SOURCES.txt
+src/poc.egg-info/dependency_links.txt
+src/poc.egg-info/requires.txt
+src/poc.egg-info/top_level.txt
+tests/test_budget.py
--- a/my-deepagent-seed/poc/src/poc.egg-info/dependency_links.txt
+++ b/my-deepagent-seed/poc/src/poc.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
--- a/my-deepagent-seed/poc/src/poc.egg-info/requires.txt
+++ b/my-deepagent-seed/poc/src/poc.egg-info/requires.txt
@@ -0,0 +1,4 @@
+aiosqlite>=0.20
+langchain<2.0.0,>=1.3.0
+langchain-core<2.0.0,>=1.4.0
+pydantic>=2.9
--- a/my-deepagent-seed/poc/src/poc.egg-info/top_level.txt
+++ b/my-deepagent-seed/poc/src/poc.egg-info/top_level.txt
@@ -0,0 +1 @@
+poc
--- a/my-deepagent-seed/poc/src/poc/init.py
+++ b/my-deepagent-seed/poc/src/poc/init.py
@@ -0,0 +1,14 @@
+"""PoC package: budget tracking for my-deepagent v0.1.0."""
+
+from poc.budget import BudgetExhausted, BudgetTracker
+from poc.middleware import CostMiddleware
+from poc.mock_model import MockChatModel
+from poc.pricing import compute_cost
+
+__all__ = [
+    "BudgetExhausted",
+    "BudgetTracker",
+    "CostMiddleware",
+    "MockChatModel",
+    "compute_cost",
+]
--- a/my-deepagent-seed/poc/src/poc/pycache/init.cpython-312.pyc
+++ b/my-deepagent-seed/poc/src/poc/pycache/init.cpython-312.pyc
--- a/my-deepagent-seed/poc/src/poc/pycache/budget.cpython-312.pyc
+++ b/my-deepagent-seed/poc/src/poc/pycache/budget.cpython-312.pyc
--- a/my-deepagent-seed/poc/src/poc/pycache/middleware.cpython-312.pyc
+++ b/my-deepagent-seed/poc/src/poc/pycache/middleware.cpython-312.pyc
--- a/my-deepagent-seed/poc/src/poc/pycache/mock_model.cpython-312.pyc
+++ b/my-deepagent-seed/poc/src/poc/pycache/mock_model.cpython-312.pyc
--- a/my-deepagent-seed/poc/src/poc/pycache/pricing.cpython-312.pyc
+++ b/my-deepagent-seed/poc/src/poc/pycache/pricing.cpython-312.pyc
--- a/my-deepagent-seed/poc/src/poc/budget.py
+++ b/my-deepagent-seed/poc/src/poc/budget.py
@@ -0,0 +1,198 @@
+"""Budget tracking with SQLite persistence for my-deepagent PoC."""
+
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Literal
+
+import aiosqlite
+
+logger = logging.getLogger(__name__)
+
+_CREATE_TABLE_SQL = """
+CREATE TABLE IF NOT EXISTS budget_ledger (
+    scope TEXT PRIMARY KEY,
+    spent_usd REAL NOT NULL DEFAULT 0,
+    cap_usd REAL,
+    last_updated TEXT NOT NULL
+);
+"""
+
+_UPSERT_CAP_SQL = """
+INSERT INTO budget_ledger (scope, spent_usd, cap_usd, last_updated)
+VALUES (?, 0, ?, ?)
+ON CONFLICT(scope) DO UPDATE SET
+    cap_usd = excluded.cap_usd
+WHERE cap_usd IS NULL;
+"""
+
+_UPSERT_SPENT_SQL = """
+INSERT INTO budget_ledger (scope, spent_usd, cap_usd, last_updated)
+VALUES (?, ?, NULL, ?)
+ON CONFLICT(scope) DO UPDATE SET
+    spent_usd = spent_usd + excluded.spent_usd,
+    last_updated = excluded.last_updated;
+"""
+
+_SELECT_ROW_SQL = "SELECT spent_usd, cap_usd FROM budget_ledger WHERE scope = ?;"
+
+
+class BudgetExhausted(Exception):
+    """Raised when a projected cost would exceed the cap for a scope."""
+
+    def __init__(self, scope: str, projected_usd: float, cap_usd: float) -> None:
+        self.scope = scope
+        self.projected_usd = projected_usd
+        self.cap_usd = cap_usd
+        super().__init__(
+            f"Budget exhausted for scope '{scope}': "
+            f"projected ${projected_usd:.6f} exceeds cap ${cap_usd:.6f}"
+        )
+
+
+def _utc_today() -> str:
+    return datetime.now(timezone.utc).strftime("%Y-%m-%d")
+
+
+def _utc_now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+class BudgetTracker:
+    """Async budget tracker backed by SQLite WAL.
+
+    Tracks spend across three overlapping scopes:
+    - ``day:YYYY-MM-DD`` — UTC calendar-day cap
+    - ``run:<uuid>`` — per-run cap
+    - ``persona:<name>:day:YYYY-MM-DD`` — per-persona daily cap (shares daily_cap_usd)
+    """
+
+    def __init__(
+        self,
+        db_path: Path | str,
+        daily_cap_usd: float,
+        run_cap_usd: float,
+        on_hit: Literal["block", "warn_continue", "prompt"] = "block",
+    ) -> None:
+        self._db_path = Path(db_path)
+        self._daily_cap_usd = daily_cap_usd
+        self._run_cap_usd = run_cap_usd
+        self._on_hit = on_hit
+
+    async def init(self) -> None:
+        """Create the budget_ledger table and enable WAL mode."""
+        async with aiosqlite.connect(self._db_path) as db:
+            await db.execute("PRAGMA journal_mode=WAL;")
+            await db.execute(_CREATE_TABLE_SQL)
+            await db.commit()
+
+    def _day_scope(self) -> str:
+        return f"day:{_utc_today()}"
+
+    def _run_scope(self, run_id: str) -> str:
+        return f"run:{run_id}"
+
+    def _persona_day_scope(self, persona_name: str) -> str:
+        return f"persona:{persona_name}:day:{_utc_today()}"
+
+    async def _ensure_caps(
+        self, db: aiosqlite.Connection, run_id: str | None, persona_name: str
+    ) -> None:
+        """Insert cap rows if they don't exist yet (won't overwrite existing caps)."""
+        now = _utc_now_iso()
+        await db.execute(_UPSERT_CAP_SQL, (self._day_scope(), self._daily_cap_usd, now))
+        if run_id is not None:
+            await db.execute(
+                _UPSERT_CAP_SQL, (self._run_scope(run_id), self._run_cap_usd, now)
+            )
+        await db.execute(
+            _UPSERT_CAP_SQL,
+            (self._persona_day_scope(persona_name), self._daily_cap_usd, now),
+        )
+        await db.commit()
+
+    async def _get_row(
+        self, db: aiosqlite.Connection, scope: str
+    ) -> tuple[float, float | None]:
+        """Return (spent_usd, cap_usd) for a scope. Returns (0, None) if not found."""
+        async with db.execute(_SELECT_ROW_SQL, (scope,)) as cursor:
+            row = await cursor.fetchone()
+        if row is None:
+            return 0.0, None
+        spent: float = row[0]
+        cap: float | None = row[1]
+        return spent, cap
+
+    async def assert_can_call(
+        self,
+        run_id: str | None,
+        persona_name: str,
+        estimated_cost_usd: float,
+    ) -> None:
+        """Check all applicable scopes against their caps.
+
+        Raises:
+            BudgetExhausted: if ``on_hit == 'block'`` (or 'prompt') and any
+                scope would be exceeded.
+        """
+        scopes_to_check: list[tuple[str, float]] = [
+            (self._day_scope(), self._daily_cap_usd),
+            (self._persona_day_scope(persona_name), self._daily_cap_usd),
+        ]
+        if run_id is not None:
+            scopes_to_check.append((self._run_scope(run_id), self._run_cap_usd))
+
+        async with aiosqlite.connect(self._db_path) as db:
+            for scope, cap in scopes_to_check:
+                spent, _ = await self._get_row(db, scope)
+                projected = spent + estimated_cost_usd
+                if projected > cap:
+                    if self._on_hit in ("block", "prompt"):
+                        raise BudgetExhausted(
+                            scope=scope, projected_usd=projected, cap_usd=cap
+                        )
+                    else:  # warn_continue
+                        logger.warning(
+                            "Budget cap approaching for scope '%s': "
+                            "projected $%.6f > cap $%.6f — continuing (warn_continue mode)",
+                            scope,
+                            projected,
+                            cap,
+                        )
+
+    async def record(
+        self,
+        run_id: str | None,
+        persona_name: str,
+        actual_cost_usd: float,
+    ) -> None:
+        """Add actual_cost_usd to all applicable scope ledgers."""
+        now = _utc_now_iso()
+        async with aiosqlite.connect(self._db_path) as db:
+            await self._ensure_caps(db, run_id, persona_name)
+            await db.execute(
+                _UPSERT_SPENT_SQL, (self._day_scope(), actual_cost_usd, now)
+            )
+            if run_id is not None:
+                await db.execute(
+                    _UPSERT_SPENT_SQL, (self._run_scope(run_id), actual_cost_usd, now)
+                )
+            await db.execute(
+                _UPSERT_SPENT_SQL,
+                (self._persona_day_scope(persona_name), actual_cost_usd, now),
+            )
+            await db.commit()
+
+    async def get_spent(self, scope: str) -> float:
+        """Return total spent USD for the given scope key."""
+        async with aiosqlite.connect(self._db_path) as db:
+            spent, _ = await self._get_row(db, scope)
+        return spent
+
+    async def get_remaining(self, scope: str) -> float | None:
+        """Return remaining budget for a scope, or None if no cap is set."""
+        async with aiosqlite.connect(self._db_path) as db:
+            spent, cap = await self._get_row(db, scope)
+        if cap is None:
+            return None
+        return max(0.0, cap - spent)
--- a/my-deepagent-seed/poc/src/poc/middleware.py
+++ b/my-deepagent-seed/poc/src/poc/middleware.py
@@ -0,0 +1,82 @@
+"""Cost tracking middleware for my-deepagent PoC.
+
+Uses langchain.agents.middleware.AgentMiddleware (langchain 1.x) to intercept
+model calls and record budget usage via BudgetTracker.
+
+Import path confirmed: from langchain.agents.middleware import AgentMiddleware
+"""
+
+import logging
+from collections.abc import Awaitable, Callable
+from typing import Any
+
+from langchain.agents.middleware import AgentMiddleware, ModelRequest, ModelResponse
+
+from poc.budget import BudgetTracker
+from poc.pricing import compute_cost
+
+logger = logging.getLogger(__name__)
+
+_WORST_CASE_INPUT_TOKENS = 4096
+_WORST_CASE_OUTPUT_TOKENS = 2048
+
+
+class CostMiddleware(AgentMiddleware):  # type: ignore[type-arg]
+    """Middleware that checks budget before model call and records cost after.
+
+    Pre-call: estimates cost with a conservative worst-case token count and calls
+    ``tracker.assert_can_call``. If the tracker raises ``BudgetExhausted``,
+    the exception propagates and the model is never called.
+
+    Post-call: extracts actual ``usage_metadata`` from the first AIMessage in
+    the response and records the real cost via ``tracker.record``.
+    """
+
+    def __init__(
+        self,
+        tracker: BudgetTracker,
+        run_id: str | None,
+        persona_name: str,
+        model_name: str,
+    ) -> None:
+        self._tracker = tracker
+        self._run_id = run_id
+        self._persona_name = persona_name
+        self._model_name = model_name
+
+    async def awrap_model_call(
+        self,
+        request: ModelRequest[Any],
+        handler: Callable[[ModelRequest[Any]], Awaitable[ModelResponse[Any]]],
+    ) -> ModelResponse[Any]:
+        """Check budget, call model, record actual cost."""
+        estimated = compute_cost(
+            self._model_name,
+            _WORST_CASE_INPUT_TOKENS,
+            _WORST_CASE_OUTPUT_TOKENS,
+        )
+        await self._tracker.assert_can_call(self._run_id, self._persona_name, estimated)
+
+        response = await handler(request)
+
+        usage: dict[str, Any] = {}
+        if response.result:
+            first_msg = response.result[0]
+            usage = getattr(first_msg, "usage_metadata", None) or {}
+
+        actual = compute_cost(
+            self._model_name,
+            int(usage.get("input_tokens", 0)),
+            int(usage.get("output_tokens", 0)),
+        )
+        await self._tracker.record(self._run_id, self._persona_name, actual)
+
+        logger.debug(
+            "CostMiddleware: model=%s persona=%s estimated=$%.6f actual=$%.6f",
+            self._model_name,
+            self._persona_name,
+            estimated,
+            actual,
+        )
+
+        return response
--- a/my-deepagent-seed/poc/src/poc/mock_model.py
+++ b/my-deepagent-seed/poc/src/poc/mock_model.py
@@ -0,0 +1,36 @@
+"""Fake BaseChatModel for testing — no real API calls."""
+
+from typing import Any
+
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import AIMessage, BaseMessage
+from langchain_core.outputs import ChatGeneration, ChatResult
+
+
+class MockChatModel(BaseChatModel):
+    """A fake chat model that returns a canned response with configurable token counts."""
+
+    model: str
+    input_tokens: int = 100
+    output_tokens: int = 50
+
+    @property
+    def _llm_type(self) -> str:
+        return "mock"
+
+    def _generate(
+        self,
+        messages: list[BaseMessage],
+        stop: list[str] | None = None,
+        run_manager: Any | None = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        message = AIMessage(
+            content="mock response",
+            usage_metadata={
+                "input_tokens": self.input_tokens,
+                "output_tokens": self.output_tokens,
+                "total_tokens": self.input_tokens + self.output_tokens,
+            },
+        )
+        return ChatResult(generations=[ChatGeneration(message=message)])
--- a/my-deepagent-seed/poc/src/poc/pricing.py
+++ b/my-deepagent-seed/poc/src/poc/pricing.py
@@ -0,0 +1,26 @@
+"""Static pricing matrix for PoC. Real implementation will fetch OpenRouter /api/v1/models."""
+
+from typing import TypedDict
+
+
+class ModelPrice(TypedDict):
+    input_per_1k: float
+    output_per_1k: float
+
+
+PRICING: dict[str, ModelPrice] = {
+    "anthropic/claude-sonnet-4-6": {"input_per_1k": 0.003, "output_per_1k": 0.015},
+    "anthropic/claude-haiku-4-5": {"input_per_1k": 0.001, "output_per_1k": 0.005},
+    "anthropic/claude-opus-4-1": {"input_per_1k": 0.015, "output_per_1k": 0.075},
+    "deepseek/deepseek-chat": {"input_per_1k": 0.00028, "output_per_1k": 0.00112},
+}
+
+
+def compute_cost(model: str, input_tokens: int, output_tokens: int) -> float:
+    """Return USD cost. Unknown model returns 0 (logged separately)."""
+    price = PRICING.get(model.removeprefix("openrouter:"))
+    if price is None:
+        return 0.0
+    return (input_tokens / 1000.0) * price["input_per_1k"] + (
+        output_tokens / 1000.0
+    ) * price["output_per_1k"]