Files
dev-puppeteer/my-deepagent-seed/poc/tests/test_budget.py
chungyeong 1fe59d16ca chore: my-deepagent-seed (BudgetTracker PoC + v0.1.0 seed assets)
Pre-flight assets prepared on the main machine before the new-machine
rewrite of my-deepagent in Python.

- poc/: BudgetTracker + CostMiddleware + MockChatModel PoC.
  Validates wrap_model_call pattern, SQLite WAL + ON CONFLICT upsert,
  per-scope cap accounting. 5/5 pytest PASS in isolated uv venv.
- schemas/: 10 personas (Anthropic Sonnet/Opus/Haiku + DeepSeek mix),
  3 workflows (spec-and-review, bug-fix-with-reproduction,
  code-investigation), 4 artifact JSON Schemas (dev/spec@1,
  dev/phase-plan@1, dev/review-finding-batch@1, common/final-report@1).
- schemas/validate.py: pydantic + Draft202012 cross-validation.
  18/18 assets verified.
- README.md: new-machine bootstrap instructions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 19:39:37 +09:00

102 lines
3.2 KiB
Python

"""Unit tests for BudgetTracker and cost middleware flow."""
from datetime import datetime, timezone
from pathlib import Path
from uuid import uuid4
import pytest
from poc.budget import BudgetExhausted, BudgetTracker
@pytest.fixture
async def tracker(tmp_path: Path) -> BudgetTracker:
t = BudgetTracker(
db_path=tmp_path / "budget.db",
daily_cap_usd=1.0,
run_cap_usd=0.5,
on_hit="block",
)
await t.init()
return t
async def test_record_updates_ledger(tracker: BudgetTracker) -> None:
run_id = str(uuid4())
await tracker.record(run_id, "persona-a", 0.10)
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
assert await tracker.get_spent(f"day:{today}") == pytest.approx(0.10)
assert await tracker.get_spent(f"run:{run_id}") == pytest.approx(0.10)
async def test_assert_can_call_under_cap_ok(tracker: BudgetTracker) -> None:
run_id = str(uuid4())
# daily cap 1.0, run cap 0.5. estimated 0.3 → all pass
await tracker.assert_can_call(run_id, "p", 0.3)
async def test_assert_can_call_over_run_cap_raises(tracker: BudgetTracker) -> None:
run_id = str(uuid4())
await tracker.record(run_id, "p", 0.40)
with pytest.raises(BudgetExhausted) as exc:
# 0.40 + 0.15 = 0.55 > run cap 0.5
await tracker.assert_can_call(run_id, "p", 0.15)
assert exc.value.scope.startswith("run:")
async def test_per_run_and_per_day_independent(tracker: BudgetTracker) -> None:
# First run: spend 0.40
r1 = str(uuid4())
await tracker.record(r1, "p", 0.40)
# Second run: run cap is fresh (0.40 < 0.5), day total 0.40 + 0.45 = 0.85 <= 1.0
r2 = str(uuid4())
await tracker.assert_can_call(r2, "p", 0.45)
await tracker.record(r2, "p", 0.45)
# Third run: day total 0.85 + 0.20 = 1.05 > 1.0 → day scope must raise
r3 = str(uuid4())
with pytest.raises(BudgetExhausted) as exc:
await tracker.assert_can_call(r3, "p", 0.20)
assert exc.value.scope.startswith("day:")
async def test_cost_middleware_records_after_invoke(tmp_path: Path) -> None:
"""Verifies the full estimate → invoke → record flow without real API calls."""
from poc.mock_model import MockChatModel
from poc.pricing import compute_cost
tracker = BudgetTracker(
db_path=tmp_path / "b.db",
daily_cap_usd=10,
run_cap_usd=5,
on_hit="block",
)
await tracker.init()
model = MockChatModel(
model="deepseek/deepseek-chat",
input_tokens=1000,
output_tokens=500,
)
run_id = str(uuid4())
# Pre-call budget check with conservative estimate
est = compute_cost("deepseek/deepseek-chat", 1000, 2000)
await tracker.assert_can_call(run_id, "log-analyzer", est)
# Actual model invocation
result = model.invoke([{"role": "user", "content": "hi"}])
assert result.content == "mock response"
# Record actual cost from usage metadata
actual = compute_cost(
"deepseek/deepseek-chat", model.input_tokens, model.output_tokens
)
await tracker.record(run_id, "log-analyzer", actual)
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
spent = await tracker.get_spent(f"day:{today}")
assert spent == pytest.approx(actual)
assert spent > 0