dev-puppeteer/my-deepagent-seed/poc/tests/test_budget.py

"""Unit tests for BudgetTracker and cost middleware flow."""

from datetime import datetime, timezone
from pathlib import Path
from uuid import uuid4

import pytest

from poc.budget import BudgetExhausted, BudgetTracker


@pytest.fixture
async def tracker(tmp_path: Path) -> BudgetTracker:
    t = BudgetTracker(
        db_path=tmp_path / "budget.db",
        daily_cap_usd=1.0,
        run_cap_usd=0.5,
        on_hit="block",
    )
    await t.init()
    return t


async def test_record_updates_ledger(tracker: BudgetTracker) -> None:
    run_id = str(uuid4())
    await tracker.record(run_id, "persona-a", 0.10)
    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
    assert await tracker.get_spent(f"day:{today}") == pytest.approx(0.10)
    assert await tracker.get_spent(f"run:{run_id}") == pytest.approx(0.10)


async def test_assert_can_call_under_cap_ok(tracker: BudgetTracker) -> None:
    run_id = str(uuid4())
    # daily cap 1.0, run cap 0.5. estimated 0.3 → all pass
    await tracker.assert_can_call(run_id, "p", 0.3)


async def test_assert_can_call_over_run_cap_raises(tracker: BudgetTracker) -> None:
    run_id = str(uuid4())
    await tracker.record(run_id, "p", 0.40)
    with pytest.raises(BudgetExhausted) as exc:
        # 0.40 + 0.15 = 0.55 > run cap 0.5
        await tracker.assert_can_call(run_id, "p", 0.15)
    assert exc.value.scope.startswith("run:")


async def test_per_run_and_per_day_independent(tracker: BudgetTracker) -> None:
    # First run: spend 0.40
    r1 = str(uuid4())
    await tracker.record(r1, "p", 0.40)

    # Second run: run cap is fresh (0.40 < 0.5), day total 0.40 + 0.45 = 0.85 <= 1.0
    r2 = str(uuid4())
    await tracker.assert_can_call(r2, "p", 0.45)
    await tracker.record(r2, "p", 0.45)

    # Third run: day total 0.85 + 0.20 = 1.05 > 1.0 → day scope must raise
    r3 = str(uuid4())
    with pytest.raises(BudgetExhausted) as exc:
        await tracker.assert_can_call(r3, "p", 0.20)
    assert exc.value.scope.startswith("day:")


async def test_cost_middleware_records_after_invoke(tmp_path: Path) -> None:
    """Verifies the full estimate → invoke → record flow without real API calls."""
    from poc.mock_model import MockChatModel
    from poc.pricing import compute_cost

    tracker = BudgetTracker(
        db_path=tmp_path / "b.db",
        daily_cap_usd=10,
        run_cap_usd=5,
        on_hit="block",
    )
    await tracker.init()

    model = MockChatModel(
        model="deepseek/deepseek-chat",
        input_tokens=1000,
        output_tokens=500,
    )
    run_id = str(uuid4())

    # Pre-call budget check with conservative estimate
    est = compute_cost("deepseek/deepseek-chat", 1000, 2000)
    await tracker.assert_can_call(run_id, "log-analyzer", est)

    # Actual model invocation
    result = model.invoke([{"role": "user", "content": "hi"}])
    assert result.content == "mock response"

    # Record actual cost from usage metadata
    actual = compute_cost(
        "deepseek/deepseek-chat", model.input_tokens, model.output_tokens
    )
    await tracker.record(run_id, "log-analyzer", actual)

    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
    spent = await tracker.get_spent(f"day:{today}")
    assert spent == pytest.approx(actual)
    assert spent > 0