"""Unit tests for BudgetTracker and cost middleware flow.""" from datetime import datetime, timezone from pathlib import Path from uuid import uuid4 import pytest from poc.budget import BudgetExhausted, BudgetTracker @pytest.fixture async def tracker(tmp_path: Path) -> BudgetTracker: t = BudgetTracker( db_path=tmp_path / "budget.db", daily_cap_usd=1.0, run_cap_usd=0.5, on_hit="block", ) await t.init() return t async def test_record_updates_ledger(tracker: BudgetTracker) -> None: run_id = str(uuid4()) await tracker.record(run_id, "persona-a", 0.10) today = datetime.now(timezone.utc).strftime("%Y-%m-%d") assert await tracker.get_spent(f"day:{today}") == pytest.approx(0.10) assert await tracker.get_spent(f"run:{run_id}") == pytest.approx(0.10) async def test_assert_can_call_under_cap_ok(tracker: BudgetTracker) -> None: run_id = str(uuid4()) # daily cap 1.0, run cap 0.5. estimated 0.3 → all pass await tracker.assert_can_call(run_id, "p", 0.3) async def test_assert_can_call_over_run_cap_raises(tracker: BudgetTracker) -> None: run_id = str(uuid4()) await tracker.record(run_id, "p", 0.40) with pytest.raises(BudgetExhausted) as exc: # 0.40 + 0.15 = 0.55 > run cap 0.5 await tracker.assert_can_call(run_id, "p", 0.15) assert exc.value.scope.startswith("run:") async def test_per_run_and_per_day_independent(tracker: BudgetTracker) -> None: # First run: spend 0.40 r1 = str(uuid4()) await tracker.record(r1, "p", 0.40) # Second run: run cap is fresh (0.40 < 0.5), day total 0.40 + 0.45 = 0.85 <= 1.0 r2 = str(uuid4()) await tracker.assert_can_call(r2, "p", 0.45) await tracker.record(r2, "p", 0.45) # Third run: day total 0.85 + 0.20 = 1.05 > 1.0 → day scope must raise r3 = str(uuid4()) with pytest.raises(BudgetExhausted) as exc: await tracker.assert_can_call(r3, "p", 0.20) assert exc.value.scope.startswith("day:") async def test_cost_middleware_records_after_invoke(tmp_path: Path) -> None: """Verifies the full estimate → invoke → record flow without real API calls.""" from poc.mock_model import MockChatModel from poc.pricing import compute_cost tracker = BudgetTracker( db_path=tmp_path / "b.db", daily_cap_usd=10, run_cap_usd=5, on_hit="block", ) await tracker.init() model = MockChatModel( model="deepseek/deepseek-chat", input_tokens=1000, output_tokens=500, ) run_id = str(uuid4()) # Pre-call budget check with conservative estimate est = compute_cost("deepseek/deepseek-chat", 1000, 2000) await tracker.assert_can_call(run_id, "log-analyzer", est) # Actual model invocation result = model.invoke([{"role": "user", "content": "hi"}]) assert result.content == "mock response" # Record actual cost from usage metadata actual = compute_cost( "deepseek/deepseek-chat", model.input_tokens, model.output_tokens ) await tracker.record(run_id, "log-analyzer", actual) today = datetime.now(timezone.utc).strftime("%Y-%m-%d") spent = await tracker.get_spent(f"day:{today}") assert spent == pytest.approx(actual) assert spent > 0