chore: my-deepagent-seed (BudgetTracker PoC + v0.1.0 seed assets)
Pre-flight assets prepared on the main machine before the new-machine rewrite of my-deepagent in Python. - poc/: BudgetTracker + CostMiddleware + MockChatModel PoC. Validates wrap_model_call pattern, SQLite WAL + ON CONFLICT upsert, per-scope cap accounting. 5/5 pytest PASS in isolated uv venv. - schemas/: 10 personas (Anthropic Sonnet/Opus/Haiku + DeepSeek mix), 3 workflows (spec-and-review, bug-fix-with-reproduction, code-investigation), 4 artifact JSON Schemas (dev/spec@1, dev/phase-plan@1, dev/review-finding-batch@1, common/final-report@1). - schemas/validate.py: pydantic + Draft202012 cross-validation. 18/18 assets verified. - README.md: new-machine bootstrap instructions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
0
my-deepagent-seed/poc/tests/__init__.py
Normal file
0
my-deepagent-seed/poc/tests/__init__.py
Normal file
BIN
my-deepagent-seed/poc/tests/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
my-deepagent-seed/poc/tests/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
101
my-deepagent-seed/poc/tests/test_budget.py
Normal file
101
my-deepagent-seed/poc/tests/test_budget.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""Unit tests for BudgetTracker and cost middleware flow."""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from poc.budget import BudgetExhausted, BudgetTracker
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def tracker(tmp_path: Path) -> BudgetTracker:
|
||||
t = BudgetTracker(
|
||||
db_path=tmp_path / "budget.db",
|
||||
daily_cap_usd=1.0,
|
||||
run_cap_usd=0.5,
|
||||
on_hit="block",
|
||||
)
|
||||
await t.init()
|
||||
return t
|
||||
|
||||
|
||||
async def test_record_updates_ledger(tracker: BudgetTracker) -> None:
|
||||
run_id = str(uuid4())
|
||||
await tracker.record(run_id, "persona-a", 0.10)
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
assert await tracker.get_spent(f"day:{today}") == pytest.approx(0.10)
|
||||
assert await tracker.get_spent(f"run:{run_id}") == pytest.approx(0.10)
|
||||
|
||||
|
||||
async def test_assert_can_call_under_cap_ok(tracker: BudgetTracker) -> None:
|
||||
run_id = str(uuid4())
|
||||
# daily cap 1.0, run cap 0.5. estimated 0.3 → all pass
|
||||
await tracker.assert_can_call(run_id, "p", 0.3)
|
||||
|
||||
|
||||
async def test_assert_can_call_over_run_cap_raises(tracker: BudgetTracker) -> None:
|
||||
run_id = str(uuid4())
|
||||
await tracker.record(run_id, "p", 0.40)
|
||||
with pytest.raises(BudgetExhausted) as exc:
|
||||
# 0.40 + 0.15 = 0.55 > run cap 0.5
|
||||
await tracker.assert_can_call(run_id, "p", 0.15)
|
||||
assert exc.value.scope.startswith("run:")
|
||||
|
||||
|
||||
async def test_per_run_and_per_day_independent(tracker: BudgetTracker) -> None:
|
||||
# First run: spend 0.40
|
||||
r1 = str(uuid4())
|
||||
await tracker.record(r1, "p", 0.40)
|
||||
|
||||
# Second run: run cap is fresh (0.40 < 0.5), day total 0.40 + 0.45 = 0.85 <= 1.0
|
||||
r2 = str(uuid4())
|
||||
await tracker.assert_can_call(r2, "p", 0.45)
|
||||
await tracker.record(r2, "p", 0.45)
|
||||
|
||||
# Third run: day total 0.85 + 0.20 = 1.05 > 1.0 → day scope must raise
|
||||
r3 = str(uuid4())
|
||||
with pytest.raises(BudgetExhausted) as exc:
|
||||
await tracker.assert_can_call(r3, "p", 0.20)
|
||||
assert exc.value.scope.startswith("day:")
|
||||
|
||||
|
||||
async def test_cost_middleware_records_after_invoke(tmp_path: Path) -> None:
|
||||
"""Verifies the full estimate → invoke → record flow without real API calls."""
|
||||
from poc.mock_model import MockChatModel
|
||||
from poc.pricing import compute_cost
|
||||
|
||||
tracker = BudgetTracker(
|
||||
db_path=tmp_path / "b.db",
|
||||
daily_cap_usd=10,
|
||||
run_cap_usd=5,
|
||||
on_hit="block",
|
||||
)
|
||||
await tracker.init()
|
||||
|
||||
model = MockChatModel(
|
||||
model="deepseek/deepseek-chat",
|
||||
input_tokens=1000,
|
||||
output_tokens=500,
|
||||
)
|
||||
run_id = str(uuid4())
|
||||
|
||||
# Pre-call budget check with conservative estimate
|
||||
est = compute_cost("deepseek/deepseek-chat", 1000, 2000)
|
||||
await tracker.assert_can_call(run_id, "log-analyzer", est)
|
||||
|
||||
# Actual model invocation
|
||||
result = model.invoke([{"role": "user", "content": "hi"}])
|
||||
assert result.content == "mock response"
|
||||
|
||||
# Record actual cost from usage metadata
|
||||
actual = compute_cost(
|
||||
"deepseek/deepseek-chat", model.input_tokens, model.output_tokens
|
||||
)
|
||||
await tracker.record(run_id, "log-analyzer", actual)
|
||||
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
spent = await tracker.get_spent(f"day:{today}")
|
||||
assert spent == pytest.approx(actual)
|
||||
assert spent > 0
|
||||
Reference in New Issue
Block a user