1차 v0.3 구현 후 plan-v0.3 와 대조해 발견된 18건 누락/명세 위반을 보강. 자기 리뷰 3 라운드 (누락·미완 / 오류·엣지케이스 / 과최적화) 모두 PASS. PR #5 plan-mode (3건): - BLOCKED_TOOLS_IN_PLAN_MODE 에 write_todos 추가 - /plan 시 system message inject (_PLAN_MODE_SYSTEM_PROMPT) - /approve 시 마지막 assistant 메시지를 "approved plan" system 으로 inject - InteractiveSession._pending_system_messages 인프라 신설 PR #2 compaction (1건): - CompactionResult.summary_text 추가, 다음 thread 첫 ainvoke 에 inject PR #3 auto-memory (6건): - global memory dir + bootstrap - frontmatter name/description/type 정식 도입 + MemoryEntry/MemoryType - _infer_memory_type (keyword heuristic, no LLM) - _scrub_secrets (OpenRouter/Anthropic/OpenAI/AWS/Bearer redaction) - /memory show <name> 서브명령 - /remember [--global] / /forget [--global] 스코프 토글 PR #4 skills (3건): - project_skills_dir + 두 스코프 (global / project) merge with last-wins - /skill <name> 본문 inject (queue_system_message) — 이전엔 REPL 출력만 - /skills show <name> 별도 서브명령 PR #6 sub-agent (4건): - budget.py `session:<uuid>` scope + CostMiddleware 자동 전달 - resolve_root_session_id walk-up (cycle guard) + sub-agent root 에 charge - run_subagent_to_completion 실제 ainvoke + 결과 push to parent - /agents 서브명령 구조 (list / spawn / show) + spawn 시 parent system msg PR #7 governance (1건): - bootstrap_user_dirs — instructions + global/memory + skills + projects 한 호출로 idempotent 부트스트랩 PR #8 Web GUI (1건): - index.html → 세션 목록, runs.html (신설) → workflow archive - conversation.html ?session=<id> deep-link PR #9 workflow integration (2건): - /workflow 백그라운드 WorkflowEngine.run + 진행 메시지 stream 누적 - /binding show <workflow-name[@version]> 인자 지원 테스트 (+17, 685 → 702 passed): - test_plan_mode: write_todos 차단 + blocklist sanity - test_memory: scrub + type 추론 + override - test_skills: project override + find_skill + resolve_skill_sources(pk) - test_subagents: resolve_root_session_id chain + missing fallback - test_budget: session: scope accumulation - test_instructions: governance bootstrap + idempotency - test_api_static: runs.html 신설 + index.html 재구성 게이트: - ruff check / format --check / mypy: PASS (141 source files) - pytest -q --ignore=tests/integration/test_e2e_workflow.py --ignore=tests/integration/test_openrouter_smoke.py: 702 passed Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
304 lines
10 KiB
Python
304 lines
10 KiB
Python
"""Integration tests for src/my_deepagent/budget.py (BudgetTracker)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from uuid import UUID, uuid4
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
|
|
from my_deepagent.budget import BudgetOnHit, BudgetTracker
|
|
from my_deepagent.errors import BudgetExhaustedError
|
|
from my_deepagent.persistence.db import Database
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_RUN_ID = UUID("00000000-0000-0000-0000-000000000001")
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def db(tmp_path: object) -> Database:
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
p = Path(tempfile.mkdtemp()) / "test_budget.sqlite3"
|
|
database = Database(f"sqlite+aiosqlite:///{p}")
|
|
await database.init_schema()
|
|
return database
|
|
|
|
|
|
def _make_tracker(
|
|
db: Database,
|
|
daily_cap: float = 5.0,
|
|
run_cap: float = 1.0,
|
|
on_hit: BudgetOnHit = BudgetOnHit.BLOCK,
|
|
prompt_callback: object = None,
|
|
) -> BudgetTracker:
|
|
return BudgetTracker(
|
|
db=db,
|
|
daily_cap_usd=daily_cap,
|
|
run_cap_usd=run_cap,
|
|
daily_warn_usd=3.0,
|
|
run_warn_usd=0.5,
|
|
on_hit=on_hit,
|
|
prompt_callback=prompt_callback, # type: ignore[arg-type]
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# init()
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_init_creates_day_scope_row(db: Database) -> None:
|
|
tracker = _make_tracker(db)
|
|
await tracker.init()
|
|
spent = await tracker.get_spent(f"day:{_today()}")
|
|
assert spent == 0.0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_init_is_idempotent(db: Database) -> None:
|
|
tracker = _make_tracker(db)
|
|
await tracker.init()
|
|
await tracker.init() # second call should not error or double-insert
|
|
spent = await tracker.get_spent(f"day:{_today()}")
|
|
assert spent == 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# assert_can_call — under cap
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_assert_can_call_under_cap_returns_ok(db: Database) -> None:
|
|
tracker = _make_tracker(db, daily_cap=5.0, run_cap=1.0)
|
|
result = await tracker.assert_can_call(
|
|
run_id=_RUN_ID,
|
|
persona_name="researcher",
|
|
estimated_cost_usd=0.5,
|
|
)
|
|
assert result.ok is True
|
|
assert result.blocked_scope is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# assert_can_call — over run cap (on_hit=block)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_assert_can_call_over_run_cap_raises(db: Database) -> None:
|
|
tracker = _make_tracker(db, run_cap=0.01, on_hit=BudgetOnHit.BLOCK)
|
|
with pytest.raises(BudgetExhaustedError) as exc_info:
|
|
await tracker.assert_can_call(
|
|
run_id=_RUN_ID,
|
|
persona_name=None,
|
|
estimated_cost_usd=1.0,
|
|
)
|
|
err = exc_info.value
|
|
assert err.scope.startswith("run:")
|
|
assert err.projected_usd > 0.01
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# assert_can_call — over day cap (on_hit=block)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_assert_can_call_over_day_cap_raises(db: Database) -> None:
|
|
tracker = _make_tracker(db, daily_cap=0.001, run_cap=999.0, on_hit=BudgetOnHit.BLOCK)
|
|
with pytest.raises(BudgetExhaustedError) as exc_info:
|
|
await tracker.assert_can_call(
|
|
run_id=_RUN_ID,
|
|
persona_name=None,
|
|
estimated_cost_usd=1.0,
|
|
)
|
|
err = exc_info.value
|
|
assert err.scope.startswith("day:")
|
|
assert err.cap_usd == pytest.approx(0.001)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# record() — accumulates spend
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_record_accumulates_spend(db: Database) -> None:
|
|
tracker = _make_tracker(db)
|
|
run_id = uuid4()
|
|
await tracker.record(run_id=run_id, persona_name=None, actual_cost_usd=0.10)
|
|
await tracker.record(run_id=run_id, persona_name=None, actual_cost_usd=0.05)
|
|
|
|
day_spent = await tracker.get_spent(f"day:{_today()}")
|
|
run_spent = await tracker.get_spent(f"run:{run_id}")
|
|
assert day_spent == pytest.approx(0.15)
|
|
assert run_spent == pytest.approx(0.15)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_record_zero_is_noop(db: Database) -> None:
|
|
tracker = _make_tracker(db)
|
|
run_id = uuid4()
|
|
await tracker.record(run_id=run_id, persona_name=None, actual_cost_usd=0.0)
|
|
run_spent = await tracker.get_spent(f"run:{run_id}")
|
|
assert run_spent == 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# on_hit=warn_continue
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_warn_continue_over_cap_returns_ok_no_raise(db: Database) -> None:
|
|
tracker = _make_tracker(db, run_cap=0.001, on_hit=BudgetOnHit.WARN_CONTINUE)
|
|
result = await tracker.assert_can_call(
|
|
run_id=_RUN_ID,
|
|
persona_name=None,
|
|
estimated_cost_usd=1.0,
|
|
)
|
|
# WARN_CONTINUE: blocked=False, no raise
|
|
assert result.ok is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# on_hit=prompt
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prompt_callback_returns_true_proceeds(db: Database) -> None:
|
|
async def _allow(scope: str, projected: float, cap: float) -> bool:
|
|
return True
|
|
|
|
tracker = _make_tracker(db, run_cap=0.001, on_hit=BudgetOnHit.PROMPT, prompt_callback=_allow)
|
|
result = await tracker.assert_can_call(
|
|
run_id=_RUN_ID,
|
|
persona_name=None,
|
|
estimated_cost_usd=1.0,
|
|
)
|
|
assert result.ok is True
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prompt_callback_returns_false_raises(db: Database) -> None:
|
|
async def _deny(scope: str, projected: float, cap: float) -> bool:
|
|
return False
|
|
|
|
tracker = _make_tracker(db, run_cap=0.001, on_hit=BudgetOnHit.PROMPT, prompt_callback=_deny)
|
|
with pytest.raises(BudgetExhaustedError):
|
|
await tracker.assert_can_call(
|
|
run_id=_RUN_ID,
|
|
persona_name=None,
|
|
estimated_cost_usd=1.0,
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prompt_callback_none_raises_like_block(db: Database) -> None:
|
|
tracker = _make_tracker(db, run_cap=0.001, on_hit=BudgetOnHit.PROMPT, prompt_callback=None)
|
|
with pytest.raises(BudgetExhaustedError):
|
|
await tracker.assert_can_call(
|
|
run_id=_RUN_ID,
|
|
persona_name=None,
|
|
estimated_cost_usd=1.0,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# persona scope
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_persona_scope_accumulates_separately(db: Database) -> None:
|
|
tracker = _make_tracker(db)
|
|
await tracker.record(run_id=None, persona_name="researcher", actual_cost_usd=0.20)
|
|
|
|
persona_spent = await tracker.get_spent(f"persona:researcher:day:{_today()}")
|
|
day_spent = await tracker.get_spent(f"day:{_today()}")
|
|
assert persona_spent == pytest.approx(0.20)
|
|
assert day_spent == pytest.approx(0.20)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# get_remaining()
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_remaining_with_no_spend(db: Database) -> None:
|
|
tracker = _make_tracker(db, daily_cap=5.0)
|
|
remaining = await tracker.get_remaining(f"day:{_today()}")
|
|
assert remaining == pytest.approx(5.0)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_remaining_after_spend(db: Database) -> None:
|
|
tracker = _make_tracker(db, daily_cap=5.0)
|
|
await tracker.record(run_id=None, persona_name=None, actual_cost_usd=1.5)
|
|
remaining = await tracker.get_remaining(f"day:{_today()}")
|
|
assert remaining == pytest.approx(3.5)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_remaining_unknown_scope_returns_none(db: Database) -> None:
|
|
tracker = _make_tracker(db)
|
|
# "unknown:xyz" has no cap in _cap_for_scope
|
|
remaining = await tracker.get_remaining("unknown:xyz")
|
|
assert remaining is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# session: scope (v0.3 PR #6) — sub-agent rollup to root session
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_session_scope_accumulates_cost(db: Database) -> None:
|
|
import uuid as _uuid
|
|
|
|
tracker = _make_tracker(db, run_cap=2.0)
|
|
session_id = _uuid.uuid4()
|
|
await tracker.record(
|
|
run_id=None, persona_name=None, actual_cost_usd=0.30, session_id=session_id
|
|
)
|
|
await tracker.record(
|
|
run_id=None, persona_name=None, actual_cost_usd=0.20, session_id=session_id
|
|
)
|
|
spent = await tracker.get_spent(f"session:{session_id}")
|
|
assert spent == pytest.approx(0.50)
|
|
remaining = await tracker.get_remaining(f"session:{session_id}")
|
|
assert remaining == pytest.approx(1.50)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_session_scope_omitted_when_no_session_id(db: Database) -> None:
|
|
"""Calls without ``session_id`` must NOT create a session: ledger row."""
|
|
import uuid as _uuid
|
|
|
|
tracker = _make_tracker(db)
|
|
# Drive a record without session_id.
|
|
await tracker.record(run_id=None, persona_name=None, actual_cost_usd=0.10)
|
|
# Querying any session scope should yield 0 spent.
|
|
sid = _uuid.uuid4()
|
|
assert (await tracker.get_spent(f"session:{sid}")) == pytest.approx(0.0)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _today() -> str:
|
|
from datetime import UTC, datetime
|
|
|
|
return datetime.now(UTC).strftime("%Y-%m-%d")
|