"""v0.3 PR #2 — Context compaction tests. 4 scenarios from the plan: 1. Manual `/compact` (via compact_session()) — happy path: inserts summary, archives originals to negative seq band, bumps token counters. 2. should_compact() threshold logic: under 70% → False, over 70% → True. 3. Insufficient messages (< MIN_COMPACTABLE) → CompactionResult(compacted=False). 4. Per-session asyncio.Lock serializes concurrent compactions — second caller waits for first to release. All scenarios stub the summariser LLM (no OpenRouter calls). The DB layer is exercised end-to-end via aiosqlite tmp_path. """ from __future__ import annotations import asyncio import uuid from collections.abc import AsyncIterator from datetime import UTC, datetime from pathlib import Path from typing import Any import pytest from sqlalchemy import select from my_deepagent import compaction as compaction_mod from my_deepagent.compaction import ( KEEP_RECENT_K, MIN_COMPACTABLE, CompactionResult, compact_session, should_compact, ) from my_deepagent.config import load_config from my_deepagent.persistence.db import Database from my_deepagent.persistence.models import ( AgentPersonaRow, InteractiveSessionRow, MessageRow, ) def _now() -> str: return datetime.now(UTC).isoformat(timespec="seconds") @pytest.fixture async def db_with_session(tmp_path: Path) -> AsyncIterator[tuple[Database, str, Any]]: """Yield (db, session_id, config) with one persona + one interactive session. Caller is responsible for seeding MessageRow rows. """ db_url = f"sqlite+aiosqlite:///{tmp_path / 'compact.sqlite3'}" cfg = load_config( workspace_root=tmp_path, data_dir=tmp_path / "data", database_url=db_url, ) db = Database(db_url) await db.init_schema() persona_id = str(uuid.uuid4()) session_id = str(uuid.uuid4()) async with db.session() as s: s.add( AgentPersonaRow( id=persona_id, name="test-persona", version=1, hash="hash-test", definition={"name": "test-persona", "version": 1}, created_at=_now(), ) ) s.add( InteractiveSessionRow( id=session_id, persona_id=persona_id, persona_hash="hash-test", started_at=_now(), last_message_at=_now(), state="active", total_input_tokens=0, total_output_tokens=0, model="openrouter:deepseek/deepseek-chat", project_key="testproj0000abcd", title="test session", plan_mode=False, parent_session_id=None, depth=0, ) ) await s.commit() try: yield (db, session_id, cfg) finally: await db.dispose() async def _seed_messages( db: Database, session_id: str, n: int, *, start_seq: int = 1, role_alternation: bool = True ) -> None: """Insert n non-system, non-archived, non-summary messages.""" async with db.session() as s: for i in range(n): role = "user" if (role_alternation and i % 2 == 0) else "assistant" s.add( MessageRow( session_id=session_id, seq=start_seq + i, role=role, content=f"message {start_seq + i} body text repeated to add tokens", tool_calls=None, token_count=20, is_summary=False, archived=False, ts=_now(), ) ) await s.commit() # --------------------------------------------------------------------------- # Scenario 2: should_compact threshold logic # --------------------------------------------------------------------------- def test_should_compact_below_threshold() -> None: row = InteractiveSessionRow( id="x", persona_id="p", persona_hash="h", state="active", total_input_tokens=10_000, total_output_tokens=10_000, model="openrouter:deepseek/deepseek-chat", # 64k window → 70% = 44_800 ) assert should_compact(row) is False def test_should_compact_at_threshold() -> None: row = InteractiveSessionRow( id="x", persona_id="p", persona_hash="h", state="active", total_input_tokens=40_000, total_output_tokens=10_000, # 50_000 > 44_800 model="openrouter:deepseek/deepseek-chat", ) assert should_compact(row) is True def test_should_compact_unknown_model_uses_default_limit() -> None: # Default 32_000 → 70% = 22_400. row = InteractiveSessionRow( id="x", persona_id="p", persona_hash="h", state="active", total_input_tokens=20_000, total_output_tokens=3_000, # 23_000 > 22_400 model="some-unknown/model", ) assert should_compact(row) is True # --------------------------------------------------------------------------- # Scenario 3: insufficient messages → no-op # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_compact_session_rejects_insufficient_messages( db_with_session: tuple[Database, str, Any], monkeypatch: pytest.MonkeyPatch, ) -> None: db, sid, cfg = db_with_session # Seed MIN_COMPACTABLE + KEEP_RECENT_K - 1 messages so to_compact is short. await _seed_messages(db, sid, n=KEEP_RECENT_K + MIN_COMPACTABLE - 1) # Stub the summariser so an accidental call would still pass — but assert it's # never invoked (length gate triggers before the LLM call). called = {"n": 0} async def fake_summary(*_a: Any, **_k: Any) -> str: called["n"] += 1 return "should-not-be-called" monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary) result = await compact_session(db, cfg, sid) assert result.compacted is False assert "insufficient_messages" in result.reason assert called["n"] == 0 # --------------------------------------------------------------------------- # Scenario 1: happy path — summary inserted, originals archived to negative seq # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_compact_session_happy_path( db_with_session: tuple[Database, str, Any], monkeypatch: pytest.MonkeyPatch, ) -> None: db, sid, cfg = db_with_session # 14 messages: oldest 4 should be compacted (14 - KEEP_RECENT_K(10) = 4). await _seed_messages(db, sid, n=14) async def fake_summary(*_a: Any, **_k: Any) -> str: return "요약: 사용자가 wordcount CLI를 만들고 있고 일부 코드를 작성했습니다." monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary) # Pre-condition: input tokens currently 0 on the row; bump to a non-zero so we # can verify the subtract-archived-add-summary arithmetic. async with db.session() as s: row = await s.get(InteractiveSessionRow, sid) assert row is not None row.total_input_tokens = 1000 # arbitrary baseline await s.commit() result = await compact_session(db, cfg, sid) assert result.compacted is True, f"got {result!r}" assert result.archived == 4 assert result.summary_tokens > 0 async with db.session() as s: # The 4 archived messages should now be at negative seq and archived=True. archived_rows = ( ( await s.execute( select(MessageRow) .where(MessageRow.session_id == sid) .where(MessageRow.archived.is_(True)) .order_by(MessageRow.seq) ) ) .scalars() .all() ) assert len(archived_rows) == 4 for r in archived_rows: assert r.seq < 0 assert r.archived is True assert r.is_summary is False # Exactly one new summary row, role=system, is_summary=True, archived=False. summary_rows = ( ( await s.execute( select(MessageRow) .where(MessageRow.session_id == sid) .where(MessageRow.is_summary.is_(True)) ) ) .scalars() .all() ) assert len(summary_rows) == 1 summary_row = summary_rows[0] assert summary_row.role == "system" assert summary_row.archived is False assert summary_row.seq == 1 # smallest of the original to_compact seqs # The 10 most recent messages remain non-archived at their original seqs. live_rows = ( ( await s.execute( select(MessageRow) .where(MessageRow.session_id == sid) .where(MessageRow.archived.is_(False)) .where(MessageRow.is_summary.is_(False)) .order_by(MessageRow.seq) ) ) .scalars() .all() ) assert len(live_rows) == KEEP_RECENT_K assert [r.seq for r in live_rows] == list(range(5, 15)) # Token counter arithmetic: 1000 - (4*20) + summary_tokens. sess = await s.get(InteractiveSessionRow, sid) assert sess is not None assert sess.total_input_tokens == 1000 - 80 + result.summary_tokens # --------------------------------------------------------------------------- # Scenario 4: per-session Lock serializes concurrent compactions # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_compact_session_lock_serializes_concurrent_calls( db_with_session: tuple[Database, str, Any], monkeypatch: pytest.MonkeyPatch, ) -> None: db, sid, cfg = db_with_session await _seed_messages(db, sid, n=14) # Slow summariser: lets us observe lock serialization (second caller starts # only after the first finishes). call_starts: list[float] = [] call_ends: list[float] = [] async def slow_summary(*_a: Any, **_k: Any) -> str: call_starts.append(asyncio.get_event_loop().time()) await asyncio.sleep(0.25) call_ends.append(asyncio.get_event_loop().time()) return "요약 ok" monkeypatch.setattr(compaction_mod, "_run_summary_llm", slow_summary) # Two concurrent compactions on the same session_id. r1, r2 = await asyncio.gather( compact_session(db, cfg, sid), compact_session(db, cfg, sid), ) # First call should compact; second call sees no compactable messages left. compacted_count = sum(1 for r in (r1, r2) for _ in [r] if r.compacted) assert compacted_count == 1, f"expected exactly 1 compaction, got r1={r1!r} r2={r2!r}" # If the lock works, the slow_summary was either called once (second caller # short-circuits on length gate) or twice with non-overlapping windows. if len(call_starts) == 2: # Second LLM call should start after first finishes. assert call_starts[1] >= call_ends[0], "lock failed to serialize summariser calls" @pytest.mark.asyncio async def test_compact_session_missing_session_returns_not_found( db_with_session: tuple[Database, str, Any], monkeypatch: pytest.MonkeyPatch, ) -> None: db, _sid, cfg = db_with_session bogus = str(uuid.uuid4()) async def fake_summary(*_a: Any, **_k: Any) -> str: return "should-not-be-called" monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary) result: CompactionResult = await compact_session(db, cfg, bogus) assert result.compacted is False assert result.reason == "session_not_found"