feat(my-deepagent): v0.3 PR #2 — context compaction (auto + manual /compact)
Claude Code의 auto-compact + `/compact` 슬래시 등가. 핵심 동작: - 세션 누적 토큰 (`total_input_tokens + total_output_tokens`)이 활성 모델 컨텍스트 윈도우의 70%를 넘으면 자동으로 가장 오래된 비-system / 비-archived 메시지를 cheap 모델 (`openrouter:deepseek/deepseek-chat` 기본)로 1회 요약 → `MessageRow(is_summary=True, role=system)` 1줄 삽입 + 원본은 `archived=True` + negative seq band (-(original.seq + 1))으로 옮김. - LangGraph thread는 `thread_suffix` bump로 새 컨텍스트 시작 (재인입 비용 회피). 세션 자체는 살아있음 — `sessions show <id> --all`로 archived 메시지 조회 가능. - 수동 `/compact` 슬래시도 동일 함수 호출. 메시지가 부족하면 (`< MIN_COMPACTABLE`) 사유 출력하고 no-op. 데이터·라이브러리: - `monitoring/token_budget.py` (신규): `tiktoken cl100k_base`로 추정 (DeepSeek/ Anthropic 모델 정확한 토크나이저가 없으므로 보수적 over-count). `MODEL_CONTEXT_LIMITS` (DeepSeek 64k, Claude Sonnet/Haiku/Opus 200k, GPT-4o 128k), 미등록 모델은 32k 기본값. `COMPACTION_THRESHOLD = 0.7`. - `compaction.py` (신규): `should_compact()` / `compact_session()` / `CompactionResult`. `_SESSION_LOCKS: dict[str, asyncio.Lock]` 세션별 직렬화 — 동시 compaction은 두 번째가 첫 번째를 기다림. `KEEP_RECENT_K = 10`, `MIN_COMPACTABLE = 4`. LLM 호출은 DB session 바깥 (asyncpg connection 점유 회피). - `pyproject.toml`: `tiktoken>=0.7` 명시 (이전엔 langchain-openai 경유 transitive). REPL 통합 (`cli/interactive.py`): - `_approx_token_count`를 tiktoken-based로 교체. - 매 ainvoke 후 `should_compact(session_row)` → 임계 초과 시 자동 `compact_session()` → 성공 시 `clear_agent_cache()`로 thread bump + 한 줄 알림. - `/compact` 슬래시 등록 (`_register_compaction_slash`). 테스트 (`tests/integration/test_compaction.py`, 7 케이스): 1. `should_compact` 70% 임계 아래/위/미등록 모델 (3개) 2. `MIN_COMPACTABLE` 미만 → LLM 호출 없이 거부 3. Happy path: 14 메시지 → 4 archive(negative seq) + summary at seq=1 + 10 live 유지 + 토큰 카운터 산술 검증 4. 동일 session_id 동시 호출 2개 → Lock 직렬화 검증 5. 없는 session_id → `session_not_found` 게이트: - ruff check / format --check / mypy: PASS - pytest -q --ignore=tests/integration/test_e2e_workflow.py --ignore=tests/integration/test_openrouter_smoke.py: 611 passed (7 신규 포함) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
351
my-deepagent/tests/integration/test_compaction.py
Normal file
351
my-deepagent/tests/integration/test_compaction.py
Normal file
@@ -0,0 +1,351 @@
|
||||
"""v0.3 PR #2 — Context compaction tests.
|
||||
|
||||
4 scenarios from the plan:
|
||||
1. Manual `/compact` (via compact_session()) — happy path: inserts summary,
|
||||
archives originals to negative seq band, bumps token counters.
|
||||
2. should_compact() threshold logic: under 70% → False, over 70% → True.
|
||||
3. Insufficient messages (< MIN_COMPACTABLE) → CompactionResult(compacted=False).
|
||||
4. Per-session asyncio.Lock serializes concurrent compactions — second caller
|
||||
waits for first to release.
|
||||
|
||||
All scenarios stub the summariser LLM (no OpenRouter calls). The DB layer is
|
||||
exercised end-to-end via aiosqlite tmp_path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
from collections.abc import AsyncIterator
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import select
|
||||
|
||||
from my_deepagent import compaction as compaction_mod
|
||||
from my_deepagent.compaction import (
|
||||
KEEP_RECENT_K,
|
||||
MIN_COMPACTABLE,
|
||||
CompactionResult,
|
||||
compact_session,
|
||||
should_compact,
|
||||
)
|
||||
from my_deepagent.config import load_config
|
||||
from my_deepagent.persistence.db import Database
|
||||
from my_deepagent.persistence.models import (
|
||||
AgentPersonaRow,
|
||||
InteractiveSessionRow,
|
||||
MessageRow,
|
||||
)
|
||||
|
||||
|
||||
def _now() -> str:
|
||||
return datetime.now(UTC).isoformat(timespec="seconds")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def db_with_session(tmp_path: Path) -> AsyncIterator[tuple[Database, str, Any]]:
|
||||
"""Yield (db, session_id, config) with one persona + one interactive session.
|
||||
|
||||
Caller is responsible for seeding MessageRow rows.
|
||||
"""
|
||||
db_url = f"sqlite+aiosqlite:///{tmp_path / 'compact.sqlite3'}"
|
||||
cfg = load_config(
|
||||
workspace_root=tmp_path,
|
||||
data_dir=tmp_path / "data",
|
||||
database_url=db_url,
|
||||
)
|
||||
db = Database(db_url)
|
||||
await db.init_schema()
|
||||
|
||||
persona_id = str(uuid.uuid4())
|
||||
session_id = str(uuid.uuid4())
|
||||
async with db.session() as s:
|
||||
s.add(
|
||||
AgentPersonaRow(
|
||||
id=persona_id,
|
||||
name="test-persona",
|
||||
version=1,
|
||||
hash="hash-test",
|
||||
definition={"name": "test-persona", "version": 1},
|
||||
created_at=_now(),
|
||||
)
|
||||
)
|
||||
s.add(
|
||||
InteractiveSessionRow(
|
||||
id=session_id,
|
||||
persona_id=persona_id,
|
||||
persona_hash="hash-test",
|
||||
started_at=_now(),
|
||||
last_message_at=_now(),
|
||||
state="active",
|
||||
total_input_tokens=0,
|
||||
total_output_tokens=0,
|
||||
model="openrouter:deepseek/deepseek-chat",
|
||||
project_key="testproj0000abcd",
|
||||
title="test session",
|
||||
plan_mode=False,
|
||||
parent_session_id=None,
|
||||
depth=0,
|
||||
)
|
||||
)
|
||||
await s.commit()
|
||||
|
||||
try:
|
||||
yield (db, session_id, cfg)
|
||||
finally:
|
||||
await db.dispose()
|
||||
|
||||
|
||||
async def _seed_messages(
|
||||
db: Database, session_id: str, n: int, *, start_seq: int = 1, role_alternation: bool = True
|
||||
) -> None:
|
||||
"""Insert n non-system, non-archived, non-summary messages."""
|
||||
async with db.session() as s:
|
||||
for i in range(n):
|
||||
role = "user" if (role_alternation and i % 2 == 0) else "assistant"
|
||||
s.add(
|
||||
MessageRow(
|
||||
session_id=session_id,
|
||||
seq=start_seq + i,
|
||||
role=role,
|
||||
content=f"message {start_seq + i} body text repeated to add tokens",
|
||||
tool_calls=None,
|
||||
token_count=20,
|
||||
is_summary=False,
|
||||
archived=False,
|
||||
ts=_now(),
|
||||
)
|
||||
)
|
||||
await s.commit()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 2: should_compact threshold logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_should_compact_below_threshold() -> None:
|
||||
row = InteractiveSessionRow(
|
||||
id="x",
|
||||
persona_id="p",
|
||||
persona_hash="h",
|
||||
state="active",
|
||||
total_input_tokens=10_000,
|
||||
total_output_tokens=10_000,
|
||||
model="openrouter:deepseek/deepseek-chat", # 64k window → 70% = 44_800
|
||||
)
|
||||
assert should_compact(row) is False
|
||||
|
||||
|
||||
def test_should_compact_at_threshold() -> None:
|
||||
row = InteractiveSessionRow(
|
||||
id="x",
|
||||
persona_id="p",
|
||||
persona_hash="h",
|
||||
state="active",
|
||||
total_input_tokens=40_000,
|
||||
total_output_tokens=10_000, # 50_000 > 44_800
|
||||
model="openrouter:deepseek/deepseek-chat",
|
||||
)
|
||||
assert should_compact(row) is True
|
||||
|
||||
|
||||
def test_should_compact_unknown_model_uses_default_limit() -> None:
|
||||
# Default 32_000 → 70% = 22_400.
|
||||
row = InteractiveSessionRow(
|
||||
id="x",
|
||||
persona_id="p",
|
||||
persona_hash="h",
|
||||
state="active",
|
||||
total_input_tokens=20_000,
|
||||
total_output_tokens=3_000, # 23_000 > 22_400
|
||||
model="some-unknown/model",
|
||||
)
|
||||
assert should_compact(row) is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 3: insufficient messages → no-op
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compact_session_rejects_insufficient_messages(
|
||||
db_with_session: tuple[Database, str, Any],
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
db, sid, cfg = db_with_session
|
||||
# Seed MIN_COMPACTABLE + KEEP_RECENT_K - 1 messages so to_compact is short.
|
||||
await _seed_messages(db, sid, n=KEEP_RECENT_K + MIN_COMPACTABLE - 1)
|
||||
|
||||
# Stub the summariser so an accidental call would still pass — but assert it's
|
||||
# never invoked (length gate triggers before the LLM call).
|
||||
called = {"n": 0}
|
||||
|
||||
async def fake_summary(*_a: Any, **_k: Any) -> str:
|
||||
called["n"] += 1
|
||||
return "should-not-be-called"
|
||||
|
||||
monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary)
|
||||
|
||||
result = await compact_session(db, cfg, sid)
|
||||
assert result.compacted is False
|
||||
assert "insufficient_messages" in result.reason
|
||||
assert called["n"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 1: happy path — summary inserted, originals archived to negative seq
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compact_session_happy_path(
|
||||
db_with_session: tuple[Database, str, Any],
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
db, sid, cfg = db_with_session
|
||||
# 14 messages: oldest 4 should be compacted (14 - KEEP_RECENT_K(10) = 4).
|
||||
await _seed_messages(db, sid, n=14)
|
||||
|
||||
async def fake_summary(*_a: Any, **_k: Any) -> str:
|
||||
return "요약: 사용자가 wordcount CLI를 만들고 있고 일부 코드를 작성했습니다."
|
||||
|
||||
monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary)
|
||||
|
||||
# Pre-condition: input tokens currently 0 on the row; bump to a non-zero so we
|
||||
# can verify the subtract-archived-add-summary arithmetic.
|
||||
async with db.session() as s:
|
||||
row = await s.get(InteractiveSessionRow, sid)
|
||||
assert row is not None
|
||||
row.total_input_tokens = 1000 # arbitrary baseline
|
||||
await s.commit()
|
||||
|
||||
result = await compact_session(db, cfg, sid)
|
||||
assert result.compacted is True, f"got {result!r}"
|
||||
assert result.archived == 4
|
||||
assert result.summary_tokens > 0
|
||||
|
||||
async with db.session() as s:
|
||||
# The 4 archived messages should now be at negative seq and archived=True.
|
||||
archived_rows = (
|
||||
(
|
||||
await s.execute(
|
||||
select(MessageRow)
|
||||
.where(MessageRow.session_id == sid)
|
||||
.where(MessageRow.archived.is_(True))
|
||||
.order_by(MessageRow.seq)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(archived_rows) == 4
|
||||
for r in archived_rows:
|
||||
assert r.seq < 0
|
||||
assert r.archived is True
|
||||
assert r.is_summary is False
|
||||
|
||||
# Exactly one new summary row, role=system, is_summary=True, archived=False.
|
||||
summary_rows = (
|
||||
(
|
||||
await s.execute(
|
||||
select(MessageRow)
|
||||
.where(MessageRow.session_id == sid)
|
||||
.where(MessageRow.is_summary.is_(True))
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(summary_rows) == 1
|
||||
summary_row = summary_rows[0]
|
||||
assert summary_row.role == "system"
|
||||
assert summary_row.archived is False
|
||||
assert summary_row.seq == 1 # smallest of the original to_compact seqs
|
||||
|
||||
# The 10 most recent messages remain non-archived at their original seqs.
|
||||
live_rows = (
|
||||
(
|
||||
await s.execute(
|
||||
select(MessageRow)
|
||||
.where(MessageRow.session_id == sid)
|
||||
.where(MessageRow.archived.is_(False))
|
||||
.where(MessageRow.is_summary.is_(False))
|
||||
.order_by(MessageRow.seq)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(live_rows) == KEEP_RECENT_K
|
||||
assert [r.seq for r in live_rows] == list(range(5, 15))
|
||||
|
||||
# Token counter arithmetic: 1000 - (4*20) + summary_tokens.
|
||||
sess = await s.get(InteractiveSessionRow, sid)
|
||||
assert sess is not None
|
||||
assert sess.total_input_tokens == 1000 - 80 + result.summary_tokens
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 4: per-session Lock serializes concurrent compactions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compact_session_lock_serializes_concurrent_calls(
|
||||
db_with_session: tuple[Database, str, Any],
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
db, sid, cfg = db_with_session
|
||||
await _seed_messages(db, sid, n=14)
|
||||
|
||||
# Slow summariser: lets us observe lock serialization (second caller starts
|
||||
# only after the first finishes).
|
||||
call_starts: list[float] = []
|
||||
call_ends: list[float] = []
|
||||
|
||||
async def slow_summary(*_a: Any, **_k: Any) -> str:
|
||||
call_starts.append(asyncio.get_event_loop().time())
|
||||
await asyncio.sleep(0.25)
|
||||
call_ends.append(asyncio.get_event_loop().time())
|
||||
return "요약 ok"
|
||||
|
||||
monkeypatch.setattr(compaction_mod, "_run_summary_llm", slow_summary)
|
||||
|
||||
# Two concurrent compactions on the same session_id.
|
||||
r1, r2 = await asyncio.gather(
|
||||
compact_session(db, cfg, sid),
|
||||
compact_session(db, cfg, sid),
|
||||
)
|
||||
|
||||
# First call should compact; second call sees no compactable messages left.
|
||||
compacted_count = sum(1 for r in (r1, r2) for _ in [r] if r.compacted)
|
||||
assert compacted_count == 1, f"expected exactly 1 compaction, got r1={r1!r} r2={r2!r}"
|
||||
|
||||
# If the lock works, the slow_summary was either called once (second caller
|
||||
# short-circuits on length gate) or twice with non-overlapping windows.
|
||||
if len(call_starts) == 2:
|
||||
# Second LLM call should start after first finishes.
|
||||
assert call_starts[1] >= call_ends[0], "lock failed to serialize summariser calls"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compact_session_missing_session_returns_not_found(
|
||||
db_with_session: tuple[Database, str, Any],
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
db, _sid, cfg = db_with_session
|
||||
bogus = str(uuid.uuid4())
|
||||
|
||||
async def fake_summary(*_a: Any, **_k: Any) -> str:
|
||||
return "should-not-be-called"
|
||||
|
||||
monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary)
|
||||
|
||||
result: CompactionResult = await compact_session(db, cfg, bogus)
|
||||
assert result.compacted is False
|
||||
assert result.reason == "session_not_found"
|
||||
Reference in New Issue
Block a user