"""v0.3 PR #2 — Context compaction tests.

4 scenarios from the plan:
1. Manual `/compact` (via compact_session()) — happy path: inserts summary,
   archives originals to negative seq band, bumps token counters.
2. should_compact() threshold logic: under 70% → False, over 70% → True.
3. Insufficient messages (< MIN_COMPACTABLE) → CompactionResult(compacted=False).
4. Per-session asyncio.Lock serializes concurrent compactions — second caller
   waits for first to release.

All scenarios stub the summariser LLM (no OpenRouter calls). The DB layer is
exercised end-to-end via aiosqlite tmp_path.
"""

from __future__ import annotations

import asyncio
import uuid
from collections.abc import AsyncIterator
from datetime import UTC, datetime
from pathlib import Path
from typing import Any

import pytest
from sqlalchemy import select

from my_deepagent import compaction as compaction_mod
from my_deepagent.compaction import (
    KEEP_RECENT_K,
    MIN_COMPACTABLE,
    CompactionResult,
    compact_session,
    should_compact,
)
from my_deepagent.config import load_config
from my_deepagent.persistence.db import Database
from my_deepagent.persistence.models import (
    AgentPersonaRow,
    InteractiveSessionRow,
    MessageRow,
)


def _now() -> str:
    return datetime.now(UTC).isoformat(timespec="seconds")


@pytest.fixture
async def db_with_session(tmp_path: Path) -> AsyncIterator[tuple[Database, str, Any]]:
    """Yield (db, session_id, config) with one persona + one interactive session.

    Caller is responsible for seeding MessageRow rows.
    """
    db_url = f"sqlite+aiosqlite:///{tmp_path / 'compact.sqlite3'}"
    cfg = load_config(
        workspace_root=tmp_path,
        data_dir=tmp_path / "data",
        database_url=db_url,
    )
    db = Database(db_url)
    await db.init_schema()

    persona_id = str(uuid.uuid4())
    session_id = str(uuid.uuid4())
    async with db.session() as s:
        s.add(
            AgentPersonaRow(
                id=persona_id,
                name="test-persona",
                version=1,
                hash="hash-test",
                definition={"name": "test-persona", "version": 1},
                created_at=_now(),
            )
        )
        s.add(
            InteractiveSessionRow(
                id=session_id,
                persona_id=persona_id,
                persona_hash="hash-test",
                started_at=_now(),
                last_message_at=_now(),
                state="active",
                total_input_tokens=0,
                total_output_tokens=0,
                model="openrouter:deepseek/deepseek-chat",
                project_key="testproj0000abcd",
                title="test session",
                plan_mode=False,
                parent_session_id=None,
                depth=0,
            )
        )
        await s.commit()

    try:
        yield (db, session_id, cfg)
    finally:
        await db.dispose()


async def _seed_messages(
    db: Database, session_id: str, n: int, *, start_seq: int = 1, role_alternation: bool = True
) -> None:
    """Insert n non-system, non-archived, non-summary messages."""
    async with db.session() as s:
        for i in range(n):
            role = "user" if (role_alternation and i % 2 == 0) else "assistant"
            s.add(
                MessageRow(
                    session_id=session_id,
                    seq=start_seq + i,
                    role=role,
                    content=f"message {start_seq + i} body text repeated to add tokens",
                    tool_calls=None,
                    token_count=20,
                    is_summary=False,
                    archived=False,
                    ts=_now(),
                )
            )
        await s.commit()


# ---------------------------------------------------------------------------
# Scenario 2: should_compact threshold logic
# ---------------------------------------------------------------------------


def test_should_compact_below_threshold() -> None:
    row = InteractiveSessionRow(
        id="x",
        persona_id="p",
        persona_hash="h",
        state="active",
        total_input_tokens=10_000,
        total_output_tokens=10_000,
        model="openrouter:deepseek/deepseek-chat",  # 64k window → 70% = 44_800
    )
    assert should_compact(row) is False


def test_should_compact_at_threshold() -> None:
    row = InteractiveSessionRow(
        id="x",
        persona_id="p",
        persona_hash="h",
        state="active",
        total_input_tokens=40_000,
        total_output_tokens=10_000,  # 50_000 > 44_800
        model="openrouter:deepseek/deepseek-chat",
    )
    assert should_compact(row) is True


def test_should_compact_unknown_model_uses_default_limit() -> None:
    # Default 32_000 → 70% = 22_400.
    row = InteractiveSessionRow(
        id="x",
        persona_id="p",
        persona_hash="h",
        state="active",
        total_input_tokens=20_000,
        total_output_tokens=3_000,  # 23_000 > 22_400
        model="some-unknown/model",
    )
    assert should_compact(row) is True


# ---------------------------------------------------------------------------
# Scenario 3: insufficient messages → no-op
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_compact_session_rejects_insufficient_messages(
    db_with_session: tuple[Database, str, Any],
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    db, sid, cfg = db_with_session
    # Seed MIN_COMPACTABLE + KEEP_RECENT_K - 1 messages so to_compact is short.
    await _seed_messages(db, sid, n=KEEP_RECENT_K + MIN_COMPACTABLE - 1)

    # Stub the summariser so an accidental call would still pass — but assert it's
    # never invoked (length gate triggers before the LLM call).
    called = {"n": 0}

    async def fake_summary(*_a: Any, **_k: Any) -> str:
        called["n"] += 1
        return "should-not-be-called"

    monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary)

    result = await compact_session(db, cfg, sid)
    assert result.compacted is False
    assert "insufficient_messages" in result.reason
    assert called["n"] == 0


# ---------------------------------------------------------------------------
# Scenario 1: happy path — summary inserted, originals archived to negative seq
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_compact_session_happy_path(
    db_with_session: tuple[Database, str, Any],
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    db, sid, cfg = db_with_session
    # 14 messages: oldest 4 should be compacted (14 - KEEP_RECENT_K(10) = 4).
    await _seed_messages(db, sid, n=14)

    async def fake_summary(*_a: Any, **_k: Any) -> str:
        return "요약: 사용자가 wordcount CLI를 만들고 있고 일부 코드를 작성했습니다."

    monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary)

    # Pre-condition: input tokens currently 0 on the row; bump to a non-zero so we
    # can verify the subtract-archived-add-summary arithmetic.
    async with db.session() as s:
        row = await s.get(InteractiveSessionRow, sid)
        assert row is not None
        row.total_input_tokens = 1000  # arbitrary baseline
        await s.commit()

    result = await compact_session(db, cfg, sid)
    assert result.compacted is True, f"got {result!r}"
    assert result.archived == 4
    assert result.summary_tokens > 0

    async with db.session() as s:
        # The 4 archived messages should now be at negative seq and archived=True.
        archived_rows = (
            (
                await s.execute(
                    select(MessageRow)
                    .where(MessageRow.session_id == sid)
                    .where(MessageRow.archived.is_(True))
                    .order_by(MessageRow.seq)
                )
            )
            .scalars()
            .all()
        )
        assert len(archived_rows) == 4
        for r in archived_rows:
            assert r.seq < 0
            assert r.archived is True
            assert r.is_summary is False

        # Exactly one new summary row, role=system, is_summary=True, archived=False.
        summary_rows = (
            (
                await s.execute(
                    select(MessageRow)
                    .where(MessageRow.session_id == sid)
                    .where(MessageRow.is_summary.is_(True))
                )
            )
            .scalars()
            .all()
        )
        assert len(summary_rows) == 1
        summary_row = summary_rows[0]
        assert summary_row.role == "system"
        assert summary_row.archived is False
        assert summary_row.seq == 1  # smallest of the original to_compact seqs

        # The 10 most recent messages remain non-archived at their original seqs.
        live_rows = (
            (
                await s.execute(
                    select(MessageRow)
                    .where(MessageRow.session_id == sid)
                    .where(MessageRow.archived.is_(False))
                    .where(MessageRow.is_summary.is_(False))
                    .order_by(MessageRow.seq)
                )
            )
            .scalars()
            .all()
        )
        assert len(live_rows) == KEEP_RECENT_K
        assert [r.seq for r in live_rows] == list(range(5, 15))

        # Token counter arithmetic: 1000 - (4*20) + summary_tokens.
        sess = await s.get(InteractiveSessionRow, sid)
        assert sess is not None
        assert sess.total_input_tokens == 1000 - 80 + result.summary_tokens


# ---------------------------------------------------------------------------
# Scenario 4: per-session Lock serializes concurrent compactions
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_compact_session_lock_serializes_concurrent_calls(
    db_with_session: tuple[Database, str, Any],
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    db, sid, cfg = db_with_session
    await _seed_messages(db, sid, n=14)

    # Slow summariser: lets us observe lock serialization (second caller starts
    # only after the first finishes).
    call_starts: list[float] = []
    call_ends: list[float] = []

    async def slow_summary(*_a: Any, **_k: Any) -> str:
        call_starts.append(asyncio.get_event_loop().time())
        await asyncio.sleep(0.25)
        call_ends.append(asyncio.get_event_loop().time())
        return "요약 ok"

    monkeypatch.setattr(compaction_mod, "_run_summary_llm", slow_summary)

    # Two concurrent compactions on the same session_id.
    r1, r2 = await asyncio.gather(
        compact_session(db, cfg, sid),
        compact_session(db, cfg, sid),
    )

    # First call should compact; second call sees no compactable messages left.
    compacted_count = sum(1 for r in (r1, r2) for _ in [r] if r.compacted)
    assert compacted_count == 1, f"expected exactly 1 compaction, got r1={r1!r} r2={r2!r}"

    # If the lock works, the slow_summary was either called once (second caller
    # short-circuits on length gate) or twice with non-overlapping windows.
    if len(call_starts) == 2:
        # Second LLM call should start after first finishes.
        assert call_starts[1] >= call_ends[0], "lock failed to serialize summariser calls"


@pytest.mark.asyncio
async def test_compact_session_missing_session_returns_not_found(
    db_with_session: tuple[Database, str, Any],
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    db, _sid, cfg = db_with_session
    bogus = str(uuid.uuid4())

    async def fake_summary(*_a: Any, **_k: Any) -> str:
        return "should-not-be-called"

    monkeypatch.setattr(compaction_mod, "_run_summary_llm", fake_summary)

    result: CompactionResult = await compact_session(db, cfg, bogus)
    assert result.compacted is False
    assert result.reason == "session_not_found"