dev-puppeteer/my-deepagent/src/my_deepagent/cli/interactive.py

"""mydeepagent (no subcommand) — interactive REPL.

v0.3 PR #1 changes:
- LangGraph `AsyncPostgresSaver` is now wired per REPL lifetime — checkpoints
  survive ^C and a later `mydeepagent --session <id>` resumes the thread.
- Every user/assistant turn is mirrored into the `messages` table for fast
  GUI/CLI listing.  LangGraph checkpoints remain the source of truth.
- `InteractiveSessionRow` is now persisted at REPL start (or loaded when
  `--session <id>` is given) — sessions are addressable by short id.
- `/model <name>` issues a fresh LangGraph thread suffix so the deepagents
  context restarts on model switch (compaction-style pattern).
- `_resolve_session_id` accepts a 6+ char prefix.

PR #2 will hook compaction triggers + tiktoken-accurate token counts onto
the same `MessageRow` + `InteractiveSessionRow` foundation.
"""

from __future__ import annotations

import asyncio
import re
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from uuid import UUID, uuid4

import typer
from prompt_toolkit import PromptSession
from prompt_toolkit.completion import WordCompleter
from prompt_toolkit.history import FileHistory
from rich.console import Console
from sqlalchemy import desc, select

from ..audit import make_audit_recorder
from ..budget import make_budget_tracker_from_config
from ..compaction import compact_session, should_compact
from ..config import Config, load_config
from ..governance import require_consent
from ..middleware.audit import AuditToolMiddleware
from ..middleware.cost import CostMiddleware
from ..monitoring.pricing import ModelPrice, PricingCache
from ..monitoring.token_budget import count_tokens
from ..persistence.checkpointer import get_checkpointer_ctx
from ..persistence.db import Database
from ..persistence.models import InteractiveSessionRow, MessageRow
from ..persona import Persona, load_personas_from_dir
from ..session import build_agent
from ..slash import SlashParsed, SlashRegistry, parse_slash

_CONSOLE = Console()
_FILE_REF_PATTERN = re.compile(r"(?<![\w./])@([\w./\-]+)")


def _seed_root() -> Path:
    return Path(__file__).resolve().parents[3] / "docs" / "schemas"


def _history_path(config: Config) -> Path:
    p = config.state_dir
    p.mkdir(parents=True, exist_ok=True)
    return p / "history.txt"


def _expand_file_refs(text: str, repo_root: Path) -> str:
    """Replace ``@path`` tokens with the file contents in fenced markdown blocks.

    Silently skips paths that escape the repo root or don't exist.
    """

    def _replace(match: re.Match[str]) -> str:
        rel = match.group(1)
        target = (repo_root / rel).resolve()
        try:
            target.relative_to(repo_root.resolve())
        except ValueError:
            return match.group(0)
        if not target.is_file():
            return match.group(0)
        try:
            content = target.read_text(encoding="utf-8", errors="replace")
        except OSError:
            return match.group(0)
        suffix = target.suffix.lstrip(".") or ""
        return f"\n```{suffix}\n# {rel}\n{content}\n```\n"

    return _FILE_REF_PATTERN.sub(_replace, text)


def _static_pricing_seed() -> PricingCache:
    """Minimal pricing matrix for v0.1.0 (full fetch is Step 12).

    Unit: USD per 1,000 tokens.
    """
    cache = PricingCache()
    cache.set(
        [
            ModelPrice("anthropic/claude-sonnet-4-6", 0.003, 0.015, 200_000),
            ModelPrice("anthropic/claude-haiku-4-5", 0.001, 0.005, 200_000),
            ModelPrice("anthropic/claude-opus-4-1", 0.015, 0.075, 200_000),
            ModelPrice("deepseek/deepseek-chat", 0.00028, 0.00112, 64_000),
        ]
    )
    return cache


def _now_iso() -> str:
    return datetime.now(UTC).isoformat(timespec="seconds")


def _truncate_title(text: str, max_chars: int = 50) -> str:
    one_line = re.sub(r"\s+", " ", text).strip()
    return one_line[: max_chars - 1] + "…" if len(one_line) > max_chars else one_line


class InteractiveSession:
    """Holds REPL state: persona, model override, agent, LangGraph saver, DB row.

    v0.3 PR #1: also tracks `thread_suffix` so `/model` and (future PR #2)
    compaction can issue a fresh LangGraph thread while the session row stays
    the same.
    """

    def __init__(
        self,
        config: Config,
        personas: list[Persona],
        db: Database,
        pricing: PricingCache,
        repo_root: Path,
        session_id: UUID,
        saver: Any,
    ) -> None:
        self.config = config
        self.personas = personas
        self.db = db
        self.pricing = pricing
        self.repo_root = repo_root
        self.session_id = session_id
        self.saver = saver
        self._model_override: str | None = None
        self._persona = self._default_persona()
        self._agent: Any | None = None
        # thread_suffix bumps on /model and compaction; LangGraph thread_id =
        # f"{session_id}:{suffix}" so model switches start fresh deepagents state.
        self._thread_suffix: int = 0

    @property
    def thread_id(self) -> str:
        return f"{self.session_id}:{self._thread_suffix}"

    def _default_persona(self) -> Persona:
        name = self.config.default_persona
        for p in self.personas:
            if p.name == name:
                return p
        if not self.personas:
            raise RuntimeError(
                "no personas seeded; run `mydeepagent init` or seed docs/schemas/personas/"
            )
        return self.personas[0]

    @property
    def persona(self) -> Persona:
        return self._persona

    @property
    def model_override(self) -> str | None:
        return self._model_override

    @property
    def active_model(self) -> str:
        return self._model_override or self._persona.model

    def set_persona(self, name: str) -> Persona:
        for p in self.personas:
            if p.name == name or f"{p.name}@{p.version}" == name:
                self._persona = p
                self._agent = None  # rebuild on next turn
                self._thread_suffix += 1  # persona switch → new LangGraph thread
                return p
        raise ValueError(f"persona not found: {name!r}")

    def set_model(self, model: str | None) -> None:
        self._model_override = model
        self._agent = None
        self._thread_suffix += 1  # model switch → new LangGraph thread

    def clear_agent_cache(self) -> None:
        """Flush the cached agent so the next call rebuilds with a fresh thread."""
        self._agent = None
        self._thread_suffix += 1

    def build_agent_if_needed(self) -> Any:
        if self._agent is not None:
            return self._agent
        budget = make_budget_tracker_from_config(self.db, self.config)
        cost_mw = CostMiddleware(
            pricing=self.pricing,
            model_name=self.active_model,
            interactive_session_id=self.session_id,
            persona_name=self._persona.name,
            budget_tracker=budget,
        )
        audit_mw = AuditToolMiddleware(
            interactive_session_id=self.session_id,
            file_recorder=make_audit_recorder(self.config.state_dir),
        )
        self._agent = build_agent(
            self._persona,
            self.config,
            root_dir=self.repo_root,
            middleware=[cost_mw, audit_mw],
            model_override=self._model_override,
            checkpointer=self.saver,
        )
        return self._agent


# ---------------------------------------------------------------------------
# DB helpers (session + message persistence)
# ---------------------------------------------------------------------------


async def _load_or_create_session_row(
    db: Database,
    session_id: UUID,
    persona: Persona,
    repo_root: Path,
    *,
    create: bool,
) -> InteractiveSessionRow:
    """Return the session row, creating it if ``create=True`` and not found."""
    from sqlalchemy import select as _select

    from ..persistence.models import AgentPersonaRow

    async with db.session() as s:
        existing = await s.get(InteractiveSessionRow, str(session_id))
        if existing is not None:
            return existing
        if not create:
            raise RuntimeError(f"session {session_id} not found")

        # Find or upsert the AgentPersonaRow.  We need persona_id for the FK.
        ph = persona.compute_hash()
        persona_row = (
            await s.execute(_select(AgentPersonaRow).where(AgentPersonaRow.hash == ph))
        ).scalar_one_or_none()
        if persona_row is None:
            persona_row = AgentPersonaRow(
                id=str(uuid4()),
                name=persona.name,
                version=persona.version,
                hash=ph,
                definition=persona.model_dump(by_alias=True),
                created_at=_now_iso(),
            )
            s.add(persona_row)
            await s.flush()

        # Derive project_key from the repo root (stable hash).
        from ..hash import sha256

        project_key = sha256(str(repo_root.resolve()))[:16]

        row = InteractiveSessionRow(
            id=str(session_id),
            persona_id=persona_row.id,
            persona_hash=ph,
            started_at=_now_iso(),
            last_message_at=None,
            state="active",
            total_input_tokens=0,
            total_output_tokens=0,
            model=persona.model,
            project_key=project_key,
            title=None,
            plan_mode=False,
            parent_session_id=None,
            depth=0,
        )
        s.add(row)
        await s.commit()
        return row


async def _next_message_seq(db: Database, session_id: UUID) -> int:
    async with db.session() as s:
        result = await s.execute(
            select(MessageRow.seq)
            .where(MessageRow.session_id == str(session_id))
            .order_by(desc(MessageRow.seq))
            .limit(1)
        )
        last = result.scalar_one_or_none()
    return (last or 0) + 1


async def _append_message(
    db: Database,
    session_id: UUID,
    role: str,
    content: str,
    *,
    tool_calls: dict[str, Any] | None = None,
    token_count: int = 0,
) -> None:
    """Insert one MessageRow + update last_message_at / title (if first user msg)."""
    seq = await _next_message_seq(db, session_id)
    now = _now_iso()
    async with db.session() as s:
        s.add(
            MessageRow(
                session_id=str(session_id),
                seq=seq,
                role=role,
                content=content,
                tool_calls=tool_calls,
                token_count=token_count,
                is_summary=False,
                archived=False,
                ts=now,
            )
        )
        row = await s.get(InteractiveSessionRow, str(session_id))
        if row is not None:
            row.last_message_at = now
            if row.title is None and role == "user":
                row.title = _truncate_title(content)
            if role == "user":
                row.total_input_tokens += token_count
            elif role == "assistant":
                row.total_output_tokens += token_count
        await s.commit()


async def _archive_messages(db: Database, session_id: UUID) -> int:
    """Mark all current messages as archived=True. Returns the count touched."""
    from sqlalchemy import update

    async with db.session() as s:
        result = await s.execute(
            update(MessageRow)
            .where(MessageRow.session_id == str(session_id))
            .where(MessageRow.archived.is_(False))
            .values(archived=True)
        )
        await s.commit()
    # update() returns CursorResult which has rowcount; cast for mypy.
    return int(getattr(result, "rowcount", 0) or 0)


async def _mark_session_ended(db: Database, session_id: UUID) -> None:
    async with db.session() as s:
        row = await s.get(InteractiveSessionRow, str(session_id))
        if row is not None and row.state != "ended":
            row.state = "ended"
            row.ended_at = _now_iso()
            await s.commit()


# ---------------------------------------------------------------------------
# Slash commands
# ---------------------------------------------------------------------------


def _register_navigation_slash(reg: SlashRegistry, sess: InteractiveSession) -> None:
    """Register /quit, /exit, /help, /clear slash handlers."""

    async def _quit(_: SlashParsed) -> bool:
        return True

    async def _help(_: SlashParsed) -> bool:
        _CONSOLE.print("[bold]Slash commands:[/]")
        for name, help_text in reg.all_help():
            _CONSOLE.print(f"  /{name:14s}  {help_text}")
        return False

    async def _clear(_: SlashParsed) -> bool:
        # v0.3 PR #1: /clear archives the current session's messages and bumps
        # the LangGraph thread suffix so the next turn starts with a fresh
        # context.  The session row stays — only the message history is
        # archived (still inspectable via `sessions show <id> --all`).
        count = await _archive_messages(sess.db, sess.session_id)
        sess.clear_agent_cache()
        _CONSOLE.print(f"[dim]context cleared ({count} messages archived, new thread)[/]")
        return False

    reg.register("quit", _quit, help="exit the REPL")
    reg.register("exit", _quit, help="alias for /quit")
    reg.register("help", _help, help="show slash commands")
    reg.register("clear", _clear, help="archive messages + start a fresh thread")


def _register_persona_slash(reg: SlashRegistry, sess: InteractiveSession) -> None:
    """Register /agent and /model slash handlers."""

    async def _agent_cmd(cmd: SlashParsed) -> bool:
        if not cmd.args:
            _CONSOLE.print(f"current: [cyan]{sess.persona.name}@{sess.persona.version}[/]")
            for p in sess.personas:
                _CONSOLE.print(f"  - {p.name}@{p.version}  ({p.backend.value})")
            return False
        try:
            new = sess.set_persona(cmd.args[0])
            _CONSOLE.print(f"[green]switched persona → {new.name}@{new.version}[/]")
        except ValueError as e:
            _CONSOLE.print(f"[red]{e}[/]")
        return False

    async def _model_cmd(cmd: SlashParsed) -> bool:
        if not cmd.args:
            _CONSOLE.print(f"current model: [cyan]{sess.active_model}[/]")
            return False
        if cmd.args[0] in ("-", "reset"):
            sess.set_model(None)
            new_model = sess.active_model
            _CONSOLE.print(f"[green]model override cleared → {new_model} (new thread)[/]")
        else:
            sess.set_model(cmd.args[0])
            _CONSOLE.print(f"[green]model → {cmd.args[0]} (new thread)[/]")
        # Persist the new active model on the session row.
        async with sess.db.session() as s:
            row = await s.get(InteractiveSessionRow, str(sess.session_id))
            if row is not None:
                row.model = sess.active_model
                await s.commit()
        return False

    reg.register("agent", _agent_cmd, help="list or switch persona: /agent [name]")
    reg.register("model", _model_cmd, help="override model: /model <id> | reset")


def _register_telemetry_slash(reg: SlashRegistry) -> None:
    """Register /stats, /budget, /runs, /sessions slash handlers."""

    async def _stats(_: SlashParsed) -> bool:
        from .stats import stats_command

        stats_command(by="model", since_days=1)
        return False

    async def _budget(_: SlashParsed) -> bool:
        from .stats import budget_command

        budget_command()
        return False

    async def _runs(_: SlashParsed) -> bool:
        from .runs import runs_list_command

        runs_list_command(limit=10, state_filter=None)
        return False

    async def _sessions(_: SlashParsed) -> bool:
        from .sessions import sessions_list_command

        sessions_list_command(limit=10)
        return False

    reg.register("stats", _stats, help="LLM-call stats (last 24h)")
    reg.register("budget", _budget, help="budget ledger")
    reg.register("runs", _runs, help="list recent workflow runs")
    reg.register("sessions", _sessions, help="list recent interactive sessions")


def _register_compaction_slash(reg: SlashRegistry, sess: InteractiveSession) -> None:
    """Register /compact slash handler (v0.3 PR #2)."""

    async def _compact(_: SlashParsed) -> bool:
        result = await compact_session(sess.db, sess.config, str(sess.session_id))
        if result.compacted:
            sess.clear_agent_cache()
            _CONSOLE.print(
                f"[green]compacted[/] — {result.archived} messages archived, "
                f"summary {result.summary_tokens} tokens (new thread started)"
            )
        else:
            _CONSOLE.print(f"[yellow]compaction skipped:[/] {result.reason}")
        return False

    reg.register("compact", _compact, help="manually compact the conversation history")


def _register_slash(reg: SlashRegistry, sess: InteractiveSession) -> None:
    _register_navigation_slash(reg, sess)
    _register_persona_slash(reg, sess)
    _register_telemetry_slash(reg)
    _register_compaction_slash(reg, sess)


def _completer(personas: list[Persona], slash_names: list[str]) -> WordCompleter:
    words = [f"/{n}" for n in slash_names]
    words += [p.name for p in personas]
    return WordCompleter(words, ignore_case=True, sentence=True)


def _approx_token_count(text: str, model: str = "") -> int:
    """Token count via tiktoken (PR #2).

    Falls back to a char-based heuristic inside `count_tokens` on tiktoken
    failure. Caller passes the active model so future model-specific
    tokenizers slot in without changing the call site.
    """
    return count_tokens(text, model)


async def _invoke_and_stream(
    agent: Any,
    user_text: str,
    sess: InteractiveSession,
) -> None:
    """Invoke the agent, print the assistant response, and persist both messages."""
    # 1. Persist the user message first so it's durable even if ainvoke fails.
    await _append_message(
        sess.db,
        sess.session_id,
        "user",
        user_text,
        token_count=_approx_token_count(user_text, sess.active_model),
    )

    # 2. Invoke the agent. LangGraph thread_id includes the suffix so /model
    #    or /clear-induced switches start a fresh context.
    try:
        result = await agent.ainvoke(
            {"messages": [{"role": "user", "content": user_text}]},
            config={"configurable": {"thread_id": sess.thread_id}},
        )
    except Exception:
        # User msg is already persisted; surface the error and bail.
        raise

    messages = result.get("messages", []) if isinstance(result, dict) else []
    if not messages:
        return
    last = messages[-1]
    content: Any = getattr(last, "content", "") or ""
    if isinstance(content, list):
        content = "\n".join(
            (c.get("text", str(c)) if isinstance(c, dict) else str(c)) for c in content
        )
    content_str = str(content)
    _CONSOLE.print(content_str)

    # 3. Persist the assistant response.
    await _append_message(
        sess.db,
        sess.session_id,
        "assistant",
        content_str,
        token_count=_approx_token_count(content_str, sess.active_model),
    )

    # 4. Auto-compaction check.  Triggered when total used tokens cross 70%
    #    of the active model's context window. Holds a per-session lock so
    #    concurrent turns serialise; failure is non-fatal (next turn retries).
    async with sess.db.session() as s:
        session_row = await s.get(InteractiveSessionRow, str(sess.session_id))
    if session_row is not None and should_compact(session_row):
        result = await compact_session(sess.db, sess.config, str(sess.session_id))
        if result.compacted:
            sess.clear_agent_cache()  # bumps thread_suffix → fresh deepagents thread
            _CONSOLE.print(
                f"[dim]context compacted — {result.archived} messages archived, "
                f"summary {result.summary_tokens} tokens, new thread[/]"
            )


async def _repl_loop(
    sess: InteractiveSession,
    reg: SlashRegistry,
    prompt_session: PromptSession[str],
) -> int:
    """Inner REPL loop. Returns 0 on clean exit, non-zero on error."""
    while True:
        try:
            line = await prompt_session.prompt_async("» ")
        except (EOFError, KeyboardInterrupt):
            _CONSOLE.print()
            return 0
        line = (line or "").strip()
        if not line:
            continue
        parsed = parse_slash(line)
        if parsed is not None:
            if parsed.name == "":
                _CONSOLE.print("[dim]empty slash command; try /help[/]")
                continue
            done = await reg.dispatch(parsed)
            if done:
                return 0
            if parsed.name not in reg.names:
                _CONSOLE.print(f"[yellow]unknown command: /{parsed.name}[/]")
            continue
        # Forward to agent.
        expanded = _expand_file_refs(line, sess.repo_root)
        agent = sess.build_agent_if_needed()
        try:
            await _invoke_and_stream(agent, expanded, sess)
        except Exception as e:
            _CONSOLE.print(f"[red]agent error:[/] {type(e).__name__}: {e}")


async def _resolve_session_arg(db: Database, prefix_or_full: str) -> UUID:
    """Accept full UUID or 6+ char prefix; return resolved UUID. Exit on miss."""
    try:
        return UUID(prefix_or_full)
    except ValueError:
        pass
    if len(prefix_or_full) < 6:
        _CONSOLE.print("[red]session prefix must be >=6 chars or a full UUID[/]")
        raise typer.Exit(code=2)
    async with db.session() as s:
        rows = (
            (
                await s.execute(
                    select(InteractiveSessionRow.id)
                    .where(InteractiveSessionRow.id.like(f"{prefix_or_full}%"))
                    .limit(2)
                )
            )
            .scalars()
            .all()
        )
    if not rows:
        _CONSOLE.print(f"[red]no session matches prefix:[/] {prefix_or_full}")
        raise typer.Exit(code=1)
    if len(rows) > 1:
        _CONSOLE.print(f"[red]ambiguous prefix matches >1 session:[/] {prefix_or_full}")
        raise typer.Exit(code=1)
    return UUID(rows[0])


async def _interactive_loop_async(
    persona_override: str | None,
    model_override: str | None,
    session_arg: str | None,
) -> int:
    config = load_config()
    require_consent(config.data_dir)
    db = Database(config.database_url)
    await db.init_schema()
    personas = load_personas_from_dir(_seed_root() / "personas")
    if not personas:
        _CONSOLE.print("[red]no personas seeded; run `mydeepagent init`[/]")
        return 1
    pricing = _static_pricing_seed()

    # Resolve session id: --session given → existing; otherwise new uuid.
    if session_arg:
        session_id = await _resolve_session_arg(db, session_arg)
        async with db.session() as s:
            row = await s.get(InteractiveSessionRow, str(session_id))
            if row is None:
                _CONSOLE.print(f"[red]session not found:[/] {session_arg}")
                await db.dispose()
                return 1
            if row.state == "ended":
                _CONSOLE.print(
                    f"[yellow]session {row.id} is ended; start a new one with `mydeepagent`.[/]"
                )
                await db.dispose()
                return 1
        creating = False
    else:
        session_id = uuid4()
        creating = True

    try:
        async with get_checkpointer_ctx(config.database_url) as saver:
            # Resolve initial persona (may be overridden below).
            sess = InteractiveSession(config, personas, db, pricing, Path.cwd(), session_id, saver)
            if persona_override:
                try:
                    sess.set_persona(persona_override)
                except ValueError as e:
                    _CONSOLE.print(f"[red]{e}[/]")
                    return 1
                # set_persona bumps thread_suffix; reset to 0 for new sessions so
                # initial thread_id is just "<session_id>:0" — clean.
                if creating:
                    sess._thread_suffix = 0
            if model_override:
                sess.set_model(model_override)
                if creating:
                    sess._thread_suffix = 0

            # Now persist the session row (or load existing).
            await _load_or_create_session_row(
                db, session_id, sess.persona, Path.cwd(), create=creating
            )

            reg = SlashRegistry()
            _register_slash(reg, sess)

            persona_label = f"{sess.persona.name}@{sess.persona.version}"
            mode_tag = "[bold green]resuming[/]" if not creating else "[bold cyan]new[/]"
            _CONSOLE.print(
                f"{mode_tag} session [dim]{str(session_id)[:8]}…[/] · "
                f"persona [cyan]{persona_label}[/] · model [dim]{sess.active_model}[/]"
            )
            _CONSOLE.print("[dim]type /help for commands, /quit to exit[/]")

            prompt_session: PromptSession[str] = PromptSession(
                history=FileHistory(str(_history_path(config))),
                completer=_completer(personas, reg.names),
            )
            code = await _repl_loop(sess, reg, prompt_session)
            # Leave the session "active" — user may resume via --session <id>.
            # Only explicit `/sessions end <id>` (or terminal state) marks it ended.
            return code
    finally:
        await db.dispose()


def interactive_command(
    persona: str | None = None,
    model: str | None = None,
    session: str | None = None,
) -> int:
    """Entry point for the interactive REPL. Returns an exit code."""
    return asyncio.run(_interactive_loop_async(persona, model, session))