fix(my-deepagent): v0.3 plan-conformance — 18-item gap fix across PR #2-#9

1차 v0.3 구현 후 plan-v0.3 와 대조해 발견된 18건 누락/명세 위반을 보강. 자기 리뷰 3 라운드 (누락·미완 / 오류·엣지케이스 / 과최적화) 모두 PASS. PR #5 plan-mode (3건): - BLOCKED_TOOLS_IN_PLAN_MODE 에 write_todos 추가 - /plan 시 system message inject (_PLAN_MODE_SYSTEM_PROMPT) - /approve 시 마지막 assistant 메시지를 "approved plan" system 으로 inject - InteractiveSession._pending_system_messages 인프라 신설 PR #2 compaction (1건): - CompactionResult.summary_text 추가, 다음 thread 첫 ainvoke 에 inject PR #3 auto-memory (6건): - global memory dir + bootstrap - frontmatter name/description/type 정식 도입 + MemoryEntry/MemoryType - _infer_memory_type (keyword heuristic, no LLM) - _scrub_secrets (OpenRouter/Anthropic/OpenAI/AWS/Bearer redaction) - /memory show <name> 서브명령 - /remember [--global] / /forget [--global] 스코프 토글 PR #4 skills (3건): - project_skills_dir + 두 스코프 (global / project) merge with last-wins - /skill <name> 본문 inject (queue_system_message) — 이전엔 REPL 출력만 - /skills show <name> 별도 서브명령 PR #6 sub-agent (4건): - budget.py `session:<uuid>` scope + CostMiddleware 자동 전달 - resolve_root_session_id walk-up (cycle guard) + sub-agent root 에 charge - run_subagent_to_completion 실제 ainvoke + 결과 push to parent - /agents 서브명령 구조 (list / spawn / show) + spawn 시 parent system msg PR #7 governance (1건): - bootstrap_user_dirs — instructions + global/memory + skills + projects 한 호출로 idempotent 부트스트랩 PR #8 Web GUI (1건): - index.html → 세션 목록, runs.html (신설) → workflow archive - conversation.html ?session=<id> deep-link PR #9 workflow integration (2건): - /workflow 백그라운드 WorkflowEngine.run + 진행 메시지 stream 누적 - /binding show <workflow-name[@version]> 인자 지원 테스트 (+17, 685 → 702 passed): - test_plan_mode: write_todos 차단 + blocklist sanity - test_memory: scrub + type 추론 + override - test_skills: project override + find_skill + resolve_skill_sources(pk) - test_subagents: resolve_root_session_id chain + missing fallback - test_budget: session: scope accumulation - test_instructions: governance bootstrap + idempotency - test_api_static: runs.html 신설 + index.html 재구성 게이트: - ruff check / format --check / mypy: PASS (141 source files) - pytest -q --ignore=tests/integration/test_e2e_workflow.py --ignore=tests/integration/test_openrouter_smoke.py: 702 passed Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 00:03:08 +09:00
parent 361d6d7636
commit 96c8849e2c
24 changed files with 1687 additions and 304 deletions
--- a/my-deepagent/src/my_deepagent/subagents.py
+++ b/my-deepagent/src/my_deepagent/subagents.py
@@ -1,34 +1,60 @@
-"""Sub-agent session linkage (v0.3 PR #6) — fork a session into a child.
+"""Sub-agent session linkage + runner (v0.3 PR #6).

 PR #1 already added `parent_session_id` + `depth` columns to
 `InteractiveSessionRow`.  This module provides:

 - :func:`spawn_subagent_session` — atomically creates a child row inheriting
-  ``project_key`` (so memory is shared) and ``persona_id`` from the parent
-  unless overridden, sets ``parent_session_id`` + ``depth = parent.depth + 1``,
-  and rejects when depth would exceed :data:`MAX_SUBAGENT_DEPTH`.
- :func:`list_subagents` — return all direct children of a session for
-  ``/agents`` and the Web GUI's session tree.
+  ``project_key`` from the parent, sets ``parent_session_id`` + ``depth =
+  parent.depth + 1``, rejects when depth would exceed
+  :data:`MAX_SUBAGENT_DEPTH`.
+- :func:`list_subagents` — direct children for ``/agents`` listings.
+- :func:`resolve_root_session_id` — walk the parent chain to find the root.
+- :func:`run_subagent_to_completion` — actually invoke a sub-agent's
+  ``ainvoke`` with isolation + LangGraph thread + summary push to parent.

-The deepagents ``task`` tool is *separate* from this concept — that tool
-spawns langchain-internal sub-agents that run inline and return a string.
-Those don't get InteractiveSessionRows.  Use ``spawn_subagent_session`` when
-you want a persisted, addressable session that the user can navigate to via
-``mydeepagent --session <id>`` or the Web GUI.
+Cost rollup: each sub-agent's CostMiddleware is wired with the ROOT session
+id so all LLM calls under that session tree charge a single ``session:<uuid>``
+scope — matches the plan ("sub-agent는 root session의 한도에 합산").
 """

 from __future__ import annotations

+import logging
 from collections.abc import Sequence
 from datetime import UTC, datetime
+from typing import Any
 from uuid import UUID, uuid4

-from sqlalchemy import select
+from sqlalchemy import desc, select

+from .audit import make_audit_recorder
+from .budget import make_budget_tracker_from_config
+from .compaction import compact_session
+from .config import Config
 from .errors import MyDeepAgentError
+from .memory import (
+    ensure_memory_initialized,
+    global_memory_dir,
+    list_memory_paths,
+    project_memory_dir,
+)
+from .middleware.audit import AuditToolMiddleware
+from .middleware.cost import CostMiddleware
+from .middleware.plan_mode import PlanModeMiddleware
+from .monitoring.pricing import ModelPrice, PricingCache
+from .monitoring.token_budget import count_tokens
 from .persistence.db import Database
-from .persistence.models import AgentPersonaRow, InteractiveSessionRow
+from .persistence.models import AgentPersonaRow, InteractiveSessionRow, MessageRow
 from .persona import Persona
+from .session import build_agent
+from .skills import (
+    ensure_skills_initialized,
+    project_skills_dir,
+    resolve_skill_sources,
+    user_skills_dir,
+)
+
+_LOG = logging.getLogger(__name__)

 #: Maximum sub-agent nesting depth.  Above this we refuse to spawn — Claude
 #: Code's `task` tool limits agent stacks to roughly 3 levels (Main → A → B)
@@ -147,3 +173,217 @@ async def list_subagents(db: Database, parent_session_id: UUID) -> list[Interact
            .all()
        )
        return list(rows)
+
+
+async def resolve_root_session_id(db: Database, session_id: UUID) -> UUID:
+    """Walk ``parent_session_id`` until we reach a session with ``parent=None``.
+
+    Guarded against cycles (would only happen if depth column lied — protective
+    cap = 1 + MAX_SUBAGENT_DEPTH iterations).  Returns the input id when the
+    session has no parent.
+    """
+    current = str(session_id)
+    for _ in range(MAX_SUBAGENT_DEPTH + 2):
+        async with db.session() as s:
+            row = await s.get(InteractiveSessionRow, current)
+        if row is None:
+            return session_id
+        if row.parent_session_id is None:
+            return UUID(row.id)
+        current = row.parent_session_id
+    # Cycle detected — return the latest hop as a graceful fallback.
+    return UUID(current)
+
+
+_SUBAGENT_SUMMARY_INSTRUCTION = (
+    "당신은 sub-agent 입니다.  사용자가 요청한 과제를 마치고 한 번의 응답 안에 "
+    "(1) 도달한 결론, (2) 변경한 파일/생성한 산출물, (3) 부모 세션에 전달할 핵심 "
+    "요약 (≤ 400 단어) 을 정리하세요.  추가 turn 은 일어나지 않습니다."
+)
+
+
+def _static_pricing_seed() -> PricingCache:
+    cache = PricingCache()
+    cache.set(
+        [
+            ModelPrice("anthropic/claude-sonnet-4-6", 0.003, 0.015, 200_000),
+            ModelPrice("anthropic/claude-haiku-4-5", 0.001, 0.005, 200_000),
+            ModelPrice("anthropic/claude-opus-4-1", 0.015, 0.075, 200_000),
+            ModelPrice("deepseek/deepseek-chat", 0.00028, 0.00112, 64_000),
+        ]
+    )
+    return cache
+
+
+def _flatten_assistant_content(msg: Any) -> str:
+    content = getattr(msg, "content", "") or ""
+    if isinstance(content, list):
+        return "\n".join(
+            (b.get("text", str(b)) if isinstance(b, dict) else str(b)) for b in content
+        )
+    return str(content)
+
+
+async def _persist_message(
+    db: Database, session_id: UUID, role: str, content: str, *, model: str
+) -> None:
+    """Insert one MessageRow + bump last_message_at + token totals.
+
+    Mirrors the REPL's ``_append_message`` but lives in subagents.py so the
+    runner stays self-contained.
+    """
+    token_count = count_tokens(content, model)
+    now = datetime.now(UTC).isoformat(timespec="seconds")
+    async with db.session() as s:
+        last_seq = (
+            await s.execute(
+                select(MessageRow.seq)
+                .where(MessageRow.session_id == str(session_id))
+                .order_by(desc(MessageRow.seq))
+                .limit(1)
+            )
+        ).scalar_one_or_none() or 0
+        s.add(
+            MessageRow(
+                session_id=str(session_id),
+                seq=last_seq + 1,
+                role=role,
+                content=content,
+                tool_calls=None,
+                token_count=token_count,
+                is_summary=False,
+                archived=False,
+                ts=now,
+            )
+        )
+        row = await s.get(InteractiveSessionRow, str(session_id))
+        if row is not None:
+            row.last_message_at = now
+            if role == "user":
+                row.total_input_tokens += token_count
+            elif role == "assistant":
+                row.total_output_tokens += token_count
+        await s.commit()
+
+
+async def run_subagent_to_completion(
+    db: Database,
+    config: Config,
+    parent_session_id: UUID,
+    sub_session_id: UUID,
+    persona: Persona,
+    prompt: str,
+    *,
+    saver: Any | None = None,
+) -> str:
+    """Invoke the sub-agent ONCE with the supplied prompt and return its summary.
+
+    - Loads the sub-session row to read ``project_key`` (inherited from parent)
+    - Resolves the root session id and wires CostMiddleware to charge that
+      single ``session:<root_uuid>`` scope so the whole agent tree shares one
+      budget envelope (per plan: "sub-agent는 root session의 한도에 합산").
+    - Persists user prompt + assistant summary to the sub-session.
+    - Pushes a "[sub-agent <id> result] …" system message to the parent so
+      the user sees the outcome in the main thread.
+    - Marks the sub-session ``ended`` on completion.
+
+    Failures are logged + propagated as a synthetic assistant message in the
+    sub-session, and an error system message in the parent.
+    """
+    async with db.session() as s:
+        sub_row = await s.get(InteractiveSessionRow, str(sub_session_id))
+    if sub_row is None:
+        raise MyDeepAgentError.fatal(
+            "subagent_session_missing",
+            message=f"sub-agent session {sub_session_id} not found",
+            recovery_hint="call spawn_subagent_session before run_subagent_to_completion",
+        )
+
+    project_key = sub_row.project_key or ""
+    root_session_id = await resolve_root_session_id(db, parent_session_id)
+
+    # Bootstrap shared memory + skills dirs (idempotent).
+    if project_key:
+        ensure_memory_initialized(project_memory_dir(config, project_key))
+        ensure_skills_initialized(project_skills_dir(config, project_key))
+    ensure_memory_initialized(global_memory_dir(config))
+    ensure_skills_initialized(user_skills_dir(config))
+
+    pricing = _static_pricing_seed()
+    budget = make_budget_tracker_from_config(db, config)
+    cost_mw = CostMiddleware(
+        pricing=pricing,
+        model_name=persona.model,
+        interactive_session_id=root_session_id,
+        persona_name=persona.name,
+        budget_tracker=budget,
+    )
+    audit_mw = AuditToolMiddleware(
+        interactive_session_id=sub_session_id,
+        file_recorder=make_audit_recorder(config.state_dir),
+    )
+    plan_mw = PlanModeMiddleware(is_active=lambda: False)
+
+    memory_paths = list_memory_paths(global_memory_dir(config))
+    if project_key:
+        memory_paths += list_memory_paths(project_memory_dir(config, project_key))
+    skill_sources = resolve_skill_sources(config, project_key or None)
+
+    agent = build_agent(
+        persona,
+        config,
+        root_dir=config.workspace_root,
+        middleware=[plan_mw, cost_mw, audit_mw],
+        checkpointer=saver,
+        memory_paths_override=memory_paths,
+        skills_sources_override=skill_sources,
+    )
+
+    full_prompt = f"{prompt.strip()}\n\n---\n\n{_SUBAGENT_SUMMARY_INSTRUCTION}"
+    await _persist_message(db, sub_session_id, "user", full_prompt, model=persona.model)
+
+    thread_id = f"{sub_session_id}:0"
+    try:
+        result = await agent.ainvoke(
+            {"messages": [{"role": "user", "content": full_prompt}]},
+            config={"configurable": {"thread_id": thread_id}},
+        )
+    except Exception as e:
+        _LOG.exception("sub-agent ainvoke failed for session %s", sub_session_id)
+        error_msg = f"sub-agent failed: {type(e).__name__}: {e}"
+        await _persist_message(db, sub_session_id, "assistant", error_msg, model=persona.model)
+        await _persist_message(
+            db,
+            parent_session_id,
+            "system",
+            f"[sub-agent {str(sub_session_id)[:8]} error] {error_msg}",
+            model=persona.model,
+        )
+        await _mark_session_ended(db, sub_session_id)
+        return error_msg
+
+    messages = result.get("messages", []) if isinstance(result, dict) else []
+    summary = _flatten_assistant_content(messages[-1]) if messages else "(empty response)"
+    await _persist_message(db, sub_session_id, "assistant", summary, model=persona.model)
+    await _persist_message(
+        db,
+        parent_session_id,
+        "system",
+        f"[sub-agent {str(sub_session_id)[:8]} result]\n{summary}",
+        model=persona.model,
+    )
+
+    # Compact the sub-session if it grew too big (rare for single-turn but
+    # the helper is idempotent + cheap to call).
+    await compact_session(db, config, str(sub_session_id))
+    await _mark_session_ended(db, sub_session_id)
+    return summary
+
+
+async def _mark_session_ended(db: Database, session_id: UUID) -> None:
+    async with db.session() as s:
+        row = await s.get(InteractiveSessionRow, str(session_id))
+        if row is not None and row.state != "ended":
+            row.state = "ended"
+            row.ended_at = datetime.now(UTC).isoformat(timespec="seconds")
+            await s.commit()