fix(my-deepagent): v0.3 plan-conformance — 18-item gap fix across PR #2-#9

1차 v0.3 구현 후 plan-v0.3 와 대조해 발견된 18건 누락/명세 위반을 보강.
자기 리뷰 3 라운드 (누락·미완 / 오류·엣지케이스 / 과최적화) 모두 PASS.

PR #5 plan-mode (3건):
- BLOCKED_TOOLS_IN_PLAN_MODE 에 write_todos 추가
- /plan 시 system message inject (_PLAN_MODE_SYSTEM_PROMPT)
- /approve 시 마지막 assistant 메시지를 "approved plan" system 으로 inject
- InteractiveSession._pending_system_messages 인프라 신설

PR #2 compaction (1건):
- CompactionResult.summary_text 추가, 다음 thread 첫 ainvoke 에 inject

PR #3 auto-memory (6건):
- global memory dir + bootstrap
- frontmatter name/description/type 정식 도입 + MemoryEntry/MemoryType
- _infer_memory_type (keyword heuristic, no LLM)
- _scrub_secrets (OpenRouter/Anthropic/OpenAI/AWS/Bearer redaction)
- /memory show <name> 서브명령
- /remember [--global] / /forget [--global] 스코프 토글

PR #4 skills (3건):
- project_skills_dir + 두 스코프 (global / project) merge with last-wins
- /skill <name> 본문 inject (queue_system_message) — 이전엔 REPL 출력만
- /skills show <name> 별도 서브명령

PR #6 sub-agent (4건):
- budget.py `session:<uuid>` scope + CostMiddleware 자동 전달
- resolve_root_session_id walk-up (cycle guard) + sub-agent root 에 charge
- run_subagent_to_completion 실제 ainvoke + 결과 push to parent
- /agents 서브명령 구조 (list / spawn / show) + spawn 시 parent system msg

PR #7 governance (1건):
- bootstrap_user_dirs — instructions + global/memory + skills + projects 한
  호출로 idempotent 부트스트랩

PR #8 Web GUI (1건):
- index.html → 세션 목록, runs.html (신설) → workflow archive
- conversation.html ?session=<id> deep-link

PR #9 workflow integration (2건):
- /workflow 백그라운드 WorkflowEngine.run + 진행 메시지 stream 누적
- /binding show <workflow-name[@version]> 인자 지원

테스트 (+17, 685 → 702 passed):
- test_plan_mode: write_todos 차단 + blocklist sanity
- test_memory: scrub + type 추론 + override
- test_skills: project override + find_skill + resolve_skill_sources(pk)
- test_subagents: resolve_root_session_id chain + missing fallback
- test_budget: session: scope accumulation
- test_instructions: governance bootstrap + idempotency
- test_api_static: runs.html 신설 + index.html 재구성

게이트:
- ruff check / format --check / mypy: PASS (141 source files)
- pytest -q --ignore=tests/integration/test_e2e_workflow.py
  --ignore=tests/integration/test_openrouter_smoke.py: 702 passed

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
chungyeong
2026-05-18 00:03:08 +09:00
parent 361d6d7636
commit 96c8849e2c
24 changed files with 1687 additions and 304 deletions

View File

@@ -33,12 +33,25 @@ async def app_client(tmp_path: Path) -> AsyncIterator[AsyncClient]:
@pytest.mark.asyncio
async def test_root_serves_index_html(app_client: AsyncClient) -> None:
"""`/` now renders the conversation-centric index (v0.3 PR #8 rewrite)."""
r = await app_client.get("/")
assert r.status_code == 200
assert r.headers["content-type"].startswith("text/html")
body = r.text
assert "<title>my-deepagent · runs</title>" in body
# Title became "대화"; data-page kept as "index" for back-compat.
assert 'data-page="index"' in body
assert "대화" in body
# Must NOT advertise itself as the Runs page anymore.
assert "my-deepagent · runs" not in body
@pytest.mark.asyncio
async def test_runs_html_served(app_client: AsyncClient) -> None:
"""`/runs.html` is the new home of the workflow runs archive."""
r = await app_client.get("/runs.html")
assert r.status_code == 200
assert 'data-page="runs"' in r.text
assert "Workflow Runs" in r.text
@pytest.mark.asyncio

View File

@@ -256,6 +256,42 @@ async def test_get_remaining_unknown_scope_returns_none(db: Database) -> None:
assert remaining is None
# ---------------------------------------------------------------------------
# session: scope (v0.3 PR #6) — sub-agent rollup to root session
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_session_scope_accumulates_cost(db: Database) -> None:
import uuid as _uuid
tracker = _make_tracker(db, run_cap=2.0)
session_id = _uuid.uuid4()
await tracker.record(
run_id=None, persona_name=None, actual_cost_usd=0.30, session_id=session_id
)
await tracker.record(
run_id=None, persona_name=None, actual_cost_usd=0.20, session_id=session_id
)
spent = await tracker.get_spent(f"session:{session_id}")
assert spent == pytest.approx(0.50)
remaining = await tracker.get_remaining(f"session:{session_id}")
assert remaining == pytest.approx(1.50)
@pytest.mark.asyncio
async def test_session_scope_omitted_when_no_session_id(db: Database) -> None:
"""Calls without ``session_id`` must NOT create a session: ledger row."""
import uuid as _uuid
tracker = _make_tracker(db)
# Drive a record without session_id.
await tracker.record(run_id=None, persona_name=None, actual_cost_usd=0.10)
# Querying any session scope should yield 0 spent.
sid = _uuid.uuid4()
assert (await tracker.get_spent(f"session:{sid}")) == pytest.approx(0.0)
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------

View File

@@ -18,6 +18,7 @@ from pathlib import Path
from typing import Any
import pytest
from fastapi import FastAPI
from httpx import ASGITransport, AsyncClient
from sqlalchemy import select
@@ -28,7 +29,9 @@ from my_deepagent.persistence.models import InteractiveSessionRow, MessageRow
@pytest.fixture
async def app_client(tmp_path: Path) -> AsyncIterator[tuple[AsyncClient, Database]]:
async def app_client(
tmp_path: Path,
) -> AsyncIterator[tuple[AsyncClient, Database, FastAPI]]:
db_url = f"sqlite+aiosqlite:///{tmp_path / 'conv.sqlite3'}"
cfg = load_config(
workspace_root=tmp_path,
@@ -44,7 +47,7 @@ async def app_client(tmp_path: Path) -> AsyncIterator[tuple[AsyncClient, Databas
# Tests get their own Database instance for direct row inspection.
external_db = Database(db_url)
async with AsyncClient(transport=transport, base_url="http://test", timeout=10.0) as client:
yield (client, external_db)
yield (client, external_db, app)
await external_db.dispose()
@@ -55,9 +58,9 @@ async def app_client(tmp_path: Path) -> AsyncIterator[tuple[AsyncClient, Databas
@pytest.mark.asyncio
async def test_conversation_page_served(
app_client: tuple[AsyncClient, Database],
app_client: tuple[AsyncClient, Database, FastAPI],
) -> None:
client, _ = app_client
client, _db, _app = app_client
r = await client.get("/conversation.html")
assert r.status_code == 200
assert 'data-page="conversation"' in r.text
@@ -71,9 +74,9 @@ async def test_conversation_page_served(
@pytest.mark.asyncio
async def test_post_message_returns_ack_and_persists_user_row(
app_client: tuple[AsyncClient, Database], monkeypatch: pytest.MonkeyPatch
app_client: tuple[AsyncClient, Database, FastAPI], monkeypatch: pytest.MonkeyPatch
) -> None:
client, db = app_client
client, db, _app = app_client
invocations: list[tuple[str, str]] = []
@@ -125,11 +128,11 @@ async def test_post_message_returns_ack_and_persists_user_row(
@pytest.mark.asyncio
async def test_post_message_holds_task_ref_on_app_state(
app_client: tuple[AsyncClient, Database], monkeypatch: pytest.MonkeyPatch
app_client: tuple[AsyncClient, Database, FastAPI], monkeypatch: pytest.MonkeyPatch
) -> None:
"""Background task must be held on app.state.pending_invocations so the
GC + RUF006 don't drop it before completion."""
client, _ = app_client
client, _db, app = app_client
started = asyncio.Event()
can_finish = asyncio.Event()
@@ -150,12 +153,12 @@ async def test_post_message_holds_task_ref_on_app_state(
# Wait for the task to start.
await asyncio.wait_for(started.wait(), timeout=2.0)
# The pending_invocations set on the app should hold a reference.
pending = client._transport.app.state.pending_invocations
pending = app.state.pending_invocations
assert len(pending) == 1
# Release the task and let the discard callback fire.
can_finish.set()
await asyncio.sleep(0.05)
assert len(client._transport.app.state.pending_invocations) == 0
assert len(app.state.pending_invocations) == 0
# ---------------------------------------------------------------------------
@@ -165,10 +168,10 @@ async def test_post_message_holds_task_ref_on_app_state(
@pytest.mark.asyncio
async def test_background_invocation_persists_assistant_row(
app_client: tuple[AsyncClient, Database], monkeypatch: pytest.MonkeyPatch
app_client: tuple[AsyncClient, Database, FastAPI], monkeypatch: pytest.MonkeyPatch
) -> None:
"""When the runner finishes, an assistant MessageRow should be visible."""
client, db = app_client
client, db, _app = app_client
async def fake_invoke(
passed_db: Any,
@@ -184,19 +187,17 @@ async def test_background_invocation_persists_assistant_row(
from sqlalchemy import desc
from my_deepagent.persistence.models import MessageRow as MR
async with passed_db.session() as s:
last = (
await s.execute(
select(MR.seq)
.where(MR.session_id == str(session_id))
.order_by(desc(MR.seq))
select(MessageRow.seq)
.where(MessageRow.session_id == str(session_id))
.order_by(desc(MessageRow.seq))
.limit(1)
)
).scalar_one_or_none() or 0
s.add(
MR(
MessageRow(
session_id=str(session_id),
seq=last + 1,
role="assistant",

View File

@@ -24,7 +24,6 @@ from my_deepagent.instructions import (
resolve_instruction_paths,
)
# ---------------------------------------------------------------------------
# Bootstrap (global only)
# ---------------------------------------------------------------------------
@@ -96,6 +95,38 @@ def test_global_instructions_path_under_data_dir(tmp_path: Path) -> None:
assert p.name == INSTRUCTION_FILENAME
def test_governance_bootstrap_creates_full_skeleton(tmp_path: Path) -> None:
"""`bootstrap_user_dirs` materialises the user-wide layout (PR #7)."""
from my_deepagent.governance import bootstrap_user_dirs
from my_deepagent.memory import INDEX_FILENAME as MEMORY_INDEX_FILENAME
cfg = load_config(workspace_root=tmp_path, data_dir=tmp_path / "data")
bootstrap_user_dirs(cfg)
# Global MYDEEPAGENT.md created with template.
assert global_instructions_path(cfg).is_file()
# Global memory dir + MEMORY.md created.
global_mem = Path(cfg.data_dir) / "global" / "memory"
assert global_mem.is_dir()
assert (global_mem / MEMORY_INDEX_FILENAME).is_file()
# User skills dir created.
assert (Path(cfg.data_dir) / "skills").is_dir()
# Projects parent dir created.
assert (Path(cfg.data_dir) / "projects").is_dir()
def test_governance_bootstrap_is_idempotent(tmp_path: Path) -> None:
from my_deepagent.governance import bootstrap_user_dirs
cfg = load_config(workspace_root=tmp_path, data_dir=tmp_path / "data")
bootstrap_user_dirs(cfg)
gpath = global_instructions_path(cfg)
gpath.write_text("custom edited content", encoding="utf-8")
# Second call must not overwrite user edits.
bootstrap_user_dirs(cfg)
assert gpath.read_text(encoding="utf-8") == "custom edited content"
# ---------------------------------------------------------------------------
# Integration: instruction paths reach deepagents memory= kwarg
# ---------------------------------------------------------------------------

View File

@@ -71,23 +71,25 @@ def test_ensure_memory_initialized_is_idempotent(memory_dir: Path) -> None:
def test_add_memory_entry_writes_file_and_updates_index(memory_dir: Path) -> None:
path = add_memory_entry(memory_dir, "프로젝트 핵심: 위크닥 CLI MVP")
assert path.is_file()
body = path.read_text(encoding="utf-8")
result = add_memory_entry(memory_dir, "프로젝트 핵심: 위크닥 CLI MVP")
assert result.path.is_file()
body = result.path.read_text(encoding="utf-8")
assert "프로젝트 핵심" in body
assert body.startswith("---\nslug: ")
assert body.startswith("---\nname: ")
assert "type:" in body
assert result.scrubbed is False
index = (memory_dir / INDEX_FILENAME).read_text(encoding="utf-8")
assert path.name in index
assert result.path.name in index
assert "프로젝트 핵심" in index
def test_add_memory_entry_handles_slug_collision(memory_dir: Path) -> None:
p1 = add_memory_entry(memory_dir, "Same first line")
p2 = add_memory_entry(memory_dir, "Same first line\nsecond entry body")
p3 = add_memory_entry(memory_dir, "Same first line\nthird entry body")
r1 = add_memory_entry(memory_dir, "Same first line")
r2 = add_memory_entry(memory_dir, "Same first line\nsecond entry body")
r3 = add_memory_entry(memory_dir, "Same first line\nthird entry body")
p1, p2, p3 = r1.path, r2.path, r3.path
assert p1.name != p2.name != p3.name
# Auto-slugging should land on <slug>-2.md and <slug>-3.md.
stems = sorted([p1.stem, p2.stem, p3.stem])
assert stems[0] == "same-first-line"
assert stems[1] == "same-first-line-2"
@@ -100,8 +102,34 @@ def test_add_memory_entry_rejects_empty_content(memory_dir: Path) -> None:
def test_add_memory_entry_explicit_name_override(memory_dir: Path) -> None:
p = add_memory_entry(memory_dir, "Random body text", name="My Custom Slug!!")
assert p.stem == "my-custom-slug"
r = add_memory_entry(memory_dir, "Random body text", name="My Custom Slug!!")
assert r.path.stem == "my-custom-slug"
def test_add_memory_entry_scrubs_openrouter_key(memory_dir: Path) -> None:
r = add_memory_entry(
memory_dir,
"save this for me: sk-or-v1-abcdefghijklmnop1234567890",
)
body = r.path.read_text(encoding="utf-8")
assert "sk-or-v1-abcdefghijklmnop" not in body
assert "<redacted:openrouter-key>" in body
assert r.scrubbed is True
def test_add_memory_entry_infers_user_type(memory_dir: Path) -> None:
r = add_memory_entry(memory_dir, "I prefer fish shell over bash")
assert r.memory_type == "user"
def test_add_memory_entry_infers_feedback_type(memory_dir: Path) -> None:
r = add_memory_entry(memory_dir, "don't mock the database in integration tests")
assert r.memory_type == "feedback"
def test_add_memory_entry_explicit_type_overrides_heuristic(memory_dir: Path) -> None:
r = add_memory_entry(memory_dir, "I prefer fish shell", memory_type="reference")
assert r.memory_type == "reference"
# ---------------------------------------------------------------------------
@@ -110,17 +138,17 @@ def test_add_memory_entry_explicit_name_override(memory_dir: Path) -> None:
def test_remove_memory_entry_by_slug(memory_dir: Path) -> None:
p = add_memory_entry(memory_dir, "to be forgotten")
assert remove_memory_entry(memory_dir, p.stem) is True
assert not p.exists()
r = add_memory_entry(memory_dir, "to be forgotten")
assert remove_memory_entry(memory_dir, r.path.stem) is True
assert not r.path.exists()
index_body = (memory_dir / INDEX_FILENAME).read_text(encoding="utf-8")
assert p.name not in index_body
assert r.path.name not in index_body
def test_remove_memory_entry_by_filename(memory_dir: Path) -> None:
p = add_memory_entry(memory_dir, "to be forgotten by full filename")
assert remove_memory_entry(memory_dir, p.name) is True
assert not p.exists()
r = add_memory_entry(memory_dir, "to be forgotten by full filename")
assert remove_memory_entry(memory_dir, r.path.name) is True
assert not r.path.exists()
def test_remove_memory_entry_missing_returns_false(memory_dir: Path) -> None:

View File

@@ -72,9 +72,7 @@ async def test_plan_mode_active_blocks_write_file() -> None:
@pytest.mark.asyncio
async def test_plan_mode_active_blocks_execute() -> None:
mw = PlanModeMiddleware(is_active=lambda: True)
req = _FakeToolRequest(
tool_call={"name": "execute", "id": "exec1", "args": {"command": "ls"}}
)
req = _FakeToolRequest(tool_call={"name": "execute", "id": "exec1", "args": {"command": "ls"}})
result = await mw.awrap_tool_call(req, _passthrough_handler)
assert isinstance(result, ToolMessage)
assert result.status == "error"
@@ -84,9 +82,7 @@ async def test_plan_mode_active_blocks_execute() -> None:
@pytest.mark.asyncio
async def test_plan_mode_active_blocks_task_subagent_spawn() -> None:
mw = PlanModeMiddleware(is_active=lambda: True)
req = _FakeToolRequest(
tool_call={"name": "task", "id": "task1", "args": {"description": "x"}}
)
req = _FakeToolRequest(tool_call={"name": "task", "id": "task1", "args": {"description": "x"}})
result = await mw.awrap_tool_call(req, _passthrough_handler)
assert isinstance(result, ToolMessage)
assert result.status == "error"
@@ -101,13 +97,24 @@ async def test_plan_mode_active_blocks_task_subagent_spawn() -> None:
@pytest.mark.asyncio
async def test_plan_mode_active_allows_read_only_tools() -> None:
mw = PlanModeMiddleware(is_active=lambda: True)
for name in ["read_file", "glob", "grep", "ls", "write_todos"]:
for name in ["read_file", "glob", "grep", "ls"]:
req = _FakeToolRequest(tool_call={"name": name, "id": "t1", "args": {}})
result = await mw.awrap_tool_call(req, _passthrough_handler)
assert result.content == "EXECUTED", f"{name} should not be blocked"
assert result.status != "error"
@pytest.mark.asyncio
async def test_plan_mode_blocks_write_todos() -> None:
"""`write_todos` is part of the plan markdown — must be blocked."""
mw = PlanModeMiddleware(is_active=lambda: True)
req = _FakeToolRequest(tool_call={"name": "write_todos", "id": "wt1", "args": {"todos": []}})
result = await mw.awrap_tool_call(req, _passthrough_handler)
assert isinstance(result, ToolMessage)
assert result.status == "error"
assert "write_todos" in result.content
# ---------------------------------------------------------------------------
# Closure-toggle behavior — flip without rebuild
# ---------------------------------------------------------------------------
@@ -165,6 +172,10 @@ def test_blocklist_includes_all_known_write_tools() -> None:
assert "task" in BLOCKED_TOOLS_IN_PLAN_MODE
def test_blocklist_excludes_read_only_and_planning_tools() -> None:
for name in ("read_file", "glob", "grep", "ls", "write_todos"):
def test_blocklist_excludes_read_only_tools() -> None:
for name in ("read_file", "glob", "grep", "ls"):
assert name not in BLOCKED_TOOLS_IN_PLAN_MODE
def test_blocklist_includes_write_todos() -> None:
assert "write_todos" in BLOCKED_TOOLS_IN_PLAN_MODE

View File

@@ -201,6 +201,66 @@ def test_resolve_skill_sources_returns_user_dir(tmp_path: Path) -> None:
assert sources[0] == str(user_skills_dir(cfg).resolve())
def test_resolve_skill_sources_with_project_key_returns_both(tmp_path: Path) -> None:
from my_deepagent.skills import project_skills_dir
cfg = load_config(workspace_root=tmp_path, data_dir=tmp_path / "data")
sources = resolve_skill_sources(cfg, project_key="proj1234abcdef00")
assert sources == [
str(user_skills_dir(cfg).resolve()),
str(project_skills_dir(cfg, "proj1234abcdef00").resolve()),
]
def test_list_all_skills_project_overrides_global(tmp_path: Path) -> None:
from my_deepagent.skills import list_all_skills, project_skills_dir
cfg = load_config(workspace_root=tmp_path, data_dir=tmp_path / "data")
pk = "abc123def456ffff"
global_dir = user_skills_dir(cfg)
proj_dir = project_skills_dir(cfg, pk)
global_dir.mkdir(parents=True)
proj_dir.mkdir(parents=True)
_make_skill(global_dir, "shared", description="global-version")
_make_skill(proj_dir, "shared", description="project-version")
_make_skill(global_dir, "global-only", description="g")
_make_skill(proj_dir, "project-only", description="p")
skills = list_all_skills(cfg, pk)
by_name = {s.name: s for s in skills}
assert set(by_name.keys()) == {"shared", "global-only", "project-only"}
# Project overrides global on the shared name.
assert by_name["shared"].scope == "project"
assert by_name["shared"].description == "project-version"
assert by_name["global-only"].scope == "global"
assert by_name["project-only"].scope == "project"
def test_find_skill_prefers_project_over_global(tmp_path: Path) -> None:
from my_deepagent.skills import find_skill, project_skills_dir
cfg = load_config(workspace_root=tmp_path, data_dir=tmp_path / "data")
pk = "f0f0f0f0f0f0f0f0"
global_dir = user_skills_dir(cfg)
proj_dir = project_skills_dir(cfg, pk)
global_dir.mkdir(parents=True)
proj_dir.mkdir(parents=True)
_make_skill(global_dir, "dup", description="g")
_make_skill(proj_dir, "dup", description="p")
skill = find_skill(cfg, pk, "dup")
assert skill is not None
assert skill.scope == "project"
assert skill.description == "p"
def test_find_skill_missing_returns_none(tmp_path: Path) -> None:
from my_deepagent.skills import find_skill
cfg = load_config(workspace_root=tmp_path, data_dir=tmp_path / "data")
assert find_skill(cfg, "any-project-key", "nonexistent") is None
# ---------------------------------------------------------------------------
# Integration: build_agent threads skills sources to deepagents
# ---------------------------------------------------------------------------

View File

@@ -29,6 +29,7 @@ from my_deepagent.persona import Persona
from my_deepagent.subagents import (
MAX_SUBAGENT_DEPTH,
list_subagents,
resolve_root_session_id,
spawn_subagent_session,
)
@@ -243,6 +244,31 @@ async def test_list_subagents_no_children_returns_empty(
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_resolve_root_session_id_walks_to_root(
db_with_root: tuple[Database, str],
) -> None:
db, root_id = db_with_root
persona = _make_persona()
child = await spawn_subagent_session(db, parent_session_id=uuid.UUID(root_id), persona=persona)
grand = await spawn_subagent_session(db, parent_session_id=child, persona=persona)
great = await spawn_subagent_session(db, parent_session_id=grand, persona=persona)
assert (await resolve_root_session_id(db, uuid.UUID(root_id))) == uuid.UUID(root_id)
assert (await resolve_root_session_id(db, child)) == uuid.UUID(root_id)
assert (await resolve_root_session_id(db, grand)) == uuid.UUID(root_id)
assert (await resolve_root_session_id(db, great)) == uuid.UUID(root_id)
@pytest.mark.asyncio
async def test_resolve_root_session_id_missing_returns_input(
db_with_root: tuple[Database, str],
) -> None:
db, _root_id = db_with_root
bogus = uuid.uuid4()
assert (await resolve_root_session_id(db, bogus)) == bogus
@pytest.mark.asyncio
async def test_spawn_reuses_persona_row_for_same_hash(
db_with_root: tuple[Database, str],