fix(my-deepagent): v0.3 plan-conformance — 18-item gap fix across PR #2-#9

1차 v0.3 구현 후 plan-v0.3 와 대조해 발견된 18건 누락/명세 위반을 보강. 자기 리뷰 3 라운드 (누락·미완 / 오류·엣지케이스 / 과최적화) 모두 PASS. PR #5 plan-mode (3건): - BLOCKED_TOOLS_IN_PLAN_MODE 에 write_todos 추가 - /plan 시 system message inject (_PLAN_MODE_SYSTEM_PROMPT) - /approve 시 마지막 assistant 메시지를 "approved plan" system 으로 inject - InteractiveSession._pending_system_messages 인프라 신설 PR #2 compaction (1건): - CompactionResult.summary_text 추가, 다음 thread 첫 ainvoke 에 inject PR #3 auto-memory (6건): - global memory dir + bootstrap - frontmatter name/description/type 정식 도입 + MemoryEntry/MemoryType - _infer_memory_type (keyword heuristic, no LLM) - _scrub_secrets (OpenRouter/Anthropic/OpenAI/AWS/Bearer redaction) - /memory show <name> 서브명령 - /remember [--global] / /forget [--global] 스코프 토글 PR #4 skills (3건): - project_skills_dir + 두 스코프 (global / project) merge with last-wins - /skill <name> 본문 inject (queue_system_message) — 이전엔 REPL 출력만 - /skills show <name> 별도 서브명령 PR #6 sub-agent (4건): - budget.py `session:<uuid>` scope + CostMiddleware 자동 전달 - resolve_root_session_id walk-up (cycle guard) + sub-agent root 에 charge - run_subagent_to_completion 실제 ainvoke + 결과 push to parent - /agents 서브명령 구조 (list / spawn / show) + spawn 시 parent system msg PR #7 governance (1건): - bootstrap_user_dirs — instructions + global/memory + skills + projects 한 호출로 idempotent 부트스트랩 PR #8 Web GUI (1건): - index.html → 세션 목록, runs.html (신설) → workflow archive - conversation.html ?session=<id> deep-link PR #9 workflow integration (2건): - /workflow 백그라운드 WorkflowEngine.run + 진행 메시지 stream 누적 - /binding show <workflow-name[@version]> 인자 지원 테스트 (+17, 685 → 702 passed): - test_plan_mode: write_todos 차단 + blocklist sanity - test_memory: scrub + type 추론 + override - test_skills: project override + find_skill + resolve_skill_sources(pk) - test_subagents: resolve_root_session_id chain + missing fallback - test_budget: session: scope accumulation - test_instructions: governance bootstrap + idempotency - test_api_static: runs.html 신설 + index.html 재구성 게이트: - ruff check / format --check / mypy: PASS (141 source files) - pytest -q --ignore=tests/integration/test_e2e_workflow.py --ignore=tests/integration/test_openrouter_smoke.py: 702 passed Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 00:03:08 +09:00
parent 361d6d7636
commit 96c8849e2c
24 changed files with 1687 additions and 304 deletions
--- a/my-deepagent/tests/integration/test_plan_mode.py
+++ b/my-deepagent/tests/integration/test_plan_mode.py
@@ -72,9 +72,7 @@ async def test_plan_mode_active_blocks_write_file() -> None:
@pytest.mark.asyncio
 async def test_plan_mode_active_blocks_execute() -> None:
    mw = PlanModeMiddleware(is_active=lambda: True)
-    req = _FakeToolRequest(
-        tool_call={"name": "execute", "id": "exec1", "args": {"command": "ls"}}
-    )
+    req = _FakeToolRequest(tool_call={"name": "execute", "id": "exec1", "args": {"command": "ls"}})
    result = await mw.awrap_tool_call(req, _passthrough_handler)
    assert isinstance(result, ToolMessage)
    assert result.status == "error"
@@ -84,9 +82,7 @@ async def test_plan_mode_active_blocks_execute() -> None:
@pytest.mark.asyncio
 async def test_plan_mode_active_blocks_task_subagent_spawn() -> None:
    mw = PlanModeMiddleware(is_active=lambda: True)
-    req = _FakeToolRequest(
-        tool_call={"name": "task", "id": "task1", "args": {"description": "x"}}
-    )
+    req = _FakeToolRequest(tool_call={"name": "task", "id": "task1", "args": {"description": "x"}})
    result = await mw.awrap_tool_call(req, _passthrough_handler)
    assert isinstance(result, ToolMessage)
    assert result.status == "error"
@@ -101,13 +97,24 @@ async def test_plan_mode_active_blocks_task_subagent_spawn() -> None:
@pytest.mark.asyncio
 async def test_plan_mode_active_allows_read_only_tools() -> None:
    mw = PlanModeMiddleware(is_active=lambda: True)
-    for name in ["read_file", "glob", "grep", "ls", "write_todos"]:
+    for name in ["read_file", "glob", "grep", "ls"]:
        req = _FakeToolRequest(tool_call={"name": name, "id": "t1", "args": {}})
        result = await mw.awrap_tool_call(req, _passthrough_handler)
        assert result.content == "EXECUTED", f"{name} should not be blocked"
        assert result.status != "error"


+@pytest.mark.asyncio
+async def test_plan_mode_blocks_write_todos() -> None:
+    """`write_todos` is part of the plan markdown — must be blocked."""
+    mw = PlanModeMiddleware(is_active=lambda: True)
+    req = _FakeToolRequest(tool_call={"name": "write_todos", "id": "wt1", "args": {"todos": []}})
+    result = await mw.awrap_tool_call(req, _passthrough_handler)
+    assert isinstance(result, ToolMessage)
+    assert result.status == "error"
+    assert "write_todos" in result.content
+
+
 # ---------------------------------------------------------------------------
 # Closure-toggle behavior — flip without rebuild
 # ---------------------------------------------------------------------------
@@ -165,6 +172,10 @@ def test_blocklist_includes_all_known_write_tools() -> None:
    assert "task" in BLOCKED_TOOLS_IN_PLAN_MODE


-def test_blocklist_excludes_read_only_and_planning_tools() -> None:
-    for name in ("read_file", "glob", "grep", "ls", "write_todos"):
+def test_blocklist_excludes_read_only_tools() -> None:
+    for name in ("read_file", "glob", "grep", "ls"):
        assert name not in BLOCKED_TOOLS_IN_PLAN_MODE
+
+
+def test_blocklist_includes_write_todos() -> None:
+    assert "write_todos" in BLOCKED_TOOLS_IN_PLAN_MODE