1차 v0.3 구현 후 plan-v0.3 와 대조해 발견된 18건 누락/명세 위반을 보강. 자기 리뷰 3 라운드 (누락·미완 / 오류·엣지케이스 / 과최적화) 모두 PASS. PR #5 plan-mode (3건): - BLOCKED_TOOLS_IN_PLAN_MODE 에 write_todos 추가 - /plan 시 system message inject (_PLAN_MODE_SYSTEM_PROMPT) - /approve 시 마지막 assistant 메시지를 "approved plan" system 으로 inject - InteractiveSession._pending_system_messages 인프라 신설 PR #2 compaction (1건): - CompactionResult.summary_text 추가, 다음 thread 첫 ainvoke 에 inject PR #3 auto-memory (6건): - global memory dir + bootstrap - frontmatter name/description/type 정식 도입 + MemoryEntry/MemoryType - _infer_memory_type (keyword heuristic, no LLM) - _scrub_secrets (OpenRouter/Anthropic/OpenAI/AWS/Bearer redaction) - /memory show <name> 서브명령 - /remember [--global] / /forget [--global] 스코프 토글 PR #4 skills (3건): - project_skills_dir + 두 스코프 (global / project) merge with last-wins - /skill <name> 본문 inject (queue_system_message) — 이전엔 REPL 출력만 - /skills show <name> 별도 서브명령 PR #6 sub-agent (4건): - budget.py `session:<uuid>` scope + CostMiddleware 자동 전달 - resolve_root_session_id walk-up (cycle guard) + sub-agent root 에 charge - run_subagent_to_completion 실제 ainvoke + 결과 push to parent - /agents 서브명령 구조 (list / spawn / show) + spawn 시 parent system msg PR #7 governance (1건): - bootstrap_user_dirs — instructions + global/memory + skills + projects 한 호출로 idempotent 부트스트랩 PR #8 Web GUI (1건): - index.html → 세션 목록, runs.html (신설) → workflow archive - conversation.html ?session=<id> deep-link PR #9 workflow integration (2건): - /workflow 백그라운드 WorkflowEngine.run + 진행 메시지 stream 누적 - /binding show <workflow-name[@version]> 인자 지원 테스트 (+17, 685 → 702 passed): - test_plan_mode: write_todos 차단 + blocklist sanity - test_memory: scrub + type 추론 + override - test_skills: project override + find_skill + resolve_skill_sources(pk) - test_subagents: resolve_root_session_id chain + missing fallback - test_budget: session: scope accumulation - test_instructions: governance bootstrap + idempotency - test_api_static: runs.html 신설 + index.html 재구성 게이트: - ruff check / format --check / mypy: PASS (141 source files) - pytest -q --ignore=tests/integration/test_e2e_workflow.py --ignore=tests/integration/test_openrouter_smoke.py: 702 passed Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
182 lines
6.6 KiB
Python
182 lines
6.6 KiB
Python
"""v0.3 PR #5 — Plan mode tests.
|
|
|
|
Covers:
|
|
1. PlanModeMiddleware passes tool calls through when inactive.
|
|
2. PlanModeMiddleware blocks write_file / edit_file / execute / task when active.
|
|
3. read_file / glob / grep / write_todos are allowed regardless.
|
|
4. Toggling the closure flag changes behavior without rebuilding the middleware.
|
|
5. The synthetic ToolMessage carries status="error" and a clear hint.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
import pytest
|
|
from langchain_core.messages import ToolMessage
|
|
|
|
from my_deepagent.middleware.plan_mode import (
|
|
BLOCKED_TOOLS_IN_PLAN_MODE,
|
|
PlanModeMiddleware,
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class _FakeToolRequest:
|
|
"""Minimal stand-in for langchain ToolCallRequest in unit tests."""
|
|
|
|
tool_call: dict[str, Any]
|
|
|
|
|
|
async def _passthrough_handler(_: _FakeToolRequest) -> ToolMessage:
|
|
"""Stub handler — returns a benign 'tool executed' message."""
|
|
return ToolMessage(content="EXECUTED", tool_call_id="t1", name="stub")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Inactive plan-mode → all tools pass through
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_plan_mode_inactive_passes_through() -> None:
|
|
mw = PlanModeMiddleware(is_active=lambda: False)
|
|
for name in ["write_file", "edit_file", "execute", "task", "read_file", "glob"]:
|
|
req = _FakeToolRequest(tool_call={"name": name, "id": "t1", "args": {}})
|
|
result = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert isinstance(result, ToolMessage)
|
|
assert result.content == "EXECUTED"
|
|
assert result.status != "error"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Active plan-mode → write tools blocked with status=error
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_plan_mode_active_blocks_write_file() -> None:
|
|
mw = PlanModeMiddleware(is_active=lambda: True)
|
|
req = _FakeToolRequest(
|
|
tool_call={"name": "write_file", "id": "abc123", "args": {"file_path": "/tmp/x"}}
|
|
)
|
|
result = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert isinstance(result, ToolMessage)
|
|
assert result.status == "error"
|
|
assert result.tool_call_id == "abc123"
|
|
assert "Plan-mode" in result.content
|
|
assert "write_file" in result.content
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_plan_mode_active_blocks_execute() -> None:
|
|
mw = PlanModeMiddleware(is_active=lambda: True)
|
|
req = _FakeToolRequest(tool_call={"name": "execute", "id": "exec1", "args": {"command": "ls"}})
|
|
result = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert isinstance(result, ToolMessage)
|
|
assert result.status == "error"
|
|
assert "execute" in result.content
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_plan_mode_active_blocks_task_subagent_spawn() -> None:
|
|
mw = PlanModeMiddleware(is_active=lambda: True)
|
|
req = _FakeToolRequest(tool_call={"name": "task", "id": "task1", "args": {"description": "x"}})
|
|
result = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert isinstance(result, ToolMessage)
|
|
assert result.status == "error"
|
|
assert "task" in result.content
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Active plan-mode → read-only tools still pass through
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_plan_mode_active_allows_read_only_tools() -> None:
|
|
mw = PlanModeMiddleware(is_active=lambda: True)
|
|
for name in ["read_file", "glob", "grep", "ls"]:
|
|
req = _FakeToolRequest(tool_call={"name": name, "id": "t1", "args": {}})
|
|
result = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert result.content == "EXECUTED", f"{name} should not be blocked"
|
|
assert result.status != "error"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_plan_mode_blocks_write_todos() -> None:
|
|
"""`write_todos` is part of the plan markdown — must be blocked."""
|
|
mw = PlanModeMiddleware(is_active=lambda: True)
|
|
req = _FakeToolRequest(tool_call={"name": "write_todos", "id": "wt1", "args": {"todos": []}})
|
|
result = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert isinstance(result, ToolMessage)
|
|
assert result.status == "error"
|
|
assert "write_todos" in result.content
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Closure-toggle behavior — flip without rebuild
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_plan_mode_closure_toggle_changes_behavior() -> None:
|
|
state = {"on": False}
|
|
mw = PlanModeMiddleware(is_active=lambda: state["on"])
|
|
|
|
req = _FakeToolRequest(tool_call={"name": "write_file", "id": "w", "args": {}})
|
|
|
|
# Off → passes.
|
|
r1 = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert r1.status != "error"
|
|
|
|
# Flip on → blocks.
|
|
state["on"] = True
|
|
r2 = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert r2.status == "error"
|
|
|
|
# Flip back off → passes again.
|
|
state["on"] = False
|
|
r3 = await mw.awrap_tool_call(req, _passthrough_handler)
|
|
assert r3.status != "error"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Sync path mirrors async path
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_plan_mode_sync_wrap_tool_call() -> None:
|
|
mw = PlanModeMiddleware(is_active=lambda: True)
|
|
|
|
def sync_handler(_: _FakeToolRequest) -> ToolMessage:
|
|
return ToolMessage(content="EXECUTED", tool_call_id="t1", name="stub")
|
|
|
|
req = _FakeToolRequest(tool_call={"name": "write_file", "id": "s1", "args": {}})
|
|
result = mw.wrap_tool_call(req, sync_handler)
|
|
assert isinstance(result, ToolMessage)
|
|
assert result.status == "error"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Blocklist constant sanity
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_blocklist_includes_all_known_write_tools() -> None:
|
|
assert "write_file" in BLOCKED_TOOLS_IN_PLAN_MODE
|
|
assert "edit_file" in BLOCKED_TOOLS_IN_PLAN_MODE
|
|
assert "execute" in BLOCKED_TOOLS_IN_PLAN_MODE
|
|
assert "bash" in BLOCKED_TOOLS_IN_PLAN_MODE
|
|
assert "task" in BLOCKED_TOOLS_IN_PLAN_MODE
|
|
|
|
|
|
def test_blocklist_excludes_read_only_tools() -> None:
|
|
for name in ("read_file", "glob", "grep", "ls"):
|
|
assert name not in BLOCKED_TOOLS_IN_PLAN_MODE
|
|
|
|
|
|
def test_blocklist_includes_write_todos() -> None:
|
|
assert "write_todos" in BLOCKED_TOOLS_IN_PLAN_MODE
|