From fb7e67fd201b73c72e5fceb2c9ccad8116ae3903 Mon Sep 17 00:00:00 2001 From: chungyeong Date: Sun, 17 May 2026 20:47:30 +0900 Subject: [PATCH] =?UTF-8?q?feat(my-deepagent):=20v0.3=20PR=20#5=20?= =?UTF-8?q?=E2=80=94=20plan=20mode=20(/plan,=20/approve,=20/reject)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Code의 plan mode 등가. `/plan` 진입 시 write_file / edit_file / execute / bash / task (sub-agent) 도구가 차단되고 read_file / glob / grep / ls / write_todos 만 허용. 핵심 동작: - `PlanModeMiddleware(is_active: Callable[[], bool])` 가 `awrap_tool_call` / `wrap_tool_call` 에서 활성 + 차단 도구면 synthetic `ToolMessage(status="error")` 반환. raise 하지 않음 — LLM 이 차단 메시지를 보고 다른 도구로 전환하거나 plan 다듬기로 자동 복귀. - `is_active` 는 closure 라서 슬래시 토글 후 agent 재빌드 불필요. - `InteractiveSessionRow.plan_mode` 영속 + resume 시 복원. 데이터·라이브러리: - `middleware/plan_mode.py` (신규): - `BLOCKED_TOOLS_IN_PLAN_MODE = write_file / edit_file / bash / execute / run_command / shell / task`. - `PlanModeMiddleware` async + sync 양쪽 구현. REPL 통합 (`cli/interactive.py`): - `InteractiveSession._plan_mode: bool` + `set_plan_mode(enabled)` async → flag 토글 + `thread_suffix` bump + row 영속. - resume path 에서 `sess._plan_mode = row.plan_mode` 로 복원. - `_register_plan_mode_slash`: `/plan`, `/approve`, `/reject` 등록. - `/reject` 는 thread 까지 리셋해 plan thread 폐기. 테스트 (`tests/integration/test_plan_mode.py`, 9 케이스): - inactive 시 모든 도구 패스스루 - active 시 write_file / execute / task 차단 (status=error, tool_call_id 유지, 메시지에 도구명 + "Plan-mode" 포함) - active 시 read_file / glob / grep / ls / write_todos 허용 - closure 토글로 동작 변경 (rebuild 없이) - 동기 wrap_tool_call 도 동일 동작 - BLOCKED_TOOLS_IN_PLAN_MODE 상수 sanity 게이트: - ruff check / format --check / mypy: PASS - pytest -q --ignore=tests/integration/test_e2e_workflow.py --ignore=tests/integration/test_openrouter_smoke.py: 657 passed (9 신규 포함) Co-Authored-By: Claude Opus 4.7 (1M context) --- my-deepagent/CHANGELOG.md | 32 ++++ .../src/my_deepagent/cli/interactive.py | 74 +++++++- .../src/my_deepagent/middleware/plan_mode.py | 106 +++++++++++ .../tests/integration/test_plan_mode.py | 170 ++++++++++++++++++ 4 files changed, 380 insertions(+), 2 deletions(-) create mode 100644 my-deepagent/src/my_deepagent/middleware/plan_mode.py create mode 100644 my-deepagent/tests/integration/test_plan_mode.py diff --git a/my-deepagent/CHANGELOG.md b/my-deepagent/CHANGELOG.md index 8c31aa8..76fd5ca 100644 --- a/my-deepagent/CHANGELOG.md +++ b/my-deepagent/CHANGELOG.md @@ -2,6 +2,38 @@ ## [Unreleased] +### Added +- **v0.3 PR #5 — Plan mode (`/plan` / `/approve` / `/reject`)**. Claude Code의 + plan mode 등가. `/plan` 진입 시 `write_file` / `edit_file` / `execute` / + `bash` / `task` (sub-agent) 도구가 차단되고 `read_file` / `glob` / `grep` / + `ls` / `write_todos`만 허용. LLM 은 차단된 도구를 호출하면 `ToolMessage( + status="error")` 를 받고 자체적으로 계획만 다듬도록 유도. `/approve` 시 + 쓰기 허용, `/reject` 시 thread 리셋 + 쓰기 허용. + - `middleware/plan_mode.py` (신규): + - `PlanModeMiddleware(is_active: Callable[[], bool])` — `awrap_tool_call` / + `wrap_tool_call` 에서 plan_mode 활성 + 차단 도구면 synthetic + `ToolMessage(status="error", content=...)` 반환. raise 하지 않음 + (LLM이 무한 루프 없이 다른 도구로 전환할 수 있도록). + - `BLOCKED_TOOLS_IN_PLAN_MODE` 상수: write_file / edit_file / bash / + execute / run_command / shell / task. read_file·write_todos 등 안전한 + 도구는 화이트리스트. + - `cli/interactive.py`: + - `InteractiveSession._plan_mode: bool`. `set_plan_mode(enabled)` async → + flag 토글 + thread_suffix bump + `InteractiveSessionRow.plan_mode` 영속 + (PR #1에서 이미 컬럼 추가했음). resume 시 row.plan_mode 로 복원. + - `build_agent_if_needed`에서 `PlanModeMiddleware(is_active=lambda: ...)` + 를 middleware 리스트 첫 자리에 삽입 — closure 가 self._plan_mode 를 읽으니 + 슬래시 토글 후 agent 재빌드 필요 없음. + - `_register_plan_mode_slash`: `/plan`, `/approve`, `/reject` 등록. + - `tests/integration/test_plan_mode.py` (신규, 9 케이스): + - inactive → 모든 도구 패스스루 + - active → write_file / execute / task 차단 (status=error, tool_call_id + 유지, 메시지에 도구명 + "Plan-mode" 포함) + - active → read_file / glob / grep / ls / write_todos 허용 + - closure 토글로 동작 변경 (rebuild 없이) + - 동기 wrap_tool_call 도 동일 동작 + - BLOCKED_TOOLS_IN_PLAN_MODE 상수 sanity + ### Added - **v0.3 PR #4 — Agent Skills (LLM-routing, no embeddings)**. Anthropic Agent Skills 명세를 그대로 따르는 progressive-disclosure 패턴. deepagents diff --git a/my-deepagent/src/my_deepagent/cli/interactive.py b/my-deepagent/src/my_deepagent/cli/interactive.py index 099e6fa..8959324 100644 --- a/my-deepagent/src/my_deepagent/cli/interactive.py +++ b/my-deepagent/src/my_deepagent/cli/interactive.py @@ -46,6 +46,7 @@ from ..memory import ( ) from ..middleware.audit import AuditToolMiddleware from ..middleware.cost import CostMiddleware +from ..middleware.plan_mode import PlanModeMiddleware from ..monitoring.pricing import ModelPrice, PricingCache from ..monitoring.token_budget import count_tokens from ..persistence.checkpointer import get_checkpointer_ctx @@ -169,6 +170,9 @@ class InteractiveSession: # users drop `/SKILL.md` directories under here to register skills. self.skills_dir: Path = user_skills_dir(config) ensure_skills_initialized(self.skills_dir) + # v0.3 PR #5: plan-mode flag. PlanModeMiddleware reads this via closure + # every tool call — no agent rebuild needed when toggling on/off. + self._plan_mode: bool = False @property def thread_id(self) -> str: @@ -216,6 +220,28 @@ class InteractiveSession: self._agent = None self._thread_suffix += 1 + @property + def plan_mode(self) -> bool: + """Whether plan mode is currently active for this session.""" + return self._plan_mode + + async def set_plan_mode(self, enabled: bool) -> None: + """Toggle plan mode + persist to the session row. + + PlanModeMiddleware re-reads via closure each tool call → no agent + rebuild required. We DO bump the thread suffix on each toggle so the + model doesn't carry over "I was about to write a file" state into the + new mode. Persists `plan_mode` on the InteractiveSessionRow so resumes + re-establish the mode. + """ + self._plan_mode = enabled + self._thread_suffix += 1 + async with self.db.session() as s: + row = await s.get(InteractiveSessionRow, str(self.session_id)) + if row is not None: + row.plan_mode = enabled + await s.commit() + def build_agent_if_needed(self) -> Any: if self._agent is not None: return self._agent @@ -231,6 +257,9 @@ class InteractiveSession: interactive_session_id=self.session_id, file_recorder=make_audit_recorder(self.config.state_dir), ) + # v0.3 PR #5: plan-mode middleware reads `self._plan_mode` via closure + # every tool call → toggling /plan vs /approve doesn't require rebuild. + plan_mw = PlanModeMiddleware(is_active=lambda: self._plan_mode) # Re-glob memory paths every time the agent is rebuilt — `/remember` and # `/forget` call `clear_agent_cache()` so this picks up new/removed files. memory_paths = list_memory_paths(self.memory_dir) @@ -239,7 +268,7 @@ class InteractiveSession: self._persona, self.config, root_dir=self.repo_root, - middleware=[cost_mw, audit_mw], + middleware=[plan_mw, cost_mw, audit_mw], model_override=self._model_override, checkpointer=self.saver, memory_paths_override=memory_paths, @@ -596,6 +625,43 @@ def _register_skills_slash(reg: SlashRegistry, sess: InteractiveSession) -> None reg.register("skill", _skill, help="show a skill's body: /skill ") +def _register_plan_mode_slash(reg: SlashRegistry, sess: InteractiveSession) -> None: + """Register /plan, /approve, /reject slash handlers (v0.3 PR #5).""" + + async def _plan(_: SlashParsed) -> bool: + if sess.plan_mode: + _CONSOLE.print("[yellow]plan-mode is already active.[/]") + return False + await sess.set_plan_mode(True) + _CONSOLE.print( + "[bold yellow]plan-mode ON[/] — write_file / edit_file / " + "execute / task tools are blocked. Use /approve to leave, " + "or /reject to discard the plan." + ) + return False + + async def _approve(_: SlashParsed) -> bool: + if not sess.plan_mode: + _CONSOLE.print("[yellow]plan-mode is not active.[/]") + return False + await sess.set_plan_mode(False) + _CONSOLE.print("[green]plan approved → leaving plan-mode (writes re-enabled).[/]") + return False + + async def _reject(_: SlashParsed) -> bool: + if not sess.plan_mode: + _CONSOLE.print("[yellow]plan-mode is not active.[/]") + return False + await sess.set_plan_mode(False) + sess.clear_agent_cache() # drop the plan thread entirely + _CONSOLE.print("[red]plan rejected → fresh thread, writes re-enabled.[/]") + return False + + reg.register("plan", _plan, help="enter plan-mode (block writes until /approve)") + reg.register("approve", _approve, help="leave plan-mode, allow writes") + reg.register("reject", _reject, help="leave plan-mode, discard plan thread") + + def _register_slash(reg: SlashRegistry, sess: InteractiveSession) -> None: _register_navigation_slash(reg, sess) _register_persona_slash(reg, sess) @@ -603,6 +669,7 @@ def _register_slash(reg: SlashRegistry, sess: InteractiveSession) -> None: _register_compaction_slash(reg, sess) _register_memory_slash(reg, sess) _register_skills_slash(reg, sess) + _register_plan_mode_slash(reg, sess) def _completer(personas: list[Persona], slash_names: list[str]) -> WordCompleter: @@ -818,9 +885,12 @@ async def _interactive_loop_async( sess._thread_suffix = 0 # Now persist the session row (or load existing). - await _load_or_create_session_row( + row = await _load_or_create_session_row( db, session_id, sess.persona, Path.cwd(), create=creating ) + # v0.3 PR #5: restore plan_mode flag from row on resume so the + # session remembers it across REPL restarts. + sess._plan_mode = bool(row.plan_mode) reg = SlashRegistry() _register_slash(reg, sess) diff --git a/my-deepagent/src/my_deepagent/middleware/plan_mode.py b/my-deepagent/src/my_deepagent/middleware/plan_mode.py new file mode 100644 index 0000000..9619f73 --- /dev/null +++ b/my-deepagent/src/my_deepagent/middleware/plan_mode.py @@ -0,0 +1,106 @@ +"""PlanModeMiddleware (v0.3 PR #5) — block write tools when plan-mode is active. + +Claude Code's plan mode lets the user say "design this, don't write code" — the +agent can read, search, plan via `write_todos`, but cannot mutate the +filesystem or run shell commands until the user `/approve`s. + +Implementation strategy: +- A callable ``is_active()`` is passed in at construction time. The REPL flips + a flag on/off via slash commands; the middleware re-reads on every tool call. + This avoids rebuilding the agent on every `/plan` / `/approve` toggle. +- When plan-mode is on and the LLM calls a blocked tool, we return a synthetic + ``ToolMessage(status="error", ...)`` so the LLM sees feedback and can adjust + ("ok, I'll keep planning instead"). We do NOT raise — that would crash the + turn and the user would lose the partial response. + +Blocked tools (matches Claude Code): + - ``write_file``, ``edit_file`` — fs mutation + - ``bash`` / ``execute`` / ``run_command`` / ``shell`` — shell exec + - ``task`` — sub-agent spawn (a sub-agent could bypass plan mode) + - ``write_todos`` is allowed — plan mode IS planning, todos are the artifact +""" + +from __future__ import annotations + +from collections.abc import Callable +from typing import Any + +from langchain.agents.middleware import AgentMiddleware +from langchain_core.messages import ToolMessage + +#: Tool names that mutate the filesystem. +_FS_WRITE_TOOLS: frozenset[str] = frozenset({"write_file", "edit_file"}) + +#: Tool names that execute shell commands. +_SHELL_TOOLS: frozenset[str] = frozenset({"bash", "execute", "run_command", "shell"}) + +#: Tool names that spawn sub-agents (which would bypass plan mode in the parent). +_SUBAGENT_TOOLS: frozenset[str] = frozenset({"task"}) + +#: Full blocklist applied while plan mode is on. +BLOCKED_TOOLS_IN_PLAN_MODE: frozenset[str] = _FS_WRITE_TOOLS | _SHELL_TOOLS | _SUBAGENT_TOOLS + + +def _block_message(tool_name: str) -> str: + return ( + f"Plan-mode is active — `{tool_name}` is blocked. " + "Keep planning with read_file / glob / grep / write_todos, " + "or ask the user to `/approve` to leave plan mode." + ) + + +class PlanModeMiddleware(AgentMiddleware): + """Block mutating tool calls while plan-mode is active. + + Construction takes an ``is_active`` callable that returns the current plan + mode state. The REPL toggles this state via slash commands without + rebuilding the agent — the middleware reads it fresh per tool call. + + Tools that are read-only (``read_file``, ``glob``, ``grep``, ``ls``, + ``write_todos``) are allowed in plan mode unconditionally. + """ + + def __init__(self, *, is_active: Callable[[], bool]) -> None: + self._is_active = is_active + + async def awrap_tool_call(self, request: Any, handler: Any) -> Any: + if not self._is_active(): + return await handler(request) + name = _tool_name(request) + if name in BLOCKED_TOOLS_IN_PLAN_MODE: + return ToolMessage( + content=_block_message(name), + tool_call_id=_tool_call_id(request), + name=name, + status="error", + ) + return await handler(request) + + def wrap_tool_call(self, request: Any, handler: Any) -> Any: + # Sync path mirrors the async one for parity (e.g. when the agent is + # invoked synchronously in unit tests). Real REPL/Web paths are async. + if not self._is_active(): + return handler(request) + name = _tool_name(request) + if name in BLOCKED_TOOLS_IN_PLAN_MODE: + return ToolMessage( + content=_block_message(name), + tool_call_id=_tool_call_id(request), + name=name, + status="error", + ) + return handler(request) + + +def _tool_name(request: Any) -> str: + tool_call = getattr(request, "tool_call", None) + if isinstance(tool_call, dict): + return str(tool_call.get("name") or "") + return str(getattr(request, "name", "") or "") + + +def _tool_call_id(request: Any) -> str: + tool_call = getattr(request, "tool_call", None) + if isinstance(tool_call, dict): + return str(tool_call.get("id") or "") + return str(getattr(request, "id", "") or "") diff --git a/my-deepagent/tests/integration/test_plan_mode.py b/my-deepagent/tests/integration/test_plan_mode.py new file mode 100644 index 0000000..551a15e --- /dev/null +++ b/my-deepagent/tests/integration/test_plan_mode.py @@ -0,0 +1,170 @@ +"""v0.3 PR #5 — Plan mode tests. + +Covers: +1. PlanModeMiddleware passes tool calls through when inactive. +2. PlanModeMiddleware blocks write_file / edit_file / execute / task when active. +3. read_file / glob / grep / write_todos are allowed regardless. +4. Toggling the closure flag changes behavior without rebuilding the middleware. +5. The synthetic ToolMessage carries status="error" and a clear hint. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import pytest +from langchain_core.messages import ToolMessage + +from my_deepagent.middleware.plan_mode import ( + BLOCKED_TOOLS_IN_PLAN_MODE, + PlanModeMiddleware, +) + + +@dataclass +class _FakeToolRequest: + """Minimal stand-in for langchain ToolCallRequest in unit tests.""" + + tool_call: dict[str, Any] + + +async def _passthrough_handler(_: _FakeToolRequest) -> ToolMessage: + """Stub handler — returns a benign 'tool executed' message.""" + return ToolMessage(content="EXECUTED", tool_call_id="t1", name="stub") + + +# --------------------------------------------------------------------------- +# Inactive plan-mode → all tools pass through +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_plan_mode_inactive_passes_through() -> None: + mw = PlanModeMiddleware(is_active=lambda: False) + for name in ["write_file", "edit_file", "execute", "task", "read_file", "glob"]: + req = _FakeToolRequest(tool_call={"name": name, "id": "t1", "args": {}}) + result = await mw.awrap_tool_call(req, _passthrough_handler) + assert isinstance(result, ToolMessage) + assert result.content == "EXECUTED" + assert result.status != "error" + + +# --------------------------------------------------------------------------- +# Active plan-mode → write tools blocked with status=error +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_plan_mode_active_blocks_write_file() -> None: + mw = PlanModeMiddleware(is_active=lambda: True) + req = _FakeToolRequest( + tool_call={"name": "write_file", "id": "abc123", "args": {"file_path": "/tmp/x"}} + ) + result = await mw.awrap_tool_call(req, _passthrough_handler) + assert isinstance(result, ToolMessage) + assert result.status == "error" + assert result.tool_call_id == "abc123" + assert "Plan-mode" in result.content + assert "write_file" in result.content + + +@pytest.mark.asyncio +async def test_plan_mode_active_blocks_execute() -> None: + mw = PlanModeMiddleware(is_active=lambda: True) + req = _FakeToolRequest( + tool_call={"name": "execute", "id": "exec1", "args": {"command": "ls"}} + ) + result = await mw.awrap_tool_call(req, _passthrough_handler) + assert isinstance(result, ToolMessage) + assert result.status == "error" + assert "execute" in result.content + + +@pytest.mark.asyncio +async def test_plan_mode_active_blocks_task_subagent_spawn() -> None: + mw = PlanModeMiddleware(is_active=lambda: True) + req = _FakeToolRequest( + tool_call={"name": "task", "id": "task1", "args": {"description": "x"}} + ) + result = await mw.awrap_tool_call(req, _passthrough_handler) + assert isinstance(result, ToolMessage) + assert result.status == "error" + assert "task" in result.content + + +# --------------------------------------------------------------------------- +# Active plan-mode → read-only tools still pass through +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_plan_mode_active_allows_read_only_tools() -> None: + mw = PlanModeMiddleware(is_active=lambda: True) + for name in ["read_file", "glob", "grep", "ls", "write_todos"]: + req = _FakeToolRequest(tool_call={"name": name, "id": "t1", "args": {}}) + result = await mw.awrap_tool_call(req, _passthrough_handler) + assert result.content == "EXECUTED", f"{name} should not be blocked" + assert result.status != "error" + + +# --------------------------------------------------------------------------- +# Closure-toggle behavior — flip without rebuild +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_plan_mode_closure_toggle_changes_behavior() -> None: + state = {"on": False} + mw = PlanModeMiddleware(is_active=lambda: state["on"]) + + req = _FakeToolRequest(tool_call={"name": "write_file", "id": "w", "args": {}}) + + # Off → passes. + r1 = await mw.awrap_tool_call(req, _passthrough_handler) + assert r1.status != "error" + + # Flip on → blocks. + state["on"] = True + r2 = await mw.awrap_tool_call(req, _passthrough_handler) + assert r2.status == "error" + + # Flip back off → passes again. + state["on"] = False + r3 = await mw.awrap_tool_call(req, _passthrough_handler) + assert r3.status != "error" + + +# --------------------------------------------------------------------------- +# Sync path mirrors async path +# --------------------------------------------------------------------------- + + +def test_plan_mode_sync_wrap_tool_call() -> None: + mw = PlanModeMiddleware(is_active=lambda: True) + + def sync_handler(_: _FakeToolRequest) -> ToolMessage: + return ToolMessage(content="EXECUTED", tool_call_id="t1", name="stub") + + req = _FakeToolRequest(tool_call={"name": "write_file", "id": "s1", "args": {}}) + result = mw.wrap_tool_call(req, sync_handler) + assert isinstance(result, ToolMessage) + assert result.status == "error" + + +# --------------------------------------------------------------------------- +# Blocklist constant sanity +# --------------------------------------------------------------------------- + + +def test_blocklist_includes_all_known_write_tools() -> None: + assert "write_file" in BLOCKED_TOOLS_IN_PLAN_MODE + assert "edit_file" in BLOCKED_TOOLS_IN_PLAN_MODE + assert "execute" in BLOCKED_TOOLS_IN_PLAN_MODE + assert "bash" in BLOCKED_TOOLS_IN_PLAN_MODE + assert "task" in BLOCKED_TOOLS_IN_PLAN_MODE + + +def test_blocklist_excludes_read_only_and_planning_tools() -> None: + for name in ("read_file", "glob", "grep", "ls", "write_todos"): + assert name not in BLOCKED_TOOLS_IN_PLAN_MODE