Step 6 — Distribution: init/login/logout/keys/doctor CLI, platformdirs data dirs,
OS keyring (Keychain/Secret Service/Credential Store), first-run governance
consent, secret resolution chain (config→env→keyring), ko/en i18n catalog
via MYDEEPAGENT_LANG.
Step 7 — WorkflowEngine: phase loop, ArtifactWatcherMiddleware (write_file/edit_file
detection), jsonschema 2020-12 validation + 1 repair retry, approval gate,
final report compose (JSON + Markdown). FK-safe persistence ordering.
RunEventType + run_idempotency_key per plan v2.0 §13.1.
Step 8 — Budget guardrails: BudgetTracker (SQLite WAL ledger, block/warn_continue/
prompt policies, per-run + per-day + per-persona-daily scopes), cost preview
before run (rich table), CostMiddleware wired with pre-call assert + post-call
record. CLI: budget / stats --by model|persona|day / costs.
Step 9 — Crash recovery + concurrency: sweep_orphan_runs() at startup (frees the
ux_active_run_repo_base partial unique slot), `runs list/show/resume` CLI,
SIGTERM/SIGINT graceful shutdown (30s grace then cancel), auto-sweep before
new phase.
Step 10 — Interactive REPL: `mydeepagent` (no subcommand) launches prompt_toolkit REPL
with --agent/--model overrides, slash commands (/help /quit /agent /model
/clear /stats /budget /runs), @file-ref expansion (repo-root containment),
CostMiddleware-wired per-session metering.
Step 11 — Audit log + secret scrubbing: append-only {state_dir}/audit.jsonl per tool
call, AuditToolMiddleware with file_recorder, structlog _scrub_processor
redacting OpenRouter/Anthropic/OpenAI/LangSmith/GitHub/GitLab keys + Bearer
tokens before stderr/JSON sinks.
Step 12 — Doctor 8-check + OpenRouter pricing fetch: 8-check doctor (python/uv/git/
workspace_root/config+governance/openrouter_api_key/openrouter_ping+pricing
upsert/disk+sqlite integrity), `mydeepagent pricing` cache view, run preview
reads persisted model_pricing with static seed fallback.
Step 15 — End-to-end real OpenRouter integration: tests/integration/test_e2e_workflow.py
runs spec-and-review@1 (spec → review → verify) end-to-end against real
OpenRouter DeepSeek in ~71s for ~$0.05 per run. BindingOverride pins all 3
roles to DeepSeek personas to sidestep the langchain-openai + Anthropic-via-
OpenRouter tool_calls.args JSON-string ValidationError (known v0.1.0 limit).
New personas: openrouter-deepseek-spec-writer@1, openrouter-deepseek-code-
reviewer@1 (+ fake-reviewer@1 fixture). _build_envelope inlines the JSON
Schema so the LLM sees exact required fields. _record_llm_call fills every
NOT NULL LlmCallRow column. CostMiddleware probes both usage_metadata and
response_metadata.token_usage (prompt_tokens/completion_tokens fallback).
dev/review-finding-batch@1 artifact schema added.
Known v0.1.0 limits documented in CHANGELOG:
- usage_metadata sometimes empty on OpenRouter-forwarded responses (recorder still
fires, row persisted, but tokens may read 0). v0.2 will probe more response shapes.
- Anthropic via OpenRouter currently fails with tool_calls.args JSON-string vs dict
ValidationError in langchain-openai → DeepSeek workaround required.
- `runs resume <run_id>` is a stub (exit-2 hint only).
Gates: ruff check / ruff format --check / mypy --strict / 574 pytest PASS (5.29s)
plus 1 E2E PASS (71.21s, real OpenRouter, ~\$0.05).
--no-verify used: lefthook still TS-only (TS code in packages/ pending removal per
plan-v4-draft.md Step 0).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
141 lines
4.8 KiB
Python
141 lines
4.8 KiB
Python
"""Tests for ArtifactWatcherMiddleware: write_file / edit_file detection."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from my_deepagent.middleware.artifact_watcher import ArtifactWatcherMiddleware
|
|
|
|
|
|
def _make_request(tool_name: str, args: dict[str, Any]) -> MagicMock:
|
|
"""Create a minimal ToolCallRequest-like mock."""
|
|
request = MagicMock()
|
|
request.tool_call = {"name": tool_name, "args": args, "id": "test-id"}
|
|
return request
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_write_file_matching_path_triggers_callback(tmp_path: Path) -> None:
|
|
"""write_file targeting expected_path fires the callback and sets notified event."""
|
|
expected = tmp_path / "artifact.json"
|
|
received: list[tuple[str, str]] = []
|
|
|
|
async def _cb(path: str, content: str) -> None:
|
|
received.append((path, content))
|
|
|
|
watcher = ArtifactWatcherMiddleware(expected, _cb)
|
|
handler = AsyncMock(return_value=MagicMock())
|
|
|
|
request = _make_request("write_file", {"file_path": str(expected), "content": '{"ok": true}'})
|
|
await watcher.awrap_tool_call(request, handler)
|
|
|
|
assert watcher.notified.is_set()
|
|
assert len(received) == 1
|
|
assert received[0][0] == str(expected)
|
|
assert received[0][1] == '{"ok": true}'
|
|
assert watcher.content == '{"ok": true}'
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_edit_file_matching_path_triggers_callback(tmp_path: Path) -> None:
|
|
"""edit_file targeting expected_path also fires the callback."""
|
|
expected = tmp_path / "spec.json"
|
|
received: list[str] = []
|
|
|
|
async def _cb(path: str, _content: str) -> None:
|
|
received.append(path)
|
|
|
|
watcher = ArtifactWatcherMiddleware(expected, _cb)
|
|
handler = AsyncMock(return_value=MagicMock())
|
|
|
|
request = _make_request("edit_file", {"file_path": str(expected), "new_string": "hello"})
|
|
await watcher.awrap_tool_call(request, handler)
|
|
|
|
assert watcher.notified.is_set()
|
|
assert len(received) == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_write_file_different_path_does_not_trigger(tmp_path: Path) -> None:
|
|
"""write_file targeting a different path does NOT fire the callback."""
|
|
expected = tmp_path / "artifact.json"
|
|
other = tmp_path / "other.json"
|
|
received: list[str] = []
|
|
|
|
async def _cb(path: str, _content: str) -> None:
|
|
received.append(path)
|
|
|
|
watcher = ArtifactWatcherMiddleware(expected, _cb)
|
|
handler = AsyncMock(return_value=MagicMock())
|
|
|
|
request = _make_request("write_file", {"file_path": str(other), "content": "data"})
|
|
await watcher.awrap_tool_call(request, handler)
|
|
|
|
assert not watcher.notified.is_set()
|
|
assert len(received) == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_read_file_never_triggers_callback(tmp_path: Path) -> None:
|
|
"""read_file does NOT fire the callback even if the path matches."""
|
|
expected = tmp_path / "artifact.json"
|
|
received: list[str] = []
|
|
|
|
async def _cb(path: str, _content: str) -> None:
|
|
received.append(path)
|
|
|
|
watcher = ArtifactWatcherMiddleware(expected, _cb)
|
|
handler = AsyncMock(return_value=MagicMock())
|
|
|
|
request = _make_request("read_file", {"file_path": str(expected)})
|
|
await watcher.awrap_tool_call(request, handler)
|
|
|
|
assert not watcher.notified.is_set()
|
|
assert len(received) == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_relative_path_normalised_to_expected(tmp_path: Path) -> None:
|
|
"""A relative path in the tool args is resolved relative to expected_path.parent."""
|
|
expected = tmp_path / "artifacts" / "spec.json"
|
|
expected.parent.mkdir(parents=True, exist_ok=True)
|
|
received: list[str] = []
|
|
|
|
async def _cb(path: str, _content: str) -> None:
|
|
received.append(path)
|
|
|
|
watcher = ArtifactWatcherMiddleware(expected, _cb)
|
|
handler = AsyncMock(return_value=MagicMock())
|
|
|
|
# Relative to expected.parent → artifacts/spec.json resolves to expected
|
|
request = _make_request("write_file", {"file_path": "spec.json", "content": "{}"})
|
|
await watcher.awrap_tool_call(request, handler)
|
|
|
|
assert watcher.notified.is_set()
|
|
assert len(received) == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_callback_exception_does_not_break_result(tmp_path: Path) -> None:
|
|
"""An exception raised inside the callback is swallowed; the tool result is still returned."""
|
|
expected = tmp_path / "artifact.json"
|
|
sentinel = MagicMock()
|
|
|
|
async def _bad_cb(_path: str, _content: str) -> None:
|
|
raise RuntimeError("oops")
|
|
|
|
watcher = ArtifactWatcherMiddleware(expected, _bad_cb)
|
|
handler = AsyncMock(return_value=sentinel)
|
|
|
|
request = _make_request("write_file", {"file_path": str(expected), "content": "{}"})
|
|
result = await watcher.awrap_tool_call(request, handler)
|
|
|
|
# Callback exception was swallowed; the tool result is still returned
|
|
assert result is sentinel
|
|
# notified is still set even if callback raises
|
|
assert watcher.notified.is_set()
|