Python rewrite of the agent harness on top of deepagents 0.6.1 + langchain 1.x, replacing the abandoned TS attempt in packages/. 388 unit/integration tests pass. Steps ----- 0. Scaffolding — uv workspace, ruff/mypy/pre-commit/alembic, src/tests/docs trees with docs/schemas/ seeded from my-deepagent-seed/. 1. Core — config (pydantic-settings with MYDEEPAGENT_ env prefix and TOML source), enums (Backend, Capability, RiskLevel, ApprovalDecisionAction, ApprovalState, RunState, RunPhaseState, SessionState, ErrorClass), errors (MyDeepAgentError + BudgetExhaustedError with PEP-3134 cause + context suppression), hash (canonical JSON + sha256). 2. Persona/Workflow/Binding — pydantic v2 schemas with tuple-based deep immutability (post-construction hash drift prevented), YAML loaders, deterministic auto-select (preferred_backends → version → name → hash), override resolution with ineligibility diagnostics, PersonaConsentStore with fcntl.flock + tmp+fsync+rename atomic write. 3. Artifact schema registry — Draft202012Validator, multi-root resolution, structured ValidationFinding output. 4. Persistence — 18 SQLAlchemy 2.0 async ORM models with FK CASCADE/RESTRICT, WAL + busy_timeout + foreign_keys PRAGMA, alembic baseline + ux_active_run_repo_base partial unique index, LangGraph SqliteSaver as context manager only (lifecycle safety). 5. DeepAgent session — build_agent wires Persona → create_deep_agent with LocalShellBackend / FilesystemBackend / StateBackend / CompositeBackend, ChatOpenAI(base_url=openrouter) for openrouter: model strings, and 4 middleware classes (cost / audit-tool / safety-shell / fallback-model). Critical workarounds -------------------- - deepagents 0.6.1 rejects FilesystemPermission together with backends that implement SandboxBackendProtocol (LocalShellBackend). SafetyShellMiddleware enforces destructive-command and secret-path policy at the tool layer instead, and build_agent strips the permissions kwarg when the persona's deepagents_backend is local_shell. - FilesystemOperation in deepagents is Literal['read', 'write'] only; _map_operations collapses our richer schema (read/write/edit/ls) safely. Real OpenRouter smoke --------------------- test_openrouter_deepagents_local_shell_smoke calls DeepSeek via deepagents + LocalShellBackend + SafetyShellMiddleware end-to-end. PASS, ~$0.000001 cost, input=9 / output=1 tokens with content "OK". Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
336 lines
11 KiB
Python
336 lines
11 KiB
Python
"""Unit tests for src/my_deepagent/workflow.py."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from pydantic import ValidationError
|
|
|
|
from my_deepagent.workflow import (
|
|
ExpectedArtifact,
|
|
WorkflowTemplate,
|
|
load_workflow_yaml,
|
|
load_workflows_from_dir,
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
WORKFLOWS_DIR = Path(__file__).parent.parent.parent / "docs" / "schemas" / "workflows"
|
|
|
|
|
|
def _minimal_role(**overrides: object) -> dict[str, object]:
|
|
base: dict[str, object] = {
|
|
"id": "spec_writer",
|
|
"required_capabilities": ["spec_write"],
|
|
}
|
|
base.update(overrides)
|
|
return base
|
|
|
|
|
|
def _minimal_phase(**overrides: object) -> dict[str, object]:
|
|
base: dict[str, object] = {
|
|
"key": "spec",
|
|
"title": "Write spec",
|
|
"risk": "low",
|
|
"role": "spec_writer",
|
|
"instructions": "Write the specification document for the feature.",
|
|
}
|
|
base.update(overrides)
|
|
return base
|
|
|
|
|
|
def _minimal_template(**overrides: object) -> dict[str, object]:
|
|
base: dict[str, object] = {
|
|
"name": "test-workflow",
|
|
"version": 1,
|
|
"roles": [_minimal_role()],
|
|
"phases": [_minimal_phase()],
|
|
}
|
|
base.update(overrides)
|
|
return base
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Seed yaml: all 3 load successfully
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_all_seed_workflows_load() -> None:
|
|
workflows = load_workflows_from_dir(WORKFLOWS_DIR)
|
|
assert len(workflows) == 3
|
|
|
|
|
|
def test_seed_workflow_names() -> None:
|
|
workflows = load_workflows_from_dir(WORKFLOWS_DIR)
|
|
names = {w.name for w in workflows}
|
|
assert names == {"spec-and-review", "bug-fix-with-reproduction", "code-investigation"}
|
|
|
|
|
|
def test_seed_workflow_roles_non_empty() -> None:
|
|
workflows = load_workflows_from_dir(WORKFLOWS_DIR)
|
|
for w in workflows:
|
|
assert len(w.roles) >= 1
|
|
|
|
|
|
def test_seed_workflow_phases_non_empty() -> None:
|
|
workflows = load_workflows_from_dir(WORKFLOWS_DIR)
|
|
for w in workflows:
|
|
assert len(w.phases) >= 1
|
|
|
|
|
|
def test_seed_workflow_phase_keys_unique() -> None:
|
|
workflows = load_workflows_from_dir(WORKFLOWS_DIR)
|
|
for w in workflows:
|
|
keys = [ph.key for ph in w.phases]
|
|
assert len(keys) == len(set(keys)), f"{w.name}: duplicate phase keys"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# WorkflowTemplate validators
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_phase_references_undefined_role_raises() -> None:
|
|
data = _minimal_template(
|
|
roles=[_minimal_role(id="spec_writer")],
|
|
phases=[_minimal_phase(role="nonexistent_role")],
|
|
)
|
|
with pytest.raises(ValidationError, match="unknown role"):
|
|
WorkflowTemplate.model_validate(data)
|
|
|
|
|
|
def test_duplicate_phase_keys_raises() -> None:
|
|
data = _minimal_template(
|
|
roles=[_minimal_role(id="spec_writer")],
|
|
phases=[
|
|
_minimal_phase(key="spec"),
|
|
_minimal_phase(key="spec"),
|
|
],
|
|
)
|
|
with pytest.raises(ValidationError, match="duplicate phase keys"):
|
|
WorkflowTemplate.model_validate(data)
|
|
|
|
|
|
def test_duplicate_role_ids_raises() -> None:
|
|
data = _minimal_template(
|
|
roles=[_minimal_role(id="spec_writer"), _minimal_role(id="spec_writer")],
|
|
phases=[_minimal_phase(role="spec_writer")],
|
|
)
|
|
with pytest.raises(ValidationError, match="duplicate role ids"):
|
|
WorkflowTemplate.model_validate(data)
|
|
|
|
|
|
def test_phase_key_uppercase_raises() -> None:
|
|
data = _minimal_template(phases=[_minimal_phase(key="SPEC")])
|
|
with pytest.raises(ValidationError):
|
|
WorkflowTemplate.model_validate(data)
|
|
|
|
|
|
def test_phase_key_with_hyphen_raises() -> None:
|
|
"""Hyphens are not allowed in phase keys (only a-z, 0-9, _)."""
|
|
data = _minimal_template(phases=[_minimal_phase(key="spec-one")])
|
|
with pytest.raises(ValidationError):
|
|
WorkflowTemplate.model_validate(data)
|
|
|
|
|
|
def test_phase_key_leading_digit_raises() -> None:
|
|
data = _minimal_template(phases=[_minimal_phase(key="1spec")])
|
|
with pytest.raises(ValidationError):
|
|
WorkflowTemplate.model_validate(data)
|
|
|
|
|
|
def test_phase_key_snake_case_ok() -> None:
|
|
data = _minimal_template(phases=[_minimal_phase(key="spec_write_phase")])
|
|
wt = WorkflowTemplate.model_validate(data)
|
|
assert wt.phases[0].key == "spec_write_phase"
|
|
|
|
|
|
def test_role_id_pattern_invalid_raises() -> None:
|
|
data = _minimal_template(
|
|
roles=[_minimal_role(id="Spec-Writer")],
|
|
phases=[_minimal_phase(role="spec_writer")],
|
|
)
|
|
with pytest.raises(ValidationError):
|
|
WorkflowTemplate.model_validate(data)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ExpectedArtifact: alias mapping
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_expected_artifact_schema_alias() -> None:
|
|
"""yaml uses 'schema' key; Python attribute is schema_id."""
|
|
art = ExpectedArtifact.model_validate({"path": "artifacts/spec.json", "schema": "dev/spec@1"})
|
|
assert art.schema_id == "dev/spec@1"
|
|
assert art.path == "artifacts/spec.json"
|
|
|
|
|
|
def test_expected_artifact_extra_field_raises() -> None:
|
|
with pytest.raises(ValidationError):
|
|
ExpectedArtifact.model_validate({"path": "x.json", "schema": "dev/spec@1", "unknown": True})
|
|
|
|
|
|
def test_expected_artifact_missing_schema_raises() -> None:
|
|
with pytest.raises(ValidationError):
|
|
ExpectedArtifact.model_validate({"path": "x.json"})
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# WorkflowTemplate frozen + extra="forbid"
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_template_frozen() -> None:
|
|
wt = WorkflowTemplate.model_validate(_minimal_template())
|
|
with pytest.raises((TypeError, ValidationError)):
|
|
wt.name = "mutated" # type: ignore[misc]
|
|
|
|
|
|
def test_template_extra_field_raises() -> None:
|
|
data = _minimal_template(extra_unknown_field="oops")
|
|
with pytest.raises(ValidationError):
|
|
WorkflowTemplate.model_validate(data)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# compute_hash: determinism
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_compute_hash_deterministic() -> None:
|
|
wt = WorkflowTemplate.model_validate(_minimal_template())
|
|
hashes = [wt.compute_hash() for _ in range(20)]
|
|
assert len(set(hashes)) == 1
|
|
|
|
|
|
def test_compute_hash_returns_64_char_hex() -> None:
|
|
wt = WorkflowTemplate.model_validate(_minimal_template())
|
|
h = wt.compute_hash()
|
|
assert re.fullmatch(r"[0-9a-f]{64}", h)
|
|
|
|
|
|
def test_compute_hash_different_templates_differ() -> None:
|
|
wt1 = WorkflowTemplate.model_validate(_minimal_template(name="wf1"))
|
|
wt2 = WorkflowTemplate.model_validate(_minimal_template(name="wf2"))
|
|
assert wt1.compute_hash() != wt2.compute_hash()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# load_workflow_yaml: file not found
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_load_workflow_yaml_missing_file(tmp_path: Path) -> None:
|
|
with pytest.raises(FileNotFoundError):
|
|
load_workflow_yaml(tmp_path / "no.yaml")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# load_workflows_from_dir: duplicate detection + missing dir
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_load_workflows_from_dir_duplicate_raises(tmp_path: Path) -> None:
|
|
import yaml
|
|
|
|
data = _minimal_template()
|
|
for fname in ("wf-a@1.yaml", "wf-b@1.yaml"):
|
|
(tmp_path / fname).write_text(yaml.dump(data), encoding="utf-8")
|
|
|
|
with pytest.raises(ValueError, match="duplicate workflow"):
|
|
load_workflows_from_dir(tmp_path)
|
|
|
|
|
|
def test_load_workflows_from_dir_missing_dir() -> None:
|
|
result = load_workflows_from_dir(Path("/nonexistent_wf_dir_xyz"))
|
|
assert result == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Snapshot: seed hashes are stable
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_spec_and_review_hash_prefix() -> None:
|
|
workflows = load_workflows_from_dir(WORKFLOWS_DIR)
|
|
w = next(x for x in workflows if x.name == "spec-and-review")
|
|
assert w.compute_hash().startswith("1c94587647b16f0d")
|
|
|
|
|
|
def test_bug_fix_hash_prefix() -> None:
|
|
workflows = load_workflows_from_dir(WORKFLOWS_DIR)
|
|
w = next(x for x in workflows if x.name == "bug-fix-with-reproduction")
|
|
assert w.compute_hash().startswith("a137c9656f10e88a")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Step 2 patch: Counter-based duplicate role ids report is sorted
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_workflow_duplicate_role_ids_reported_sorted() -> None:
|
|
"""Multiple duplicated role ids must be reported in sorted order."""
|
|
with pytest.raises(ValidationError, match=r"duplicate role ids: \['a', 'b'\]"):
|
|
WorkflowTemplate.model_validate(
|
|
{
|
|
"name": "x",
|
|
"version": 1,
|
|
"roles": [
|
|
{"id": "b", "required_capabilities": ["spec_write"]},
|
|
{"id": "a", "required_capabilities": ["spec_write"]},
|
|
{"id": "a", "required_capabilities": ["spec_write"]},
|
|
{"id": "b", "required_capabilities": ["spec_write"]},
|
|
],
|
|
"phases": [
|
|
{
|
|
"key": "x",
|
|
"title": "x",
|
|
"risk": "low",
|
|
"role": "a",
|
|
"instructions": "x" * 20,
|
|
}
|
|
],
|
|
}
|
|
)
|
|
|
|
|
|
def test_code_investigation_hash_prefix() -> None:
|
|
workflows = load_workflows_from_dir(WORKFLOWS_DIR)
|
|
w = next(x for x in workflows if x.name == "code-investigation")
|
|
assert w.compute_hash().startswith("5b80ea2e248d5232")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Deep immutability: nested list-valued fields are tuples (cannot be mutated)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_workflow_phases_immutable() -> None:
|
|
"""phases is a tuple — .append() must raise AttributeError."""
|
|
wt = WorkflowTemplate.model_validate(_minimal_template())
|
|
with pytest.raises((AttributeError, TypeError)):
|
|
wt.phases.append(None) # type: ignore[attr-defined]
|
|
|
|
|
|
def test_workflow_roles_immutable() -> None:
|
|
"""roles is a tuple — .append() must raise AttributeError."""
|
|
wt = WorkflowTemplate.model_validate(_minimal_template())
|
|
with pytest.raises((AttributeError, TypeError)):
|
|
wt.roles.append(None) # type: ignore[attr-defined]
|
|
|
|
|
|
def test_workflow_role_required_capabilities_immutable() -> None:
|
|
"""required_capabilities is a tuple — .append() must raise AttributeError."""
|
|
from my_deepagent.workflow import WorkflowRole
|
|
|
|
role = WorkflowRole.model_validate(
|
|
{"id": "spec_writer", "required_capabilities": ["spec_write"]}
|
|
)
|
|
with pytest.raises((AttributeError, TypeError)):
|
|
role.required_capabilities.append(None) # type: ignore[attr-defined]
|