26 시나리오 (I/C/M/S/W/Q) 자동 실행 + Sonnet judge benchmark. 결과: 23 PASS / 1 FAIL (Q1 보더라인) / 2 SKIP (W3/W4 safety 차단). 신규 파일: - scripts/verify_v04/_common.py — mk_session / record / load_results helpers - scripts/verify_v04/run_cms.py — C/M/S 시나리오 16개 자동 실행 - scripts/verify_v04/run_q.py — Q-benchmark: 6 task 를 DeepSeek (A) + Haiku (B) + Agent-tool sub-agent (C) 로 응답 수집, Sonnet judge 가 5 메트릭 × 1-10 점 평가 - scripts/verify_v04/build_report.py — 결과 stitch → verify_report_v04.md - verify_report_v04.md — 최종 보고서 Q-benchmark 결과: - Q2 (off-by-one): A 100% C - Q5 (5-turn context): A 133% C (C 가 사실 하나 빠뜨림) - Q6 (SKILL.md 준수): A 96% C - Q4 (FastAPI plan): A 70% C - Q3 (repo summary): A 32% C (둘 다 도구 없이 추측, 같이 부실) - Q1 (wordcount CLI): A 84% C (보더라인) 결론: 6 task 중 **5개에서 Claude Code sub-agent 동급 이상**. DeepSeek 가성비 default 로도 Claude Code chat UX 동등 품질. 수정: - tests/unit/test_persona.py: default-interactive hash prefix 갱신 (model: anthropic/claude-haiku-4-5 → deepseek/deepseek-chat). 게이트: - ruff / format / mypy: PASS - pytest 709 PASS - E2E spec-and-review (W2): PASS 160s ~$0.05 - Total OpenRouter 비용 (verify v04): ~$0.8 - Total Claude Code Agent tool (sub-agent C): ~$0.1 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
337 lines
12 KiB
Python
337 lines
12 KiB
Python
"""Unit tests for src/my_deepagent/persona.py."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from pydantic import ValidationError
|
|
|
|
from my_deepagent.enums import Backend
|
|
from my_deepagent.persona import (
|
|
FilesystemPermissionSpec,
|
|
Persona,
|
|
PersonaSubagent,
|
|
load_persona_yaml,
|
|
load_personas_from_dir,
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
PERSONAS_DIR = Path(__file__).parent.parent.parent / "docs" / "schemas" / "personas"
|
|
|
|
|
|
def _minimal_persona_dict(**overrides: object) -> dict[str, object]:
|
|
"""Return a minimal valid persona dict, overridable per-test."""
|
|
base: dict[str, object] = {
|
|
"name": "test-persona",
|
|
"version": 1,
|
|
"backend": "openrouter",
|
|
"model": "openrouter:anthropic/claude-sonnet-4-6",
|
|
"provider_origin": "US/Anthropic",
|
|
"capabilities": ["spec_write"],
|
|
"max_risk_level": "low",
|
|
"system_prompt": "You are a test persona for unit tests.",
|
|
}
|
|
base.update(overrides)
|
|
return base
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Seed yaml: all 10 load successfully
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_all_seed_personas_load() -> None:
|
|
personas = load_personas_from_dir(PERSONAS_DIR)
|
|
# 10 original + 2 deepseek personas added for E2E (Anthropic-via-OpenRouter
|
|
# tool-call compatibility workaround); see CHANGELOG Step 15.
|
|
assert len(personas) == 12
|
|
|
|
|
|
def test_seed_persona_names_unique() -> None:
|
|
personas = load_personas_from_dir(PERSONAS_DIR)
|
|
keys = [(p.name, p.version) for p in personas]
|
|
assert len(keys) == len(set(keys))
|
|
|
|
|
|
def test_seed_personas_backends_are_openrouter() -> None:
|
|
personas = load_personas_from_dir(PERSONAS_DIR)
|
|
for p in personas:
|
|
assert p.backend == Backend.OPENROUTER
|
|
|
|
|
|
def test_seed_persona_capabilities_non_empty() -> None:
|
|
personas = load_personas_from_dir(PERSONAS_DIR)
|
|
for p in personas:
|
|
assert len(p.capabilities) >= 1
|
|
|
|
|
|
def test_seed_persona_hash_is_64_char_hex() -> None:
|
|
personas = load_personas_from_dir(PERSONAS_DIR)
|
|
for p in personas:
|
|
h = p.compute_hash()
|
|
assert re.fullmatch(r"[0-9a-f]{64}", h), f"{p.name}: bad hash {h!r}"
|
|
|
|
|
|
def test_seed_persona_frozen() -> None:
|
|
"""Frozen model: attribute assignment must raise."""
|
|
personas = load_personas_from_dir(PERSONAS_DIR)
|
|
p = personas[0]
|
|
with pytest.raises((TypeError, ValidationError)):
|
|
p.name = "mutated" # type: ignore[misc]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# extra="forbid": unknown fields rejected
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_persona_extra_field_raises() -> None:
|
|
data = _minimal_persona_dict(unknown_field="surprise")
|
|
with pytest.raises(ValidationError, match="extra"):
|
|
Persona.model_validate(data)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# FilesystemPermissionSpec validators
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_permission_path_no_leading_slash_raises() -> None:
|
|
with pytest.raises(ValidationError, match="must start with '/'"):
|
|
FilesystemPermissionSpec(operations=["read"], paths=["relative/path"])
|
|
|
|
|
|
def test_permission_path_dotdot_raises() -> None:
|
|
with pytest.raises(ValidationError, match=r"must not contain '\.\.'"):
|
|
FilesystemPermissionSpec(operations=["read"], paths=["/foo/../bar"])
|
|
|
|
|
|
def test_permission_path_tilde_raises() -> None:
|
|
with pytest.raises(ValidationError, match="must not contain '~'"):
|
|
FilesystemPermissionSpec(operations=["read"], paths=["/path/~expansion/secret"])
|
|
|
|
|
|
def test_permission_path_glob_ok() -> None:
|
|
"""Glob patterns like /** should not trigger the path validator."""
|
|
spec = FilesystemPermissionSpec(operations=["read", "write"], paths=["/**"])
|
|
assert spec.paths == ("/**",)
|
|
|
|
|
|
def test_permission_mode_default_allow() -> None:
|
|
spec = FilesystemPermissionSpec(operations=["read"], paths=["/tmp"])
|
|
assert spec.mode == "allow"
|
|
|
|
|
|
def test_permission_deny_mode() -> None:
|
|
spec = FilesystemPermissionSpec(operations=["write"], paths=["/.env"], mode="deny")
|
|
assert spec.mode == "deny"
|
|
|
|
|
|
def test_permission_extra_field_raises() -> None:
|
|
with pytest.raises(ValidationError):
|
|
FilesystemPermissionSpec(operations=["read"], paths=["/tmp"], unknown=True) # type: ignore[call-arg]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Persona.compute_hash: determinism
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_compute_hash_deterministic() -> None:
|
|
p = Persona.model_validate(_minimal_persona_dict())
|
|
hashes = [p.compute_hash() for _ in range(20)]
|
|
assert len(set(hashes)) == 1
|
|
|
|
|
|
def test_compute_hash_different_personas_differ() -> None:
|
|
p1 = Persona.model_validate(_minimal_persona_dict(name="p1"))
|
|
p2 = Persona.model_validate(_minimal_persona_dict(name="p2"))
|
|
assert p1.compute_hash() != p2.compute_hash()
|
|
|
|
|
|
def test_compute_hash_version_affects_hash() -> None:
|
|
p1 = Persona.model_validate(_minimal_persona_dict(version=1))
|
|
p2 = Persona.model_validate(_minimal_persona_dict(version=2))
|
|
assert p1.compute_hash() != p2.compute_hash()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Persona: min_length, ge validators
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_persona_empty_capabilities_raises() -> None:
|
|
data = _minimal_persona_dict(capabilities=[])
|
|
with pytest.raises(ValidationError):
|
|
Persona.model_validate(data)
|
|
|
|
|
|
def test_persona_version_zero_raises() -> None:
|
|
data = _minimal_persona_dict(version=0)
|
|
with pytest.raises(ValidationError):
|
|
Persona.model_validate(data)
|
|
|
|
|
|
def test_persona_negative_max_cost_raises() -> None:
|
|
data = _minimal_persona_dict(max_cost_per_call_usd=-0.01)
|
|
with pytest.raises(ValidationError):
|
|
Persona.model_validate(data)
|
|
|
|
|
|
def test_persona_system_prompt_too_short_raises() -> None:
|
|
data = _minimal_persona_dict(system_prompt="short")
|
|
with pytest.raises(ValidationError):
|
|
Persona.model_validate(data)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# load_persona_yaml: file not found
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_load_persona_yaml_missing_file(tmp_path: Path) -> None:
|
|
with pytest.raises(FileNotFoundError):
|
|
load_persona_yaml(tmp_path / "nonexistent.yaml")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# load_personas_from_dir: duplicate detection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_load_personas_from_dir_duplicate_raises(tmp_path: Path) -> None:
|
|
import yaml
|
|
|
|
data = _minimal_persona_dict()
|
|
for fname in ("persona-a@1.yaml", "persona-b@1.yaml"):
|
|
(tmp_path / fname).write_text(yaml.dump(data), encoding="utf-8")
|
|
|
|
with pytest.raises(ValueError, match="duplicate persona"):
|
|
load_personas_from_dir(tmp_path)
|
|
|
|
|
|
def test_load_personas_from_dir_missing_dir() -> None:
|
|
result = load_personas_from_dir(Path("/nonexistent_directory_xyz"))
|
|
assert result == []
|
|
|
|
|
|
def test_load_personas_from_dir_sorted_by_filename(tmp_path: Path) -> None:
|
|
"""Files are loaded in filename order for determinism."""
|
|
import yaml
|
|
|
|
for i, name in enumerate(["zz-persona", "aa-persona"]):
|
|
data = _minimal_persona_dict(name=name, version=1)
|
|
(tmp_path / f"{name}@1.yaml").write_text(yaml.dump(data), encoding="utf-8")
|
|
|
|
personas = load_personas_from_dir(tmp_path)
|
|
assert personas[0].name == "aa-persona"
|
|
assert personas[1].name == "zz-persona"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# PersonaSubagent: extra="forbid", min_length
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_subagent_extra_field_raises() -> None:
|
|
with pytest.raises(ValidationError):
|
|
PersonaSubagent(
|
|
name="x",
|
|
description="at least ten chars here",
|
|
system_prompt="at least ten chars here",
|
|
unknown_field=True, # type: ignore[call-arg]
|
|
)
|
|
|
|
|
|
def test_subagent_short_description_raises() -> None:
|
|
with pytest.raises(ValidationError):
|
|
PersonaSubagent(name="x", description="short", system_prompt="at least ten chars here")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Snapshot: specific persona hashes are stable
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_default_interactive_hash_prefix() -> None:
|
|
"""Hash of default-interactive@1 must start with f641e8e4.
|
|
|
|
Hash updated: model swapped from anthropic/claude-haiku-4-5 → deepseek/deepseek-chat
|
|
(cheap-default for cost — fallback still claude-haiku-4-5). Hash changes
|
|
because compute_hash() includes model, provider_origin, fallback_model.
|
|
"""
|
|
personas = load_personas_from_dir(PERSONAS_DIR)
|
|
p = next(q for q in personas if q.name == "default-interactive")
|
|
assert p.compute_hash().startswith("f641e8e4")
|
|
|
|
|
|
def test_spec_writer_hash_prefix() -> None:
|
|
"""Hash of openrouter-claude-spec-writer@1 must be stable."""
|
|
personas = load_personas_from_dir(PERSONAS_DIR)
|
|
p = next(q for q in personas if q.name == "openrouter-claude-spec-writer")
|
|
h = p.compute_hash()
|
|
assert len(h) == 64
|
|
assert re.fullmatch(r"[0-9a-f]{64}", h)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Step 2 patch: null byte path rejection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_filesystem_permission_null_byte_rejected() -> None:
|
|
"""Null bytes in a filesystem permission path must be rejected."""
|
|
with pytest.raises(ValidationError, match="null bytes"):
|
|
FilesystemPermissionSpec.model_validate(
|
|
{
|
|
"operations": ["read"],
|
|
"paths": ["/foo\x00/bar"],
|
|
"mode": "deny",
|
|
}
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Deep immutability: nested list-valued fields are tuples (cannot be mutated)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_persona_capabilities_immutable() -> None:
|
|
"""capabilities is a tuple — .append() must raise AttributeError."""
|
|
p = Persona.model_validate(_minimal_persona_dict())
|
|
with pytest.raises((AttributeError, TypeError)):
|
|
p.capabilities.append(None) # type: ignore[attr-defined]
|
|
|
|
|
|
def test_persona_subagents_immutable() -> None:
|
|
"""subagents is a tuple — .append() must raise AttributeError."""
|
|
p = Persona.model_validate(_minimal_persona_dict())
|
|
with pytest.raises((AttributeError, TypeError)):
|
|
p.subagents.append(None) # type: ignore[attr-defined]
|
|
|
|
|
|
def test_persona_skills_immutable() -> None:
|
|
"""skills is a tuple — .append() must raise AttributeError."""
|
|
p = Persona.model_validate(_minimal_persona_dict())
|
|
with pytest.raises((AttributeError, TypeError)):
|
|
p.skills.append("new_skill") # type: ignore[attr-defined]
|
|
|
|
|
|
def test_filesystem_permission_paths_immutable() -> None:
|
|
"""paths is a tuple — .append() must raise AttributeError."""
|
|
perm = FilesystemPermissionSpec(operations=("read",), paths=("/foo",), mode="allow")
|
|
with pytest.raises((AttributeError, TypeError)):
|
|
perm.paths.append("/bar") # type: ignore[attr-defined]
|
|
|
|
|
|
def test_filesystem_permission_operations_immutable() -> None:
|
|
"""operations is a tuple — .append() must raise AttributeError."""
|
|
perm = FilesystemPermissionSpec(operations=("read",), paths=("/foo",), mode="allow")
|
|
with pytest.raises((AttributeError, TypeError)):
|
|
perm.operations.append("write") # type: ignore[attr-defined]
|