dev-puppeteer/my-deepagent/tests/unit/test_workflow.py

"""Unit tests for src/my_deepagent/workflow.py."""

from __future__ import annotations

import re
from pathlib import Path

import pytest
from pydantic import ValidationError

from my_deepagent.workflow import (
    ExpectedArtifact,
    WorkflowTemplate,
    load_workflow_yaml,
    load_workflows_from_dir,
)

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

WORKFLOWS_DIR = Path(__file__).parent.parent.parent / "docs" / "schemas" / "workflows"


def _minimal_role(**overrides: object) -> dict[str, object]:
    base: dict[str, object] = {
        "id": "spec_writer",
        "required_capabilities": ["spec_write"],
    }
    base.update(overrides)
    return base


def _minimal_phase(**overrides: object) -> dict[str, object]:
    base: dict[str, object] = {
        "key": "spec",
        "title": "Write spec",
        "risk": "low",
        "role": "spec_writer",
        "instructions": "Write the specification document for the feature.",
    }
    base.update(overrides)
    return base


def _minimal_template(**overrides: object) -> dict[str, object]:
    base: dict[str, object] = {
        "name": "test-workflow",
        "version": 1,
        "roles": [_minimal_role()],
        "phases": [_minimal_phase()],
    }
    base.update(overrides)
    return base


# ---------------------------------------------------------------------------
# Seed yaml: all 3 load successfully
# ---------------------------------------------------------------------------


def test_all_seed_workflows_load() -> None:
    workflows = load_workflows_from_dir(WORKFLOWS_DIR)
    assert len(workflows) == 3


def test_seed_workflow_names() -> None:
    workflows = load_workflows_from_dir(WORKFLOWS_DIR)
    names = {w.name for w in workflows}
    assert names == {"spec-and-review", "bug-fix-with-reproduction", "code-investigation"}


def test_seed_workflow_roles_non_empty() -> None:
    workflows = load_workflows_from_dir(WORKFLOWS_DIR)
    for w in workflows:
        assert len(w.roles) >= 1


def test_seed_workflow_phases_non_empty() -> None:
    workflows = load_workflows_from_dir(WORKFLOWS_DIR)
    for w in workflows:
        assert len(w.phases) >= 1


def test_seed_workflow_phase_keys_unique() -> None:
    workflows = load_workflows_from_dir(WORKFLOWS_DIR)
    for w in workflows:
        keys = [ph.key for ph in w.phases]
        assert len(keys) == len(set(keys)), f"{w.name}: duplicate phase keys"


# ---------------------------------------------------------------------------
# WorkflowTemplate validators
# ---------------------------------------------------------------------------


def test_phase_references_undefined_role_raises() -> None:
    data = _minimal_template(
        roles=[_minimal_role(id="spec_writer")],
        phases=[_minimal_phase(role="nonexistent_role")],
    )
    with pytest.raises(ValidationError, match="unknown role"):
        WorkflowTemplate.model_validate(data)


def test_duplicate_phase_keys_raises() -> None:
    data = _minimal_template(
        roles=[_minimal_role(id="spec_writer")],
        phases=[
            _minimal_phase(key="spec"),
            _minimal_phase(key="spec"),
        ],
    )
    with pytest.raises(ValidationError, match="duplicate phase keys"):
        WorkflowTemplate.model_validate(data)


def test_duplicate_role_ids_raises() -> None:
    data = _minimal_template(
        roles=[_minimal_role(id="spec_writer"), _minimal_role(id="spec_writer")],
        phases=[_minimal_phase(role="spec_writer")],
    )
    with pytest.raises(ValidationError, match="duplicate role ids"):
        WorkflowTemplate.model_validate(data)


def test_phase_key_uppercase_raises() -> None:
    data = _minimal_template(phases=[_minimal_phase(key="SPEC")])
    with pytest.raises(ValidationError):
        WorkflowTemplate.model_validate(data)


def test_phase_key_with_hyphen_raises() -> None:
    """Hyphens are not allowed in phase keys (only a-z, 0-9, _)."""
    data = _minimal_template(phases=[_minimal_phase(key="spec-one")])
    with pytest.raises(ValidationError):
        WorkflowTemplate.model_validate(data)


def test_phase_key_leading_digit_raises() -> None:
    data = _minimal_template(phases=[_minimal_phase(key="1spec")])
    with pytest.raises(ValidationError):
        WorkflowTemplate.model_validate(data)


def test_phase_key_snake_case_ok() -> None:
    data = _minimal_template(phases=[_minimal_phase(key="spec_write_phase")])
    wt = WorkflowTemplate.model_validate(data)
    assert wt.phases[0].key == "spec_write_phase"


def test_role_id_pattern_invalid_raises() -> None:
    data = _minimal_template(
        roles=[_minimal_role(id="Spec-Writer")],
        phases=[_minimal_phase(role="spec_writer")],
    )
    with pytest.raises(ValidationError):
        WorkflowTemplate.model_validate(data)


# ---------------------------------------------------------------------------
# ExpectedArtifact: alias mapping
# ---------------------------------------------------------------------------


def test_expected_artifact_schema_alias() -> None:
    """yaml uses 'schema' key; Python attribute is schema_id."""
    art = ExpectedArtifact.model_validate({"path": "artifacts/spec.json", "schema": "dev/spec@1"})
    assert art.schema_id == "dev/spec@1"
    assert art.path == "artifacts/spec.json"


def test_expected_artifact_extra_field_raises() -> None:
    with pytest.raises(ValidationError):
        ExpectedArtifact.model_validate({"path": "x.json", "schema": "dev/spec@1", "unknown": True})


def test_expected_artifact_missing_schema_raises() -> None:
    with pytest.raises(ValidationError):
        ExpectedArtifact.model_validate({"path": "x.json"})


# ---------------------------------------------------------------------------
# WorkflowTemplate frozen + extra="forbid"
# ---------------------------------------------------------------------------


def test_template_frozen() -> None:
    wt = WorkflowTemplate.model_validate(_minimal_template())
    with pytest.raises((TypeError, ValidationError)):
        wt.name = "mutated"  # type: ignore[misc]


def test_template_extra_field_raises() -> None:
    data = _minimal_template(extra_unknown_field="oops")
    with pytest.raises(ValidationError):
        WorkflowTemplate.model_validate(data)


# ---------------------------------------------------------------------------
# compute_hash: determinism
# ---------------------------------------------------------------------------


def test_compute_hash_deterministic() -> None:
    wt = WorkflowTemplate.model_validate(_minimal_template())
    hashes = [wt.compute_hash() for _ in range(20)]
    assert len(set(hashes)) == 1


def test_compute_hash_returns_64_char_hex() -> None:
    wt = WorkflowTemplate.model_validate(_minimal_template())
    h = wt.compute_hash()
    assert re.fullmatch(r"[0-9a-f]{64}", h)


def test_compute_hash_different_templates_differ() -> None:
    wt1 = WorkflowTemplate.model_validate(_minimal_template(name="wf1"))
    wt2 = WorkflowTemplate.model_validate(_minimal_template(name="wf2"))
    assert wt1.compute_hash() != wt2.compute_hash()


# ---------------------------------------------------------------------------
# load_workflow_yaml: file not found
# ---------------------------------------------------------------------------


def test_load_workflow_yaml_missing_file(tmp_path: Path) -> None:
    with pytest.raises(FileNotFoundError):
        load_workflow_yaml(tmp_path / "no.yaml")


# ---------------------------------------------------------------------------
# load_workflows_from_dir: duplicate detection + missing dir
# ---------------------------------------------------------------------------


def test_load_workflows_from_dir_duplicate_raises(tmp_path: Path) -> None:
    import yaml

    data = _minimal_template()
    for fname in ("wf-a@1.yaml", "wf-b@1.yaml"):
        (tmp_path / fname).write_text(yaml.dump(data), encoding="utf-8")

    with pytest.raises(ValueError, match="duplicate workflow"):
        load_workflows_from_dir(tmp_path)


def test_load_workflows_from_dir_missing_dir() -> None:
    result = load_workflows_from_dir(Path("/nonexistent_wf_dir_xyz"))
    assert result == []


# ---------------------------------------------------------------------------
# Snapshot: seed hashes are stable
# ---------------------------------------------------------------------------


def test_spec_and_review_hash_prefix() -> None:
    workflows = load_workflows_from_dir(WORKFLOWS_DIR)
    w = next(x for x in workflows if x.name == "spec-and-review")
    assert w.compute_hash().startswith("1c94587647b16f0d")


def test_bug_fix_hash_prefix() -> None:
    workflows = load_workflows_from_dir(WORKFLOWS_DIR)
    w = next(x for x in workflows if x.name == "bug-fix-with-reproduction")
    assert w.compute_hash().startswith("a137c9656f10e88a")


# ---------------------------------------------------------------------------
# Step 2 patch: Counter-based duplicate role ids report is sorted
# ---------------------------------------------------------------------------


def test_workflow_duplicate_role_ids_reported_sorted() -> None:
    """Multiple duplicated role ids must be reported in sorted order."""
    with pytest.raises(ValidationError, match=r"duplicate role ids: \['a', 'b'\]"):
        WorkflowTemplate.model_validate(
            {
                "name": "x",
                "version": 1,
                "roles": [
                    {"id": "b", "required_capabilities": ["spec_write"]},
                    {"id": "a", "required_capabilities": ["spec_write"]},
                    {"id": "a", "required_capabilities": ["spec_write"]},
                    {"id": "b", "required_capabilities": ["spec_write"]},
                ],
                "phases": [
                    {
                        "key": "x",
                        "title": "x",
                        "risk": "low",
                        "role": "a",
                        "instructions": "x" * 20,
                    }
                ],
            }
        )


def test_code_investigation_hash_prefix() -> None:
    workflows = load_workflows_from_dir(WORKFLOWS_DIR)
    w = next(x for x in workflows if x.name == "code-investigation")
    assert w.compute_hash().startswith("5b80ea2e248d5232")


# ---------------------------------------------------------------------------
# Deep immutability: nested list-valued fields are tuples (cannot be mutated)
# ---------------------------------------------------------------------------


def test_workflow_phases_immutable() -> None:
    """phases is a tuple — .append() must raise AttributeError."""
    wt = WorkflowTemplate.model_validate(_minimal_template())
    with pytest.raises((AttributeError, TypeError)):
        wt.phases.append(None)  # type: ignore[attr-defined]


def test_workflow_roles_immutable() -> None:
    """roles is a tuple — .append() must raise AttributeError."""
    wt = WorkflowTemplate.model_validate(_minimal_template())
    with pytest.raises((AttributeError, TypeError)):
        wt.roles.append(None)  # type: ignore[attr-defined]


def test_workflow_role_required_capabilities_immutable() -> None:
    """required_capabilities is a tuple — .append() must raise AttributeError."""
    from my_deepagent.workflow import WorkflowRole

    role = WorkflowRole.model_validate(
        {"id": "spec_writer", "required_capabilities": ["spec_write"]}
    )
    with pytest.raises((AttributeError, TypeError)):
        role.required_capabilities.append(None)  # type: ignore[attr-defined]