"""Canonical JSON serialization + sha256 hashing for content-addressed identity.""" from __future__ import annotations import hashlib import json from typing import Any def canonicalize(value: Any) -> str: """Return canonical JSON: keys sorted, no insignificant whitespace, UTF-16 codepoint order. json.dumps with sort_keys=True uses Python's default dict key sort which is by Unicode codepoint. For ASCII keys this is equivalent to UTF-16 codepoint order which is what we want. For non-ASCII keys outside the BMP, this is a documented approximation. """ return json.dumps( value, sort_keys=True, ensure_ascii=False, separators=(",", ":"), allow_nan=False, ) def sha256(value: Any) -> str: """Return sha256 hex digest of canonical JSON of value.""" return hashlib.sha256(canonicalize(value).encode("utf-8")).hexdigest()