"""structlog configuration with built-in secret scrubbing. Scrubs known API key patterns and bearer tokens from all log output (both rich pretty-printed and JSON). Apply ``configure_logging(config)`` once at process start (called from CLI entry points). """ from __future__ import annotations import logging import re import sys from typing import Any import structlog # Secret patterns. Order matters: more specific first. _SECRET_PATTERNS: tuple[re.Pattern[str], ...] = tuple( re.compile(p) for p in ( r"sk-or-[A-Za-z0-9_-]{20,}", # OpenRouter r"sk-ant-[A-Za-z0-9_-]{20,}", # Anthropic r"sk-proj-[A-Za-z0-9_-]{20,}", # OpenAI project keys r"sk-[A-Za-z0-9_-]{30,}", # OpenAI (general) r"lsv2_pt_[A-Za-z0-9_-]{20,}", # LangSmith personal token r"lsv2_[A-Za-z0-9_-]{30,}", # LangSmith (other) r"Bearer\s+[A-Za-z0-9._-]{20,}", # generic bearer r"ghp_[A-Za-z0-9]{30,}", # GitHub PAT r"glpat-[A-Za-z0-9-]{20,}", # GitLab PAT ) ) _REDACTED = "[REDACTED]" def scrub(text: str) -> str: """Replace secrets in ``text`` with ``[REDACTED]``.""" for pat in _SECRET_PATTERNS: text = pat.sub(_REDACTED, text) return text def scrub_value(value: Any) -> Any: """Recursively scrub strings inside dicts/lists/tuples/sets. Non-strings pass through.""" if isinstance(value, str): return scrub(value) if isinstance(value, dict): return {k: scrub_value(v) for k, v in value.items()} if isinstance(value, list): return [scrub_value(v) for v in value] if isinstance(value, tuple): return tuple(scrub_value(v) for v in value) if isinstance(value, set): return {scrub_value(v) for v in value} return value def _scrub_processor(_logger: Any, _method: str, event_dict: dict[str, Any]) -> dict[str, Any]: """structlog processor: scrub every value in the event dict.""" return {k: scrub_value(v) for k, v in event_dict.items()} def configure_logging(level: str = "info", json_output: bool = False) -> None: """Configure structlog with secret-scrubbing on top of the chosen renderer.""" log_level = getattr(logging, level.upper(), logging.INFO) logging.basicConfig(level=log_level, format="%(message)s", stream=sys.stderr) processors: list[Any] = [ structlog.contextvars.merge_contextvars, structlog.processors.add_log_level, structlog.processors.TimeStamper(fmt="iso", utc=True), _scrub_processor, ] if json_output: processors.append(structlog.processors.JSONRenderer()) else: processors.append(structlog.dev.ConsoleRenderer(colors=True)) structlog.configure( processors=processors, wrapper_class=structlog.make_filtering_bound_logger(log_level), logger_factory=structlog.PrintLoggerFactory(file=sys.stderr), cache_logger_on_first_use=True, ) def get_logger(name: str | None = None) -> Any: return structlog.get_logger(name) if name else structlog.get_logger()