"""Repository/service discovery helpers for autonomous execution prompts.""" from __future__ import annotations import json from dataclasses import dataclass, field from pathlib import Path @dataclass class RepoDiscovery: languages: set[str] = field(default_factory=set) package_managers: set[str] = field(default_factory=set) databases: set[str] = field(default_factory=set) services: set[str] = field(default_factory=set) frameworks: set[str] = field(default_factory=set) hints: list[str] = field(default_factory=list) def _read_text(path: Path) -> str: try: return path.read_text(encoding="utf-8") except (OSError, UnicodeDecodeError): return "" def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) -> None: lowered = content.lower() for needle, name in mapping.items(): if needle in lowered: target.add(name) # Shared mapping for database signals found in manifest content _MANIFEST_DB_SIGNALS: dict[str, str] = { # PostgreSQL "psycopg": "postgresql", "asyncpg": "postgresql", "postgres": "postgresql", "pgx": "postgresql", # MySQL / MariaDB "mysql": "mysql", "mariadb": "mysql", "pymysql": "mysql", # MongoDB "pymongo": "mongodb", "mongodb": "mongodb", "mongoengine": "mongodb", "mongosh": "mongodb", # ClickHouse "clickhouse": "clickhouse", "clickhouse-driver": "clickhouse", "clickhouse_connect": "clickhouse", # Redis "redis": "redis", "ioredis": "redis", # SQLite "sqlite": "sqlite", "better-sqlite3": "sqlite", "aiosqlite": "sqlite", # Elasticsearch / OpenSearch "elasticsearch": "elasticsearch", "opensearch": "elasticsearch", # DynamoDB "dynamodb": "dynamodb", "boto3": "dynamodb", # broad but common signal # Cassandra "cassandra-driver": "cassandra", "cassandra": "cassandra", # RabbitMQ "amqplib": "rabbitmq", "pika": "rabbitmq", "rabbitmq": "rabbitmq", # Kafka "kafka": "kafka", "confluent-kafka": "kafka", "kafkajs": "kafka", # Neo4j "neo4j": "neo4j", } # Node package.json dependency → database mapping _NODE_DEP_DB_SIGNALS: dict[str, str] = { "pg": "postgresql", "mysql": "mysql", "mysql2": "mysql", "mongoose": "mongodb", "mongodb": "mongodb", "@clickhouse/client": "clickhouse", "redis": "redis", "ioredis": "redis", "prisma": "postgresql", "better-sqlite3": "sqlite", "sqlite3": "sqlite", "@elastic/elasticsearch": "elasticsearch", "@aws-sdk/client-dynamodb": "dynamodb", "kafkajs": "kafka", "amqplib": "rabbitmq", "neo4j-driver": "neo4j", "cassandra-driver": "cassandra", "typeorm": "postgresql", "sequelize": "postgresql", "knex": "postgresql", } # Docker compose service image → service name mapping _COMPOSE_SERVICE_SIGNALS: dict[str, str] = { "clickhouse": "clickhouse", "postgres": "postgresql", "mysql": "mysql", "mariadb": "mysql", "mongo": "mongodb", "redis": "redis", "elasticsearch": "elasticsearch", "opensearch": "elasticsearch", "rabbitmq": "rabbitmq", "kafka": "kafka", "zookeeper": "kafka", "cassandra": "cassandra", "neo4j": "neo4j", "minio": "s3", "localstack": "aws-local", "dynamodb": "dynamodb", "memcached": "memcached", "nginx": "nginx", } # Environment variable name patterns → database mapping _ENV_DB_PATTERNS: list[tuple[str, str]] = [ ("CLICKHOUSE", "clickhouse"), ("CH_", "clickhouse"), ("POSTGRES", "postgresql"), ("PG", "postgresql"), ("DATABASE_URL", "postgresql"), ("MYSQL", "mysql"), ("MARIADB", "mysql"), ("MONGO", "mongodb"), ("REDIS", "redis"), ("ELASTICSEARCH", "elasticsearch"), ("OPENSEARCH", "elasticsearch"), ("DYNAMO", "dynamodb"), ("CASSANDRA", "cassandra"), ("KAFKA", "kafka"), ("RABBIT", "rabbitmq"), ("AMQP", "rabbitmq"), ("NEO4J", "neo4j"), ("SQLITE", "sqlite"), ] def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery: """Infer runtime-relevant stack hints from common manifest/config files.""" discovery = RepoDiscovery() env_names = {name.upper() for name in (env_names or set())} file_map: dict[str, Path] = { "pyproject": project_root / "pyproject.toml", "requirements": project_root / "requirements.txt", "requirements_dev": project_root / "requirements-dev.txt", "setup_py": project_root / "setup.py", "setup_cfg": project_root / "setup.cfg", "package": project_root / "package.json", "go_mod": project_root / "go.mod", "cargo": project_root / "Cargo.toml", "gemfile": project_root / "Gemfile", "build_gradle": project_root / "build.gradle", "build_gradle_kts": project_root / "build.gradle.kts", "pom": project_root / "pom.xml", "composer": project_root / "composer.json", "mix": project_root / "mix.exs", "docker_compose": project_root / "docker-compose.yml", "docker_compose_alt": project_root / "docker-compose.yaml", "compose": project_root / "compose.yaml", "prisma": project_root / "prisma" / "schema.prisma", "dockerfile": project_root / "Dockerfile", } # ---- Language detection ---- if ( file_map["pyproject"].exists() or file_map["requirements"].exists() or file_map["requirements_dev"].exists() or file_map["setup_py"].exists() or file_map["setup_cfg"].exists() ): discovery.languages.add("python") if file_map["package"].exists(): discovery.languages.add("node") if file_map["go_mod"].exists(): discovery.languages.add("go") if file_map["cargo"].exists(): discovery.languages.add("rust") if file_map["gemfile"].exists(): discovery.languages.add("ruby") if file_map["build_gradle"].exists() or file_map["build_gradle_kts"].exists() or file_map["pom"].exists(): discovery.languages.add("java") if file_map["composer"].exists(): discovery.languages.add("php") if file_map["mix"].exists(): discovery.languages.add("elixir") # ---- Package manager detection ---- if file_map["pyproject"].exists() or file_map["requirements"].exists() or file_map["setup_py"].exists(): discovery.package_managers.add("pip") if file_map["package"].exists(): try: package_json = json.loads(_read_text(file_map["package"]) or "{}") except json.JSONDecodeError: package_json = {} pm = package_json.get("packageManager") if isinstance(pm, str) and pm: discovery.package_managers.add(pm.split("@", 1)[0]) else: # Check for lockfiles to distinguish npm/yarn/pnpm if (project_root / "pnpm-lock.yaml").exists(): discovery.package_managers.add("pnpm") elif (project_root / "yarn.lock").exists(): discovery.package_managers.add("yarn") else: discovery.package_managers.add("npm") if file_map["go_mod"].exists(): discovery.package_managers.add("go") if file_map["cargo"].exists(): discovery.package_managers.add("cargo") if file_map["gemfile"].exists(): discovery.package_managers.add("bundler") if file_map["build_gradle"].exists() or file_map["build_gradle_kts"].exists(): discovery.package_managers.add("gradle") if file_map["pom"].exists(): discovery.package_managers.add("maven") if file_map["composer"].exists(): discovery.package_managers.add("composer") if file_map["mix"].exists(): discovery.package_managers.add("mix") # ---- Gather manifest content ---- manifests = { name: _read_text(path) for name, path in file_map.items() if path.exists() } combined = "\n".join(manifests.values()) # ---- Database detection from manifest content ---- _add_if_contains(discovery.databases, combined, _MANIFEST_DB_SIGNALS) # ---- Node.js dependency-specific detection ---- if file_map["package"].exists(): try: package_json = json.loads(_read_text(file_map["package"]) or "{}") except json.JSONDecodeError: package_json = {} deps = { **(package_json.get("dependencies") or {}), **(package_json.get("devDependencies") or {}), } dep_blob = "\n".join(deps.keys()).lower() _add_if_contains(discovery.databases, dep_blob, _NODE_DEP_DB_SIGNALS) # ---- Framework detection from manifest content ---- _add_if_contains( discovery.frameworks, combined, { "fastapi": "fastapi", "django": "django", "flask": "flask", "express": "express", "nextjs": "next.js", "next": "next.js", "nestjs": "nestjs", "spring": "spring", "rails": "rails", "laravel": "laravel", "phoenix": "phoenix", "gin": "gin", "actix": "actix", }, ) # ---- Database detection from environment variable names ---- for env_name in env_names: for pattern, db_name in _ENV_DB_PATTERNS: if pattern in env_name or env_name.startswith(pattern): discovery.databases.add(db_name) break # ---- Docker compose service detection ---- compose_blob = "\n".join( manifests.get(key, "") for key in ("docker_compose", "docker_compose_alt", "compose") ).lower() _add_if_contains(discovery.services, compose_blob, _COMPOSE_SERVICE_SIGNALS) # ---- Hints from config files ---- if file_map["prisma"].exists(): discovery.hints.append("Prisma schema detected.") if (project_root / "alembic.ini").exists(): discovery.hints.append("Alembic migration config detected.") if (project_root / "knexfile.js").exists() or (project_root / "knexfile.ts").exists(): discovery.hints.append("Knex migration config detected.") if (project_root / "ormconfig.json").exists() or (project_root / "ormconfig.ts").exists(): discovery.hints.append("TypeORM config detected.") if (project_root / "drizzle.config.ts").exists(): discovery.hints.append("Drizzle ORM config detected.") if (project_root / "Makefile").exists(): discovery.hints.append("Makefile available for build/task automation.") if file_map["dockerfile"].exists() or (project_root / "docker").exists() or discovery.services: discovery.hints.append("Containerized services may be available for local verification.") return discovery def format_repo_discovery(discovery: RepoDiscovery) -> str: """Render discovery results into a compact prompt summary.""" lines: list[str] = [] if discovery.languages: lines.append("Detected languages: " + ", ".join(sorted(discovery.languages))) if discovery.package_managers: lines.append("Likely package managers: " + ", ".join(sorted(discovery.package_managers))) if discovery.databases: lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases))) if discovery.services: lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services))) if discovery.frameworks: lines.append("Detected frameworks: " + ", ".join(sorted(discovery.frameworks))) if discovery.hints: lines.extend(discovery.hints) if not lines: return "No strong runtime/service signals were detected from repository manifests." return "\n".join(lines)