331 lines
12 KiB
Python
331 lines
12 KiB
Python
"""Repository/service discovery helpers for autonomous execution prompts."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
|
|
|
|
@dataclass
|
|
class RepoDiscovery:
|
|
languages: set[str] = field(default_factory=set)
|
|
package_managers: set[str] = field(default_factory=set)
|
|
databases: set[str] = field(default_factory=set)
|
|
services: set[str] = field(default_factory=set)
|
|
frameworks: set[str] = field(default_factory=set)
|
|
hints: list[str] = field(default_factory=list)
|
|
|
|
|
|
def _read_text(path: Path) -> str:
|
|
try:
|
|
return path.read_text(encoding="utf-8")
|
|
except (OSError, UnicodeDecodeError):
|
|
return ""
|
|
|
|
|
|
def _add_if_contains(target: set[str], content: str, mapping: dict[str, str]) -> None:
|
|
lowered = content.lower()
|
|
for needle, name in mapping.items():
|
|
if needle in lowered:
|
|
target.add(name)
|
|
|
|
|
|
# Shared mapping for database signals found in manifest content
|
|
_MANIFEST_DB_SIGNALS: dict[str, str] = {
|
|
# PostgreSQL
|
|
"psycopg": "postgresql",
|
|
"asyncpg": "postgresql",
|
|
"postgres": "postgresql",
|
|
"pgx": "postgresql",
|
|
# MySQL / MariaDB
|
|
"mysql": "mysql",
|
|
"mariadb": "mysql",
|
|
"pymysql": "mysql",
|
|
# MongoDB
|
|
"pymongo": "mongodb",
|
|
"mongodb": "mongodb",
|
|
"mongoengine": "mongodb",
|
|
"mongosh": "mongodb",
|
|
# ClickHouse
|
|
"clickhouse": "clickhouse",
|
|
"clickhouse-driver": "clickhouse",
|
|
"clickhouse_connect": "clickhouse",
|
|
# Redis
|
|
"redis": "redis",
|
|
"ioredis": "redis",
|
|
# SQLite
|
|
"sqlite": "sqlite",
|
|
"better-sqlite3": "sqlite",
|
|
"aiosqlite": "sqlite",
|
|
# Elasticsearch / OpenSearch
|
|
"elasticsearch": "elasticsearch",
|
|
"opensearch": "elasticsearch",
|
|
# DynamoDB
|
|
"dynamodb": "dynamodb",
|
|
"boto3": "dynamodb", # broad but common signal
|
|
# Cassandra
|
|
"cassandra-driver": "cassandra",
|
|
"cassandra": "cassandra",
|
|
# RabbitMQ
|
|
"amqplib": "rabbitmq",
|
|
"pika": "rabbitmq",
|
|
"rabbitmq": "rabbitmq",
|
|
# Kafka
|
|
"kafka": "kafka",
|
|
"confluent-kafka": "kafka",
|
|
"kafkajs": "kafka",
|
|
# Neo4j
|
|
"neo4j": "neo4j",
|
|
}
|
|
|
|
# Node package.json dependency → database mapping
|
|
_NODE_DEP_DB_SIGNALS: dict[str, str] = {
|
|
"pg": "postgresql",
|
|
"mysql": "mysql",
|
|
"mysql2": "mysql",
|
|
"mongoose": "mongodb",
|
|
"mongodb": "mongodb",
|
|
"@clickhouse/client": "clickhouse",
|
|
"redis": "redis",
|
|
"ioredis": "redis",
|
|
"prisma": "postgresql",
|
|
"better-sqlite3": "sqlite",
|
|
"sqlite3": "sqlite",
|
|
"@elastic/elasticsearch": "elasticsearch",
|
|
"@aws-sdk/client-dynamodb": "dynamodb",
|
|
"kafkajs": "kafka",
|
|
"amqplib": "rabbitmq",
|
|
"neo4j-driver": "neo4j",
|
|
"cassandra-driver": "cassandra",
|
|
"typeorm": "postgresql",
|
|
"sequelize": "postgresql",
|
|
"knex": "postgresql",
|
|
}
|
|
|
|
# Docker compose service image → service name mapping
|
|
_COMPOSE_SERVICE_SIGNALS: dict[str, str] = {
|
|
"clickhouse": "clickhouse",
|
|
"postgres": "postgresql",
|
|
"mysql": "mysql",
|
|
"mariadb": "mysql",
|
|
"mongo": "mongodb",
|
|
"redis": "redis",
|
|
"elasticsearch": "elasticsearch",
|
|
"opensearch": "elasticsearch",
|
|
"rabbitmq": "rabbitmq",
|
|
"kafka": "kafka",
|
|
"zookeeper": "kafka",
|
|
"cassandra": "cassandra",
|
|
"neo4j": "neo4j",
|
|
"minio": "s3",
|
|
"localstack": "aws-local",
|
|
"dynamodb": "dynamodb",
|
|
"memcached": "memcached",
|
|
"nginx": "nginx",
|
|
}
|
|
|
|
# Environment variable name patterns → database mapping
|
|
_ENV_DB_PATTERNS: list[tuple[str, str]] = [
|
|
("CLICKHOUSE", "clickhouse"),
|
|
("CH_", "clickhouse"),
|
|
("POSTGRES", "postgresql"),
|
|
("PG", "postgresql"),
|
|
("DATABASE_URL", "postgresql"),
|
|
("MYSQL", "mysql"),
|
|
("MARIADB", "mysql"),
|
|
("MONGO", "mongodb"),
|
|
("REDIS", "redis"),
|
|
("ELASTICSEARCH", "elasticsearch"),
|
|
("OPENSEARCH", "elasticsearch"),
|
|
("DYNAMO", "dynamodb"),
|
|
("CASSANDRA", "cassandra"),
|
|
("KAFKA", "kafka"),
|
|
("RABBIT", "rabbitmq"),
|
|
("AMQP", "rabbitmq"),
|
|
("NEO4J", "neo4j"),
|
|
("SQLITE", "sqlite"),
|
|
]
|
|
|
|
|
|
def discover_repo(project_root: Path, env_names: set[str] | None = None) -> RepoDiscovery:
|
|
"""Infer runtime-relevant stack hints from common manifest/config files."""
|
|
discovery = RepoDiscovery()
|
|
env_names = {name.upper() for name in (env_names or set())}
|
|
|
|
file_map: dict[str, Path] = {
|
|
"pyproject": project_root / "pyproject.toml",
|
|
"requirements": project_root / "requirements.txt",
|
|
"requirements_dev": project_root / "requirements-dev.txt",
|
|
"setup_py": project_root / "setup.py",
|
|
"setup_cfg": project_root / "setup.cfg",
|
|
"package": project_root / "package.json",
|
|
"go_mod": project_root / "go.mod",
|
|
"cargo": project_root / "Cargo.toml",
|
|
"gemfile": project_root / "Gemfile",
|
|
"build_gradle": project_root / "build.gradle",
|
|
"build_gradle_kts": project_root / "build.gradle.kts",
|
|
"pom": project_root / "pom.xml",
|
|
"composer": project_root / "composer.json",
|
|
"mix": project_root / "mix.exs",
|
|
"docker_compose": project_root / "docker-compose.yml",
|
|
"docker_compose_alt": project_root / "docker-compose.yaml",
|
|
"compose": project_root / "compose.yaml",
|
|
"prisma": project_root / "prisma" / "schema.prisma",
|
|
"dockerfile": project_root / "Dockerfile",
|
|
}
|
|
|
|
# ---- Language detection ----
|
|
if (
|
|
file_map["pyproject"].exists()
|
|
or file_map["requirements"].exists()
|
|
or file_map["requirements_dev"].exists()
|
|
or file_map["setup_py"].exists()
|
|
or file_map["setup_cfg"].exists()
|
|
):
|
|
discovery.languages.add("python")
|
|
if file_map["package"].exists():
|
|
discovery.languages.add("node")
|
|
if file_map["go_mod"].exists():
|
|
discovery.languages.add("go")
|
|
if file_map["cargo"].exists():
|
|
discovery.languages.add("rust")
|
|
if file_map["gemfile"].exists():
|
|
discovery.languages.add("ruby")
|
|
if file_map["build_gradle"].exists() or file_map["build_gradle_kts"].exists() or file_map["pom"].exists():
|
|
discovery.languages.add("java")
|
|
if file_map["composer"].exists():
|
|
discovery.languages.add("php")
|
|
if file_map["mix"].exists():
|
|
discovery.languages.add("elixir")
|
|
|
|
# ---- Package manager detection ----
|
|
if file_map["pyproject"].exists() or file_map["requirements"].exists() or file_map["setup_py"].exists():
|
|
discovery.package_managers.add("pip")
|
|
if file_map["package"].exists():
|
|
try:
|
|
package_json = json.loads(_read_text(file_map["package"]) or "{}")
|
|
except json.JSONDecodeError:
|
|
package_json = {}
|
|
pm = package_json.get("packageManager")
|
|
if isinstance(pm, str) and pm:
|
|
discovery.package_managers.add(pm.split("@", 1)[0])
|
|
else:
|
|
# Check for lockfiles to distinguish npm/yarn/pnpm
|
|
if (project_root / "pnpm-lock.yaml").exists():
|
|
discovery.package_managers.add("pnpm")
|
|
elif (project_root / "yarn.lock").exists():
|
|
discovery.package_managers.add("yarn")
|
|
else:
|
|
discovery.package_managers.add("npm")
|
|
if file_map["go_mod"].exists():
|
|
discovery.package_managers.add("go")
|
|
if file_map["cargo"].exists():
|
|
discovery.package_managers.add("cargo")
|
|
if file_map["gemfile"].exists():
|
|
discovery.package_managers.add("bundler")
|
|
if file_map["build_gradle"].exists() or file_map["build_gradle_kts"].exists():
|
|
discovery.package_managers.add("gradle")
|
|
if file_map["pom"].exists():
|
|
discovery.package_managers.add("maven")
|
|
if file_map["composer"].exists():
|
|
discovery.package_managers.add("composer")
|
|
if file_map["mix"].exists():
|
|
discovery.package_managers.add("mix")
|
|
|
|
# ---- Gather manifest content ----
|
|
manifests = {
|
|
name: _read_text(path)
|
|
for name, path in file_map.items()
|
|
if path.exists()
|
|
}
|
|
combined = "\n".join(manifests.values())
|
|
|
|
# ---- Database detection from manifest content ----
|
|
_add_if_contains(discovery.databases, combined, _MANIFEST_DB_SIGNALS)
|
|
|
|
# ---- Node.js dependency-specific detection ----
|
|
if file_map["package"].exists():
|
|
try:
|
|
package_json = json.loads(_read_text(file_map["package"]) or "{}")
|
|
except json.JSONDecodeError:
|
|
package_json = {}
|
|
deps = {
|
|
**(package_json.get("dependencies") or {}),
|
|
**(package_json.get("devDependencies") or {}),
|
|
}
|
|
dep_blob = "\n".join(deps.keys()).lower()
|
|
_add_if_contains(discovery.databases, dep_blob, _NODE_DEP_DB_SIGNALS)
|
|
|
|
# ---- Framework detection from manifest content ----
|
|
_add_if_contains(
|
|
discovery.frameworks,
|
|
combined,
|
|
{
|
|
"fastapi": "fastapi",
|
|
"django": "django",
|
|
"flask": "flask",
|
|
"express": "express",
|
|
"nextjs": "next.js",
|
|
"next": "next.js",
|
|
"nestjs": "nestjs",
|
|
"spring": "spring",
|
|
"rails": "rails",
|
|
"laravel": "laravel",
|
|
"phoenix": "phoenix",
|
|
"gin": "gin",
|
|
"actix": "actix",
|
|
},
|
|
)
|
|
|
|
# ---- Database detection from environment variable names ----
|
|
for env_name in env_names:
|
|
for pattern, db_name in _ENV_DB_PATTERNS:
|
|
if pattern in env_name or env_name.startswith(pattern):
|
|
discovery.databases.add(db_name)
|
|
break
|
|
|
|
# ---- Docker compose service detection ----
|
|
compose_blob = "\n".join(
|
|
manifests.get(key, "")
|
|
for key in ("docker_compose", "docker_compose_alt", "compose")
|
|
).lower()
|
|
_add_if_contains(discovery.services, compose_blob, _COMPOSE_SERVICE_SIGNALS)
|
|
|
|
# ---- Hints from config files ----
|
|
if file_map["prisma"].exists():
|
|
discovery.hints.append("Prisma schema detected.")
|
|
if (project_root / "alembic.ini").exists():
|
|
discovery.hints.append("Alembic migration config detected.")
|
|
if (project_root / "knexfile.js").exists() or (project_root / "knexfile.ts").exists():
|
|
discovery.hints.append("Knex migration config detected.")
|
|
if (project_root / "ormconfig.json").exists() or (project_root / "ormconfig.ts").exists():
|
|
discovery.hints.append("TypeORM config detected.")
|
|
if (project_root / "drizzle.config.ts").exists():
|
|
discovery.hints.append("Drizzle ORM config detected.")
|
|
if (project_root / "Makefile").exists():
|
|
discovery.hints.append("Makefile available for build/task automation.")
|
|
if file_map["dockerfile"].exists() or (project_root / "docker").exists() or discovery.services:
|
|
discovery.hints.append("Containerized services may be available for local verification.")
|
|
|
|
return discovery
|
|
|
|
|
|
def format_repo_discovery(discovery: RepoDiscovery) -> str:
|
|
"""Render discovery results into a compact prompt summary."""
|
|
lines: list[str] = []
|
|
if discovery.languages:
|
|
lines.append("Detected languages: " + ", ".join(sorted(discovery.languages)))
|
|
if discovery.package_managers:
|
|
lines.append("Likely package managers: " + ", ".join(sorted(discovery.package_managers)))
|
|
if discovery.databases:
|
|
lines.append("Detected databases/services in code or env: " + ", ".join(sorted(discovery.databases)))
|
|
if discovery.services:
|
|
lines.append("Detected local service containers: " + ", ".join(sorted(discovery.services)))
|
|
if discovery.frameworks:
|
|
lines.append("Detected frameworks: " + ", ".join(sorted(discovery.frameworks)))
|
|
if discovery.hints:
|
|
lines.extend(discovery.hints)
|
|
if not lines:
|
|
return "No strong runtime/service signals were detected from repository manifests."
|
|
return "\n".join(lines)
|