Files
air-watcher/src/crawlerClient.js
2026-03-05 11:00:45 +09:00

442 lines
13 KiB
JavaScript

"use strict";
const ROUTING_STRATEGY_MAP = {
primaryonly: "primaryOnly",
priorityfallback: "priorityFallback",
parallelrace: "parallelRace",
};
function pickOptionOrEnv(optionValue, envKey) {
return optionValue !== undefined ? optionValue : process.env[envKey];
}
function normalizeProviderName(provider) {
if (typeof provider !== "string") return "";
return provider.trim().toLowerCase();
}
function toProviderEnvKey(provider) {
const normalized = provider.trim().toUpperCase().replace(/[^A-Z0-9]+/g, "_");
return `CRAWLER_ENDPOINT_${normalized}`;
}
function parseProviderList(rawProviders) {
if (Array.isArray(rawProviders)) {
return [...new Set(rawProviders.map((item) => normalizeProviderName(String(item))).filter(Boolean))];
}
if (typeof rawProviders !== "string") return [];
return [
...new Set(
rawProviders
.split(",")
.map((item) => normalizeProviderName(item))
.filter(Boolean)
),
];
}
function normalizeRoutingStrategy(rawStrategy) {
if (rawStrategy === undefined || rawStrategy === null || rawStrategy === "") {
return "priorityFallback";
}
if (typeof rawStrategy !== "string") {
throw new Error("routing strategy must be a string");
}
const collapsed = rawStrategy.trim().toLowerCase().replace(/[\s_-]+/g, "");
const normalized = ROUTING_STRATEGY_MAP[collapsed];
if (!normalized) {
throw new Error(
`Unsupported routing strategy: ${rawStrategy}. Use primaryOnly|priorityFallback|parallelRace`
);
}
return normalized;
}
function normalizeOffer(offer, defaultCurrency, fallbackProvider) {
if (!offer || typeof offer !== "object") return null;
const price = Number(offer.price);
if (!Number.isFinite(price) || price <= 0) return null;
const provider =
typeof offer.provider === "string" && offer.provider.trim()
? offer.provider.trim()
: fallbackProvider || "unknown-provider";
const currency =
typeof offer.currency === "string" && offer.currency.trim()
? offer.currency.trim().toUpperCase()
: defaultCurrency;
return {
provider,
price: Math.round(price),
currency,
fetchedAt: new Date().toISOString(),
metadata: offer.metadata || null,
};
}
function normalizeOffers(offers, defaultCurrency = "KRW", fallbackProvider) {
if (!Array.isArray(offers)) return [];
return offers
.map((offer) => normalizeOffer(offer, defaultCurrency, fallbackProvider))
.filter(Boolean);
}
function parsePositiveInt(value, fallback) {
const parsed = Number(value);
if (!Number.isInteger(parsed) || parsed <= 0) return fallback;
return parsed;
}
function sleep(ms) {
return new Promise((resolve) => {
setTimeout(resolve, ms);
});
}
function shouldRetryStatus(statusCode) {
return statusCode === 408 || statusCode === 429 || statusCode >= 500;
}
function isTransientMessage(message) {
return /timeout|timed out|network|fetch failed|socket|econn|enotfound|eai_again/i.test(message);
}
function isAbortError(error) {
return error?.name === "AbortError";
}
function withRetriableFlag(error, retriable) {
if (error && typeof error === "object") {
error.retriable = retriable;
return error;
}
const wrapped = new Error(String(error));
wrapped.retriable = retriable;
return wrapped;
}
function toRequestError(error, requestTimeoutMs) {
if (isAbortError(error)) {
return withRetriableFlag(
new Error(`Crawler request timed out after ${requestTimeoutMs}ms`),
true
);
}
if (!(error instanceof Error)) {
return withRetriableFlag(error, false);
}
if (typeof error.retriable === "boolean") {
return error;
}
return withRetriableFlag(error, isTransientMessage(error.message || ""));
}
function computeRetryDelayMs(attempt, baseDelayMs, maxDelayMs) {
const exponential = baseDelayMs * 2 ** Math.max(0, attempt - 1);
return Math.min(maxDelayMs, exponential);
}
function stableHash(text) {
let hash = 0;
for (let i = 0; i < text.length; i += 1) {
hash = (hash * 31 + text.charCodeAt(i)) >>> 0;
}
return hash;
}
function serializeSearchSeed(searchParams) {
const segments = Array.isArray(searchParams?.segments) ? searchParams.segments : [];
const segmentText =
segments.length > 0
? segments.map((segment) => `${segment.from || "?"}-${segment.to || "?"}`).join("|")
: "no-segments";
return JSON.stringify({
segmentText,
departureDateWindow: searchParams?.departureDateWindow || null,
stayDurationDays: searchParams?.stayDurationDays || null,
passengers: searchParams?.passengers?.total || null,
});
}
function createMockCrawler() {
let tick = 0;
return {
async getQuotes({ searchParams }) {
tick += 1;
const seed = stableHash(serializeSearchSeed(searchParams));
const basePrice = 700000 + (seed % 450000);
const driftFactor = ((seed + tick * 17) % 80) - 40;
const best = Math.max(150000, basePrice + driftFactor * 2500);
return [
{
provider: "mock-ota-a",
price: Math.round(best * 1.03),
currency: "KRW",
},
{
provider: "mock-ota-b",
price: Math.round(best),
currency: "KRW",
},
{
provider: "mock-ota-c",
price: Math.round(best * 1.06),
currency: "KRW",
},
];
},
};
}
function createEndpointCrawler(options = {}) {
const endpoint = pickOptionOrEnv(options.endpoint, "CRAWLER_ENDPOINT");
if (!endpoint) {
throw new Error("Crawler endpoint is required");
}
const fallbackProvider = normalizeProviderName(options.provider);
const fetchImpl = options.fetch || global.fetch;
if (typeof fetchImpl !== "function") {
throw new Error("global fetch is unavailable. Node.js 18+ is required.");
}
const requestTimeoutMs =
parsePositiveInt(
pickOptionOrEnv(options.requestTimeoutMs, "CRAWLER_REQUEST_TIMEOUT_MS"),
15000
);
const maxAttempts = parsePositiveInt(
pickOptionOrEnv(options.maxAttempts, "CRAWLER_MAX_ATTEMPTS"),
2
);
const retryBaseDelayMs = parsePositiveInt(
pickOptionOrEnv(options.retryBaseDelayMs, "CRAWLER_RETRY_BASE_DELAY_MS"),
300
);
const retryMaxDelayMs = parsePositiveInt(
pickOptionOrEnv(options.retryMaxDelayMs, "CRAWLER_RETRY_MAX_DELAY_MS"),
3000
);
return {
async getQuotes({ watchId, searchParams }) {
let lastError;
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
const abortController = new AbortController();
const timeout = setTimeout(() => {
abortController.abort();
}, requestTimeoutMs);
try {
const response = await fetchImpl(endpoint, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
watchId,
searchParams,
}),
signal: abortController.signal,
});
if (!response.ok) {
const body = await response.text();
throw withRetriableFlag(
new Error(`Crawler request failed (${response.status}): ${body}`),
shouldRetryStatus(response.status)
);
}
const payload = await response.json();
const offers = Array.isArray(payload) ? payload : payload?.offers;
const defaultCurrency =
typeof payload?.currency === "string" ? payload.currency.toUpperCase() : "KRW";
const normalized = normalizeOffers(offers, defaultCurrency, fallbackProvider);
if (normalized.length === 0) {
throw withRetriableFlag(new Error("Crawler returned no valid offers"), false);
}
return normalized;
} catch (error) {
const requestError = toRequestError(error, requestTimeoutMs);
lastError = requestError;
const shouldRetry =
requestError.retriable === true && attempt < maxAttempts;
if (!shouldRetry) {
throw requestError;
}
await sleep(computeRetryDelayMs(attempt, retryBaseDelayMs, retryMaxDelayMs));
} finally {
clearTimeout(timeout);
}
}
throw lastError || new Error("Crawler request failed");
},
};
}
function buildProviderEndpointMap(options = {}) {
if (!options.providerEndpoints || typeof options.providerEndpoints !== "object") {
return new Map();
}
const map = new Map();
for (const [provider, endpoint] of Object.entries(options.providerEndpoints)) {
const normalizedProvider = normalizeProviderName(provider);
if (!normalizedProvider) continue;
if (typeof endpoint !== "string" || endpoint.trim() === "") continue;
map.set(normalizedProvider, endpoint.trim());
}
return map;
}
function createMultiSourceCrawler(options = {}) {
const providers = parseProviderList(pickOptionOrEnv(options.providers, "CRAWLER_PROVIDERS"));
if (providers.length === 0) return null;
const routingStrategy = normalizeRoutingStrategy(
pickOptionOrEnv(options.routingStrategy, "CRAWLER_ROUTING_STRATEGY")
);
const sharedEndpointRaw = pickOptionOrEnv(options.endpoint, "CRAWLER_ENDPOINT");
const sharedEndpoint =
typeof sharedEndpointRaw === "string" && sharedEndpointRaw.trim()
? sharedEndpointRaw.trim()
: null;
const providerEndpointMap = buildProviderEndpointMap(options);
const missingProviders = [];
const providerClients = providers.map((provider) => {
const providerSpecificFromEnv = process.env[toProviderEnvKey(provider)];
const providerSpecific =
providerEndpointMap.get(provider) ||
(typeof providerSpecificFromEnv === "string" && providerSpecificFromEnv.trim()
? providerSpecificFromEnv.trim()
: null);
const endpoint = providerSpecific || sharedEndpoint;
if (!endpoint) {
missingProviders.push(provider);
return null;
}
return {
provider,
crawler: createEndpointCrawler({
...options,
endpoint,
provider,
}),
};
});
if (missingProviders.length > 0) {
throw new Error(
`Missing endpoint for provider(s): ${missingProviders.join(
", "
)}. Set CRAWLER_ENDPOINT_<PROVIDER> or CRAWLER_ENDPOINT.`
);
}
const sources = providerClients.filter(Boolean);
if (sources.length === 0) {
throw new Error("No valid providers configured");
}
return {
async getQuotes(request) {
const explicitProvider = normalizeProviderName(request.searchParams?.provider);
if (explicitProvider) {
const target = sources.find((s) => s.provider === explicitProvider);
if (target) {
try {
return await target.crawler.getQuotes(request);
} catch (error) {
throw new Error(`Explicit provider failed (${target.provider}): ${error.message}`);
}
} else {
throw new Error(`Requested provider not available: ${explicitProvider}`);
}
}
if (routingStrategy === "primaryOnly") {
const primary = sources[0];
try {
return await primary.crawler.getQuotes(request);
} catch (error) {
throw new Error(`Primary provider failed (${primary.provider}): ${error.message}`);
}
}
if (routingStrategy === "parallelRace") {
const attempts = sources.map(({ provider, crawler }) =>
crawler.getQuotes(request).then((offers) => ({ provider, offers }))
);
try {
const winner = await Promise.any(attempts);
return winner.offers;
} catch (error) {
const reasons = Array.isArray(error?.errors)
? error.errors.map((item) => item?.message || String(item))
: [error?.message || String(error)];
throw new Error(`All provider requests failed: ${reasons.join(" | ")}`);
}
}
const errors = [];
for (const { provider, crawler } of sources) {
try {
return await crawler.getQuotes(request);
} catch (error) {
errors.push(`${provider}: ${error.message}`);
}
}
throw new Error(`All provider requests failed: ${errors.join(" | ")}`);
},
};
}
function createCrawlerClient(options = {}) {
if (options.client && typeof options.client.getQuotes === "function") {
return options.client;
}
if (typeof options.getQuotes === "function") {
return { getQuotes: options.getQuotes };
}
const multiSourceCrawler = createMultiSourceCrawler(options);
if (multiSourceCrawler) {
return multiSourceCrawler;
}
const endpoint = pickOptionOrEnv(options.endpoint, "CRAWLER_ENDPOINT");
if (endpoint) {
return createEndpointCrawler(options);
}
return createMockCrawler();
}
module.exports = {
createCrawlerClient,
createEndpointCrawler,
createMultiSourceCrawler,
createMockCrawler,
};