442 lines
13 KiB
JavaScript
442 lines
13 KiB
JavaScript
"use strict";
|
|
|
|
const ROUTING_STRATEGY_MAP = {
|
|
primaryonly: "primaryOnly",
|
|
priorityfallback: "priorityFallback",
|
|
parallelrace: "parallelRace",
|
|
};
|
|
|
|
function pickOptionOrEnv(optionValue, envKey) {
|
|
return optionValue !== undefined ? optionValue : process.env[envKey];
|
|
}
|
|
|
|
function normalizeProviderName(provider) {
|
|
if (typeof provider !== "string") return "";
|
|
return provider.trim().toLowerCase();
|
|
}
|
|
|
|
function toProviderEnvKey(provider) {
|
|
const normalized = provider.trim().toUpperCase().replace(/[^A-Z0-9]+/g, "_");
|
|
return `CRAWLER_ENDPOINT_${normalized}`;
|
|
}
|
|
|
|
function parseProviderList(rawProviders) {
|
|
if (Array.isArray(rawProviders)) {
|
|
return [...new Set(rawProviders.map((item) => normalizeProviderName(String(item))).filter(Boolean))];
|
|
}
|
|
|
|
if (typeof rawProviders !== "string") return [];
|
|
return [
|
|
...new Set(
|
|
rawProviders
|
|
.split(",")
|
|
.map((item) => normalizeProviderName(item))
|
|
.filter(Boolean)
|
|
),
|
|
];
|
|
}
|
|
|
|
function normalizeRoutingStrategy(rawStrategy) {
|
|
if (rawStrategy === undefined || rawStrategy === null || rawStrategy === "") {
|
|
return "priorityFallback";
|
|
}
|
|
if (typeof rawStrategy !== "string") {
|
|
throw new Error("routing strategy must be a string");
|
|
}
|
|
|
|
const collapsed = rawStrategy.trim().toLowerCase().replace(/[\s_-]+/g, "");
|
|
const normalized = ROUTING_STRATEGY_MAP[collapsed];
|
|
if (!normalized) {
|
|
throw new Error(
|
|
`Unsupported routing strategy: ${rawStrategy}. Use primaryOnly|priorityFallback|parallelRace`
|
|
);
|
|
}
|
|
return normalized;
|
|
}
|
|
|
|
function normalizeOffer(offer, defaultCurrency, fallbackProvider) {
|
|
if (!offer || typeof offer !== "object") return null;
|
|
|
|
const price = Number(offer.price);
|
|
if (!Number.isFinite(price) || price <= 0) return null;
|
|
|
|
const provider =
|
|
typeof offer.provider === "string" && offer.provider.trim()
|
|
? offer.provider.trim()
|
|
: fallbackProvider || "unknown-provider";
|
|
const currency =
|
|
typeof offer.currency === "string" && offer.currency.trim()
|
|
? offer.currency.trim().toUpperCase()
|
|
: defaultCurrency;
|
|
|
|
return {
|
|
provider,
|
|
price: Math.round(price),
|
|
currency,
|
|
fetchedAt: new Date().toISOString(),
|
|
metadata: offer.metadata || null,
|
|
};
|
|
}
|
|
|
|
function normalizeOffers(offers, defaultCurrency = "KRW", fallbackProvider) {
|
|
if (!Array.isArray(offers)) return [];
|
|
return offers
|
|
.map((offer) => normalizeOffer(offer, defaultCurrency, fallbackProvider))
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function parsePositiveInt(value, fallback) {
|
|
const parsed = Number(value);
|
|
if (!Number.isInteger(parsed) || parsed <= 0) return fallback;
|
|
return parsed;
|
|
}
|
|
|
|
function sleep(ms) {
|
|
return new Promise((resolve) => {
|
|
setTimeout(resolve, ms);
|
|
});
|
|
}
|
|
|
|
function shouldRetryStatus(statusCode) {
|
|
return statusCode === 408 || statusCode === 429 || statusCode >= 500;
|
|
}
|
|
|
|
function isTransientMessage(message) {
|
|
return /timeout|timed out|network|fetch failed|socket|econn|enotfound|eai_again/i.test(message);
|
|
}
|
|
|
|
function isAbortError(error) {
|
|
return error?.name === "AbortError";
|
|
}
|
|
|
|
function withRetriableFlag(error, retriable) {
|
|
if (error && typeof error === "object") {
|
|
error.retriable = retriable;
|
|
return error;
|
|
}
|
|
|
|
const wrapped = new Error(String(error));
|
|
wrapped.retriable = retriable;
|
|
return wrapped;
|
|
}
|
|
|
|
function toRequestError(error, requestTimeoutMs) {
|
|
if (isAbortError(error)) {
|
|
return withRetriableFlag(
|
|
new Error(`Crawler request timed out after ${requestTimeoutMs}ms`),
|
|
true
|
|
);
|
|
}
|
|
|
|
if (!(error instanceof Error)) {
|
|
return withRetriableFlag(error, false);
|
|
}
|
|
|
|
if (typeof error.retriable === "boolean") {
|
|
return error;
|
|
}
|
|
|
|
return withRetriableFlag(error, isTransientMessage(error.message || ""));
|
|
}
|
|
|
|
function computeRetryDelayMs(attempt, baseDelayMs, maxDelayMs) {
|
|
const exponential = baseDelayMs * 2 ** Math.max(0, attempt - 1);
|
|
return Math.min(maxDelayMs, exponential);
|
|
}
|
|
|
|
function stableHash(text) {
|
|
let hash = 0;
|
|
for (let i = 0; i < text.length; i += 1) {
|
|
hash = (hash * 31 + text.charCodeAt(i)) >>> 0;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
function serializeSearchSeed(searchParams) {
|
|
const segments = Array.isArray(searchParams?.segments) ? searchParams.segments : [];
|
|
const segmentText =
|
|
segments.length > 0
|
|
? segments.map((segment) => `${segment.from || "?"}-${segment.to || "?"}`).join("|")
|
|
: "no-segments";
|
|
|
|
return JSON.stringify({
|
|
segmentText,
|
|
departureDateWindow: searchParams?.departureDateWindow || null,
|
|
stayDurationDays: searchParams?.stayDurationDays || null,
|
|
passengers: searchParams?.passengers?.total || null,
|
|
});
|
|
}
|
|
|
|
function createMockCrawler() {
|
|
let tick = 0;
|
|
|
|
return {
|
|
async getQuotes({ searchParams }) {
|
|
tick += 1;
|
|
const seed = stableHash(serializeSearchSeed(searchParams));
|
|
const basePrice = 700000 + (seed % 450000);
|
|
const driftFactor = ((seed + tick * 17) % 80) - 40;
|
|
const best = Math.max(150000, basePrice + driftFactor * 2500);
|
|
|
|
return [
|
|
{
|
|
provider: "mock-ota-a",
|
|
price: Math.round(best * 1.03),
|
|
currency: "KRW",
|
|
},
|
|
{
|
|
provider: "mock-ota-b",
|
|
price: Math.round(best),
|
|
currency: "KRW",
|
|
},
|
|
{
|
|
provider: "mock-ota-c",
|
|
price: Math.round(best * 1.06),
|
|
currency: "KRW",
|
|
},
|
|
];
|
|
},
|
|
};
|
|
}
|
|
|
|
function createEndpointCrawler(options = {}) {
|
|
const endpoint = pickOptionOrEnv(options.endpoint, "CRAWLER_ENDPOINT");
|
|
if (!endpoint) {
|
|
throw new Error("Crawler endpoint is required");
|
|
}
|
|
const fallbackProvider = normalizeProviderName(options.provider);
|
|
|
|
const fetchImpl = options.fetch || global.fetch;
|
|
if (typeof fetchImpl !== "function") {
|
|
throw new Error("global fetch is unavailable. Node.js 18+ is required.");
|
|
}
|
|
|
|
const requestTimeoutMs =
|
|
parsePositiveInt(
|
|
pickOptionOrEnv(options.requestTimeoutMs, "CRAWLER_REQUEST_TIMEOUT_MS"),
|
|
15000
|
|
);
|
|
const maxAttempts = parsePositiveInt(
|
|
pickOptionOrEnv(options.maxAttempts, "CRAWLER_MAX_ATTEMPTS"),
|
|
2
|
|
);
|
|
const retryBaseDelayMs = parsePositiveInt(
|
|
pickOptionOrEnv(options.retryBaseDelayMs, "CRAWLER_RETRY_BASE_DELAY_MS"),
|
|
300
|
|
);
|
|
const retryMaxDelayMs = parsePositiveInt(
|
|
pickOptionOrEnv(options.retryMaxDelayMs, "CRAWLER_RETRY_MAX_DELAY_MS"),
|
|
3000
|
|
);
|
|
|
|
return {
|
|
async getQuotes({ watchId, searchParams }) {
|
|
let lastError;
|
|
|
|
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
|
const abortController = new AbortController();
|
|
const timeout = setTimeout(() => {
|
|
abortController.abort();
|
|
}, requestTimeoutMs);
|
|
|
|
try {
|
|
const response = await fetchImpl(endpoint, {
|
|
method: "POST",
|
|
headers: { "content-type": "application/json" },
|
|
body: JSON.stringify({
|
|
watchId,
|
|
searchParams,
|
|
}),
|
|
signal: abortController.signal,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const body = await response.text();
|
|
throw withRetriableFlag(
|
|
new Error(`Crawler request failed (${response.status}): ${body}`),
|
|
shouldRetryStatus(response.status)
|
|
);
|
|
}
|
|
|
|
const payload = await response.json();
|
|
const offers = Array.isArray(payload) ? payload : payload?.offers;
|
|
const defaultCurrency =
|
|
typeof payload?.currency === "string" ? payload.currency.toUpperCase() : "KRW";
|
|
const normalized = normalizeOffers(offers, defaultCurrency, fallbackProvider);
|
|
|
|
if (normalized.length === 0) {
|
|
throw withRetriableFlag(new Error("Crawler returned no valid offers"), false);
|
|
}
|
|
|
|
return normalized;
|
|
} catch (error) {
|
|
const requestError = toRequestError(error, requestTimeoutMs);
|
|
lastError = requestError;
|
|
const shouldRetry =
|
|
requestError.retriable === true && attempt < maxAttempts;
|
|
|
|
if (!shouldRetry) {
|
|
throw requestError;
|
|
}
|
|
|
|
await sleep(computeRetryDelayMs(attempt, retryBaseDelayMs, retryMaxDelayMs));
|
|
} finally {
|
|
clearTimeout(timeout);
|
|
}
|
|
}
|
|
|
|
throw lastError || new Error("Crawler request failed");
|
|
},
|
|
};
|
|
}
|
|
|
|
function buildProviderEndpointMap(options = {}) {
|
|
if (!options.providerEndpoints || typeof options.providerEndpoints !== "object") {
|
|
return new Map();
|
|
}
|
|
|
|
const map = new Map();
|
|
for (const [provider, endpoint] of Object.entries(options.providerEndpoints)) {
|
|
const normalizedProvider = normalizeProviderName(provider);
|
|
if (!normalizedProvider) continue;
|
|
if (typeof endpoint !== "string" || endpoint.trim() === "") continue;
|
|
map.set(normalizedProvider, endpoint.trim());
|
|
}
|
|
return map;
|
|
}
|
|
|
|
function createMultiSourceCrawler(options = {}) {
|
|
const providers = parseProviderList(pickOptionOrEnv(options.providers, "CRAWLER_PROVIDERS"));
|
|
if (providers.length === 0) return null;
|
|
|
|
const routingStrategy = normalizeRoutingStrategy(
|
|
pickOptionOrEnv(options.routingStrategy, "CRAWLER_ROUTING_STRATEGY")
|
|
);
|
|
const sharedEndpointRaw = pickOptionOrEnv(options.endpoint, "CRAWLER_ENDPOINT");
|
|
const sharedEndpoint =
|
|
typeof sharedEndpointRaw === "string" && sharedEndpointRaw.trim()
|
|
? sharedEndpointRaw.trim()
|
|
: null;
|
|
const providerEndpointMap = buildProviderEndpointMap(options);
|
|
|
|
const missingProviders = [];
|
|
const providerClients = providers.map((provider) => {
|
|
const providerSpecificFromEnv = process.env[toProviderEnvKey(provider)];
|
|
const providerSpecific =
|
|
providerEndpointMap.get(provider) ||
|
|
(typeof providerSpecificFromEnv === "string" && providerSpecificFromEnv.trim()
|
|
? providerSpecificFromEnv.trim()
|
|
: null);
|
|
const endpoint = providerSpecific || sharedEndpoint;
|
|
|
|
if (!endpoint) {
|
|
missingProviders.push(provider);
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
provider,
|
|
crawler: createEndpointCrawler({
|
|
...options,
|
|
endpoint,
|
|
provider,
|
|
}),
|
|
};
|
|
});
|
|
|
|
if (missingProviders.length > 0) {
|
|
throw new Error(
|
|
`Missing endpoint for provider(s): ${missingProviders.join(
|
|
", "
|
|
)}. Set CRAWLER_ENDPOINT_<PROVIDER> or CRAWLER_ENDPOINT.`
|
|
);
|
|
}
|
|
|
|
const sources = providerClients.filter(Boolean);
|
|
if (sources.length === 0) {
|
|
throw new Error("No valid providers configured");
|
|
}
|
|
|
|
return {
|
|
async getQuotes(request) {
|
|
const explicitProvider = normalizeProviderName(request.searchParams?.provider);
|
|
if (explicitProvider) {
|
|
const target = sources.find((s) => s.provider === explicitProvider);
|
|
if (target) {
|
|
try {
|
|
return await target.crawler.getQuotes(request);
|
|
} catch (error) {
|
|
throw new Error(`Explicit provider failed (${target.provider}): ${error.message}`);
|
|
}
|
|
} else {
|
|
throw new Error(`Requested provider not available: ${explicitProvider}`);
|
|
}
|
|
}
|
|
|
|
if (routingStrategy === "primaryOnly") {
|
|
const primary = sources[0];
|
|
try {
|
|
return await primary.crawler.getQuotes(request);
|
|
} catch (error) {
|
|
throw new Error(`Primary provider failed (${primary.provider}): ${error.message}`);
|
|
}
|
|
}
|
|
|
|
if (routingStrategy === "parallelRace") {
|
|
const attempts = sources.map(({ provider, crawler }) =>
|
|
crawler.getQuotes(request).then((offers) => ({ provider, offers }))
|
|
);
|
|
|
|
try {
|
|
const winner = await Promise.any(attempts);
|
|
return winner.offers;
|
|
} catch (error) {
|
|
const reasons = Array.isArray(error?.errors)
|
|
? error.errors.map((item) => item?.message || String(item))
|
|
: [error?.message || String(error)];
|
|
throw new Error(`All provider requests failed: ${reasons.join(" | ")}`);
|
|
}
|
|
}
|
|
|
|
const errors = [];
|
|
for (const { provider, crawler } of sources) {
|
|
try {
|
|
return await crawler.getQuotes(request);
|
|
} catch (error) {
|
|
errors.push(`${provider}: ${error.message}`);
|
|
}
|
|
}
|
|
throw new Error(`All provider requests failed: ${errors.join(" | ")}`);
|
|
},
|
|
};
|
|
}
|
|
|
|
function createCrawlerClient(options = {}) {
|
|
if (options.client && typeof options.client.getQuotes === "function") {
|
|
return options.client;
|
|
}
|
|
|
|
if (typeof options.getQuotes === "function") {
|
|
return { getQuotes: options.getQuotes };
|
|
}
|
|
|
|
const multiSourceCrawler = createMultiSourceCrawler(options);
|
|
if (multiSourceCrawler) {
|
|
return multiSourceCrawler;
|
|
}
|
|
|
|
const endpoint = pickOptionOrEnv(options.endpoint, "CRAWLER_ENDPOINT");
|
|
if (endpoint) {
|
|
return createEndpointCrawler(options);
|
|
}
|
|
|
|
return createMockCrawler();
|
|
}
|
|
|
|
module.exports = {
|
|
createCrawlerClient,
|
|
createEndpointCrawler,
|
|
createMultiSourceCrawler,
|
|
createMockCrawler,
|
|
};
|