initial commit
This commit is contained in:
283
src/llmParameterExtractor.js
Normal file
283
src/llmParameterExtractor.js
Normal file
@@ -0,0 +1,283 @@
|
||||
"use strict";
|
||||
|
||||
const { parseFlightSearchRequest } = require("./naturalLanguageFlightParser");
|
||||
|
||||
function tryParseJsonObject(text) {
|
||||
if (typeof text !== "string") {
|
||||
throw new Error("LLM response is not a string");
|
||||
}
|
||||
|
||||
const trimmed = text.trim();
|
||||
try {
|
||||
return JSON.parse(trimmed);
|
||||
} catch (_error) {
|
||||
// Continue to bracket-based recovery.
|
||||
}
|
||||
|
||||
const firstBrace = trimmed.indexOf("{");
|
||||
const lastBrace = trimmed.lastIndexOf("}");
|
||||
if (firstBrace < 0 || lastBrace < 0 || firstBrace >= lastBrace) {
|
||||
throw new Error("LLM response did not include a valid JSON object");
|
||||
}
|
||||
|
||||
const sliced = trimmed.slice(firstBrace, lastBrace + 1);
|
||||
return JSON.parse(sliced);
|
||||
}
|
||||
|
||||
function toIntegerOrNull(value) {
|
||||
if (value === null || value === undefined) return null;
|
||||
const n = Number(value);
|
||||
if (!Number.isFinite(n)) return null;
|
||||
return Math.round(n);
|
||||
}
|
||||
|
||||
function sanitizeDateWindow(value) {
|
||||
if (!value || typeof value !== "object") return null;
|
||||
const from = typeof value.from === "string" ? value.from : null;
|
||||
const to = typeof value.to === "string" ? value.to : null;
|
||||
if (!from || !to) return null;
|
||||
return { from, to };
|
||||
}
|
||||
|
||||
function sanitizeStayDuration(value) {
|
||||
if (!value || typeof value !== "object") return null;
|
||||
const minDays = toIntegerOrNull(value.minDays);
|
||||
const maxDays = toIntegerOrNull(value.maxDays);
|
||||
if (minDays === null || maxDays === null) return null;
|
||||
return { minDays: Math.min(minDays, maxDays), maxDays: Math.max(minDays, maxDays) };
|
||||
}
|
||||
|
||||
function sanitizeSegments(value) {
|
||||
if (!Array.isArray(value)) return null;
|
||||
const segments = value
|
||||
.map((segment) => {
|
||||
if (!segment || typeof segment !== "object") return null;
|
||||
const from = typeof segment.from === "string" ? segment.from.trim() : "";
|
||||
const to = typeof segment.to === "string" ? segment.to.trim() : "";
|
||||
if (!from || !to) return null;
|
||||
return { from: from.toUpperCase(), to: to.toUpperCase() };
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
return segments.length > 0 ? segments : null;
|
||||
}
|
||||
|
||||
function sanitizePassengers(value) {
|
||||
if (!value || typeof value !== "object") return null;
|
||||
|
||||
const byCabinSource = value.byCabin || {};
|
||||
const byCabin = {
|
||||
economy: Math.max(0, toIntegerOrNull(byCabinSource.economy) || 0),
|
||||
premium_economy: Math.max(0, toIntegerOrNull(byCabinSource.premium_economy) || 0),
|
||||
business: Math.max(0, toIntegerOrNull(byCabinSource.business) || 0),
|
||||
first: Math.max(0, toIntegerOrNull(byCabinSource.first) || 0),
|
||||
};
|
||||
|
||||
const computedTotal = Object.values(byCabin).reduce((acc, n) => acc + n, 0);
|
||||
const providedTotal = toIntegerOrNull(value.total);
|
||||
const total = providedTotal !== null ? Math.max(providedTotal, computedTotal) : computedTotal;
|
||||
if (total <= 0) return null;
|
||||
return { byCabin, total };
|
||||
}
|
||||
|
||||
function sanitizeMaxJourneyHours(value) {
|
||||
if (!value || typeof value !== "object") return null;
|
||||
const hours = toIntegerOrNull(value.hours);
|
||||
const operator = value.operator === "<=" ? "<=" : value.operator === "<" ? "<" : null;
|
||||
if (hours === null || operator === null) return null;
|
||||
return { hours, operator };
|
||||
}
|
||||
|
||||
function sanitizeConstraints(value, fallbackConstraints) {
|
||||
const source = value && typeof value === "object" ? value : {};
|
||||
return {
|
||||
sameFlightForAllPassengers:
|
||||
typeof source.sameFlightForAllPassengers === "boolean"
|
||||
? source.sameFlightForAllPassengers
|
||||
: fallbackConstraints.sameFlightForAllPassengers,
|
||||
itineraryCount:
|
||||
source.itineraryCount === null
|
||||
? null
|
||||
: toIntegerOrNull(source.itineraryCount) ?? fallbackConstraints.itineraryCount,
|
||||
maxStops:
|
||||
source.maxStops === null ? null : toIntegerOrNull(source.maxStops) ?? fallbackConstraints.maxStops,
|
||||
maxJourneyHours: sanitizeMaxJourneyHours(source.maxJourneyHours) || fallbackConstraints.maxJourneyHours,
|
||||
};
|
||||
}
|
||||
|
||||
function uniqueStrings(values) {
|
||||
if (!Array.isArray(values)) return [];
|
||||
const seen = new Set();
|
||||
const result = [];
|
||||
for (const item of values) {
|
||||
if (typeof item !== "string") continue;
|
||||
if (seen.has(item)) continue;
|
||||
seen.add(item);
|
||||
result.push(item);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function inferTripType(segments) {
|
||||
if (!segments || segments.length < 2) return "unknown";
|
||||
const first = segments[0];
|
||||
const second = segments[1];
|
||||
if (first.from === second.to && first.to === second.from) return "round_trip";
|
||||
if (first.from === second.to && first.to !== second.from) return "open_jaw";
|
||||
return "multi_city";
|
||||
}
|
||||
|
||||
function recomputeMissingFields(params) {
|
||||
const missingFields = [];
|
||||
if (!params.departureDateWindow) missingFields.push("departureDateWindow");
|
||||
if (!params.stayDurationDays) missingFields.push("stayDurationDays");
|
||||
if (!params.passengers) missingFields.push("passengers");
|
||||
if (!params.segments) missingFields.push("segments");
|
||||
if (!params.constraints.maxJourneyHours) missingFields.push("maxJourneyHours");
|
||||
return missingFields;
|
||||
}
|
||||
|
||||
function mergeWithFallback(llmObject, fallbackParams, input, now) {
|
||||
const source = llmObject && typeof llmObject === "object" ? llmObject : {};
|
||||
|
||||
const departureDateWindow =
|
||||
sanitizeDateWindow(source.departureDateWindow) || fallbackParams.departureDateWindow;
|
||||
const stayDurationDays = sanitizeStayDuration(source.stayDurationDays) || fallbackParams.stayDurationDays;
|
||||
const segments = sanitizeSegments(source.segments) || fallbackParams.segments;
|
||||
const passengers = sanitizePassengers(source.passengers) || fallbackParams.passengers;
|
||||
const constraints = sanitizeConstraints(source.constraints, fallbackParams.constraints);
|
||||
|
||||
const warnings = uniqueStrings([
|
||||
...fallbackParams.warnings,
|
||||
...uniqueStrings(source.warnings),
|
||||
]);
|
||||
|
||||
const tripType =
|
||||
typeof source.tripType === "string" && source.tripType.trim()
|
||||
? source.tripType
|
||||
: inferTripType(segments);
|
||||
|
||||
const parsed = {
|
||||
rawInput: input,
|
||||
parsedAt: new Date(now).toISOString(),
|
||||
tripType,
|
||||
departureDateWindow,
|
||||
stayDurationDays,
|
||||
segments,
|
||||
passengers,
|
||||
constraints,
|
||||
warnings,
|
||||
};
|
||||
|
||||
parsed.missingFields = recomputeMissingFields(parsed);
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function buildPrompt(input, nowDate) {
|
||||
return [
|
||||
"Extract flight-search parameters from user text and return JSON only.",
|
||||
"Use this JSON schema keys exactly:",
|
||||
"{",
|
||||
' "departureDateWindow": {"from":"YYYY-MM-DD","to":"YYYY-MM-DD"} | null,',
|
||||
' "stayDurationDays": {"minDays": number, "maxDays": number} | null,',
|
||||
' "segments": [{"from":"IATA or city code","to":"IATA or city code"}] | null,',
|
||||
' "passengers": {"total":number,"byCabin":{"economy":number,"premium_economy":number,"business":number,"first":number}} | null,',
|
||||
' "constraints": {"sameFlightForAllPassengers":boolean,"itineraryCount":number|null,"maxStops":number|null,"maxJourneyHours":{"hours":number,"operator":"<|<="}|null},',
|
||||
' "tripType": "round_trip|open_jaw|multi_city|unknown",',
|
||||
' "warnings": [string],',
|
||||
' "missingFields": [string]',
|
||||
"}",
|
||||
"When information is missing, set null and add key names in missingFields.",
|
||||
`Today date is ${new Date(nowDate).toISOString().slice(0, 10)}.`,
|
||||
"",
|
||||
`User input: ${input}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function createOpenAIClient(options = {}) {
|
||||
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) return null;
|
||||
|
||||
const baseUrl = options.baseUrl || process.env.OPENAI_BASE_URL || "https://api.openai.com/v1";
|
||||
const model = options.model || process.env.OPENAI_MODEL || "gpt-4.1-mini";
|
||||
const fetchImpl = options.fetch || global.fetch;
|
||||
if (typeof fetchImpl !== "function") {
|
||||
throw new Error("global fetch is unavailable. Node.js 18+ is required.");
|
||||
}
|
||||
|
||||
const endpoint = `${baseUrl.replace(/\/$/, "")}/chat/completions`;
|
||||
|
||||
return async ({ input, now }) => {
|
||||
const response = await fetchImpl(endpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
temperature: 0,
|
||||
response_format: { type: "json_object" },
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"You are a parser. Output valid JSON only. Do not wrap in markdown or prose.",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: buildPrompt(input, now),
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const message = await response.text();
|
||||
throw new Error(`OpenAI API request failed (${response.status}): ${message}`);
|
||||
}
|
||||
|
||||
const payload = await response.json();
|
||||
const content = payload?.choices?.[0]?.message?.content;
|
||||
return tryParseJsonObject(content);
|
||||
};
|
||||
}
|
||||
|
||||
async function extractFlightSearchRequest(input, options = {}) {
|
||||
if (typeof input !== "string" || input.trim() === "") {
|
||||
throw new Error("input must be a non-empty string");
|
||||
}
|
||||
|
||||
const now = options.now || new Date();
|
||||
const fallbackParams = parseFlightSearchRequest(input, { now });
|
||||
if (options.preferRuleParser) {
|
||||
return { source: "rule_parser", params: fallbackParams };
|
||||
}
|
||||
|
||||
try {
|
||||
const llmClient = options.llmClient || createOpenAIClient(options);
|
||||
if (!llmClient) {
|
||||
return { source: "rule_parser", params: fallbackParams };
|
||||
}
|
||||
|
||||
const llmRaw = await llmClient({ input, now });
|
||||
const merged = mergeWithFallback(llmRaw, fallbackParams, input, now);
|
||||
return { source: "llm", params: merged };
|
||||
} catch (error) {
|
||||
return {
|
||||
source: "rule_parser",
|
||||
params: {
|
||||
...fallbackParams,
|
||||
warnings: [
|
||||
...fallbackParams.warnings,
|
||||
`LLM extraction fallback triggered: ${error.message}`,
|
||||
],
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
createOpenAIClient,
|
||||
extractFlightSearchRequest,
|
||||
};
|
||||
Reference in New Issue
Block a user