405 lines
14 KiB
JavaScript
405 lines
14 KiB
JavaScript
"use strict";
|
|
|
|
const test = require("node:test");
|
|
const assert = require("node:assert/strict");
|
|
// const { buildNaverUrl } = require("../src/crawlers/naver");
|
|
const { buildSkyscannerUrl } = require("../src/crawlers/skyscanner");
|
|
const { buildGoogleUrl } = require("../src/crawlers/google");
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function makeParams(overrides = {}) {
|
|
return {
|
|
tripType: "one_way",
|
|
segments: [{ from: "ICN", to: "NRT" }],
|
|
passengers: { total: 1, byCabin: {} },
|
|
departureDateWindow: { from: "2026-03-15" },
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Naver URL tests (provider 일시 제외)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// test("naver: one-way URL format", () => {
|
|
// const url = buildNaverUrl(makeParams());
|
|
// assert.match(url, /flight\.naver\.com\/flights\/international\/ICN-NRT-20260315/);
|
|
// assert.match(url, /adult=1/);
|
|
// assert.match(url, /fareType=Y/);
|
|
// assert.ok(!url.includes("/NRT-ICN-"));
|
|
// });
|
|
|
|
// test("naver: round-trip URL has separate outbound and return segments", () => {
|
|
// const url = buildNaverUrl(makeParams({ tripType: "round_trip", stayDurationDays: { minDays: 7 } }));
|
|
// assert.match(url, /ICN-NRT-20260315/);
|
|
// assert.match(url, /\/NRT-ICN-20260322/);
|
|
// assert.match(url, /fareType=Y/);
|
|
// });
|
|
|
|
// test("naver: round-trip defaults to 7-day return when stayDurationDays is missing", () => {
|
|
// const url = buildNaverUrl(makeParams({ tripType: "round_trip" }));
|
|
// assert.match(url, /\/NRT-ICN-20260322/);
|
|
// });
|
|
|
|
// test("naver: multi-city uses /flights/multi and colon separators", () => {
|
|
// const url = buildNaverUrl(makeParams({
|
|
// tripType: "multi_city",
|
|
// segments: [{ from: "ICN", to: "NRT" }, { from: "NRT", to: "ICN" }],
|
|
// }));
|
|
// assert.match(url, /\/flights\/multi\?/);
|
|
// assert.match(url, /ICN:NRT:20260315/);
|
|
// assert.match(url, /NRT:ICN:20260318/);
|
|
// });
|
|
|
|
// test("naver: business class maps to fareType=C", () => {
|
|
// const url = buildNaverUrl(makeParams({ passengers: { total: 2, byCabin: { business: 2 } } }));
|
|
// assert.match(url, /fareType=C/);
|
|
// assert.match(url, /adult=2/);
|
|
// });
|
|
|
|
// test("naver: first class maps to fareType=F", () => {
|
|
// const url = buildNaverUrl(makeParams({ passengers: { total: 1, byCabin: { first: 1 } } }));
|
|
// assert.match(url, /fareType=F/);
|
|
// });
|
|
|
|
// test("naver: empty segments returns base URL", () => {
|
|
// const url = buildNaverUrl({ segments: [] });
|
|
// assert.equal(url, "https://flight.naver.com");
|
|
// });
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Skyscanner URL tests (/transport/d/ format with YYYY-MM-DD)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
test("skyscanner: one-way uses /transport/d/ with YYYY-MM-DD", () => {
|
|
const url = buildSkyscannerUrl(makeParams());
|
|
assert.match(url, /\/transport\/d\/icn\/2026-03-15\/nrt\//);
|
|
assert.match(url, /adultsv2=1/);
|
|
assert.match(url, /cabinclass=economy/);
|
|
});
|
|
|
|
test("skyscanner: round-trip path has outbound and return legs", () => {
|
|
const url = buildSkyscannerUrl(
|
|
makeParams({
|
|
tripType: "round_trip",
|
|
stayDurationDays: { minDays: 5 },
|
|
})
|
|
);
|
|
// /icn/2026-03-15/nrt/nrt/2026-03-20/icn/
|
|
assert.match(url, /\/icn\/2026-03-15\/nrt\/nrt\/2026-03-20\/icn\//);
|
|
});
|
|
|
|
test("skyscanner: round-trip defaults to 7-day return", () => {
|
|
const url = buildSkyscannerUrl(
|
|
makeParams({ tripType: "round_trip" })
|
|
);
|
|
assert.match(url, /\/nrt\/2026-03-22\/icn\//);
|
|
});
|
|
|
|
test("skyscanner: multi-city path-based with multiple legs", () => {
|
|
const url = buildSkyscannerUrl(
|
|
makeParams({
|
|
tripType: "multi_city",
|
|
segments: [
|
|
{ from: "ICN", to: "MAD" },
|
|
{ from: "BCN", to: "ICN" },
|
|
],
|
|
})
|
|
);
|
|
// /transport/d/icn/2026-03-15/mad/bcn/2026-03-22/icn/ (default 7-day stay)
|
|
assert.match(url, /\/transport\/d\/icn\/2026-03-15\/mad\/bcn\/2026-03-22\/icn\//);
|
|
});
|
|
|
|
test("skyscanner: business cabin class", () => {
|
|
const url = buildSkyscannerUrl(
|
|
makeParams({ passengers: { total: 1, byCabin: { business: 1 } } })
|
|
);
|
|
assert.match(url, /cabinclass=business/);
|
|
});
|
|
|
|
test("skyscanner: duration param (total journey minutes)", () => {
|
|
const url = buildSkyscannerUrl(
|
|
makeParams({ constraints: { maxJourneyHours: { hours: 33.5 } } })
|
|
);
|
|
assert.match(url, /duration=2010/); // 33.5 * 60
|
|
});
|
|
|
|
test("skyscanner: maxStops=0 → stops=!oneStop,!twoPlusStops (direct only)", () => {
|
|
const url = buildSkyscannerUrl(
|
|
makeParams({ constraints: { maxStops: 0 } })
|
|
);
|
|
const stops = new URL(url).searchParams.get("stops");
|
|
assert.equal(stops, "!oneStop,!twoPlusStops");
|
|
});
|
|
|
|
test("skyscanner: maxStops=1 → stops=!twoPlusStops (direct + 1 stop)", () => {
|
|
const url = buildSkyscannerUrl(
|
|
makeParams({ constraints: { maxStops: 1 } })
|
|
);
|
|
const stops = new URL(url).searchParams.get("stops");
|
|
assert.equal(stops, "!twoPlusStops");
|
|
});
|
|
|
|
test("skyscanner: no maxStops → no stops param", () => {
|
|
const url = buildSkyscannerUrl(makeParams());
|
|
const stops = new URL(url).searchParams.get("stops");
|
|
assert.equal(stops, null);
|
|
});
|
|
|
|
test("skyscanner: matches real URL structure (multi-city + business + stops + duration)", () => {
|
|
const url = buildSkyscannerUrl({
|
|
tripType: "multi_city",
|
|
segments: [
|
|
{ from: "ICN", to: "MAD" },
|
|
{ from: "BCN", to: "ICN" },
|
|
],
|
|
passengers: { total: 2, byCabin: { business: 2 } },
|
|
departureDateWindow: { from: "2026-11-26" },
|
|
stayDurationDays: { minDays: 19 },
|
|
constraints: { maxStops: 1, maxJourneyHours: { hours: 33.5 } },
|
|
});
|
|
// Path: return leg uses stayDurationDays.minDays=19 → 2026-11-26 + 19 = 2026-12-15
|
|
assert.match(url, /\/transport\/d\/icn\/2026-11-26\/mad\/bcn\/2026-12-15\/icn\//);
|
|
// Params
|
|
assert.match(url, /adultsv2=2/);
|
|
assert.match(url, /cabinclass=business/);
|
|
assert.match(url, /duration=2010/);
|
|
const stops = new URL(url).searchParams.get("stops");
|
|
assert.equal(stops, "!twoPlusStops");
|
|
});
|
|
|
|
test("skyscanner: empty segments returns base URL", () => {
|
|
const url = buildSkyscannerUrl({ segments: [] });
|
|
assert.equal(url, "https://www.skyscanner.co.kr");
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Google URL tests (protobuf tfs format)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Helper: decode URL-safe base64 tfs param and parse protobuf fields.
|
|
* Returns a flat-ish structure for easy assertions.
|
|
*/
|
|
function decodeTfs(url) {
|
|
const tfs = new URL(url).searchParams.get("tfs");
|
|
if (!tfs) return null;
|
|
const std = tfs.replace(/-/g, "+").replace(/_/g, "/");
|
|
const buf = Buffer.from(std, "base64");
|
|
return parseProtobuf(buf);
|
|
}
|
|
|
|
function readVarint(buf, pos) {
|
|
let value = 0n;
|
|
let shift = 0n;
|
|
while (pos < buf.length) {
|
|
const byte = buf[pos++];
|
|
value |= BigInt(byte & 0x7f) << shift;
|
|
shift += 7n;
|
|
if ((byte & 0x80) === 0) break;
|
|
}
|
|
return { value: Number(value), next: pos };
|
|
}
|
|
|
|
function parseProtobuf(buf) {
|
|
const results = [];
|
|
let pos = 0;
|
|
while (pos < buf.length) {
|
|
const tag = readVarint(buf, pos);
|
|
pos = tag.next;
|
|
const fieldNum = tag.value >> 3;
|
|
const wireType = tag.value & 0x7;
|
|
if (wireType === 0) {
|
|
const val = readVarint(buf, pos);
|
|
pos = val.next;
|
|
results.push({ f: fieldNum, t: "varint", v: val.value });
|
|
} else if (wireType === 2) {
|
|
const len = readVarint(buf, pos);
|
|
pos = len.next;
|
|
const data = buf.slice(pos, pos + len.value);
|
|
pos += len.value;
|
|
const str = data.toString("utf8");
|
|
if (/^[\x20-\x7E]+$/.test(str)) {
|
|
results.push({ f: fieldNum, t: "str", v: str });
|
|
} else {
|
|
try {
|
|
const nested = parseProtobuf(data);
|
|
results.push({ f: fieldNum, t: "msg", v: nested });
|
|
} catch {
|
|
results.push({ f: fieldNum, t: "bytes", v: data });
|
|
}
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
/** Extract all field_3 (segment) messages from decoded tfs */
|
|
function getSegments(decoded) {
|
|
return decoded.filter((f) => f.f === 3 && f.t === "msg").map((f) => f.v);
|
|
}
|
|
|
|
/** Find a field value in a decoded protobuf array */
|
|
function findField(decoded, fieldNum) {
|
|
const f = decoded.find((x) => x.f === fieldNum);
|
|
return f ? f.v : undefined;
|
|
}
|
|
|
|
test("google: uses protobuf tfs format (not ?q= query)", () => {
|
|
const url = buildGoogleUrl(makeParams());
|
|
assert.match(url, /\/flights\/search\?tfs=/);
|
|
assert.match(url, /&tfu=/);
|
|
assert.ok(!url.includes("?q="));
|
|
});
|
|
|
|
test("google: one-way has 1 segment with correct airports and date", () => {
|
|
const url = buildGoogleUrl(makeParams());
|
|
const decoded = decodeTfs(url);
|
|
const segs = getSegments(decoded);
|
|
assert.equal(segs.length, 1);
|
|
// Date
|
|
assert.equal(findField(segs[0], 2), "2026-03-15");
|
|
// Origin: field 13 → nested field 2 = "ICN"
|
|
const origin = findField(segs[0], 13);
|
|
assert.equal(findField(origin, 2), "ICN");
|
|
// Dest: field 14 → nested field 2 = "NRT"
|
|
const dest = findField(segs[0], 14);
|
|
assert.equal(findField(dest, 2), "NRT");
|
|
});
|
|
|
|
test("google: round-trip has 2 segments with reversed airports", () => {
|
|
const url = buildGoogleUrl(
|
|
makeParams({ tripType: "round_trip", stayDurationDays: { minDays: 7 } })
|
|
);
|
|
const decoded = decodeTfs(url);
|
|
const segs = getSegments(decoded);
|
|
assert.equal(segs.length, 2);
|
|
// Outbound: ICN → NRT on 2026-03-15
|
|
assert.equal(findField(segs[0], 2), "2026-03-15");
|
|
assert.equal(findField(findField(segs[0], 13), 2), "ICN");
|
|
assert.equal(findField(findField(segs[0], 14), 2), "NRT");
|
|
// Return: NRT → ICN on 2026-03-22
|
|
assert.equal(findField(segs[1], 2), "2026-03-22");
|
|
assert.equal(findField(findField(segs[1], 13), 2), "NRT");
|
|
assert.equal(findField(findField(segs[1], 14), 2), "ICN");
|
|
});
|
|
|
|
test("google: round-trip defaults to 7-day return when stayDurationDays is missing", () => {
|
|
const url = buildGoogleUrl(makeParams({ tripType: "round_trip" }));
|
|
const segs = getSegments(decodeTfs(url));
|
|
assert.equal(segs.length, 2);
|
|
assert.equal(findField(segs[1], 2), "2026-03-22");
|
|
});
|
|
|
|
test("google: maxJourneyHours encodes as field_12 in minutes", () => {
|
|
const url = buildGoogleUrl(
|
|
makeParams({ constraints: { maxJourneyHours: { hours: 9 } } })
|
|
);
|
|
const segs = getSegments(decodeTfs(url));
|
|
assert.equal(findField(segs[0], 12), 540); // 9h * 60
|
|
});
|
|
|
|
test("google: maxStops encodes as field_5", () => {
|
|
const url = buildGoogleUrl(
|
|
makeParams({ constraints: { maxStops: 1 } })
|
|
);
|
|
const segs = getSegments(decodeTfs(url));
|
|
assert.equal(findField(segs[0], 5), 1);
|
|
});
|
|
|
|
test("google: maxStops=0 means direct flights only", () => {
|
|
const url = buildGoogleUrl(
|
|
makeParams({ constraints: { maxStops: 0 } })
|
|
);
|
|
const segs = getSegments(decodeTfs(url));
|
|
assert.equal(findField(segs[0], 5), 0);
|
|
});
|
|
|
|
test("google: no maxStops omits field_5", () => {
|
|
const url = buildGoogleUrl(makeParams());
|
|
const segs = getSegments(decodeTfs(url));
|
|
assert.equal(findField(segs[0], 5), undefined);
|
|
});
|
|
|
|
test("google: byte-exact match with known duration-filter URL", () => {
|
|
const url = buildGoogleUrl({
|
|
tripType: "round_trip",
|
|
segments: [{ from: "ICN", to: "NRT" }],
|
|
passengers: { total: 1, byCabin: {} },
|
|
departureDateWindow: { from: "2026-03-15" },
|
|
stayDurationDays: { minDays: 7 },
|
|
constraints: { maxJourneyHours: { hours: 9 } },
|
|
});
|
|
const got = new URL(url).searchParams.get("tfs");
|
|
const expected =
|
|
"CBwQAhohEgoyMDI2LTAzLTE1YJwEagcIARIDSUNOcgcIARIDTlJUGiESCjIwMjYtMDMtMjJgnARqBwgBEgNOUlRyBwgBEgNJQ05AAUgBcAGCAQsI____________AZgBAQ";
|
|
assert.equal(got, expected);
|
|
});
|
|
|
|
test("google: byte-exact match with known 1-stop URL", () => {
|
|
const url = buildGoogleUrl({
|
|
tripType: "round_trip",
|
|
segments: [{ from: "ICN", to: "NRT" }],
|
|
passengers: { total: 1, byCabin: {} },
|
|
departureDateWindow: { from: "2026-03-15" },
|
|
stayDurationDays: { minDays: 7 },
|
|
constraints: { maxStops: 1, maxJourneyHours: { hours: 9 } },
|
|
});
|
|
const got = new URL(url).searchParams.get("tfs");
|
|
const expected =
|
|
"CBwQAhojEgoyMDI2LTAzLTE1KAFgnARqBwgBEgNJQ05yBwgBEgNOUlQaIxIKMjAyNi0wMy0yMigBYJwEagcIARIDTlJUcgcIARIDSUNOQAFIAXABggELCP___________wGYAQE";
|
|
assert.equal(got, expected);
|
|
});
|
|
|
|
test("google: field 19 encodes trip type (1=RT, 2=OW, 3=MC)", () => {
|
|
const ow = buildGoogleUrl(makeParams({ tripType: "one_way" }));
|
|
assert.equal(findField(decodeTfs(ow), 19), 2);
|
|
|
|
const rt = buildGoogleUrl(makeParams({ tripType: "round_trip" }));
|
|
assert.equal(findField(decodeTfs(rt), 19), 1);
|
|
|
|
const mc = buildGoogleUrl(makeParams({
|
|
tripType: "multi_city",
|
|
segments: [{ from: "ICN", to: "NRT" }, { from: "NRT", to: "LAX" }],
|
|
}));
|
|
assert.equal(findField(decodeTfs(mc), 19), 3);
|
|
});
|
|
|
|
test("google: multi-city has correct number of segments", () => {
|
|
const url = buildGoogleUrl(
|
|
makeParams({
|
|
tripType: "multi_city",
|
|
segments: [
|
|
{ from: "ICN", to: "NRT" },
|
|
{ from: "NRT", to: "LAX" },
|
|
],
|
|
})
|
|
);
|
|
const decoded = decodeTfs(url);
|
|
assert.equal(findField(decoded, 2), 2);
|
|
const segs = getSegments(decoded);
|
|
assert.equal(segs.length, 2);
|
|
assert.equal(findField(findField(segs[1], 13), 2), "NRT");
|
|
assert.equal(findField(findField(segs[1], 14), 2), "LAX");
|
|
assert.equal(findField(segs[1], 2), "2026-03-22"); // default 7-day stay
|
|
});
|
|
|
|
test("google: business class encodes as cabinClass=3 (field_9)", () => {
|
|
const url = buildGoogleUrl(
|
|
makeParams({ passengers: { total: 2, byCabin: { business: 2 } } })
|
|
);
|
|
const decoded = decodeTfs(url);
|
|
assert.equal(findField(decoded, 8), 2); // adults
|
|
assert.equal(findField(decoded, 9), 3); // business = 3
|
|
});
|
|
|
|
test("google: empty segments returns base URL", () => {
|
|
const url = buildGoogleUrl({ segments: [] });
|
|
assert.equal(url, "https://www.google.com/travel/flights");
|
|
});
|