chore: 현재 작업 중간 커밋
This commit is contained in:
404
test/crawlerUrls.test.js
Normal file
404
test/crawlerUrls.test.js
Normal file
@@ -0,0 +1,404 @@
|
||||
"use strict";
|
||||
|
||||
const test = require("node:test");
|
||||
const assert = require("node:assert/strict");
|
||||
// const { buildNaverUrl } = require("../src/crawlers/naver");
|
||||
const { buildSkyscannerUrl } = require("../src/crawlers/skyscanner");
|
||||
const { buildGoogleUrl } = require("../src/crawlers/google");
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeParams(overrides = {}) {
|
||||
return {
|
||||
tripType: "one_way",
|
||||
segments: [{ from: "ICN", to: "NRT" }],
|
||||
passengers: { total: 1, byCabin: {} },
|
||||
departureDateWindow: { from: "2026-03-15" },
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Naver URL tests (provider 일시 제외)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// test("naver: one-way URL format", () => {
|
||||
// const url = buildNaverUrl(makeParams());
|
||||
// assert.match(url, /flight\.naver\.com\/flights\/international\/ICN-NRT-20260315/);
|
||||
// assert.match(url, /adult=1/);
|
||||
// assert.match(url, /fareType=Y/);
|
||||
// assert.ok(!url.includes("/NRT-ICN-"));
|
||||
// });
|
||||
|
||||
// test("naver: round-trip URL has separate outbound and return segments", () => {
|
||||
// const url = buildNaverUrl(makeParams({ tripType: "round_trip", stayDurationDays: { minDays: 7 } }));
|
||||
// assert.match(url, /ICN-NRT-20260315/);
|
||||
// assert.match(url, /\/NRT-ICN-20260322/);
|
||||
// assert.match(url, /fareType=Y/);
|
||||
// });
|
||||
|
||||
// test("naver: round-trip defaults to 7-day return when stayDurationDays is missing", () => {
|
||||
// const url = buildNaverUrl(makeParams({ tripType: "round_trip" }));
|
||||
// assert.match(url, /\/NRT-ICN-20260322/);
|
||||
// });
|
||||
|
||||
// test("naver: multi-city uses /flights/multi and colon separators", () => {
|
||||
// const url = buildNaverUrl(makeParams({
|
||||
// tripType: "multi_city",
|
||||
// segments: [{ from: "ICN", to: "NRT" }, { from: "NRT", to: "ICN" }],
|
||||
// }));
|
||||
// assert.match(url, /\/flights\/multi\?/);
|
||||
// assert.match(url, /ICN:NRT:20260315/);
|
||||
// assert.match(url, /NRT:ICN:20260318/);
|
||||
// });
|
||||
|
||||
// test("naver: business class maps to fareType=C", () => {
|
||||
// const url = buildNaverUrl(makeParams({ passengers: { total: 2, byCabin: { business: 2 } } }));
|
||||
// assert.match(url, /fareType=C/);
|
||||
// assert.match(url, /adult=2/);
|
||||
// });
|
||||
|
||||
// test("naver: first class maps to fareType=F", () => {
|
||||
// const url = buildNaverUrl(makeParams({ passengers: { total: 1, byCabin: { first: 1 } } }));
|
||||
// assert.match(url, /fareType=F/);
|
||||
// });
|
||||
|
||||
// test("naver: empty segments returns base URL", () => {
|
||||
// const url = buildNaverUrl({ segments: [] });
|
||||
// assert.equal(url, "https://flight.naver.com");
|
||||
// });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Skyscanner URL tests (/transport/d/ format with YYYY-MM-DD)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
test("skyscanner: one-way uses /transport/d/ with YYYY-MM-DD", () => {
|
||||
const url = buildSkyscannerUrl(makeParams());
|
||||
assert.match(url, /\/transport\/d\/icn\/2026-03-15\/nrt\//);
|
||||
assert.match(url, /adultsv2=1/);
|
||||
assert.match(url, /cabinclass=economy/);
|
||||
});
|
||||
|
||||
test("skyscanner: round-trip path has outbound and return legs", () => {
|
||||
const url = buildSkyscannerUrl(
|
||||
makeParams({
|
||||
tripType: "round_trip",
|
||||
stayDurationDays: { minDays: 5 },
|
||||
})
|
||||
);
|
||||
// /icn/2026-03-15/nrt/nrt/2026-03-20/icn/
|
||||
assert.match(url, /\/icn\/2026-03-15\/nrt\/nrt\/2026-03-20\/icn\//);
|
||||
});
|
||||
|
||||
test("skyscanner: round-trip defaults to 7-day return", () => {
|
||||
const url = buildSkyscannerUrl(
|
||||
makeParams({ tripType: "round_trip" })
|
||||
);
|
||||
assert.match(url, /\/nrt\/2026-03-22\/icn\//);
|
||||
});
|
||||
|
||||
test("skyscanner: multi-city path-based with multiple legs", () => {
|
||||
const url = buildSkyscannerUrl(
|
||||
makeParams({
|
||||
tripType: "multi_city",
|
||||
segments: [
|
||||
{ from: "ICN", to: "MAD" },
|
||||
{ from: "BCN", to: "ICN" },
|
||||
],
|
||||
})
|
||||
);
|
||||
// /transport/d/icn/2026-03-15/mad/bcn/2026-03-22/icn/ (default 7-day stay)
|
||||
assert.match(url, /\/transport\/d\/icn\/2026-03-15\/mad\/bcn\/2026-03-22\/icn\//);
|
||||
});
|
||||
|
||||
test("skyscanner: business cabin class", () => {
|
||||
const url = buildSkyscannerUrl(
|
||||
makeParams({ passengers: { total: 1, byCabin: { business: 1 } } })
|
||||
);
|
||||
assert.match(url, /cabinclass=business/);
|
||||
});
|
||||
|
||||
test("skyscanner: duration param (total journey minutes)", () => {
|
||||
const url = buildSkyscannerUrl(
|
||||
makeParams({ constraints: { maxJourneyHours: { hours: 33.5 } } })
|
||||
);
|
||||
assert.match(url, /duration=2010/); // 33.5 * 60
|
||||
});
|
||||
|
||||
test("skyscanner: maxStops=0 → stops=!oneStop,!twoPlusStops (direct only)", () => {
|
||||
const url = buildSkyscannerUrl(
|
||||
makeParams({ constraints: { maxStops: 0 } })
|
||||
);
|
||||
const stops = new URL(url).searchParams.get("stops");
|
||||
assert.equal(stops, "!oneStop,!twoPlusStops");
|
||||
});
|
||||
|
||||
test("skyscanner: maxStops=1 → stops=!twoPlusStops (direct + 1 stop)", () => {
|
||||
const url = buildSkyscannerUrl(
|
||||
makeParams({ constraints: { maxStops: 1 } })
|
||||
);
|
||||
const stops = new URL(url).searchParams.get("stops");
|
||||
assert.equal(stops, "!twoPlusStops");
|
||||
});
|
||||
|
||||
test("skyscanner: no maxStops → no stops param", () => {
|
||||
const url = buildSkyscannerUrl(makeParams());
|
||||
const stops = new URL(url).searchParams.get("stops");
|
||||
assert.equal(stops, null);
|
||||
});
|
||||
|
||||
test("skyscanner: matches real URL structure (multi-city + business + stops + duration)", () => {
|
||||
const url = buildSkyscannerUrl({
|
||||
tripType: "multi_city",
|
||||
segments: [
|
||||
{ from: "ICN", to: "MAD" },
|
||||
{ from: "BCN", to: "ICN" },
|
||||
],
|
||||
passengers: { total: 2, byCabin: { business: 2 } },
|
||||
departureDateWindow: { from: "2026-11-26" },
|
||||
stayDurationDays: { minDays: 19 },
|
||||
constraints: { maxStops: 1, maxJourneyHours: { hours: 33.5 } },
|
||||
});
|
||||
// Path: return leg uses stayDurationDays.minDays=19 → 2026-11-26 + 19 = 2026-12-15
|
||||
assert.match(url, /\/transport\/d\/icn\/2026-11-26\/mad\/bcn\/2026-12-15\/icn\//);
|
||||
// Params
|
||||
assert.match(url, /adultsv2=2/);
|
||||
assert.match(url, /cabinclass=business/);
|
||||
assert.match(url, /duration=2010/);
|
||||
const stops = new URL(url).searchParams.get("stops");
|
||||
assert.equal(stops, "!twoPlusStops");
|
||||
});
|
||||
|
||||
test("skyscanner: empty segments returns base URL", () => {
|
||||
const url = buildSkyscannerUrl({ segments: [] });
|
||||
assert.equal(url, "https://www.skyscanner.co.kr");
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Google URL tests (protobuf tfs format)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Helper: decode URL-safe base64 tfs param and parse protobuf fields.
|
||||
* Returns a flat-ish structure for easy assertions.
|
||||
*/
|
||||
function decodeTfs(url) {
|
||||
const tfs = new URL(url).searchParams.get("tfs");
|
||||
if (!tfs) return null;
|
||||
const std = tfs.replace(/-/g, "+").replace(/_/g, "/");
|
||||
const buf = Buffer.from(std, "base64");
|
||||
return parseProtobuf(buf);
|
||||
}
|
||||
|
||||
function readVarint(buf, pos) {
|
||||
let value = 0n;
|
||||
let shift = 0n;
|
||||
while (pos < buf.length) {
|
||||
const byte = buf[pos++];
|
||||
value |= BigInt(byte & 0x7f) << shift;
|
||||
shift += 7n;
|
||||
if ((byte & 0x80) === 0) break;
|
||||
}
|
||||
return { value: Number(value), next: pos };
|
||||
}
|
||||
|
||||
function parseProtobuf(buf) {
|
||||
const results = [];
|
||||
let pos = 0;
|
||||
while (pos < buf.length) {
|
||||
const tag = readVarint(buf, pos);
|
||||
pos = tag.next;
|
||||
const fieldNum = tag.value >> 3;
|
||||
const wireType = tag.value & 0x7;
|
||||
if (wireType === 0) {
|
||||
const val = readVarint(buf, pos);
|
||||
pos = val.next;
|
||||
results.push({ f: fieldNum, t: "varint", v: val.value });
|
||||
} else if (wireType === 2) {
|
||||
const len = readVarint(buf, pos);
|
||||
pos = len.next;
|
||||
const data = buf.slice(pos, pos + len.value);
|
||||
pos += len.value;
|
||||
const str = data.toString("utf8");
|
||||
if (/^[\x20-\x7E]+$/.test(str)) {
|
||||
results.push({ f: fieldNum, t: "str", v: str });
|
||||
} else {
|
||||
try {
|
||||
const nested = parseProtobuf(data);
|
||||
results.push({ f: fieldNum, t: "msg", v: nested });
|
||||
} catch {
|
||||
results.push({ f: fieldNum, t: "bytes", v: data });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/** Extract all field_3 (segment) messages from decoded tfs */
|
||||
function getSegments(decoded) {
|
||||
return decoded.filter((f) => f.f === 3 && f.t === "msg").map((f) => f.v);
|
||||
}
|
||||
|
||||
/** Find a field value in a decoded protobuf array */
|
||||
function findField(decoded, fieldNum) {
|
||||
const f = decoded.find((x) => x.f === fieldNum);
|
||||
return f ? f.v : undefined;
|
||||
}
|
||||
|
||||
test("google: uses protobuf tfs format (not ?q= query)", () => {
|
||||
const url = buildGoogleUrl(makeParams());
|
||||
assert.match(url, /\/flights\/search\?tfs=/);
|
||||
assert.match(url, /&tfu=/);
|
||||
assert.ok(!url.includes("?q="));
|
||||
});
|
||||
|
||||
test("google: one-way has 1 segment with correct airports and date", () => {
|
||||
const url = buildGoogleUrl(makeParams());
|
||||
const decoded = decodeTfs(url);
|
||||
const segs = getSegments(decoded);
|
||||
assert.equal(segs.length, 1);
|
||||
// Date
|
||||
assert.equal(findField(segs[0], 2), "2026-03-15");
|
||||
// Origin: field 13 → nested field 2 = "ICN"
|
||||
const origin = findField(segs[0], 13);
|
||||
assert.equal(findField(origin, 2), "ICN");
|
||||
// Dest: field 14 → nested field 2 = "NRT"
|
||||
const dest = findField(segs[0], 14);
|
||||
assert.equal(findField(dest, 2), "NRT");
|
||||
});
|
||||
|
||||
test("google: round-trip has 2 segments with reversed airports", () => {
|
||||
const url = buildGoogleUrl(
|
||||
makeParams({ tripType: "round_trip", stayDurationDays: { minDays: 7 } })
|
||||
);
|
||||
const decoded = decodeTfs(url);
|
||||
const segs = getSegments(decoded);
|
||||
assert.equal(segs.length, 2);
|
||||
// Outbound: ICN → NRT on 2026-03-15
|
||||
assert.equal(findField(segs[0], 2), "2026-03-15");
|
||||
assert.equal(findField(findField(segs[0], 13), 2), "ICN");
|
||||
assert.equal(findField(findField(segs[0], 14), 2), "NRT");
|
||||
// Return: NRT → ICN on 2026-03-22
|
||||
assert.equal(findField(segs[1], 2), "2026-03-22");
|
||||
assert.equal(findField(findField(segs[1], 13), 2), "NRT");
|
||||
assert.equal(findField(findField(segs[1], 14), 2), "ICN");
|
||||
});
|
||||
|
||||
test("google: round-trip defaults to 7-day return when stayDurationDays is missing", () => {
|
||||
const url = buildGoogleUrl(makeParams({ tripType: "round_trip" }));
|
||||
const segs = getSegments(decodeTfs(url));
|
||||
assert.equal(segs.length, 2);
|
||||
assert.equal(findField(segs[1], 2), "2026-03-22");
|
||||
});
|
||||
|
||||
test("google: maxJourneyHours encodes as field_12 in minutes", () => {
|
||||
const url = buildGoogleUrl(
|
||||
makeParams({ constraints: { maxJourneyHours: { hours: 9 } } })
|
||||
);
|
||||
const segs = getSegments(decodeTfs(url));
|
||||
assert.equal(findField(segs[0], 12), 540); // 9h * 60
|
||||
});
|
||||
|
||||
test("google: maxStops encodes as field_5", () => {
|
||||
const url = buildGoogleUrl(
|
||||
makeParams({ constraints: { maxStops: 1 } })
|
||||
);
|
||||
const segs = getSegments(decodeTfs(url));
|
||||
assert.equal(findField(segs[0], 5), 1);
|
||||
});
|
||||
|
||||
test("google: maxStops=0 means direct flights only", () => {
|
||||
const url = buildGoogleUrl(
|
||||
makeParams({ constraints: { maxStops: 0 } })
|
||||
);
|
||||
const segs = getSegments(decodeTfs(url));
|
||||
assert.equal(findField(segs[0], 5), 0);
|
||||
});
|
||||
|
||||
test("google: no maxStops omits field_5", () => {
|
||||
const url = buildGoogleUrl(makeParams());
|
||||
const segs = getSegments(decodeTfs(url));
|
||||
assert.equal(findField(segs[0], 5), undefined);
|
||||
});
|
||||
|
||||
test("google: byte-exact match with known duration-filter URL", () => {
|
||||
const url = buildGoogleUrl({
|
||||
tripType: "round_trip",
|
||||
segments: [{ from: "ICN", to: "NRT" }],
|
||||
passengers: { total: 1, byCabin: {} },
|
||||
departureDateWindow: { from: "2026-03-15" },
|
||||
stayDurationDays: { minDays: 7 },
|
||||
constraints: { maxJourneyHours: { hours: 9 } },
|
||||
});
|
||||
const got = new URL(url).searchParams.get("tfs");
|
||||
const expected =
|
||||
"CBwQAhohEgoyMDI2LTAzLTE1YJwEagcIARIDSUNOcgcIARIDTlJUGiESCjIwMjYtMDMtMjJgnARqBwgBEgNOUlRyBwgBEgNJQ05AAUgBcAGCAQsI____________AZgBAQ";
|
||||
assert.equal(got, expected);
|
||||
});
|
||||
|
||||
test("google: byte-exact match with known 1-stop URL", () => {
|
||||
const url = buildGoogleUrl({
|
||||
tripType: "round_trip",
|
||||
segments: [{ from: "ICN", to: "NRT" }],
|
||||
passengers: { total: 1, byCabin: {} },
|
||||
departureDateWindow: { from: "2026-03-15" },
|
||||
stayDurationDays: { minDays: 7 },
|
||||
constraints: { maxStops: 1, maxJourneyHours: { hours: 9 } },
|
||||
});
|
||||
const got = new URL(url).searchParams.get("tfs");
|
||||
const expected =
|
||||
"CBwQAhojEgoyMDI2LTAzLTE1KAFgnARqBwgBEgNJQ05yBwgBEgNOUlQaIxIKMjAyNi0wMy0yMigBYJwEagcIARIDTlJUcgcIARIDSUNOQAFIAXABggELCP___________wGYAQE";
|
||||
assert.equal(got, expected);
|
||||
});
|
||||
|
||||
test("google: field 19 encodes trip type (1=RT, 2=OW, 3=MC)", () => {
|
||||
const ow = buildGoogleUrl(makeParams({ tripType: "one_way" }));
|
||||
assert.equal(findField(decodeTfs(ow), 19), 2);
|
||||
|
||||
const rt = buildGoogleUrl(makeParams({ tripType: "round_trip" }));
|
||||
assert.equal(findField(decodeTfs(rt), 19), 1);
|
||||
|
||||
const mc = buildGoogleUrl(makeParams({
|
||||
tripType: "multi_city",
|
||||
segments: [{ from: "ICN", to: "NRT" }, { from: "NRT", to: "LAX" }],
|
||||
}));
|
||||
assert.equal(findField(decodeTfs(mc), 19), 3);
|
||||
});
|
||||
|
||||
test("google: multi-city has correct number of segments", () => {
|
||||
const url = buildGoogleUrl(
|
||||
makeParams({
|
||||
tripType: "multi_city",
|
||||
segments: [
|
||||
{ from: "ICN", to: "NRT" },
|
||||
{ from: "NRT", to: "LAX" },
|
||||
],
|
||||
})
|
||||
);
|
||||
const decoded = decodeTfs(url);
|
||||
assert.equal(findField(decoded, 2), 2);
|
||||
const segs = getSegments(decoded);
|
||||
assert.equal(segs.length, 2);
|
||||
assert.equal(findField(findField(segs[1], 13), 2), "NRT");
|
||||
assert.equal(findField(findField(segs[1], 14), 2), "LAX");
|
||||
assert.equal(findField(segs[1], 2), "2026-03-22"); // default 7-day stay
|
||||
});
|
||||
|
||||
test("google: business class encodes as cabinClass=3 (field_9)", () => {
|
||||
const url = buildGoogleUrl(
|
||||
makeParams({ passengers: { total: 2, byCabin: { business: 2 } } })
|
||||
);
|
||||
const decoded = decodeTfs(url);
|
||||
assert.equal(findField(decoded, 8), 2); // adults
|
||||
assert.equal(findField(decoded, 9), 3); // business = 3
|
||||
});
|
||||
|
||||
test("google: empty segments returns base URL", () => {
|
||||
const url = buildGoogleUrl({ segments: [] });
|
||||
assert.equal(url, "https://www.google.com/travel/flights");
|
||||
});
|
||||
Reference in New Issue
Block a user