chore: 현재 작업 중간 커밋

This commit is contained in:
chungyeong
2026-03-05 11:00:45 +09:00
parent 02970df6af
commit be88b4fcec
43 changed files with 6837 additions and 466 deletions

404
test/crawlerUrls.test.js Normal file
View File

@@ -0,0 +1,404 @@
"use strict";
const test = require("node:test");
const assert = require("node:assert/strict");
// const { buildNaverUrl } = require("../src/crawlers/naver");
const { buildSkyscannerUrl } = require("../src/crawlers/skyscanner");
const { buildGoogleUrl } = require("../src/crawlers/google");
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function makeParams(overrides = {}) {
return {
tripType: "one_way",
segments: [{ from: "ICN", to: "NRT" }],
passengers: { total: 1, byCabin: {} },
departureDateWindow: { from: "2026-03-15" },
...overrides,
};
}
// ---------------------------------------------------------------------------
// Naver URL tests (provider 일시 제외)
// ---------------------------------------------------------------------------
// test("naver: one-way URL format", () => {
// const url = buildNaverUrl(makeParams());
// assert.match(url, /flight\.naver\.com\/flights\/international\/ICN-NRT-20260315/);
// assert.match(url, /adult=1/);
// assert.match(url, /fareType=Y/);
// assert.ok(!url.includes("/NRT-ICN-"));
// });
// test("naver: round-trip URL has separate outbound and return segments", () => {
// const url = buildNaverUrl(makeParams({ tripType: "round_trip", stayDurationDays: { minDays: 7 } }));
// assert.match(url, /ICN-NRT-20260315/);
// assert.match(url, /\/NRT-ICN-20260322/);
// assert.match(url, /fareType=Y/);
// });
// test("naver: round-trip defaults to 7-day return when stayDurationDays is missing", () => {
// const url = buildNaverUrl(makeParams({ tripType: "round_trip" }));
// assert.match(url, /\/NRT-ICN-20260322/);
// });
// test("naver: multi-city uses /flights/multi and colon separators", () => {
// const url = buildNaverUrl(makeParams({
// tripType: "multi_city",
// segments: [{ from: "ICN", to: "NRT" }, { from: "NRT", to: "ICN" }],
// }));
// assert.match(url, /\/flights\/multi\?/);
// assert.match(url, /ICN:NRT:20260315/);
// assert.match(url, /NRT:ICN:20260318/);
// });
// test("naver: business class maps to fareType=C", () => {
// const url = buildNaverUrl(makeParams({ passengers: { total: 2, byCabin: { business: 2 } } }));
// assert.match(url, /fareType=C/);
// assert.match(url, /adult=2/);
// });
// test("naver: first class maps to fareType=F", () => {
// const url = buildNaverUrl(makeParams({ passengers: { total: 1, byCabin: { first: 1 } } }));
// assert.match(url, /fareType=F/);
// });
// test("naver: empty segments returns base URL", () => {
// const url = buildNaverUrl({ segments: [] });
// assert.equal(url, "https://flight.naver.com");
// });
// ---------------------------------------------------------------------------
// Skyscanner URL tests (/transport/d/ format with YYYY-MM-DD)
// ---------------------------------------------------------------------------
test("skyscanner: one-way uses /transport/d/ with YYYY-MM-DD", () => {
const url = buildSkyscannerUrl(makeParams());
assert.match(url, /\/transport\/d\/icn\/2026-03-15\/nrt\//);
assert.match(url, /adultsv2=1/);
assert.match(url, /cabinclass=economy/);
});
test("skyscanner: round-trip path has outbound and return legs", () => {
const url = buildSkyscannerUrl(
makeParams({
tripType: "round_trip",
stayDurationDays: { minDays: 5 },
})
);
// /icn/2026-03-15/nrt/nrt/2026-03-20/icn/
assert.match(url, /\/icn\/2026-03-15\/nrt\/nrt\/2026-03-20\/icn\//);
});
test("skyscanner: round-trip defaults to 7-day return", () => {
const url = buildSkyscannerUrl(
makeParams({ tripType: "round_trip" })
);
assert.match(url, /\/nrt\/2026-03-22\/icn\//);
});
test("skyscanner: multi-city path-based with multiple legs", () => {
const url = buildSkyscannerUrl(
makeParams({
tripType: "multi_city",
segments: [
{ from: "ICN", to: "MAD" },
{ from: "BCN", to: "ICN" },
],
})
);
// /transport/d/icn/2026-03-15/mad/bcn/2026-03-22/icn/ (default 7-day stay)
assert.match(url, /\/transport\/d\/icn\/2026-03-15\/mad\/bcn\/2026-03-22\/icn\//);
});
test("skyscanner: business cabin class", () => {
const url = buildSkyscannerUrl(
makeParams({ passengers: { total: 1, byCabin: { business: 1 } } })
);
assert.match(url, /cabinclass=business/);
});
test("skyscanner: duration param (total journey minutes)", () => {
const url = buildSkyscannerUrl(
makeParams({ constraints: { maxJourneyHours: { hours: 33.5 } } })
);
assert.match(url, /duration=2010/); // 33.5 * 60
});
test("skyscanner: maxStops=0 → stops=!oneStop,!twoPlusStops (direct only)", () => {
const url = buildSkyscannerUrl(
makeParams({ constraints: { maxStops: 0 } })
);
const stops = new URL(url).searchParams.get("stops");
assert.equal(stops, "!oneStop,!twoPlusStops");
});
test("skyscanner: maxStops=1 → stops=!twoPlusStops (direct + 1 stop)", () => {
const url = buildSkyscannerUrl(
makeParams({ constraints: { maxStops: 1 } })
);
const stops = new URL(url).searchParams.get("stops");
assert.equal(stops, "!twoPlusStops");
});
test("skyscanner: no maxStops → no stops param", () => {
const url = buildSkyscannerUrl(makeParams());
const stops = new URL(url).searchParams.get("stops");
assert.equal(stops, null);
});
test("skyscanner: matches real URL structure (multi-city + business + stops + duration)", () => {
const url = buildSkyscannerUrl({
tripType: "multi_city",
segments: [
{ from: "ICN", to: "MAD" },
{ from: "BCN", to: "ICN" },
],
passengers: { total: 2, byCabin: { business: 2 } },
departureDateWindow: { from: "2026-11-26" },
stayDurationDays: { minDays: 19 },
constraints: { maxStops: 1, maxJourneyHours: { hours: 33.5 } },
});
// Path: return leg uses stayDurationDays.minDays=19 → 2026-11-26 + 19 = 2026-12-15
assert.match(url, /\/transport\/d\/icn\/2026-11-26\/mad\/bcn\/2026-12-15\/icn\//);
// Params
assert.match(url, /adultsv2=2/);
assert.match(url, /cabinclass=business/);
assert.match(url, /duration=2010/);
const stops = new URL(url).searchParams.get("stops");
assert.equal(stops, "!twoPlusStops");
});
test("skyscanner: empty segments returns base URL", () => {
const url = buildSkyscannerUrl({ segments: [] });
assert.equal(url, "https://www.skyscanner.co.kr");
});
// ---------------------------------------------------------------------------
// Google URL tests (protobuf tfs format)
// ---------------------------------------------------------------------------
/**
* Helper: decode URL-safe base64 tfs param and parse protobuf fields.
* Returns a flat-ish structure for easy assertions.
*/
function decodeTfs(url) {
const tfs = new URL(url).searchParams.get("tfs");
if (!tfs) return null;
const std = tfs.replace(/-/g, "+").replace(/_/g, "/");
const buf = Buffer.from(std, "base64");
return parseProtobuf(buf);
}
function readVarint(buf, pos) {
let value = 0n;
let shift = 0n;
while (pos < buf.length) {
const byte = buf[pos++];
value |= BigInt(byte & 0x7f) << shift;
shift += 7n;
if ((byte & 0x80) === 0) break;
}
return { value: Number(value), next: pos };
}
function parseProtobuf(buf) {
const results = [];
let pos = 0;
while (pos < buf.length) {
const tag = readVarint(buf, pos);
pos = tag.next;
const fieldNum = tag.value >> 3;
const wireType = tag.value & 0x7;
if (wireType === 0) {
const val = readVarint(buf, pos);
pos = val.next;
results.push({ f: fieldNum, t: "varint", v: val.value });
} else if (wireType === 2) {
const len = readVarint(buf, pos);
pos = len.next;
const data = buf.slice(pos, pos + len.value);
pos += len.value;
const str = data.toString("utf8");
if (/^[\x20-\x7E]+$/.test(str)) {
results.push({ f: fieldNum, t: "str", v: str });
} else {
try {
const nested = parseProtobuf(data);
results.push({ f: fieldNum, t: "msg", v: nested });
} catch {
results.push({ f: fieldNum, t: "bytes", v: data });
}
}
} else {
break;
}
}
return results;
}
/** Extract all field_3 (segment) messages from decoded tfs */
function getSegments(decoded) {
return decoded.filter((f) => f.f === 3 && f.t === "msg").map((f) => f.v);
}
/** Find a field value in a decoded protobuf array */
function findField(decoded, fieldNum) {
const f = decoded.find((x) => x.f === fieldNum);
return f ? f.v : undefined;
}
test("google: uses protobuf tfs format (not ?q= query)", () => {
const url = buildGoogleUrl(makeParams());
assert.match(url, /\/flights\/search\?tfs=/);
assert.match(url, /&tfu=/);
assert.ok(!url.includes("?q="));
});
test("google: one-way has 1 segment with correct airports and date", () => {
const url = buildGoogleUrl(makeParams());
const decoded = decodeTfs(url);
const segs = getSegments(decoded);
assert.equal(segs.length, 1);
// Date
assert.equal(findField(segs[0], 2), "2026-03-15");
// Origin: field 13 → nested field 2 = "ICN"
const origin = findField(segs[0], 13);
assert.equal(findField(origin, 2), "ICN");
// Dest: field 14 → nested field 2 = "NRT"
const dest = findField(segs[0], 14);
assert.equal(findField(dest, 2), "NRT");
});
test("google: round-trip has 2 segments with reversed airports", () => {
const url = buildGoogleUrl(
makeParams({ tripType: "round_trip", stayDurationDays: { minDays: 7 } })
);
const decoded = decodeTfs(url);
const segs = getSegments(decoded);
assert.equal(segs.length, 2);
// Outbound: ICN → NRT on 2026-03-15
assert.equal(findField(segs[0], 2), "2026-03-15");
assert.equal(findField(findField(segs[0], 13), 2), "ICN");
assert.equal(findField(findField(segs[0], 14), 2), "NRT");
// Return: NRT → ICN on 2026-03-22
assert.equal(findField(segs[1], 2), "2026-03-22");
assert.equal(findField(findField(segs[1], 13), 2), "NRT");
assert.equal(findField(findField(segs[1], 14), 2), "ICN");
});
test("google: round-trip defaults to 7-day return when stayDurationDays is missing", () => {
const url = buildGoogleUrl(makeParams({ tripType: "round_trip" }));
const segs = getSegments(decodeTfs(url));
assert.equal(segs.length, 2);
assert.equal(findField(segs[1], 2), "2026-03-22");
});
test("google: maxJourneyHours encodes as field_12 in minutes", () => {
const url = buildGoogleUrl(
makeParams({ constraints: { maxJourneyHours: { hours: 9 } } })
);
const segs = getSegments(decodeTfs(url));
assert.equal(findField(segs[0], 12), 540); // 9h * 60
});
test("google: maxStops encodes as field_5", () => {
const url = buildGoogleUrl(
makeParams({ constraints: { maxStops: 1 } })
);
const segs = getSegments(decodeTfs(url));
assert.equal(findField(segs[0], 5), 1);
});
test("google: maxStops=0 means direct flights only", () => {
const url = buildGoogleUrl(
makeParams({ constraints: { maxStops: 0 } })
);
const segs = getSegments(decodeTfs(url));
assert.equal(findField(segs[0], 5), 0);
});
test("google: no maxStops omits field_5", () => {
const url = buildGoogleUrl(makeParams());
const segs = getSegments(decodeTfs(url));
assert.equal(findField(segs[0], 5), undefined);
});
test("google: byte-exact match with known duration-filter URL", () => {
const url = buildGoogleUrl({
tripType: "round_trip",
segments: [{ from: "ICN", to: "NRT" }],
passengers: { total: 1, byCabin: {} },
departureDateWindow: { from: "2026-03-15" },
stayDurationDays: { minDays: 7 },
constraints: { maxJourneyHours: { hours: 9 } },
});
const got = new URL(url).searchParams.get("tfs");
const expected =
"CBwQAhohEgoyMDI2LTAzLTE1YJwEagcIARIDSUNOcgcIARIDTlJUGiESCjIwMjYtMDMtMjJgnARqBwgBEgNOUlRyBwgBEgNJQ05AAUgBcAGCAQsI____________AZgBAQ";
assert.equal(got, expected);
});
test("google: byte-exact match with known 1-stop URL", () => {
const url = buildGoogleUrl({
tripType: "round_trip",
segments: [{ from: "ICN", to: "NRT" }],
passengers: { total: 1, byCabin: {} },
departureDateWindow: { from: "2026-03-15" },
stayDurationDays: { minDays: 7 },
constraints: { maxStops: 1, maxJourneyHours: { hours: 9 } },
});
const got = new URL(url).searchParams.get("tfs");
const expected =
"CBwQAhojEgoyMDI2LTAzLTE1KAFgnARqBwgBEgNJQ05yBwgBEgNOUlQaIxIKMjAyNi0wMy0yMigBYJwEagcIARIDTlJUcgcIARIDSUNOQAFIAXABggELCP___________wGYAQE";
assert.equal(got, expected);
});
test("google: field 19 encodes trip type (1=RT, 2=OW, 3=MC)", () => {
const ow = buildGoogleUrl(makeParams({ tripType: "one_way" }));
assert.equal(findField(decodeTfs(ow), 19), 2);
const rt = buildGoogleUrl(makeParams({ tripType: "round_trip" }));
assert.equal(findField(decodeTfs(rt), 19), 1);
const mc = buildGoogleUrl(makeParams({
tripType: "multi_city",
segments: [{ from: "ICN", to: "NRT" }, { from: "NRT", to: "LAX" }],
}));
assert.equal(findField(decodeTfs(mc), 19), 3);
});
test("google: multi-city has correct number of segments", () => {
const url = buildGoogleUrl(
makeParams({
tripType: "multi_city",
segments: [
{ from: "ICN", to: "NRT" },
{ from: "NRT", to: "LAX" },
],
})
);
const decoded = decodeTfs(url);
assert.equal(findField(decoded, 2), 2);
const segs = getSegments(decoded);
assert.equal(segs.length, 2);
assert.equal(findField(findField(segs[1], 13), 2), "NRT");
assert.equal(findField(findField(segs[1], 14), 2), "LAX");
assert.equal(findField(segs[1], 2), "2026-03-22"); // default 7-day stay
});
test("google: business class encodes as cabinClass=3 (field_9)", () => {
const url = buildGoogleUrl(
makeParams({ passengers: { total: 2, byCabin: { business: 2 } } })
);
const decoded = decodeTfs(url);
assert.equal(findField(decoded, 8), 2); // adults
assert.equal(findField(decoded, 9), 3); // business = 3
});
test("google: empty segments returns base URL", () => {
const url = buildGoogleUrl({ segments: [] });
assert.equal(url, "https://www.google.com/travel/flights");
});