/** * Coaster name matching — shared between the Queue-Times scraper and tests. * * Queue-Times and RCDB use different name conventions: * - Trademark symbols (™ ® ©) * - Leading "THE " prefixes * - Possessives ("Catwoman's" vs "Catwoman") * - Subtitles added or dropped ("Apocalypse" vs "Apocalypse the Ride") * - Space-split brand words ("BAT GIRL" vs "Batgirl") * - Conjunction-joined compound rides ("Joker y Harley Quinn" ≠ "Joker") */ // Words that join two ride names rather than extend one subtitle. // When a prefix match is found and the next word is one of these, // the longer name is a *different* ride, not a subtitle. const CONJUNCTIONS = new Set(["y", "and", "&", "with", "de", "del", "e", "et"]); /** * Normalize a ride name for matching. * Both sides (Queue-Times and RCDB) must be normalized with this function * before any comparison so the transforms are symmetric. */ export function normalizeForMatch(name: string): string { return name .replace(/[\u2122\u00ae\u00a9™®©]/g, "") // strip ™ ® © .replace(/^the\s+/i, "") // strip leading "THE " .replace(/['\u2019]s\b/gi, "") // strip possessives ('s / 's) .replace(/[^\w\s]/g, " ") // all remaining punctuation → space .replace(/\s+/g, " ") .toLowerCase() .trim(); } /** * Returns true when the Queue-Times ride name matches an entry in the RCDB * coaster set (which must be built with normalizeForMatch). * * Matching strategy (in order): * 1. Exact normalized match. * 2. Compact (space-stripped) match — catches "BAT GIRL" vs "Batgirl". * 3. Prefix match — the shorter normalized name is a prefix of the longer, * minimum 5 chars, unless the next word after the prefix is a conjunction * (which signals a compound ride name, not a subtitle). */ export function isCoasterMatch(qtName: string, coasterSet: Set): boolean { const norm = normalizeForMatch(qtName); if (coasterSet.has(norm)) return true; const compact = norm.replace(/\s/g, ""); for (const c of coasterSet) { // Compact comparison if (compact.length >= 5 && c.replace(/\s/g, "") === compact) return true; // Prefix comparison const shorter = norm.length <= c.length ? norm : c; const longer = norm.length <= c.length ? c : norm; if (shorter.length >= 5 && longer.startsWith(shorter)) { const nextWord = longer.slice(shorter.length).trim().split(" ")[0]; if (!CONJUNCTIONS.has(nextWord)) return true; } } return false; }