From 9cac86d241fc7293c08bbf13a4feb6037c0e2dc2 Mon Sep 17 00:00:00 2001 From: josh Date: Sat, 4 Apr 2026 15:43:20 -0400 Subject: [PATCH] test: add coaster name matching test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract matching logic into lib/coaster-match.ts (isCoasterMatch + normalizeForMatch) so it can be imported by both the scraper and tests without duplication. Add tests/coaster-matching.test.ts covering all known match/false-positive cases: - Trademark symbols, leading THE, possessives, punctuation - Subtitle variants in both directions (Apocalypse, New Revolution - Classic) - Space-split brand words (BAT GIRL vs Batgirl) - 4D subtitle extension (THE JOKER™ 4D Free Fly Coaster vs Joker) - False positives: Joker y Harley Quinn, conjunction connectors Run with: npm test Co-Authored-By: Claude Sonnet 4.6 --- lib/coaster-match.ts | 64 ++++++++++++++++++++++++ lib/park-meta.ts | 13 ++--- lib/scrapers/queuetimes.ts | 42 +--------------- package.json | 3 +- tests/coaster-matching.test.ts | 90 ++++++++++++++++++++++++++++++++++ 5 files changed, 161 insertions(+), 51 deletions(-) create mode 100644 lib/coaster-match.ts create mode 100644 tests/coaster-matching.test.ts diff --git a/lib/coaster-match.ts b/lib/coaster-match.ts new file mode 100644 index 0000000..7d9282d --- /dev/null +++ b/lib/coaster-match.ts @@ -0,0 +1,64 @@ +/** + * Coaster name matching — shared between the Queue-Times scraper and tests. + * + * Queue-Times and RCDB use different name conventions: + * - Trademark symbols (™ ® ©) + * - Leading "THE " prefixes + * - Possessives ("Catwoman's" vs "Catwoman") + * - Subtitles added or dropped ("Apocalypse" vs "Apocalypse the Ride") + * - Space-split brand words ("BAT GIRL" vs "Batgirl") + * - Conjunction-joined compound rides ("Joker y Harley Quinn" ≠ "Joker") + */ + +// Words that join two ride names rather than extend one subtitle. +// When a prefix match is found and the next word is one of these, +// the longer name is a *different* ride, not a subtitle. +const CONJUNCTIONS = new Set(["y", "and", "&", "with", "de", "del", "e", "et"]); + +/** + * Normalize a ride name for matching. + * Both sides (Queue-Times and RCDB) must be normalized with this function + * before any comparison so the transforms are symmetric. + */ +export function normalizeForMatch(name: string): string { + return name + .replace(/[\u2122\u00ae\u00a9™®©]/g, "") // strip ™ ® © + .replace(/^the\s+/i, "") // strip leading "THE " + .replace(/['\u2019]s\b/gi, "") // strip possessives ('s / 's) + .replace(/[^\w\s]/g, " ") // all remaining punctuation → space + .replace(/\s+/g, " ") + .toLowerCase() + .trim(); +} + +/** + * Returns true when the Queue-Times ride name matches an entry in the RCDB + * coaster set (which must be built with normalizeForMatch). + * + * Matching strategy (in order): + * 1. Exact normalized match. + * 2. Compact (space-stripped) match — catches "BAT GIRL" vs "Batgirl". + * 3. Prefix match — the shorter normalized name is a prefix of the longer, + * minimum 5 chars, unless the next word after the prefix is a conjunction + * (which signals a compound ride name, not a subtitle). + */ +export function isCoasterMatch(qtName: string, coasterSet: Set): boolean { + const norm = normalizeForMatch(qtName); + if (coasterSet.has(norm)) return true; + + const compact = norm.replace(/\s/g, ""); + for (const c of coasterSet) { + // Compact comparison + if (compact.length >= 5 && c.replace(/\s/g, "") === compact) return true; + + // Prefix comparison + const shorter = norm.length <= c.length ? norm : c; + const longer = norm.length <= c.length ? c : norm; + if (shorter.length >= 5 && longer.startsWith(shorter)) { + const nextWord = longer.slice(shorter.length).trim().split(" ")[0]; + if (!CONJUNCTIONS.has(nextWord)) return true; + } + } + + return false; +} diff --git a/lib/park-meta.ts b/lib/park-meta.ts index caf95fa..f96516d 100644 --- a/lib/park-meta.ts +++ b/lib/park-meta.ts @@ -65,15 +65,8 @@ export function areCoastersStale(entry: ParkMeta): boolean { * 4. Collapse runs of whitespace * 5. Lowercase and trim */ -export function normalizeRideName(name: string): string { - return name - .replace(/[\u2122\u00ae\u00a9™®©]/g, "") - .replace(/^the\s+/i, "") - .replace(/[^\w\s]/g, " ") - .replace(/\s+/g, " ") - .toLowerCase() - .trim(); -} +export { normalizeForMatch as normalizeRideName } from "./coaster-match.ts"; +import { normalizeForMatch } from "./coaster-match.ts"; /** * Returns a Set of normalized coaster names for fast membership checks. @@ -82,5 +75,5 @@ export function normalizeRideName(name: string): string { export function getCoasterSet(parkId: string, meta: ParkMetaMap): Set | null { const entry = meta[parkId]; if (!entry || entry.coasters.length === 0) return null; - return new Set(entry.coasters.map(normalizeRideName)); + return new Set(entry.coasters.map(normalizeForMatch)); } diff --git a/lib/scrapers/queuetimes.ts b/lib/scrapers/queuetimes.ts index b54be43..4bdd660 100644 --- a/lib/scrapers/queuetimes.ts +++ b/lib/scrapers/queuetimes.ts @@ -9,45 +9,7 @@ const BASE = "https://queue-times.com/parks"; -/** - * Normalize a ride name for fuzzy matching between Queue-Times and RCDB. - * - * - Strips trademark/copyright symbols (™ ® © and Unicode variants) - * - Strips leading "THE " prefix - * - Replaces ALL non-word, non-space characters with a space - * (handles !, -, :, ', ' U+2019, ", and any other punctuation) - * - Collapses whitespace, lowercases, trims - */ -function normalize(name: string): string { - return name - .replace(/[\u2122\u00ae\u00a9™®©]/g, "") - .replace(/^the\s+/i, "") - .replace(/[^\w\s]/g, " ") - .replace(/\s+/g, " ") - .toLowerCase() - .trim(); -} - -/** - * Check if a Queue-Times ride name matches any coaster in the RCDB set. - * - * Exact normalized match covers most cases. Prefix matching handles cases - * where one source drops or adds a subtitle: - * "Apocalypse" (QT) vs "Apocalypse the Ride" (RCDB) - * "The New Revolution - Classic" (QT) vs "New Revolution" (RCDB) - * - * Minimum 5 chars on the shorter side prevents accidental short matches. - */ -function isCoaster(name: string, coasterSet: Set): boolean { - const norm = normalize(name); - if (coasterSet.has(norm)) return true; - for (const c of coasterSet) { - const shorter = norm.length <= c.length ? norm : c; - const longer = norm.length <= c.length ? c : norm; - if (shorter.length >= 5 && longer.startsWith(shorter)) return true; - } - return false; -} +import { isCoasterMatch, normalizeForMatch } from "../coaster-match.ts"; const HEADERS = { "User-Agent": @@ -131,7 +93,7 @@ export async function fetchLiveRides( isOpen: r.is_open, waitMinutes: r.wait_time ?? 0, lastUpdated: r.last_updated, - isCoaster: coasterNames ? isCoaster(r.name, coasterNames) : false, + isCoaster: coasterNames ? isCoasterMatch(r.name, coasterNames) : false, }); } } diff --git a/package.json b/package.json index c822ffe..2c47375 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,8 @@ "scrape": "tsx scripts/scrape.ts", "scrape:force": "tsx scripts/scrape.ts --rescrape", "discover": "tsx scripts/discover.ts", - "debug": "tsx scripts/debug.ts" + "debug": "tsx scripts/debug.ts", + "test": "tsx --test tests/*.test.ts" }, "dependencies": { "better-sqlite3": "^12.8.0", diff --git a/tests/coaster-matching.test.ts b/tests/coaster-matching.test.ts new file mode 100644 index 0000000..5e0c303 --- /dev/null +++ b/tests/coaster-matching.test.ts @@ -0,0 +1,90 @@ +/** + * Coaster name matching tests. + * + * Each case documents a real mismatch found between Queue-Times ride names + * and RCDB coaster names, along with the park where it was observed. + * + * Run with: npm test + */ + +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { isCoasterMatch, normalizeForMatch } from "../lib/coaster-match.ts"; + +// ── Helper ────────────────────────────────────────────────────────────────── + +function makeSet(...rcdbNames: string[]): Set { + return new Set(rcdbNames.map(normalizeForMatch)); +} + +// ── Should MATCH (Queue-Times name → RCDB name) ────────────────────────────── + +test("exact match after lowercasing", () => { + assert.ok(isCoasterMatch("Goliath", makeSet("Goliath"))); +}); + +test("trademark symbol stripped — BATMAN™ The Ride (Over Georgia, Magic Mountain)", () => { + assert.ok(isCoasterMatch("BATMAN™ The Ride", makeSet("Batman The Ride"))); +}); + +test("leading THE stripped — THE RIDDLER Mindbender (Over Georgia)", () => { + assert.ok(isCoasterMatch("THE RIDDLER Mindbender", makeSet("Riddler Mindbender"))); +}); + +test("trademark + leading THE — THE RIDDLER™'s Revenge (Magic Mountain)", () => { + assert.ok(isCoasterMatch("THE RIDDLER™'s Revenge", makeSet("Riddler's Revenge"))); +}); + +test("curly apostrophe possessive stripped — CATWOMAN™ Whip (New England)", () => { + assert.ok(isCoasterMatch("CATWOMAN™ Whip", makeSet("Catwoman's Whip"))); +}); + +test("straight apostrophe possessive stripped", () => { + assert.ok(isCoasterMatch("Riddler's Revenge", makeSet("Riddler's Revenge"))); +}); + +test("trademark + colon punctuation — SUPERMAN™: Ultimate Flight (Over Georgia)", () => { + assert.ok(isCoasterMatch("SUPERMAN™: Ultimate Flight", makeSet("Superman - Ultimate Flight"))); +}); + +test("QT drops subtitle — Apocalypse (Magic Mountain)", () => { + assert.ok(isCoasterMatch("Apocalypse", makeSet("Apocalypse the Ride"))); +}); + +test("QT adds subtitle — The New Revolution - Classic (Magic Mountain)", () => { + assert.ok(isCoasterMatch("The New Revolution - Classic", makeSet("New Revolution"))); +}); + +test("QT exclamation stripped — SCREAM (Magic Mountain)", () => { + assert.ok(isCoasterMatch("SCREAM", makeSet("Scream!"))); +}); + +test("space-split word — BAT GIRL™: Coaster Chase (Fiesta Texas)", () => { + assert.ok(isCoasterMatch("BAT GIRL™: Coaster Chase", makeSet("Batgirl Coaster Chase"))); +}); + +test("trademark + 4D subtitle — THE JOKER™ 4D Free Fly Coaster (New England)", () => { + assert.ok(isCoasterMatch("THE JOKER™ 4D Free Fly Coaster", makeSet("Joker"))); +}); + +test("Great American Scream Machine — top-level QT rides array (Over Georgia)", () => { + assert.ok(isCoasterMatch("The Great American Scream Machine", makeSet("Great American Scream Machine"))); +}); + +test("THE JOKER™ Funhouse Coaster — top-level QT rides array (Over Georgia)", () => { + assert.ok(isCoasterMatch("THE JOKER™ Funhouse Coaster", makeSet("Joker Funhouse Coaster"))); +}); + +// ── Should NOT MATCH (false positives) ────────────────────────────────────── + +test("false positive: Joker y Harley Quinn ≠ Joker (Six Flags Mexico)", () => { + assert.ok(!isCoasterMatch("Joker y Harley Quinn", makeSet("Joker"))); +}); + +test("false positive: unrelated ride does not match", () => { + assert.ok(!isCoasterMatch("SkyScreamer", makeSet("Goliath"))); +}); + +test("false positive: short prefix with conjunction — de connector", () => { + assert.ok(!isCoasterMatch("Batman de Gotham", makeSet("Batman"))); +});