test: add coaster name matching test suite

Extract matching logic into lib/coaster-match.ts (isCoasterMatch + normalizeForMatch)
so it can be imported by both the scraper and tests without duplication.

Add tests/coaster-matching.test.ts covering all known match/false-positive cases:
- Trademark symbols, leading THE, possessives, punctuation
- Subtitle variants in both directions (Apocalypse, New Revolution - Classic)
- Space-split brand words (BAT GIRL vs Batgirl)
- 4D subtitle extension (THE JOKER™ 4D Free Fly Coaster vs Joker)
- False positives: Joker y Harley Quinn, conjunction connectors

Run with: npm test

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-04 15:43:20 -04:00
parent dc4fbeb7ec
commit 9cac86d241
5 changed files with 161 additions and 51 deletions

64
lib/coaster-match.ts Normal file
View File

@@ -0,0 +1,64 @@
/**
* Coaster name matching — shared between the Queue-Times scraper and tests.
*
* Queue-Times and RCDB use different name conventions:
* - Trademark symbols (™ ® ©)
* - Leading "THE " prefixes
* - Possessives ("Catwoman's" vs "Catwoman")
* - Subtitles added or dropped ("Apocalypse" vs "Apocalypse the Ride")
* - Space-split brand words ("BAT GIRL" vs "Batgirl")
* - Conjunction-joined compound rides ("Joker y Harley Quinn" ≠ "Joker")
*/
// Words that join two ride names rather than extend one subtitle.
// When a prefix match is found and the next word is one of these,
// the longer name is a *different* ride, not a subtitle.
const CONJUNCTIONS = new Set(["y", "and", "&", "with", "de", "del", "e", "et"]);
/**
* Normalize a ride name for matching.
* Both sides (Queue-Times and RCDB) must be normalized with this function
* before any comparison so the transforms are symmetric.
*/
export function normalizeForMatch(name: string): string {
return name
.replace(/[\u2122\u00ae\u00a9™®©]/g, "") // strip ™ ® ©
.replace(/^the\s+/i, "") // strip leading "THE "
.replace(/['\u2019]s\b/gi, "") // strip possessives ('s / 's)
.replace(/[^\w\s]/g, " ") // all remaining punctuation → space
.replace(/\s+/g, " ")
.toLowerCase()
.trim();
}
/**
* Returns true when the Queue-Times ride name matches an entry in the RCDB
* coaster set (which must be built with normalizeForMatch).
*
* Matching strategy (in order):
* 1. Exact normalized match.
* 2. Compact (space-stripped) match — catches "BAT GIRL" vs "Batgirl".
* 3. Prefix match — the shorter normalized name is a prefix of the longer,
* minimum 5 chars, unless the next word after the prefix is a conjunction
* (which signals a compound ride name, not a subtitle).
*/
export function isCoasterMatch(qtName: string, coasterSet: Set<string>): boolean {
const norm = normalizeForMatch(qtName);
if (coasterSet.has(norm)) return true;
const compact = norm.replace(/\s/g, "");
for (const c of coasterSet) {
// Compact comparison
if (compact.length >= 5 && c.replace(/\s/g, "") === compact) return true;
// Prefix comparison
const shorter = norm.length <= c.length ? norm : c;
const longer = norm.length <= c.length ? c : norm;
if (shorter.length >= 5 && longer.startsWith(shorter)) {
const nextWord = longer.slice(shorter.length).trim().split(" ")[0];
if (!CONJUNCTIONS.has(nextWord)) return true;
}
}
return false;
}

View File

@@ -65,15 +65,8 @@ export function areCoastersStale(entry: ParkMeta): boolean {
* 4. Collapse runs of whitespace * 4. Collapse runs of whitespace
* 5. Lowercase and trim * 5. Lowercase and trim
*/ */
export function normalizeRideName(name: string): string { export { normalizeForMatch as normalizeRideName } from "./coaster-match.ts";
return name import { normalizeForMatch } from "./coaster-match.ts";
.replace(/[\u2122\u00ae\u00a9™®©]/g, "")
.replace(/^the\s+/i, "")
.replace(/[^\w\s]/g, " ")
.replace(/\s+/g, " ")
.toLowerCase()
.trim();
}
/** /**
* Returns a Set of normalized coaster names for fast membership checks. * Returns a Set of normalized coaster names for fast membership checks.
@@ -82,5 +75,5 @@ export function normalizeRideName(name: string): string {
export function getCoasterSet(parkId: string, meta: ParkMetaMap): Set<string> | null { export function getCoasterSet(parkId: string, meta: ParkMetaMap): Set<string> | null {
const entry = meta[parkId]; const entry = meta[parkId];
if (!entry || entry.coasters.length === 0) return null; if (!entry || entry.coasters.length === 0) return null;
return new Set(entry.coasters.map(normalizeRideName)); return new Set(entry.coasters.map(normalizeForMatch));
} }

View File

@@ -9,45 +9,7 @@
const BASE = "https://queue-times.com/parks"; const BASE = "https://queue-times.com/parks";
/** import { isCoasterMatch, normalizeForMatch } from "../coaster-match.ts";
* Normalize a ride name for fuzzy matching between Queue-Times and RCDB.
*
* - Strips trademark/copyright symbols (™ ® © and Unicode variants)
* - Strips leading "THE " prefix
* - Replaces ALL non-word, non-space characters with a space
* (handles !, -, :, ', ' U+2019, ", and any other punctuation)
* - Collapses whitespace, lowercases, trims
*/
function normalize(name: string): string {
return name
.replace(/[\u2122\u00ae\u00a9™®©]/g, "")
.replace(/^the\s+/i, "")
.replace(/[^\w\s]/g, " ")
.replace(/\s+/g, " ")
.toLowerCase()
.trim();
}
/**
* Check if a Queue-Times ride name matches any coaster in the RCDB set.
*
* Exact normalized match covers most cases. Prefix matching handles cases
* where one source drops or adds a subtitle:
* "Apocalypse" (QT) vs "Apocalypse the Ride" (RCDB)
* "The New Revolution - Classic" (QT) vs "New Revolution" (RCDB)
*
* Minimum 5 chars on the shorter side prevents accidental short matches.
*/
function isCoaster(name: string, coasterSet: Set<string>): boolean {
const norm = normalize(name);
if (coasterSet.has(norm)) return true;
for (const c of coasterSet) {
const shorter = norm.length <= c.length ? norm : c;
const longer = norm.length <= c.length ? c : norm;
if (shorter.length >= 5 && longer.startsWith(shorter)) return true;
}
return false;
}
const HEADERS = { const HEADERS = {
"User-Agent": "User-Agent":
@@ -131,7 +93,7 @@ export async function fetchLiveRides(
isOpen: r.is_open, isOpen: r.is_open,
waitMinutes: r.wait_time ?? 0, waitMinutes: r.wait_time ?? 0,
lastUpdated: r.last_updated, lastUpdated: r.last_updated,
isCoaster: coasterNames ? isCoaster(r.name, coasterNames) : false, isCoaster: coasterNames ? isCoasterMatch(r.name, coasterNames) : false,
}); });
} }
} }

View File

@@ -10,7 +10,8 @@
"scrape": "tsx scripts/scrape.ts", "scrape": "tsx scripts/scrape.ts",
"scrape:force": "tsx scripts/scrape.ts --rescrape", "scrape:force": "tsx scripts/scrape.ts --rescrape",
"discover": "tsx scripts/discover.ts", "discover": "tsx scripts/discover.ts",
"debug": "tsx scripts/debug.ts" "debug": "tsx scripts/debug.ts",
"test": "tsx --test tests/*.test.ts"
}, },
"dependencies": { "dependencies": {
"better-sqlite3": "^12.8.0", "better-sqlite3": "^12.8.0",

View File

@@ -0,0 +1,90 @@
/**
* Coaster name matching tests.
*
* Each case documents a real mismatch found between Queue-Times ride names
* and RCDB coaster names, along with the park where it was observed.
*
* Run with: npm test
*/
import { test } from "node:test";
import assert from "node:assert/strict";
import { isCoasterMatch, normalizeForMatch } from "../lib/coaster-match.ts";
// ── Helper ──────────────────────────────────────────────────────────────────
function makeSet(...rcdbNames: string[]): Set<string> {
return new Set(rcdbNames.map(normalizeForMatch));
}
// ── Should MATCH (Queue-Times name → RCDB name) ──────────────────────────────
test("exact match after lowercasing", () => {
assert.ok(isCoasterMatch("Goliath", makeSet("Goliath")));
});
test("trademark symbol stripped — BATMAN™ The Ride (Over Georgia, Magic Mountain)", () => {
assert.ok(isCoasterMatch("BATMAN™ The Ride", makeSet("Batman The Ride")));
});
test("leading THE stripped — THE RIDDLER Mindbender (Over Georgia)", () => {
assert.ok(isCoasterMatch("THE RIDDLER Mindbender", makeSet("Riddler Mindbender")));
});
test("trademark + leading THE — THE RIDDLER™'s Revenge (Magic Mountain)", () => {
assert.ok(isCoasterMatch("THE RIDDLER™'s Revenge", makeSet("Riddler's Revenge")));
});
test("curly apostrophe possessive stripped — CATWOMAN™ Whip (New England)", () => {
assert.ok(isCoasterMatch("CATWOMAN™ Whip", makeSet("Catwoman's Whip")));
});
test("straight apostrophe possessive stripped", () => {
assert.ok(isCoasterMatch("Riddler's Revenge", makeSet("Riddler's Revenge")));
});
test("trademark + colon punctuation — SUPERMAN™: Ultimate Flight (Over Georgia)", () => {
assert.ok(isCoasterMatch("SUPERMAN™: Ultimate Flight", makeSet("Superman - Ultimate Flight")));
});
test("QT drops subtitle — Apocalypse (Magic Mountain)", () => {
assert.ok(isCoasterMatch("Apocalypse", makeSet("Apocalypse the Ride")));
});
test("QT adds subtitle — The New Revolution - Classic (Magic Mountain)", () => {
assert.ok(isCoasterMatch("The New Revolution - Classic", makeSet("New Revolution")));
});
test("QT exclamation stripped — SCREAM (Magic Mountain)", () => {
assert.ok(isCoasterMatch("SCREAM", makeSet("Scream!")));
});
test("space-split word — BAT GIRL™: Coaster Chase (Fiesta Texas)", () => {
assert.ok(isCoasterMatch("BAT GIRL™: Coaster Chase", makeSet("Batgirl Coaster Chase")));
});
test("trademark + 4D subtitle — THE JOKER™ 4D Free Fly Coaster (New England)", () => {
assert.ok(isCoasterMatch("THE JOKER™ 4D Free Fly Coaster", makeSet("Joker")));
});
test("Great American Scream Machine — top-level QT rides array (Over Georgia)", () => {
assert.ok(isCoasterMatch("The Great American Scream Machine", makeSet("Great American Scream Machine")));
});
test("THE JOKER™ Funhouse Coaster — top-level QT rides array (Over Georgia)", () => {
assert.ok(isCoasterMatch("THE JOKER™ Funhouse Coaster", makeSet("Joker Funhouse Coaster")));
});
// ── Should NOT MATCH (false positives) ──────────────────────────────────────
test("false positive: Joker y Harley Quinn ≠ Joker (Six Flags Mexico)", () => {
assert.ok(!isCoasterMatch("Joker y Harley Quinn", makeSet("Joker")));
});
test("false positive: unrelated ride does not match", () => {
assert.ok(!isCoasterMatch("SkyScreamer", makeSet("Goliath")));
});
test("false positive: short prefix with conjunction — de connector", () => {
assert.ok(!isCoasterMatch("Batman de Gotham", makeSet("Batman")));
});