fix: isCoaster typo in top-level rides loop; simplify test structure

- isCoaster → isCoasterMatch on line 109 (missed rename causing runtime crash which returned null from fetchLiveRides, breaking the entire ride panel) - Rewrite test as two flat arrays: SHOULD_MATCH and SHOULD_NOT_MATCH pairs, each with the QT name, RCDB name, and park for context Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix: correct import paths for coaster-match module
2026-04-04 15:49:47 -04:00 · 2026-04-04 15:46:07 -04:00 · 2026-04-04 15:43:20 -04:00
5 changed files with 124 additions and 66 deletions
@@ -0,0 +1,64 @@
+/**
+ * Coaster name matching — shared between the Queue-Times scraper and tests.
+ *
+ * Queue-Times and RCDB use different name conventions:
+ *   - Trademark symbols (™ ® ©)
+ *   - Leading "THE " prefixes
+ *   - Possessives ("Catwoman's" vs "Catwoman")
+ *   - Subtitles added or dropped ("Apocalypse" vs "Apocalypse the Ride")
+ *   - Space-split brand words ("BAT GIRL" vs "Batgirl")
+ *   - Conjunction-joined compound rides ("Joker y Harley Quinn" ≠ "Joker")
+ */
+
+// Words that join two ride names rather than extend one subtitle.
+// When a prefix match is found and the next word is one of these,
+// the longer name is a *different* ride, not a subtitle.
+const CONJUNCTIONS = new Set(["y", "and", "&", "with", "de", "del", "e", "et"]);
+
+/**
+ * Normalize a ride name for matching.
+ * Both sides (Queue-Times and RCDB) must be normalized with this function
+ * before any comparison so the transforms are symmetric.
+ */
+export function normalizeForMatch(name: string): string {
+  return name
+    .replace(/[\u2122\u00ae\u00a9™®©]/g, "")   // strip ™ ® ©
+    .replace(/^the\s+/i, "")                    // strip leading "THE "
+    .replace(/['\u2019]s\b/gi, "")              // strip possessives ('s / 's)
+    .replace(/[^\w\s]/g, " ")                   // all remaining punctuation → space
+    .replace(/\s+/g, " ")
+    .toLowerCase()
+    .trim();
+}
+
+/**
+ * Returns true when the Queue-Times ride name matches an entry in the RCDB
+ * coaster set (which must be built with normalizeForMatch).
+ *
+ * Matching strategy (in order):
+ * 1. Exact normalized match.
+ * 2. Compact (space-stripped) match — catches "BAT GIRL" vs "Batgirl".
+ * 3. Prefix match — the shorter normalized name is a prefix of the longer,
+ *    minimum 5 chars, unless the next word after the prefix is a conjunction
+ *    (which signals a compound ride name, not a subtitle).
+ */
+export function isCoasterMatch(qtName: string, coasterSet: Set<string>): boolean {
+  const norm = normalizeForMatch(qtName);
+  if (coasterSet.has(norm)) return true;
+
+  const compact = norm.replace(/\s/g, "");
+  for (const c of coasterSet) {
+    // Compact comparison
+    if (compact.length >= 5 && c.replace(/\s/g, "") === compact) return true;
+
+    // Prefix comparison
+    const shorter = norm.length <= c.length ? norm : c;
+    const longer  = norm.length <= c.length ? c : norm;
+    if (shorter.length >= 5 && longer.startsWith(shorter)) {
+      const nextWord = longer.slice(shorter.length).trim().split(" ")[0];
+      if (!CONJUNCTIONS.has(nextWord)) return true;
+    }
+  }
+
+  return false;
+}
@@ -52,28 +52,8 @@ export function areCoastersStale(entry: ParkMeta): boolean {
  return Date.now() - new Date(entry.coasters_scraped_at).getTime() > COASTER_STALE_MS;
 }

-/**
- * Normalize a ride name for fuzzy matching between data sources.
- *
- * Queue-Times uses branded names (BATMAN™ The Ride, THE JOKER™ Funhouse Coaster)
- * while RCDB uses clean names (Batman The Ride, Joker Funhouse Coaster).
- *
- * Normalization steps:
- *   1. Strip trademark/copyright symbols (™ ® ©)
- *   2. Strip leading "THE " / "THE" prefix
- *   3. Replace punctuation (- : ' ") with spaces
- *   4. Collapse runs of whitespace
- *   5. Lowercase and trim
- */
-export function normalizeRideName(name: string): string {
-  return name
-    .replace(/[\u2122\u00ae\u00a9™®©]/g, "")
-    .replace(/^the\s+/i, "")
-    .replace(/[^\w\s]/g, " ")
-    .replace(/\s+/g, " ")
-    .toLowerCase()
-    .trim();
-}
+import { normalizeForMatch } from "./coaster-match";
+export { normalizeForMatch as normalizeRideName } from "./coaster-match";

 /**
 * Returns a Set of normalized coaster names for fast membership checks.
@@ -82,5 +62,5 @@ export function normalizeRideName(name: string): string {
 export function getCoasterSet(parkId: string, meta: ParkMetaMap): Set<string> | null {
  const entry = meta[parkId];
  if (!entry || entry.coasters.length === 0) return null;
-  return new Set(entry.coasters.map(normalizeRideName));
+  return new Set(entry.coasters.map(normalizeForMatch));
 }
@@ -7,48 +7,10 @@
 * See: https://queue-times.com/en-US/pages/api
 */

+import { isCoasterMatch } from "../coaster-match";
+
 const BASE = "https://queue-times.com/parks";

-/**
- * Normalize a ride name for fuzzy matching between Queue-Times and RCDB.
- *
- * - Strips trademark/copyright symbols (™ ® © and Unicode variants)
- * - Strips leading "THE " prefix
- * - Replaces ALL non-word, non-space characters with a space
- *   (handles !, -, :, ', ' U+2019, ", and any other punctuation)
- * - Collapses whitespace, lowercases, trims
- */
-function normalize(name: string): string {
-  return name
-    .replace(/[\u2122\u00ae\u00a9™®©]/g, "")
-    .replace(/^the\s+/i, "")
-    .replace(/[^\w\s]/g, " ")
-    .replace(/\s+/g, " ")
-    .toLowerCase()
-    .trim();
-}
-
-/**
- * Check if a Queue-Times ride name matches any coaster in the RCDB set.
- *
- * Exact normalized match covers most cases. Prefix matching handles cases
- * where one source drops or adds a subtitle:
- *   "Apocalypse" (QT) vs "Apocalypse the Ride" (RCDB)
- *   "The New Revolution - Classic" (QT) vs "New Revolution" (RCDB)
- *
- * Minimum 5 chars on the shorter side prevents accidental short matches.
- */
-function isCoaster(name: string, coasterSet: Set<string>): boolean {
-  const norm = normalize(name);
-  if (coasterSet.has(norm)) return true;
-  for (const c of coasterSet) {
-    const shorter = norm.length <= c.length ? norm : c;
-    const longer  = norm.length <= c.length ? c : norm;
-    if (shorter.length >= 5 && longer.startsWith(shorter)) return true;
-  }
-  return false;
-}
-
 const HEADERS = {
  "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
@@ -131,7 +93,7 @@ export async function fetchLiveRides(
          isOpen: r.is_open,
          waitMinutes: r.wait_time ?? 0,
          lastUpdated: r.last_updated,
-          isCoaster: coasterNames ? isCoaster(r.name, coasterNames) : false,
+          isCoaster: coasterNames ? isCoasterMatch(r.name, coasterNames) : false,
        });
      }
    }
@@ -144,7 +106,7 @@ export async function fetchLiveRides(
        isOpen: r.is_open,
        waitMinutes: r.wait_time ?? 0,
        lastUpdated: r.last_updated,
-        isCoaster: coasterNames ? isCoaster(r.name, coasterNames) : false,
+        isCoaster: coasterNames ? isCoasterMatch(r.name, coasterNames) : false,
      });
    }

@@ -10,7 +10,8 @@
    "scrape": "tsx scripts/scrape.ts",
    "scrape:force": "tsx scripts/scrape.ts --rescrape",
    "discover": "tsx scripts/discover.ts",
-    "debug": "tsx scripts/debug.ts"
+    "debug": "tsx scripts/debug.ts",
+    "test": "tsx --test tests/*.test.ts"
  },
  "dependencies": {
    "better-sqlite3": "^12.8.0",
@@ -0,0 +1,51 @@
+/**
+ * Coaster name matching tests.
+ *
+ * Each entry is a real case found between Queue-Times and RCDB names.
+ * Add new cases here when fixing a mismatch or false positive.
+ *
+ * Run with: npm test
+ */
+
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { isCoasterMatch, normalizeForMatch } from "../lib/coaster-match";
+
+function set(...rcdbNames: string[]): Set<string> {
+  return new Set(rcdbNames.map(normalizeForMatch));
+}
+
+// ── Should match ─────────────────────────────────────────────────────────────
+
+const SHOULD_MATCH: [qtName: string, rcdbName: string, park: string][] = [
+  ["BATMAN™ The Ride",                  "Batman The Ride",                "Over Georgia / Magic Mountain"],
+  ["THE RIDDLER Mindbender",            "Riddler Mindbender",             "Over Georgia"],
+  ["THE RIDDLER™'s Revenge",            "Riddler's Revenge",              "Magic Mountain"],
+  ["CATWOMAN™ Whip",                    "Catwoman's Whip",                "New England"],
+  ["SUPERMAN™: Ultimate Flight",        "Superman - Ultimate Flight",     "Over Georgia"],
+  ["THE JOKER™ Funhouse Coaster",       "Joker Funhouse Coaster",         "Over Georgia"],
+  ["The Great American Scream Machine", "Great American Scream Machine",  "Over Georgia"],
+  ["Apocalypse",                        "Apocalypse the Ride",            "Magic Mountain"],
+  ["The New Revolution - Classic",      "New Revolution",                 "Magic Mountain"],
+  ["SCREAM",                            "Scream!",                        "Magic Mountain"],
+  ["BAT GIRL™: Coaster Chase",          "Batgirl Coaster Chase",          "Fiesta Texas"],
+  ["THE JOKER™ 4D Free Fly Coaster",    "Joker",                          "New England"],
+];
+
+for (const [qt, rcdb, park] of SHOULD_MATCH) {
+  test(`match: "${qt}" = "${rcdb}" (${park})`, () => {
+    assert.ok(isCoasterMatch(qt, set(rcdb)), `Expected match`);
+  });
+}
+
+// ── Should NOT match (false positives) ───────────────────────────────────────
+
+const SHOULD_NOT_MATCH: [qtName: string, rcdbName: string, park: string][] = [
+  ["Joker y Harley Quinn", "Joker", "Six Flags Mexico"],
+];
+
+for (const [qt, rcdb, park] of SHOULD_NOT_MATCH) {
+  test(`no match: "${qt}" ≠ "${rcdb}" (${park})`, () => {
+    assert.ok(!isCoasterMatch(qt, set(rcdb)), `Expected no match`);
+  });
+}