SixFlagsSuperCalendar/lib/coaster-match.ts

/**
 * Coaster name matching — shared between the Queue-Times scraper and tests.
 *
 * Queue-Times and RCDB use different name conventions:
 *   - Trademark symbols (™ ® ©)
 *   - Leading "THE " prefixes
 *   - Possessives ("Catwoman's" vs "Catwoman")
 *   - Subtitles added or dropped ("Apocalypse" vs "Apocalypse the Ride")
 *   - Space-split brand words ("BAT GIRL" vs "Batgirl")
 *   - Conjunction-joined compound rides ("Joker y Harley Quinn" ≠ "Joker")
 */

// Words that join two ride names rather than extend one subtitle.
// When a prefix match is found and the next word is one of these,
// the longer name is a *different* ride, not a subtitle.
const CONJUNCTIONS = new Set(["y", "and", "&", "with", "de", "del", "e", "et"]);

/**
 * Normalize a ride name for matching.
 * Both sides (Queue-Times and RCDB) must be normalized with this function
 * before any comparison so the transforms are symmetric.
 */
export function normalizeForMatch(name: string): string {
  return name
    .replace(/[\u2122\u00ae\u00a9™®©]/g, "")   // strip ™ ® ©
    .replace(/^the\s+/i, "")                    // strip leading "THE "
    .replace(/['\u2019]s\b/gi, "")              // strip possessives ('s / 's)
    .replace(/[^\w\s]/g, " ")                   // all remaining punctuation → space
    .replace(/\s+/g, " ")
    .toLowerCase()
    .trim();
}

/**
 * Returns true when the Queue-Times ride name matches an entry in the RCDB
 * coaster set (which must be built with normalizeForMatch).
 *
 * Matching strategy (in order):
 * 1. Exact normalized match.
 * 2. Compact (space-stripped) match — catches "BAT GIRL" vs "Batgirl".
 * 3. Prefix match — the shorter normalized name is a prefix of the longer,
 *    minimum 5 chars, unless the next word after the prefix is a conjunction
 *    (which signals a compound ride name, not a subtitle).
 */
export function isCoasterMatch(qtName: string, coasterSet: Set<string>): boolean {
  const norm = normalizeForMatch(qtName);
  if (coasterSet.has(norm)) return true;

  const compact = norm.replace(/\s/g, "");
  for (const c of coasterSet) {
    // Compact comparison
    if (compact.length >= 5 && c.replace(/\s/g, "") === compact) return true;

    // Prefix comparison
    const shorter = norm.length <= c.length ? norm : c;
    const longer  = norm.length <= c.length ? c : norm;
    if (shorter.length >= 5 && longer.startsWith(shorter)) {
      const nextWord = longer.slice(shorter.length).trim().split(" ")[0];
      if (!CONJUNCTIONS.has(nextWord)) return true;
    }
  }

  return false;
}