Files
SixFlagsSuperCalendar/lib/park-meta.ts
josh 9cac86d241 test: add coaster name matching test suite
Extract matching logic into lib/coaster-match.ts (isCoasterMatch + normalizeForMatch)
so it can be imported by both the scraper and tests without duplication.

Add tests/coaster-matching.test.ts covering all known match/false-positive cases:
- Trademark symbols, leading THE, possessives, punctuation
- Subtitle variants in both directions (Apocalypse, New Revolution - Classic)
- Space-split brand words (BAT GIRL vs Batgirl)
- 4D subtitle extension (THE JOKER™ 4D Free Fly Coaster vs Joker)
- False positives: Joker y Harley Quinn, conjunction connectors

Run with: npm test

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 15:43:20 -04:00

80 lines
2.8 KiB
TypeScript

/**
* park-meta.json — persisted alongside the SQLite DB in data/
*
* This file stores per-park metadata that doesn't belong in the schedule DB:
* - rcdb_id: user-supplied RCDB park ID (fills into https://rcdb.com/{id}.htm)
* - coasters: list of operating roller coaster names scraped from RCDB
* - coasters_scraped_at: ISO timestamp of last RCDB scrape
*
* discover.ts: ensures every park has a skeleton entry (rcdb_id null by default)
* scrape.ts: populates coasters[] for parks with a known rcdb_id (30-day staleness)
*/
import fs from "fs";
import path from "path";
const META_PATH = path.join(process.cwd(), "data", "park-meta.json");
export interface ParkMeta {
/** RCDB park page ID — user fills this in manually after discover creates the skeleton */
rcdb_id: number | null;
/** Operating roller coaster names scraped from RCDB */
coasters: string[];
/** ISO timestamp of when coasters was last scraped from RCDB */
coasters_scraped_at: string | null;
}
export type ParkMetaMap = Record<string, ParkMeta>;
export function readParkMeta(): ParkMetaMap {
try {
return JSON.parse(fs.readFileSync(META_PATH, "utf8")) as ParkMetaMap;
} catch {
return {};
}
}
export function writeParkMeta(meta: ParkMetaMap): void {
fs.mkdirSync(path.dirname(META_PATH), { recursive: true });
fs.writeFileSync(META_PATH, JSON.stringify(meta, null, 2) + "\n");
}
/** Default skeleton entry for a park that has never been configured. */
export function defaultParkMeta(): ParkMeta {
return { rcdb_id: null, coasters: [], coasters_scraped_at: null };
}
const COASTER_STALE_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
/** Returns true when the coaster list needs to be re-scraped from RCDB. */
export function areCoastersStale(entry: ParkMeta): boolean {
if (!entry.coasters_scraped_at) return true;
return Date.now() - new Date(entry.coasters_scraped_at).getTime() > COASTER_STALE_MS;
}
/**
* Normalize a ride name for fuzzy matching between data sources.
*
* Queue-Times uses branded names (BATMAN™ The Ride, THE JOKER™ Funhouse Coaster)
* while RCDB uses clean names (Batman The Ride, Joker Funhouse Coaster).
*
* Normalization steps:
* 1. Strip trademark/copyright symbols (™ ® ©)
* 2. Strip leading "THE " / "THE" prefix
* 3. Replace punctuation (- : ' ") with spaces
* 4. Collapse runs of whitespace
* 5. Lowercase and trim
*/
export { normalizeForMatch as normalizeRideName } from "./coaster-match.ts";
import { normalizeForMatch } from "./coaster-match.ts";
/**
* Returns a Set of normalized coaster names for fast membership checks.
* Returns null when no coaster data exists for the park.
*/
export function getCoasterSet(parkId: string, meta: ParkMetaMap): Set<string> | null {
const entry = meta[parkId];
if (!entry || entry.coasters.length === 0) return null;
return new Set(entry.coasters.map(normalizeForMatch));
}