feat: RCDB-backed roller coaster filter with fuzzy name matching
All checks were successful
Build and Deploy / Build & Push (push) Successful in 2m54s
All checks were successful
Build and Deploy / Build & Push (push) Successful in 2m54s
- Add lib/park-meta.ts to manage data/park-meta.json (rcdb_id + coaster lists) - Add lib/scrapers/rcdb.ts to scrape operating coaster names from RCDB park pages - discover.ts now seeds park-meta.json with skeleton entries for all parks - scrape.ts now refreshes RCDB coaster lists (30-day staleness) for parks with rcdb_id set - fetchLiveRides() accepts a coasterNames Set; isCoaster uses normalize() on both sides to handle trademark symbols, 'THE ' prefixes, and punctuation differences between Queue-Times and RCDB names — applies correctly to both land rides and top-level rides - Commit park-meta.json so it ships in the Docker image (fresh volumes get it automatically) - Update .gitignore / .dockerignore to exclude only *.db files, not all of data/ - Dockerfile copies park-meta.json into image before VOLUME declaration - README: document coaster filter setup and correct staleness window (72h not 7d) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
86
lib/park-meta.ts
Normal file
86
lib/park-meta.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* park-meta.json — persisted alongside the SQLite DB in data/
|
||||
*
|
||||
* This file stores per-park metadata that doesn't belong in the schedule DB:
|
||||
* - rcdb_id: user-supplied RCDB park ID (fills into https://rcdb.com/{id}.htm)
|
||||
* - coasters: list of operating roller coaster names scraped from RCDB
|
||||
* - coasters_scraped_at: ISO timestamp of last RCDB scrape
|
||||
*
|
||||
* discover.ts: ensures every park has a skeleton entry (rcdb_id null by default)
|
||||
* scrape.ts: populates coasters[] for parks with a known rcdb_id (30-day staleness)
|
||||
*/
|
||||
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
const META_PATH = path.join(process.cwd(), "data", "park-meta.json");
|
||||
|
||||
export interface ParkMeta {
|
||||
/** RCDB park page ID — user fills this in manually after discover creates the skeleton */
|
||||
rcdb_id: number | null;
|
||||
/** Operating roller coaster names scraped from RCDB */
|
||||
coasters: string[];
|
||||
/** ISO timestamp of when coasters was last scraped from RCDB */
|
||||
coasters_scraped_at: string | null;
|
||||
}
|
||||
|
||||
export type ParkMetaMap = Record<string, ParkMeta>;
|
||||
|
||||
export function readParkMeta(): ParkMetaMap {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(META_PATH, "utf8")) as ParkMetaMap;
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
export function writeParkMeta(meta: ParkMetaMap): void {
|
||||
fs.mkdirSync(path.dirname(META_PATH), { recursive: true });
|
||||
fs.writeFileSync(META_PATH, JSON.stringify(meta, null, 2) + "\n");
|
||||
}
|
||||
|
||||
/** Default skeleton entry for a park that has never been configured. */
|
||||
export function defaultParkMeta(): ParkMeta {
|
||||
return { rcdb_id: null, coasters: [], coasters_scraped_at: null };
|
||||
}
|
||||
|
||||
const COASTER_STALE_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
|
||||
|
||||
/** Returns true when the coaster list needs to be re-scraped from RCDB. */
|
||||
export function areCoastersStale(entry: ParkMeta): boolean {
|
||||
if (!entry.coasters_scraped_at) return true;
|
||||
return Date.now() - new Date(entry.coasters_scraped_at).getTime() > COASTER_STALE_MS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a ride name for fuzzy matching between data sources.
|
||||
*
|
||||
* Queue-Times uses branded names (BATMAN™ The Ride, THE JOKER™ Funhouse Coaster)
|
||||
* while RCDB uses clean names (Batman The Ride, Joker Funhouse Coaster).
|
||||
*
|
||||
* Normalization steps:
|
||||
* 1. Strip trademark/copyright symbols (™ ® ©)
|
||||
* 2. Strip leading "THE " / "THE" prefix
|
||||
* 3. Replace punctuation (- : ' ") with spaces
|
||||
* 4. Collapse runs of whitespace
|
||||
* 5. Lowercase and trim
|
||||
*/
|
||||
export function normalizeRideName(name: string): string {
|
||||
return name
|
||||
.replace(/[™®©]/g, "")
|
||||
.replace(/^the\s+/i, "")
|
||||
.replace(/[-:'".]/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.toLowerCase()
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Set of normalized coaster names for fast membership checks.
|
||||
* Returns null when no coaster data exists for the park.
|
||||
*/
|
||||
export function getCoasterSet(parkId: string, meta: ParkMetaMap): Set<string> | null {
|
||||
const entry = meta[parkId];
|
||||
if (!entry || entry.coasters.length === 0) return null;
|
||||
return new Set(entry.coasters.map(normalizeRideName));
|
||||
}
|
||||
Reference in New Issue
Block a user