feat: RCDB-backed roller coaster filter with fuzzy name matching
All checks were successful
Build and Deploy / Build & Push (push) Successful in 2m54s

- Add lib/park-meta.ts to manage data/park-meta.json (rcdb_id + coaster lists)
- Add lib/scrapers/rcdb.ts to scrape operating coaster names from RCDB park pages
- discover.ts now seeds park-meta.json with skeleton entries for all parks
- scrape.ts now refreshes RCDB coaster lists (30-day staleness) for parks with rcdb_id set
- fetchLiveRides() accepts a coasterNames Set; isCoaster uses normalize() on both sides
  to handle trademark symbols, 'THE ' prefixes, and punctuation differences between
  Queue-Times and RCDB names — applies correctly to both land rides and top-level rides
- Commit park-meta.json so it ships in the Docker image (fresh volumes get it automatically)
- Update .gitignore / .dockerignore to exclude only *.db files, not all of data/
- Dockerfile copies park-meta.json into image before VOLUME declaration
- README: document coaster filter setup and correct staleness window (72h not 7d)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-04 13:49:49 -04:00
parent 819e716197
commit 9700d0bd9a
11 changed files with 710 additions and 15 deletions

View File

@@ -17,6 +17,7 @@ import { chromium } from "playwright";
import { openDb, getApiId, setApiId, type DbInstance } from "../lib/db";
import { PARKS } from "../lib/parks";
import { fetchParkInfo, isMainThemePark } from "../lib/scrapers/sixflags";
import { readParkMeta, writeParkMeta, defaultParkMeta } from "../lib/park-meta";
const CLOUDFRONT_PATTERN = /operating-hours\/park\/(\d+)/;
@@ -124,11 +125,39 @@ async function main() {
await new Promise((r) => setTimeout(r, 2000));
}
// ── Ensure park-meta.json has a skeleton entry for every park ────────────
// Users fill in rcdb_id manually; scrape.ts populates coasters[] from RCDB.
const meta = readParkMeta();
let metaChanged = false;
for (const park of PARKS) {
if (!meta[park.id]) {
meta[park.id] = defaultParkMeta();
metaChanged = true;
}
}
// Remove entries for parks no longer in the registry
for (const id of Object.keys(meta)) {
if (!PARKS.find((p) => p.id === id)) {
delete meta[id];
metaChanged = true;
}
}
if (metaChanged) {
writeParkMeta(meta);
console.log("\nUpdated data/park-meta.json");
console.log(" → Set rcdb_id for each park to enable the coaster filter.");
console.log(" Find a park's RCDB ID from: https://rcdb.com (the number in the URL).");
}
// Print summary
console.log("\n── Discovered IDs ──");
for (const park of PARKS) {
const id = getApiId(db, park.id);
console.log(` ${park.id.padEnd(30)} ${id ?? "NOT FOUND"}`);
const rcdbId = meta[park.id]?.rcdb_id;
const rcdbStr = rcdbId ? `rcdb:${rcdbId}` : "rcdb:?";
console.log(` ${park.id.padEnd(30)} api:${String(id ?? "?").padEnd(8)} ${rcdbStr}`);
}
db.close();