feat: RCDB-backed roller coaster filter with fuzzy name matching
All checks were successful
Build and Deploy / Build & Push (push) Successful in 2m54s

- Add lib/park-meta.ts to manage data/park-meta.json (rcdb_id + coaster lists)
- Add lib/scrapers/rcdb.ts to scrape operating coaster names from RCDB park pages
- discover.ts now seeds park-meta.json with skeleton entries for all parks
- scrape.ts now refreshes RCDB coaster lists (30-day staleness) for parks with rcdb_id set
- fetchLiveRides() accepts a coasterNames Set; isCoaster uses normalize() on both sides
  to handle trademark symbols, 'THE ' prefixes, and punctuation differences between
  Queue-Times and RCDB names — applies correctly to both land rides and top-level rides
- Commit park-meta.json so it ships in the Docker image (fresh volumes get it automatically)
- Update .gitignore / .dockerignore to exclude only *.db files, not all of data/
- Dockerfile copies park-meta.json into image before VOLUME declaration
- README: document coaster filter setup and correct staleness window (72h not 7d)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-04 13:49:49 -04:00
parent 819e716197
commit 9700d0bd9a
11 changed files with 710 additions and 15 deletions

View File

@@ -10,6 +10,8 @@
import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db";
import { PARKS } from "../lib/parks";
import { scrapeMonth, RateLimitError } from "../lib/scrapers/sixflags";
import { readParkMeta, writeParkMeta, areCoastersStale } from "../lib/park-meta";
import { scrapeRcdbCoasters } from "../lib/scrapers/rcdb";
const YEAR = 2026;
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
@@ -99,6 +101,42 @@ async function main() {
if (totalErrors > 0) console.log(" Re-run to retry failed months.");
db.close();
// ── RCDB coaster scrape (30-day staleness) ────────────────────────────────
const meta = readParkMeta();
const rcdbParks = PARKS.filter((p) => {
const entry = meta[p.id];
return entry?.rcdb_id && (FORCE || areCoastersStale(entry));
});
if (rcdbParks.length === 0) {
console.log("\nCoaster data up to date.");
return;
}
console.log(`\n── RCDB coaster scrape — ${rcdbParks.length} park(s) ──`);
for (const park of rcdbParks) {
const entry = meta[park.id];
const rcdbId = entry.rcdb_id!;
process.stdout.write(` ${park.shortName.padEnd(30)} `);
const coasters = await scrapeRcdbCoasters(rcdbId);
if (coasters === null) {
console.log("FAILED");
continue;
}
entry.coasters = coasters;
entry.coasters_scraped_at = new Date().toISOString();
console.log(`${coasters.length} coasters`);
// Polite delay between RCDB requests
await new Promise((r) => setTimeout(r, 2000));
}
writeParkMeta(meta);
console.log(" Saved to data/park-meta.json");
}
main().catch((err) => {