feat: RCDB-backed roller coaster filter with fuzzy name matching
All checks were successful
Build and Deploy / Build & Push (push) Successful in 2m54s
All checks were successful
Build and Deploy / Build & Push (push) Successful in 2m54s
- Add lib/park-meta.ts to manage data/park-meta.json (rcdb_id + coaster lists) - Add lib/scrapers/rcdb.ts to scrape operating coaster names from RCDB park pages - discover.ts now seeds park-meta.json with skeleton entries for all parks - scrape.ts now refreshes RCDB coaster lists (30-day staleness) for parks with rcdb_id set - fetchLiveRides() accepts a coasterNames Set; isCoaster uses normalize() on both sides to handle trademark symbols, 'THE ' prefixes, and punctuation differences between Queue-Times and RCDB names — applies correctly to both land rides and top-level rides - Commit park-meta.json so it ships in the Docker image (fresh volumes get it automatically) - Update .gitignore / .dockerignore to exclude only *.db files, not all of data/ - Dockerfile copies park-meta.json into image before VOLUME declaration - README: document coaster filter setup and correct staleness window (72h not 7d) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,7 @@ import { chromium } from "playwright";
|
||||
import { openDb, getApiId, setApiId, type DbInstance } from "../lib/db";
|
||||
import { PARKS } from "../lib/parks";
|
||||
import { fetchParkInfo, isMainThemePark } from "../lib/scrapers/sixflags";
|
||||
import { readParkMeta, writeParkMeta, defaultParkMeta } from "../lib/park-meta";
|
||||
|
||||
const CLOUDFRONT_PATTERN = /operating-hours\/park\/(\d+)/;
|
||||
|
||||
@@ -124,11 +125,39 @@ async function main() {
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
}
|
||||
|
||||
// ── Ensure park-meta.json has a skeleton entry for every park ────────────
|
||||
// Users fill in rcdb_id manually; scrape.ts populates coasters[] from RCDB.
|
||||
const meta = readParkMeta();
|
||||
let metaChanged = false;
|
||||
|
||||
for (const park of PARKS) {
|
||||
if (!meta[park.id]) {
|
||||
meta[park.id] = defaultParkMeta();
|
||||
metaChanged = true;
|
||||
}
|
||||
}
|
||||
// Remove entries for parks no longer in the registry
|
||||
for (const id of Object.keys(meta)) {
|
||||
if (!PARKS.find((p) => p.id === id)) {
|
||||
delete meta[id];
|
||||
metaChanged = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (metaChanged) {
|
||||
writeParkMeta(meta);
|
||||
console.log("\nUpdated data/park-meta.json");
|
||||
console.log(" → Set rcdb_id for each park to enable the coaster filter.");
|
||||
console.log(" Find a park's RCDB ID from: https://rcdb.com (the number in the URL).");
|
||||
}
|
||||
|
||||
// Print summary
|
||||
console.log("\n── Discovered IDs ──");
|
||||
for (const park of PARKS) {
|
||||
const id = getApiId(db, park.id);
|
||||
console.log(` ${park.id.padEnd(30)} ${id ?? "NOT FOUND"}`);
|
||||
const rcdbId = meta[park.id]?.rcdb_id;
|
||||
const rcdbStr = rcdbId ? `rcdb:${rcdbId}` : "rcdb:?";
|
||||
console.log(` ${park.id.padEnd(30)} api:${String(id ?? "?").padEnd(8)} ${rcdbStr}`);
|
||||
}
|
||||
|
||||
db.close();
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db";
|
||||
import { PARKS } from "../lib/parks";
|
||||
import { scrapeMonth, RateLimitError } from "../lib/scrapers/sixflags";
|
||||
import { readParkMeta, writeParkMeta, areCoastersStale } from "../lib/park-meta";
|
||||
import { scrapeRcdbCoasters } from "../lib/scrapers/rcdb";
|
||||
|
||||
const YEAR = 2026;
|
||||
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
@@ -99,6 +101,42 @@ async function main() {
|
||||
if (totalErrors > 0) console.log(" Re-run to retry failed months.");
|
||||
|
||||
db.close();
|
||||
|
||||
// ── RCDB coaster scrape (30-day staleness) ────────────────────────────────
|
||||
const meta = readParkMeta();
|
||||
const rcdbParks = PARKS.filter((p) => {
|
||||
const entry = meta[p.id];
|
||||
return entry?.rcdb_id && (FORCE || areCoastersStale(entry));
|
||||
});
|
||||
|
||||
if (rcdbParks.length === 0) {
|
||||
console.log("\nCoaster data up to date.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\n── RCDB coaster scrape — ${rcdbParks.length} park(s) ──`);
|
||||
|
||||
for (const park of rcdbParks) {
|
||||
const entry = meta[park.id];
|
||||
const rcdbId = entry.rcdb_id!;
|
||||
process.stdout.write(` ${park.shortName.padEnd(30)} `);
|
||||
|
||||
const coasters = await scrapeRcdbCoasters(rcdbId);
|
||||
if (coasters === null) {
|
||||
console.log("FAILED");
|
||||
continue;
|
||||
}
|
||||
|
||||
entry.coasters = coasters;
|
||||
entry.coasters_scraped_at = new Date().toISOString();
|
||||
console.log(`${coasters.length} coasters`);
|
||||
|
||||
// Polite delay between RCDB requests
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
}
|
||||
|
||||
writeParkMeta(meta);
|
||||
console.log(" Saved to data/park-meta.json");
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
|
||||
Reference in New Issue
Block a user