refactor: hardcode API IDs and coaster lists, remove Playwright discovery

Embed Six Flags API IDs directly in the park registry and snapshot
coaster lists from park-meta.json into a TypeScript module. This
eliminates the Playwright-based discovery script, RCDB scraper, and
runtime dependency on park-meta.json — preparing for the backend
API transition.

- Add apiId field to Park type and all 24 park entries
- Create lib/coaster-data.ts with hardcoded coaster lists
- Update page components to use park.apiId and new getCoasterSet()
- Remove scripts/discover.ts, lib/scrapers/rcdb.ts, lib/park-meta.ts
- Remove data/park-meta.json from shared volume
- Remove playwright devDependency and discover npm script
- Simplify scripts/scrape.ts (no RCDB, no discovery checks)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-23 21:25:53 -04:00
parent 757c2a8d4f
commit 4652a92c29
13 changed files with 381 additions and 866 deletions
+2 -13
View File
@@ -9,7 +9,6 @@
import fs from "fs";
import path from "path";
import { openDb, getApiId } from "../lib/db";
import { PARKS } from "../lib/parks";
import { scrapeMonthRaw } from "../lib/scrapers/sixflags";
@@ -52,16 +51,6 @@ async function main() {
const month = parseInt(monthStr);
const day = parseInt(dayStr);
const db = openDb();
const apiId = getApiId(db, park.id);
db.close();
if (apiId === null) {
console.error(`No API ID found for ${park.name} — run: npm run discover`);
process.exit(1);
}
// Collect all output so we can write it to a file as well
const lines: string[] = [];
const out = (...args: string[]) => {
const line = args.join(" ");
@@ -70,13 +59,13 @@ async function main() {
};
out(`Park : ${park.name} (${park.id})`);
out(`API ID : ${apiId}`);
out(`API ID : ${park.apiId}`);
out(`Date : ${dateStr}`);
out(`Fetched : ${new Date().toISOString()}`);
out("");
out(`Fetching ${year}-${String(month).padStart(2, "0")} from API...`);
const raw = await scrapeMonthRaw(apiId, year, month);
const raw = await scrapeMonthRaw(park.apiId, year, month);
const targetDate = `${String(month).padStart(2, "0")}/${String(day).padStart(2, "0")}/${year}`;
const dayData = raw.dates.find((d) => d.date === targetDate);
-168
View File
@@ -1,168 +0,0 @@
/**
* One-time discovery script — finds the CloudFront API ID for each park.
*
* Run this once before using scrape.ts:
* npx tsx scripts/discover.ts
*
* For each park in the registry it:
* 1. Opens the park's hours page in a headless browser
* 2. Intercepts all calls to the operating-hours CloudFront API
* 3. Identifies the main theme park ID (filters out water parks, safari, etc.)
* 4. Stores the ID in the database
*
* Re-running is safe — already-discovered parks are skipped.
*/
import { chromium } from "playwright";
import { openDb, getApiId, setApiId, type DbInstance } from "../lib/db";
import { PARKS } from "../lib/parks";
import { fetchParkInfo, isMainThemePark } from "../lib/scrapers/sixflags";
import { readParkMeta, writeParkMeta, defaultParkMeta } from "../lib/park-meta";
const CLOUDFRONT_PATTERN = /operating-hours\/park\/(\d+)/;
async function discoverParkId(slug: string): Promise<number | null> {
const browser = await chromium.launch({ headless: true });
try {
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
locale: "en-US",
});
const page = await context.newPage();
const capturedIds = new Set<number>();
page.on("request", (req) => {
const match = req.url().match(CLOUDFRONT_PATTERN);
if (match) capturedIds.add(parseInt(match[1]));
});
await page
.goto(`https://www.sixflags.com/${slug}/park-hours?date=2026-05-01`, {
waitUntil: "networkidle",
timeout: 30_000,
})
.catch(() => null);
await context.close();
if (capturedIds.size === 0) return null;
// Check each captured ID — pick the main theme park (not water park / safari)
for (const id of capturedIds) {
const info = await fetchParkInfo(id);
if (info && isMainThemePark(info.parkName)) {
console.log(
` → ID ${id} | ${info.parkAbbreviation} | ${info.parkName}`
);
return id;
}
}
// Fallback: return the lowest ID (usually the main park)
const fallback = Math.min(...capturedIds);
console.log(` → fallback to lowest ID: ${fallback}`);
return fallback;
} finally {
await browser.close();
}
}
function purgeRemovedParks(db: DbInstance) {
const knownIds = new Set(PARKS.map((p) => p.id));
const staleParkIds = (
db.prepare("SELECT DISTINCT park_id FROM park_api_ids").all() as { park_id: string }[]
)
.map((r) => r.park_id)
.filter((id) => !knownIds.has(id));
if (staleParkIds.length === 0) return;
console.log(`\nRemoving ${staleParkIds.length} park(s) no longer in registry:`);
for (const parkId of staleParkIds) {
const days = (
db.prepare("SELECT COUNT(*) AS n FROM park_days WHERE park_id = ?").get(parkId) as { n: number }
).n;
db.prepare("DELETE FROM park_days WHERE park_id = ?").run(parkId);
db.prepare("DELETE FROM park_api_ids WHERE park_id = ?").run(parkId);
console.log(` removed ${parkId} (${days} day rows deleted)`);
}
console.log();
}
async function main() {
const db = openDb();
purgeRemovedParks(db);
for (const park of PARKS) {
const existing = getApiId(db, park.id);
if (existing !== null) {
console.log(`${park.name}: already known (API ID ${existing}) — skip`);
continue;
}
process.stdout.write(`${park.name} (${park.slug})... `);
try {
const apiId = await discoverParkId(park.slug);
if (apiId === null) {
console.log("FAILED — no API IDs captured");
continue;
}
// Fetch full info to store name/abbreviation
const info = await fetchParkInfo(apiId);
setApiId(db, park.id, apiId, info?.parkAbbreviation, info?.parkName);
} catch (err) {
console.log(`ERROR: ${err}`);
}
// Small delay between parks to be polite
await new Promise((r) => setTimeout(r, 2000));
}
// ── Ensure park-meta.json has a skeleton entry for every park ────────────
// Users fill in rcdb_id manually; scrape.ts populates coasters[] from RCDB.
const meta = readParkMeta();
let metaChanged = false;
for (const park of PARKS) {
if (!meta[park.id]) {
meta[park.id] = defaultParkMeta();
metaChanged = true;
}
}
// Remove entries for parks no longer in the registry
for (const id of Object.keys(meta)) {
if (!PARKS.find((p) => p.id === id)) {
delete meta[id];
metaChanged = true;
}
}
if (metaChanged) {
writeParkMeta(meta);
console.log("\nUpdated data/park-meta.json");
console.log(" → Set rcdb_id for each park to enable the coaster filter.");
console.log(" Find a park's RCDB ID from: https://rcdb.com (the number in the URL).");
}
// Print summary
console.log("\n── Discovered IDs ──");
for (const park of PARKS) {
const id = getApiId(db, park.id);
const rcdbId = meta[park.id]?.rcdb_id;
const rcdbStr = rcdbId ? `rcdb:${rcdbId}` : "rcdb:?";
console.log(` ${park.id.padEnd(30)} api:${String(id ?? "?").padEnd(8)} ${rcdbStr}`);
}
db.close();
}
main().catch((err) => {
console.error("Fatal:", err);
process.exit(1);
});
+7 -64
View File
@@ -1,17 +1,13 @@
/**
* Scrape job — fetches 2026 operating hours for all parks from the Six Flags API.
*
* Prerequisite: run `npm run discover` first to populate API IDs.
*
* npm run scrape — skips months scraped within the last 7 days
* npm run scrape — skips months scraped within the last 72h
* npm run scrape:force — re-scrapes everything
*/
import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db";
import { openDb, upsertDay, isMonthScraped } from "../lib/db";
import { PARKS } from "../lib/parks";
import { scrapeMonth, fetchToday, RateLimitError } from "../lib/scrapers/sixflags";
import { readParkMeta, writeParkMeta, areCoastersStale } from "../lib/park-meta";
import { scrapeRcdbCoasters } from "../lib/scrapers/rcdb";
const YEAR = 2026;
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
@@ -25,29 +21,13 @@ async function sleep(ms: number) {
async function main() {
const db = openDb();
const ready = PARKS.filter((p) => getApiId(db, p.id) !== null);
const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null);
if (needsDiscovery.length > 0) {
console.log(
`${needsDiscovery.length} park(s) need discovery first: ${needsDiscovery.map((p) => p.id).join(", ")}\n`
);
}
if (ready.length === 0) {
console.log("No parks ready — run: npm run discover");
db.close();
return;
}
console.log(`Scraping ${YEAR}${ready.length} parks\n`);
console.log(`Scraping ${YEAR}${PARKS.length} parks\n`);
let totalFetched = 0;
let totalSkipped = 0;
let totalErrors = 0;
for (const park of ready) {
const apiId = getApiId(db, park.id)!;
for (const park of PARKS) {
const label = park.shortName.padEnd(22);
let openDays = 0;
@@ -65,7 +45,7 @@ async function main() {
}
try {
const days = await scrapeMonth(apiId, YEAR, month);
const days = await scrapeMonth(park.apiId, YEAR, month);
db.transaction(() => {
for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel, d.specialType);
})();
@@ -102,11 +82,10 @@ async function main() {
// ── Today scrape (always fresh — dateless endpoint returns current day) ────
console.log("\n── Today's data ──");
for (const park of ready) {
const apiId = getApiId(db, park.id)!;
for (const park of PARKS) {
process.stdout.write(` ${park.shortName.padEnd(22)} `);
try {
const today = await fetchToday(apiId);
const today = await fetchToday(park.apiId);
if (today) {
upsertDay(db, park.id, today.date, today.isOpen, today.hoursLabel, today.specialType);
console.log(today.isOpen ? `open ${today.hoursLabel ?? ""}` : "closed");
@@ -120,42 +99,6 @@ async function main() {
}
db.close();
// ── RCDB coaster scrape (30-day staleness) ────────────────────────────────
const meta = readParkMeta();
const rcdbParks = PARKS.filter((p) => {
const entry = meta[p.id];
return entry?.rcdb_id && (FORCE || areCoastersStale(entry));
});
if (rcdbParks.length === 0) {
console.log("\nCoaster data up to date.");
return;
}
console.log(`\n── RCDB coaster scrape — ${rcdbParks.length} park(s) ──`);
for (const park of rcdbParks) {
const entry = meta[park.id];
const rcdbId = entry.rcdb_id!;
process.stdout.write(` ${park.shortName.padEnd(30)} `);
const coasters = await scrapeRcdbCoasters(rcdbId);
if (coasters === null) {
console.log("FAILED");
continue;
}
entry.coasters = coasters;
entry.coasters_scraped_at = new Date().toISOString();
console.log(`${coasters.length} coasters`);
// Polite delay between RCDB requests
await new Promise((r) => setTimeout(r, 2000));
}
writeParkMeta(meta);
console.log(" Saved to data/park-meta.json");
}
main().catch((err) => {