/** * Scrape job — fetches 2026 operating hours for all parks from the Six Flags API. * * Prerequisite: run `npm run discover` first to populate API IDs. * * npm run scrape — skips months scraped within the last 7 days * npm run scrape:force — re-scrapes everything */ import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db"; import { PARKS } from "../lib/parks"; import { scrapeMonth, RateLimitError } from "../lib/scrapers/sixflags"; import { readParkMeta, writeParkMeta, areCoastersStale } from "../lib/park-meta"; import { scrapeRcdbCoasters } from "../lib/scrapers/rcdb"; const YEAR = 2026; const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; const DELAY_MS = 1000; const FORCE = process.argv.includes("--rescrape"); async function sleep(ms: number) { return new Promise((r) => setTimeout(r, ms)); } async function main() { const db = openDb(); const ready = PARKS.filter((p) => getApiId(db, p.id) !== null); const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null); if (needsDiscovery.length > 0) { console.log( `⚠ ${needsDiscovery.length} park(s) need discovery first: ${needsDiscovery.map((p) => p.id).join(", ")}\n` ); } if (ready.length === 0) { console.log("No parks ready — run: npm run discover"); db.close(); return; } console.log(`Scraping ${YEAR} — ${ready.length} parks\n`); let totalFetched = 0; let totalSkipped = 0; let totalErrors = 0; for (const park of ready) { const apiId = getApiId(db, park.id)!; const label = park.shortName.padEnd(22); let openDays = 0; let fetched = 0; let skipped = 0; let errors = 0; process.stdout.write(` ${label} `); for (const month of MONTHS) { if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) { process.stdout.write("·"); skipped++; continue; } try { const days = await scrapeMonth(apiId, YEAR, month); db.transaction(() => { for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel, d.specialType); })(); openDays += days.filter((d) => d.isOpen).length; fetched++; process.stdout.write("█"); if (fetched + skipped + errors < MONTHS.length) await sleep(DELAY_MS); } catch (err) { if (err instanceof RateLimitError) { process.stdout.write("✗"); } else { process.stdout.write("✗"); console.error(`\n error: ${err instanceof Error ? err.message : err}`); } errors++; } } totalFetched += fetched; totalSkipped += skipped; totalErrors += errors; if (errors > 0) { console.log(` ${errors} error(s)`); } else if (skipped === MONTHS.length) { console.log(" up to date"); } else { console.log(` ${openDays} open days`); } } console.log(`\n ${totalFetched} fetched ${totalSkipped} skipped ${totalErrors} errors`); if (totalErrors > 0) console.log(" Re-run to retry failed months."); db.close(); // ── RCDB coaster scrape (30-day staleness) ──────────────────────────────── const meta = readParkMeta(); const rcdbParks = PARKS.filter((p) => { const entry = meta[p.id]; return entry?.rcdb_id && (FORCE || areCoastersStale(entry)); }); if (rcdbParks.length === 0) { console.log("\nCoaster data up to date."); return; } console.log(`\n── RCDB coaster scrape — ${rcdbParks.length} park(s) ──`); for (const park of rcdbParks) { const entry = meta[park.id]; const rcdbId = entry.rcdb_id!; process.stdout.write(` ${park.shortName.padEnd(30)} `); const coasters = await scrapeRcdbCoasters(rcdbId); if (coasters === null) { console.log("FAILED"); continue; } entry.coasters = coasters; entry.coasters_scraped_at = new Date().toISOString(); console.log(`${coasters.length} coasters`); // Polite delay between RCDB requests await new Promise((r) => setTimeout(r, 2000)); } writeParkMeta(meta); console.log(" Saved to data/park-meta.json"); } main().catch((err) => { console.error("Fatal:", err); process.exit(1); });