/** * Scrape job — fetches 2026 operating hours for all parks from the Six Flags API. * * Prerequisite: run `npm run discover` first to populate API IDs. * * Run once and leave it: * npm run scrape * * Skips park+month combos scraped within the last week. Re-run to resume after interruption. * To force a full re-scrape: * npm run scrape:force * * Rate limiting: backs off automatically (30s → 60s → 120s per retry). * After exhausting retries, skips that park+month and continues. */ import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db"; import { PARKS } from "../lib/parks"; import { scrapeMonth, RateLimitError } from "../lib/scrapers/sixflags"; const YEAR = 2026; const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; const DELAY_MS = 1000; // between successful API calls const FORCE = process.argv.includes("--rescrape"); function monthLabel(m: number) { return `${YEAR}-${String(m).padStart(2, "0")}`; } function pad(n: number, width: number) { return String(n).padStart(width, " "); } async function sleep(ms: number) { return new Promise((r) => setTimeout(r, ms)); } async function main() { const db = openDb(); // Separate parks with known API IDs from those needing discovery const ready = PARKS.filter((p) => getApiId(db, p.id) !== null); const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null); if (needsDiscovery.length > 0) { console.log( `⚠ ${needsDiscovery.length} parks have no API ID — run \`npm run discover\` first:\n` + needsDiscovery.map((p) => ` ${p.id}`).join("\n") + "\n" ); } if (ready.length === 0) { console.log("No parks ready to scrape. Run: npm run discover"); db.close(); return; } // Build the full work queue: month × park const queue: { month: number; park: (typeof PARKS)[0]; apiId: number }[] = []; for (const month of MONTHS) { for (const park of ready) { if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) continue; queue.push({ month, park, apiId: getApiId(db, park.id)! }); } } const total = MONTHS.length * ready.length; const skip = total - queue.length; console.log( `Scraping ${YEAR} — ${ready.length} parks × 12 months = ${total} total\n` + `Skipping ${skip} already-scraped. ${queue.length} to fetch.\n` ); if (queue.length === 0) { console.log("Nothing to do. To force a full re-scrape: npm run scrape:force"); db.close(); return; } let done = 0; let errors = 0; for (const { month, park, apiId } of queue) { const counter = `[${pad(done + 1, queue.length.toString().length)}/${queue.length}]`; process.stdout.write(`${counter} ${park.shortName.padEnd(22)} ${monthLabel(month)} ... `); try { const days = await scrapeMonth(apiId, YEAR, month); const insertAll = db.transaction(() => { for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel); }); insertAll(); const openCount = days.filter((d) => d.isOpen).length; console.log(`${openCount}/${days.length} open`); done++; if (done < queue.length) await sleep(DELAY_MS); } catch (err) { if (err instanceof RateLimitError) { console.log(`RATE LIMITED — skipping (re-run to retry)`); } else { console.log(`ERROR: ${err instanceof Error ? err.message : err}`); } errors++; } } const summary = [ `\n── Summary ─────────────────────────────`, ` Fetched : ${done}`, ` Skipped : ${skip}`, ` Errors : ${errors}`, ` Total : ${total}`, ]; if (errors > 0) summary.push(`\nRe-run to retry failed months.`); console.log(summary.join("\n")); db.close(); } main().catch((err) => { console.error("Fatal:", err); process.exit(1); });