Files
SixFlagsSuperCalendar/scripts/scrape.ts
josh bc5777c9e2
All checks were successful
Build and Deploy / Build & Push (push) Successful in 2m54s
refactor: scrape all 12 months per park before moving to next park
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 10:25:20 -04:00

127 lines
3.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Scrape job — fetches 2026 operating hours for all parks from the Six Flags API.
*
* Prerequisite: run `npm run discover` first to populate API IDs.
*
* Run once and leave it:
* npm run scrape
*
* Skips park+month combos scraped within the last week. Re-run to resume after interruption.
* To force a full re-scrape:
* npm run scrape:force
*
* Rate limiting: backs off automatically (30s → 60s → 120s per retry).
* After exhausting retries, skips that park+month and continues.
*/
import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db";
import { PARKS } from "../lib/parks";
import { scrapeMonth, RateLimitError } from "../lib/scrapers/sixflags";
const YEAR = 2026;
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
const DELAY_MS = 1000; // between successful API calls
const FORCE = process.argv.includes("--rescrape");
function monthLabel(m: number) {
return `${YEAR}-${String(m).padStart(2, "0")}`;
}
function pad(n: number, width: number) {
return String(n).padStart(width, " ");
}
async function sleep(ms: number) {
return new Promise<void>((r) => setTimeout(r, ms));
}
async function main() {
const db = openDb();
// Separate parks with known API IDs from those needing discovery
const ready = PARKS.filter((p) => getApiId(db, p.id) !== null);
const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null);
if (needsDiscovery.length > 0) {
console.log(
`${needsDiscovery.length} parks have no API ID — run \`npm run discover\` first:\n` +
needsDiscovery.map((p) => ` ${p.id}`).join("\n") +
"\n"
);
}
if (ready.length === 0) {
console.log("No parks ready to scrape. Run: npm run discover");
db.close();
return;
}
// Build the full work queue: park × month (all 12 months per park before moving on)
const queue: { month: number; park: (typeof PARKS)[0]; apiId: number }[] = [];
for (const park of ready) {
for (const month of MONTHS) {
if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) continue;
queue.push({ month, park, apiId: getApiId(db, park.id)! });
}
}
const total = MONTHS.length * ready.length;
const skip = total - queue.length;
console.log(
`Scraping ${YEAR}${ready.length} parks × 12 months = ${total} total\n` +
`Skipping ${skip} already-scraped. ${queue.length} to fetch.\n`
);
if (queue.length === 0) {
console.log("Nothing to do. To force a full re-scrape: npm run scrape:force");
db.close();
return;
}
let done = 0;
let errors = 0;
for (const { month, park, apiId } of queue) {
const counter = `[${pad(done + 1, queue.length.toString().length)}/${queue.length}]`;
process.stdout.write(`${counter} ${park.shortName.padEnd(22)} ${monthLabel(month)} ... `);
try {
const days = await scrapeMonth(apiId, YEAR, month);
const insertAll = db.transaction(() => {
for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel);
});
insertAll();
const openCount = days.filter((d) => d.isOpen).length;
console.log(`${openCount}/${days.length} open`);
done++;
if (done < queue.length) await sleep(DELAY_MS);
} catch (err) {
if (err instanceof RateLimitError) {
console.log(`RATE LIMITED — skipping (re-run to retry)`);
} else {
console.log(`ERROR: ${err instanceof Error ? err.message : err}`);
}
errors++;
}
}
const summary = [
`\n── Summary ─────────────────────────────`,
` Fetched : ${done}`,
` Skipped : ${skip}`,
` Errors : ${errors}`,
` Total : ${total}`,
];
if (errors > 0) summary.push(`\nRe-run to retry failed months.`);
console.log(summary.join("\n"));
db.close();
}
main().catch((err) => {
console.error("Fatal:", err);
process.exit(1);
});