Files
SixFlagsSuperCalendar/scripts/scrape.ts
josh 548c7ae09e feat: initial project scaffold with CI/CD and Docker deployment
Next.js 15 + Tailwind CSS v4 week calendar showing Six Flags park hours.
Scrapes the internal CloudFront API, stores results in SQLite.
Includes Dockerfile (Debian/Playwright-compatible), docker-compose, and
Gitea Actions pipeline that builds and pushes to the container registry.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 00:48:09 -04:00

127 lines
3.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Scrape job — fetches 2026 operating hours for all parks from the Six Flags API.
*
* Prerequisite: run `npm run discover` first to populate API IDs.
*
* Run once and leave it:
* npm run scrape
*
* Skips park+month combos scraped within the last week. Re-run to resume after interruption.
* To force a full re-scrape:
* npm run scrape:force
*
* Rate limiting: backs off automatically (30s → 60s → 120s per retry).
* After exhausting retries, skips that park+month and continues.
*/
import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db";
import { PARKS } from "../lib/parks";
import { scrapeMonth, RateLimitError } from "../lib/scrapers/sixflags";
const YEAR = 2026;
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
const DELAY_MS = 1000; // between successful API calls
const FORCE = process.argv.includes("--rescrape");
function monthLabel(m: number) {
return `${YEAR}-${String(m).padStart(2, "0")}`;
}
function pad(n: number, width: number) {
return String(n).padStart(width, " ");
}
async function sleep(ms: number) {
return new Promise<void>((r) => setTimeout(r, ms));
}
async function main() {
const db = openDb();
// Separate parks with known API IDs from those needing discovery
const ready = PARKS.filter((p) => getApiId(db, p.id) !== null);
const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null);
if (needsDiscovery.length > 0) {
console.log(
`${needsDiscovery.length} parks have no API ID — run \`npm run discover\` first:\n` +
needsDiscovery.map((p) => ` ${p.id}`).join("\n") +
"\n"
);
}
if (ready.length === 0) {
console.log("No parks ready to scrape. Run: npm run discover");
db.close();
return;
}
// Build the full work queue: month × park
const queue: { month: number; park: (typeof PARKS)[0]; apiId: number }[] = [];
for (const month of MONTHS) {
for (const park of ready) {
if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) continue;
queue.push({ month, park, apiId: getApiId(db, park.id)! });
}
}
const total = MONTHS.length * ready.length;
const skip = total - queue.length;
console.log(
`Scraping ${YEAR}${ready.length} parks × 12 months = ${total} total\n` +
`Skipping ${skip} already-scraped. ${queue.length} to fetch.\n`
);
if (queue.length === 0) {
console.log("Nothing to do. To force a full re-scrape: npm run scrape:force");
db.close();
return;
}
let done = 0;
let errors = 0;
for (const { month, park, apiId } of queue) {
const counter = `[${pad(done + 1, queue.length.toString().length)}/${queue.length}]`;
process.stdout.write(`${counter} ${park.shortName.padEnd(22)} ${monthLabel(month)} ... `);
try {
const days = await scrapeMonth(apiId, YEAR, month);
const insertAll = db.transaction(() => {
for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel);
});
insertAll();
const openCount = days.filter((d) => d.isOpen).length;
console.log(`${openCount}/${days.length} open`);
done++;
if (done < queue.length) await sleep(DELAY_MS);
} catch (err) {
if (err instanceof RateLimitError) {
console.log(`RATE LIMITED — skipping (re-run to retry)`);
} else {
console.log(`ERROR: ${err instanceof Error ? err.message : err}`);
}
errors++;
}
}
const summary = [
`\n── Summary ─────────────────────────────`,
` Fetched : ${done}`,
` Skipped : ${skip}`,
` Errors : ${errors}`,
` Total : ${total}`,
];
if (errors > 0) summary.push(`\nRe-run to retry failed months.`);
console.log(summary.join("\n"));
db.close();
}
main().catch((err) => {
console.error("Fatal:", err);
process.exit(1);
});