Files
Josh Wright f0faff412c
All checks were successful
Build and Deploy / Build & Push (push) Successful in 17s
feat: use dateless Six Flags API endpoint for live today data
The API without a date param returns today's operating data directly,
invalidating the previous assumption that today's date was always missing.

- Add fetchToday(apiId, revalidate?) to sixflags.ts — calls the dateless
  endpoint with optional ISR cache
- Extract parseApiDay() helper shared by scrapeMonth and fetchToday
- Update upsertDay WHERE clause: >= date('now') so today can be updated
  (was > date('now'), which froze today after first write)
- scrape.ts: add a today-scrape pass after the monthly loop so each run
  always writes fresh today data to the DB
- app/page.tsx: fetch live today data for all parks (5-min ISR) and merge
  into the data map before computing open/closing/weatherDelay status
- app/park/[id]/page.tsx: prefer live today data from API for todayData
  so weather delays and hour changes surface within 5 minutes
- scrapeRidesForDay: update comment only — role unchanged (QT fallback)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 16:54:06 -04:00

165 lines
4.8 KiB
TypeScript

/**
* Scrape job — fetches 2026 operating hours for all parks from the Six Flags API.
*
* Prerequisite: run `npm run discover` first to populate API IDs.
*
* npm run scrape — skips months scraped within the last 7 days
* npm run scrape:force — re-scrapes everything
*/
import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db";
import { PARKS } from "../lib/parks";
import { scrapeMonth, fetchToday, RateLimitError } from "../lib/scrapers/sixflags";
import { readParkMeta, writeParkMeta, areCoastersStale } from "../lib/park-meta";
import { scrapeRcdbCoasters } from "../lib/scrapers/rcdb";
const YEAR = 2026;
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
const DELAY_MS = 1000;
const FORCE = process.argv.includes("--rescrape");
async function sleep(ms: number) {
return new Promise<void>((r) => setTimeout(r, ms));
}
async function main() {
const db = openDb();
const ready = PARKS.filter((p) => getApiId(db, p.id) !== null);
const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null);
if (needsDiscovery.length > 0) {
console.log(
`${needsDiscovery.length} park(s) need discovery first: ${needsDiscovery.map((p) => p.id).join(", ")}\n`
);
}
if (ready.length === 0) {
console.log("No parks ready — run: npm run discover");
db.close();
return;
}
console.log(`Scraping ${YEAR}${ready.length} parks\n`);
let totalFetched = 0;
let totalSkipped = 0;
let totalErrors = 0;
for (const park of ready) {
const apiId = getApiId(db, park.id)!;
const label = park.shortName.padEnd(22);
let openDays = 0;
let fetched = 0;
let skipped = 0;
let errors = 0;
process.stdout.write(` ${label} `);
for (const month of MONTHS) {
if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) {
process.stdout.write("·");
skipped++;
continue;
}
try {
const days = await scrapeMonth(apiId, YEAR, month);
db.transaction(() => {
for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel, d.specialType);
})();
openDays += days.filter((d) => d.isOpen).length;
fetched++;
process.stdout.write("█");
if (fetched + skipped + errors < MONTHS.length) await sleep(DELAY_MS);
} catch (err) {
if (err instanceof RateLimitError) {
process.stdout.write("✗");
} else {
process.stdout.write("✗");
console.error(`\n error: ${err instanceof Error ? err.message : err}`);
}
errors++;
}
}
totalFetched += fetched;
totalSkipped += skipped;
totalErrors += errors;
if (errors > 0) {
console.log(` ${errors} error(s)`);
} else if (skipped === MONTHS.length) {
console.log(" up to date");
} else {
console.log(` ${openDays} open days`);
}
}
console.log(`\n ${totalFetched} fetched ${totalSkipped} skipped ${totalErrors} errors`);
if (totalErrors > 0) console.log(" Re-run to retry failed months.");
// ── Today scrape (always fresh — dateless endpoint returns current day) ────
console.log("\n── Today's data ──");
for (const park of ready) {
const apiId = getApiId(db, park.id)!;
process.stdout.write(` ${park.shortName.padEnd(22)} `);
try {
const today = await fetchToday(apiId);
if (today) {
upsertDay(db, park.id, today.date, today.isOpen, today.hoursLabel, today.specialType);
console.log(today.isOpen ? `open ${today.hoursLabel ?? ""}` : "closed");
} else {
console.log("no data");
}
} catch {
console.log("error");
}
await sleep(500);
}
db.close();
// ── RCDB coaster scrape (30-day staleness) ────────────────────────────────
const meta = readParkMeta();
const rcdbParks = PARKS.filter((p) => {
const entry = meta[p.id];
return entry?.rcdb_id && (FORCE || areCoastersStale(entry));
});
if (rcdbParks.length === 0) {
console.log("\nCoaster data up to date.");
return;
}
console.log(`\n── RCDB coaster scrape — ${rcdbParks.length} park(s) ──`);
for (const park of rcdbParks) {
const entry = meta[park.id];
const rcdbId = entry.rcdb_id!;
process.stdout.write(` ${park.shortName.padEnd(30)} `);
const coasters = await scrapeRcdbCoasters(rcdbId);
if (coasters === null) {
console.log("FAILED");
continue;
}
entry.coasters = coasters;
entry.coasters_scraped_at = new Date().toISOString();
console.log(`${coasters.length} coasters`);
// Polite delay between RCDB requests
await new Promise((r) => setTimeout(r, 2000));
}
writeParkMeta(meta);
console.log(" Saved to data/park-meta.json");
}
main().catch((err) => {
console.error("Fatal:", err);
process.exit(1);
});