Files
SixFlagsSuperCalendar/lib/scrapers/sixflags.ts
T
josh e1657f07d7
Build and Deploy / Lint, typecheck, test (push) Successful in 33s
Build and Deploy / Build & Push (push) Successful in 1m4s
fix: surface silent scraper failures and stop falsely claiming weather delay
The homepage was flagging every park as weather delay because calendar.ts
collapsed "fetchLiveRides returned null" into the same openRides=0 bucket as
"all rides actually closed." Meanwhile every scraper (queuetimes, sixflags
operating-hours, sixflags wait-times) was swallowing non-OK responses and
exceptions silently, so logs gave no signal which upstream was failing or how.

Add a small scraperWarn helper that emits in the same shape as backend/log.ts
(without importing it — lib/scrapers is shared with the Next frontend). Use it
in all three scrapers to record HTTP status and error name+message before each
return null. Add parksSkipped to the tier-5 summary log so we can tell when the
openParks filter is rejecting everyone vs the fetcher silently failing.

Convert calendar.ts ridesCache to a discriminated union { kind: "ok" | "unknown" }.
Weather delay only fires on { kind: "ok", openRides: 0 }; unknown entries get
a 30s TTL so we recover quickly when upstream comes back and don't thunder-herd
in the meantime.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-31 20:28:25 -04:00

388 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Six Flags API client — calls the internal CloudFront operating-hours API.
*
* API: https://d18car1k0ff81h.cloudfront.net/operating-hours/park/{apiId}?date=YYYYMM
* Returns full month data in one request.
*
* Rate limiting: on 429/503, exponential backoff (30s → 60s → 120s), MAX_RETRIES attempts.
*/
import { scraperWarn } from "./log";
const API_BASE = "https://d18car1k0ff81h.cloudfront.net/operating-hours/park";
const MAX_RETRIES = 3;
const BASE_BACKOFF_MS = 30_000;
export class RateLimitError extends Error {
constructor(public readonly waitedMs: number) {
super(`Rate limited — exhausted ${MAX_RETRIES} retries after ${waitedMs / 1000}s total wait`);
this.name = "RateLimitError";
}
}
const HEADERS = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
Accept: "application/json",
"Accept-Language": "en-US,en;q=0.9",
Referer: "https://www.sixflags.com/",
};
export interface DayResult {
date: string; // YYYY-MM-DD
isOpen: boolean;
hoursLabel?: string;
specialType?: "passholder_preview";
}
function sleep(ms: number) {
return new Promise<void>((r) => setTimeout(r, ms));
}
/** "04/05/2026" → "2026-04-05" */
function parseApiDate(d: string): string {
const [m, day, y] = d.split("/");
return `${y}-${m}-${day}`;
}
interface ApiOperatingItem {
timeFrom: string; // "10:30" 24h
timeTo: string; // "20:00" 24h
isBuyout?: boolean;
}
interface ApiOperating {
operatingTypeName: string; // "Park", "Special Event", etc.
items: ApiOperatingItem[];
}
interface ApiEvent {
extEventName: string;
}
interface ApiRideDetail {
itemID: number;
itemName: string;
extLocationID: string;
operatingTimeFrom: string; // "" or "HH:MM" 24h — empty means not scheduled
operatingTimeTo: string;
}
interface ApiVenue {
venueId: number;
venueName: string;
detailHours: ApiRideDetail[];
}
interface ApiDay {
date: string;
isParkClosed: boolean;
events?: ApiEvent[];
operatings?: ApiOperating[];
venues?: ApiVenue[];
}
/** "10:30" → "10:30am", "20:00" → "8pm", "12:00" → "12pm" */
function fmt24(time: string): string {
const [h, m] = time.split(":").map(Number);
const period = h >= 12 ? "pm" : "am";
const h12 = h % 12 || 12;
return m === 0 ? `${h12}${period}` : `${h12}:${String(m).padStart(2, "0")}${period}`;
}
interface ApiResponse {
parkId: number;
parkAbbreviation: string;
parkName: string;
dates: ApiDay[];
}
async function fetchApi(
url: string,
attempt = 0,
totalWaitedMs = 0,
revalidate?: number,
): Promise<ApiResponse> {
const fetchOpts: RequestInit & { next?: { revalidate: number } } = { headers: HEADERS };
if (revalidate !== undefined) fetchOpts.next = { revalidate };
const res = await fetch(url, { ...fetchOpts, signal: AbortSignal.timeout(15_000) });
if (res.status === 429 || res.status === 503) {
const retryAfter = res.headers.get("Retry-After");
const waitMs = retryAfter
? Math.min(parseInt(retryAfter, 10) * 1000, 5 * 60 * 1000) // cap at 5 min
: BASE_BACKOFF_MS * Math.pow(2, attempt);
console.log(
` [rate-limited] HTTP ${res.status} — waiting ${waitMs / 1000}s (attempt ${attempt + 1}/${MAX_RETRIES})`
);
await sleep(waitMs);
if (attempt < MAX_RETRIES) return fetchApi(url, attempt + 1, totalWaitedMs + waitMs, revalidate);
throw new RateLimitError(totalWaitedMs + waitMs);
}
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return res.json() as Promise<ApiResponse>;
}
/**
* Fetch the raw API response for a month — used by scripts/debug.ts and the park detail page.
* Pass `revalidate` (seconds) to enable Next.js ISR caching when called from a Server Component.
*/
export async function scrapeMonthRaw(
apiId: number,
year: number,
month: number,
revalidate?: number,
): Promise<ApiResponse> {
const dateParam = `${year}${String(month).padStart(2, "0")}`;
const url = `${API_BASE}/${apiId}?date=${dateParam}`;
return fetchApi(url, 0, 0, revalidate);
}
export interface RideStatus {
name: string;
isOpen: boolean;
hoursLabel?: string; // e.g. "10am 10pm"
}
export interface RidesFetchResult {
rides: RideStatus[];
/** The date the ride data actually came from (YYYY-MM-DD). May differ from
* the requested date when the API has already dropped the current day and
* we fell back to the nearest upcoming open date. */
dataDate: string;
/** True when dataDate === requested date. False when we fell back. */
isExact: boolean;
/** Park-level operating hours for the data date (e.g. "10am 6pm").
* Used by the UI to suppress per-ride hours that match the park hours. */
parkHoursLabel?: string;
}
/** Convert "MM/DD/YYYY" API date string to "YYYY-MM-DD". */
function apiDateToIso(apiDate: string): string {
const [mm, dd, yyyy] = apiDate.split("/");
return `${yyyy}-${mm}-${dd}`;
}
/** Parse a single ApiDay into a DayResult. Shared by scrapeMonth and fetchToday. */
function parseApiDay(d: ApiDay): DayResult {
const date = parseApiDate(d.date);
const operating =
d.operatings?.find((o) => o.operatingTypeName === "Park") ??
d.operatings?.[0];
const item = operating?.items?.[0];
const hoursLabel =
item?.timeFrom && item?.timeTo
? `${fmt24(item.timeFrom)} ${fmt24(item.timeTo)}`
: undefined;
const isPassholderPreview = d.events?.some((e) =>
e.extEventName.toLowerCase().includes("passholder preview")
) ?? false;
const isBuyout = item?.isBuyout ?? false;
const isOpen = !d.isParkClosed && hoursLabel !== undefined && (!isBuyout || isPassholderPreview);
const specialType: DayResult["specialType"] = isPassholderPreview ? "passholder_preview" : undefined;
return { date, isOpen, hoursLabel: isOpen ? hoursLabel : undefined, specialType };
}
/**
* Fetch today's operating data directly (no date param = API returns today).
* Pass `revalidate` (seconds) for Next.js ISR caching; omit for a fully fresh fetch.
*/
export async function fetchToday(apiId: number, revalidate?: number): Promise<DayResult | null> {
try {
const url = `${API_BASE}/${apiId}`;
const raw = await fetchApi(url, 0, 0, revalidate);
if (!raw.dates.length) {
scraperWarn("sixflags", "fetchToday empty dates array", { apiId });
return null;
}
return parseApiDay(raw.dates[0]);
} catch (err) {
const e = err as Error;
scraperWarn("sixflags", "fetchToday threw", {
apiId,
name: e.name,
err: e.message,
});
return null;
}
}
/**
* Fetch ride operating status for a given date. Used as a fallback when
* Queue-Times live data is unavailable.
*
* The monthly API endpoint (`?date=YYYYMM`) may not include today; use
* `fetchToday(apiId)` to get today's park hours directly. The fallback
* chain here will find the nearest upcoming date if an exact match is missing.
*
* Returns null if no ride data could be found at all (API error, pre-season,
* no venues in response).
*
* Pass `revalidate` (seconds) to enable Next.js ISR caching when called from
* a Server Component. Defaults to 1 hour.
*/
export async function scrapeRidesForDay(
apiId: number,
dateIso: string, // YYYY-MM-DD
revalidate = 3600,
): Promise<RidesFetchResult | null> {
const [yearStr, monthStr] = dateIso.split("-");
const year = parseInt(yearStr);
const month = parseInt(monthStr);
let raw: ApiResponse;
try {
raw = await scrapeMonthRaw(apiId, year, month, revalidate);
} catch (err) {
const e = err as Error;
scraperWarn("sixflags", "scrapeRidesForDay scrapeMonthRaw threw", {
apiId,
year,
month,
name: e.name,
err: e.message,
});
return null;
}
if (!raw.dates.length) {
scraperWarn("sixflags", "scrapeRidesForDay empty dates array", { apiId, year, month });
return null;
}
// The API uses "MM/DD/YYYY" internally.
const [, mm, dd] = dateIso.split("-");
const apiDate = `${mm}/${dd}/${yearStr}`;
// Try exact match first; if the API has already dropped today, fall back to
// the chronologically nearest available date (always a future date here).
let dayData = raw.dates.find((d) => d.date === apiDate);
let isExact = true;
if (!dayData) {
// The API drops dates that have already started, so we need a future date.
// Prefer the nearest open day; fall back to the nearest date regardless.
// If the current month has no more dates (e.g. today is the 30th), also
// check next month — a month boundary is not unusual for this case.
const futureDates = [...raw.dates]
.filter((d) => apiDateToIso(d.date) > dateIso)
.sort((a, b) => a.date.localeCompare(b.date));
dayData = futureDates.find((d) => !d.isParkClosed) ?? futureDates[0];
if (!dayData) {
// Nothing left in the current month — fetch next month.
const nextMonthDate = new Date(`${dateIso}T00:00:00`);
nextMonthDate.setMonth(nextMonthDate.getMonth() + 1);
const nextYear = nextMonthDate.getFullYear();
const nextMonth = nextMonthDate.getMonth() + 1;
try {
const nextRaw = await scrapeMonthRaw(apiId, nextYear, nextMonth, revalidate);
const nextSorted = [...nextRaw.dates].sort((a, b) => a.date.localeCompare(b.date));
dayData = nextSorted.find((d) => !d.isParkClosed) ?? nextSorted[0];
} catch (err) {
const e = err as Error;
scraperWarn("sixflags", "scrapeRidesForDay next-month fallback threw", {
apiId,
year: nextYear,
month: nextMonth,
name: e.name,
err: e.message,
});
}
}
isExact = false;
}
if (!dayData) return null;
// Extract park-level hours from the selected day so the UI can suppress
// per-ride hours that simply repeat what the park is already showing.
const parkOperating =
dayData.operatings?.find((o) => o.operatingTypeName === "Park") ??
dayData.operatings?.[0];
const parkItem = parkOperating?.items?.[0];
const parkHoursLabel =
parkItem?.timeFrom && parkItem?.timeTo
? `${fmt24(parkItem.timeFrom)} ${fmt24(parkItem.timeTo)}`
: undefined;
const rides: RideStatus[] = [];
for (const venue of (dayData.venues ?? []).filter((v) => v.venueName === "Rides")) {
for (const ride of venue.detailHours ?? []) {
if (!ride.itemName) continue;
const isOpen = Boolean(ride.operatingTimeFrom && ride.operatingTimeTo);
const hoursLabel = isOpen
? `${fmt24(ride.operatingTimeFrom)} ${fmt24(ride.operatingTimeTo)}`
: undefined;
rides.push({ name: ride.itemName, isOpen, hoursLabel });
}
}
if (rides.length === 0) return null;
// Sort: open rides first, then alphabetical within each group
rides.sort((a, b) => {
if (a.isOpen !== b.isOpen) return a.isOpen ? -1 : 1;
return a.name.localeCompare(b.name);
});
return { rides, dataDate: apiDateToIso(dayData.date), isExact, parkHoursLabel };
}
/**
* Fetch operating hours for an entire month in a single API call.
*/
export async function scrapeMonth(
apiId: number,
year: number,
month: number
): Promise<DayResult[]> {
const dateParam = `${year}${String(month).padStart(2, "0")}`;
const url = `${API_BASE}/${apiId}?date=${dateParam}`;
const data = await fetchApi(url);
return data.dates.map(parseApiDay);
}
/**
* Fetch park info for a given API ID. Uses the current month so there's always some data.
*/
export async function fetchParkInfo(
apiId: number
): Promise<Pick<ApiResponse, "parkId" | "parkAbbreviation" | "parkName"> | null> {
const now = new Date();
const dateParam = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, "0")}`;
const url = `${API_BASE}/${apiId}?date=${dateParam}`;
try {
const data = await fetchApi(url);
return {
parkId: data.parkId,
parkAbbreviation: data.parkAbbreviation,
parkName: data.parkName,
};
} catch {
return null;
}
}
/** Returns true if the API park name looks like a main theme park (not a water park or safari). */
export function isMainThemePark(parkName: string): boolean {
const lower = parkName.toLowerCase();
const waterParkKeywords = [
"hurricane harbor",
"safari",
"water park",
"waterpark",
"schlitterbahn",
"wave pool",
"splash",
"aquatic",
];
return !waterParkKeywords.some((kw) => lower.includes(kw));
}