Files
SixFlagsSuperCalendar/lib/scrapers/sixflags.ts
josh 8a68251beb
All checks were successful
Build and Deploy / Build & Push (push) Successful in 4m23s
feat: add debug script for inspecting raw API data per park+date
npm run debug -- --park greatadventure --date 2026-07-04

Prints the raw API response for that day alongside the parsed result
so mismatched or missing hours can be traced to their source.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 10:39:37 -04:00

181 lines
5.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Six Flags scraper — calls the internal CloudFront operating-hours API directly.
*
* API: https://d18car1k0ff81h.cloudfront.net/operating-hours/park/{apiId}?date=YYYYMM
* Returns full month data in one request — no browser needed.
*
* Each park has a numeric API ID that must be discovered first (see scripts/discover.ts).
* Once stored in the DB, this scraper never touches a browser again.
*
* Rate limiting: on 429/503, exponential backoff (30s → 60s → 120s), MAX_RETRIES attempts.
*/
const API_BASE = "https://d18car1k0ff81h.cloudfront.net/operating-hours/park";
const MAX_RETRIES = 3;
const BASE_BACKOFF_MS = 30_000;
export class RateLimitError extends Error {
constructor(public readonly waitedMs: number) {
super(`Rate limited — exhausted ${MAX_RETRIES} retries after ${waitedMs / 1000}s total wait`);
this.name = "RateLimitError";
}
}
const HEADERS = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
Accept: "application/json",
"Accept-Language": "en-US,en;q=0.9",
Referer: "https://www.sixflags.com/",
};
export interface DayResult {
date: string; // YYYY-MM-DD
isOpen: boolean;
hoursLabel?: string;
}
function sleep(ms: number) {
return new Promise<void>((r) => setTimeout(r, ms));
}
/** "04/05/2026" → "2026-04-05" */
function parseApiDate(d: string): string {
const [m, day, y] = d.split("/");
return `${y}-${m}-${day}`;
}
interface ApiOperatingItem {
timeFrom: string; // "10:30" 24h
timeTo: string; // "20:00" 24h
}
interface ApiOperating {
operatingTypeName: string; // "Park", "Special Event", etc.
items: ApiOperatingItem[];
}
interface ApiDay {
date: string;
isParkClosed: boolean;
operatings?: ApiOperating[];
}
/** "10:30" → "10:30am", "20:00" → "8pm", "12:00" → "12pm" */
function fmt24(time: string): string {
const [h, m] = time.split(":").map(Number);
const period = h >= 12 ? "pm" : "am";
const h12 = h % 12 || 12;
return m === 0 ? `${h12}${period}` : `${h12}:${String(m).padStart(2, "0")}${period}`;
}
interface ApiResponse {
parkId: number;
parkAbbreviation: string;
parkName: string;
dates: ApiDay[];
}
async function fetchApi(url: string, attempt = 0, totalWaitedMs = 0): Promise<ApiResponse> {
const res = await fetch(url, { headers: HEADERS });
if (res.status === 429 || res.status === 503) {
const retryAfter = res.headers.get("Retry-After");
const waitMs = retryAfter
? parseInt(retryAfter) * 1000
: BASE_BACKOFF_MS * Math.pow(2, attempt);
console.log(
` [rate-limited] HTTP ${res.status} — waiting ${waitMs / 1000}s (attempt ${attempt + 1}/${MAX_RETRIES})`
);
await sleep(waitMs);
if (attempt < MAX_RETRIES) return fetchApi(url, attempt + 1, totalWaitedMs + waitMs);
throw new RateLimitError(totalWaitedMs + waitMs);
}
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return res.json() as Promise<ApiResponse>;
}
/**
* Fetch the raw API response for a month — used by scripts/debug.ts.
*/
export async function scrapeMonthRaw(
apiId: number,
year: number,
month: number
): Promise<ApiResponse> {
const dateParam = `${year}${String(month).padStart(2, "0")}`;
const url = `${API_BASE}/${apiId}?date=${dateParam}`;
return fetchApi(url);
}
/**
* Fetch operating hours for an entire month in a single API call.
* apiId must be pre-discovered via scripts/discover.ts.
*/
export async function scrapeMonth(
apiId: number,
year: number,
month: number
): Promise<DayResult[]> {
const dateParam = `${year}${String(month).padStart(2, "0")}`;
const url = `${API_BASE}/${apiId}?date=${dateParam}`;
const data = await fetchApi(url);
return data.dates.map((d): DayResult => {
const date = parseApiDate(d.date);
// Prefer the "Park" operating entry; fall back to first entry
const operating =
d.operatings?.find((o) => o.operatingTypeName === "Park") ??
d.operatings?.[0];
const item = operating?.items?.[0];
const hoursLabel =
item?.timeFrom && item?.timeTo
? `${fmt24(item.timeFrom)} ${fmt24(item.timeTo)}`
: undefined;
// If the API says open but no hours are available, treat as closed
const isOpen = !d.isParkClosed && hoursLabel !== undefined;
return { date, isOpen, hoursLabel };
});
}
/**
* Fetch park info for a given API ID (used during discovery to identify park type).
* Uses the current month so there's always some data.
*/
export async function fetchParkInfo(
apiId: number
): Promise<Pick<ApiResponse, "parkId" | "parkAbbreviation" | "parkName"> | null> {
const now = new Date();
const dateParam = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, "0")}`;
const url = `${API_BASE}/${apiId}?date=${dateParam}`;
try {
const data = await fetchApi(url);
return {
parkId: data.parkId,
parkAbbreviation: data.parkAbbreviation,
parkName: data.parkName,
};
} catch {
return null;
}
}
/** Returns true if the API park name looks like a main theme park (not a water park or safari). */
export function isMainThemePark(parkName: string): boolean {
const lower = parkName.toLowerCase();
const waterParkKeywords = [
"hurricane harbor",
"safari",
"water park",
"waterpark",
"schlitterbahn",
"wave pool",
"splash",
"aquatic",
];
return !waterParkKeywords.some((kw) => lower.includes(kw));
}