/** * One-time discovery script — finds the CloudFront API ID for each park. * * Run this once before using scrape.ts: * npx tsx scripts/discover.ts * * For each park in the registry it: * 1. Opens the park's hours page in a headless browser * 2. Intercepts all calls to the operating-hours CloudFront API * 3. Identifies the main theme park ID (filters out water parks, safari, etc.) * 4. Stores the ID in the database * * Re-running is safe — already-discovered parks are skipped. */ import { chromium } from "playwright"; import { openDb, getApiId, setApiId, type DbInstance } from "../lib/db"; import { PARKS } from "../lib/parks"; import { fetchParkInfo, isMainThemePark } from "../lib/scrapers/sixflags"; const CLOUDFRONT_PATTERN = /operating-hours\/park\/(\d+)/; async function discoverParkId(slug: string): Promise { const browser = await chromium.launch({ headless: true }); try { const context = await browser.newContext({ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", locale: "en-US", }); const page = await context.newPage(); const capturedIds = new Set(); page.on("request", (req) => { const match = req.url().match(CLOUDFRONT_PATTERN); if (match) capturedIds.add(parseInt(match[1])); }); await page .goto(`https://www.sixflags.com/${slug}/park-hours?date=2026-05-01`, { waitUntil: "networkidle", timeout: 30_000, }) .catch(() => null); await context.close(); if (capturedIds.size === 0) return null; // Check each captured ID — pick the main theme park (not water park / safari) for (const id of capturedIds) { const info = await fetchParkInfo(id); if (info && isMainThemePark(info.parkName)) { console.log( ` → ID ${id} | ${info.parkAbbreviation} | ${info.parkName}` ); return id; } } // Fallback: return the lowest ID (usually the main park) const fallback = Math.min(...capturedIds); console.log(` → fallback to lowest ID: ${fallback}`); return fallback; } finally { await browser.close(); } } function purgeRemovedParks(db: DbInstance) { const knownIds = new Set(PARKS.map((p) => p.id)); const staleParkIds = ( db.prepare("SELECT DISTINCT park_id FROM park_api_ids").all() as { park_id: string }[] ) .map((r) => r.park_id) .filter((id) => !knownIds.has(id)); if (staleParkIds.length === 0) return; console.log(`\nRemoving ${staleParkIds.length} park(s) no longer in registry:`); for (const parkId of staleParkIds) { const days = ( db.prepare("SELECT COUNT(*) AS n FROM park_days WHERE park_id = ?").get(parkId) as { n: number } ).n; db.prepare("DELETE FROM park_days WHERE park_id = ?").run(parkId); db.prepare("DELETE FROM park_api_ids WHERE park_id = ?").run(parkId); console.log(` removed ${parkId} (${days} day rows deleted)`); } console.log(); } async function main() { const db = openDb(); purgeRemovedParks(db); for (const park of PARKS) { const existing = getApiId(db, park.id); if (existing !== null) { console.log(`${park.name}: already known (API ID ${existing}) — skip`); continue; } process.stdout.write(`${park.name} (${park.slug})... `); try { const apiId = await discoverParkId(park.slug); if (apiId === null) { console.log("FAILED — no API IDs captured"); continue; } // Fetch full info to store name/abbreviation const info = await fetchParkInfo(apiId); setApiId(db, park.id, apiId, info?.parkAbbreviation, info?.parkName); console.log(`done (ID ${apiId})`); } catch (err) { console.log(`ERROR: ${err}`); } // Small delay between parks to be polite await new Promise((r) => setTimeout(r, 2000)); } // Print summary console.log("\n── Discovered IDs ──"); for (const park of PARKS) { const id = getApiId(db, park.id); console.log(` ${park.id.padEnd(30)} ${id ?? "NOT FOUND"}`); } db.close(); } main().catch((err) => { console.error("Fatal:", err); process.exit(1); });