All checks were successful
Build and Deploy / Build & Push (push) Successful in 3m27s
The → ID line already confirms success. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
169 lines
5.2 KiB
TypeScript
169 lines
5.2 KiB
TypeScript
/**
|
|
* One-time discovery script — finds the CloudFront API ID for each park.
|
|
*
|
|
* Run this once before using scrape.ts:
|
|
* npx tsx scripts/discover.ts
|
|
*
|
|
* For each park in the registry it:
|
|
* 1. Opens the park's hours page in a headless browser
|
|
* 2. Intercepts all calls to the operating-hours CloudFront API
|
|
* 3. Identifies the main theme park ID (filters out water parks, safari, etc.)
|
|
* 4. Stores the ID in the database
|
|
*
|
|
* Re-running is safe — already-discovered parks are skipped.
|
|
*/
|
|
|
|
import { chromium } from "playwright";
|
|
import { openDb, getApiId, setApiId, type DbInstance } from "../lib/db";
|
|
import { PARKS } from "../lib/parks";
|
|
import { fetchParkInfo, isMainThemePark } from "../lib/scrapers/sixflags";
|
|
import { readParkMeta, writeParkMeta, defaultParkMeta } from "../lib/park-meta";
|
|
|
|
const CLOUDFRONT_PATTERN = /operating-hours\/park\/(\d+)/;
|
|
|
|
async function discoverParkId(slug: string): Promise<number | null> {
|
|
const browser = await chromium.launch({ headless: true });
|
|
try {
|
|
const context = await browser.newContext({
|
|
userAgent:
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
|
|
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
locale: "en-US",
|
|
});
|
|
const page = await context.newPage();
|
|
|
|
const capturedIds = new Set<number>();
|
|
page.on("request", (req) => {
|
|
const match = req.url().match(CLOUDFRONT_PATTERN);
|
|
if (match) capturedIds.add(parseInt(match[1]));
|
|
});
|
|
|
|
await page
|
|
.goto(`https://www.sixflags.com/${slug}/park-hours?date=2026-05-01`, {
|
|
waitUntil: "networkidle",
|
|
timeout: 30_000,
|
|
})
|
|
.catch(() => null);
|
|
|
|
await context.close();
|
|
|
|
if (capturedIds.size === 0) return null;
|
|
|
|
// Check each captured ID — pick the main theme park (not water park / safari)
|
|
for (const id of capturedIds) {
|
|
const info = await fetchParkInfo(id);
|
|
if (info && isMainThemePark(info.parkName)) {
|
|
console.log(
|
|
` → ID ${id} | ${info.parkAbbreviation} | ${info.parkName}`
|
|
);
|
|
return id;
|
|
}
|
|
}
|
|
|
|
// Fallback: return the lowest ID (usually the main park)
|
|
const fallback = Math.min(...capturedIds);
|
|
console.log(` → fallback to lowest ID: ${fallback}`);
|
|
return fallback;
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
function purgeRemovedParks(db: DbInstance) {
|
|
const knownIds = new Set(PARKS.map((p) => p.id));
|
|
|
|
const staleParkIds = (
|
|
db.prepare("SELECT DISTINCT park_id FROM park_api_ids").all() as { park_id: string }[]
|
|
)
|
|
.map((r) => r.park_id)
|
|
.filter((id) => !knownIds.has(id));
|
|
|
|
if (staleParkIds.length === 0) return;
|
|
|
|
console.log(`\nRemoving ${staleParkIds.length} park(s) no longer in registry:`);
|
|
for (const parkId of staleParkIds) {
|
|
const days = (
|
|
db.prepare("SELECT COUNT(*) AS n FROM park_days WHERE park_id = ?").get(parkId) as { n: number }
|
|
).n;
|
|
db.prepare("DELETE FROM park_days WHERE park_id = ?").run(parkId);
|
|
db.prepare("DELETE FROM park_api_ids WHERE park_id = ?").run(parkId);
|
|
console.log(` removed ${parkId} (${days} day rows deleted)`);
|
|
}
|
|
console.log();
|
|
}
|
|
|
|
async function main() {
|
|
const db = openDb();
|
|
|
|
purgeRemovedParks(db);
|
|
|
|
for (const park of PARKS) {
|
|
const existing = getApiId(db, park.id);
|
|
if (existing !== null) {
|
|
console.log(`${park.name}: already known (API ID ${existing}) — skip`);
|
|
continue;
|
|
}
|
|
|
|
process.stdout.write(`${park.name} (${park.slug})... `);
|
|
|
|
try {
|
|
const apiId = await discoverParkId(park.slug);
|
|
if (apiId === null) {
|
|
console.log("FAILED — no API IDs captured");
|
|
continue;
|
|
}
|
|
|
|
// Fetch full info to store name/abbreviation
|
|
const info = await fetchParkInfo(apiId);
|
|
setApiId(db, park.id, apiId, info?.parkAbbreviation, info?.parkName);
|
|
} catch (err) {
|
|
console.log(`ERROR: ${err}`);
|
|
}
|
|
|
|
// Small delay between parks to be polite
|
|
await new Promise((r) => setTimeout(r, 2000));
|
|
}
|
|
|
|
// ── Ensure park-meta.json has a skeleton entry for every park ────────────
|
|
// Users fill in rcdb_id manually; scrape.ts populates coasters[] from RCDB.
|
|
const meta = readParkMeta();
|
|
let metaChanged = false;
|
|
|
|
for (const park of PARKS) {
|
|
if (!meta[park.id]) {
|
|
meta[park.id] = defaultParkMeta();
|
|
metaChanged = true;
|
|
}
|
|
}
|
|
// Remove entries for parks no longer in the registry
|
|
for (const id of Object.keys(meta)) {
|
|
if (!PARKS.find((p) => p.id === id)) {
|
|
delete meta[id];
|
|
metaChanged = true;
|
|
}
|
|
}
|
|
|
|
if (metaChanged) {
|
|
writeParkMeta(meta);
|
|
console.log("\nUpdated data/park-meta.json");
|
|
console.log(" → Set rcdb_id for each park to enable the coaster filter.");
|
|
console.log(" Find a park's RCDB ID from: https://rcdb.com (the number in the URL).");
|
|
}
|
|
|
|
// Print summary
|
|
console.log("\n── Discovered IDs ──");
|
|
for (const park of PARKS) {
|
|
const id = getApiId(db, park.id);
|
|
const rcdbId = meta[park.id]?.rcdb_id;
|
|
const rcdbStr = rcdbId ? `rcdb:${rcdbId}` : "rcdb:?";
|
|
console.log(` ${park.id.padEnd(30)} api:${String(id ?? "?").padEnd(8)} ${rcdbStr}`);
|
|
}
|
|
|
|
db.close();
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("Fatal:", err);
|
|
process.exit(1);
|
|
});
|