refactor: hardcode API IDs and coaster lists, remove Playwright discovery

Embed Six Flags API IDs directly in the park registry and snapshot coaster lists from park-meta.json into a TypeScript module. This eliminates the Playwright-based discovery script, RCDB scraper, and runtime dependency on park-meta.json — preparing for the backend API transition. - Add apiId field to Park type and all 24 park entries - Create lib/coaster-data.ts with hardcoded coaster lists - Update page components to use park.apiId and new getCoasterSet() - Remove scripts/discover.ts, lib/scrapers/rcdb.ts, lib/park-meta.ts - Remove data/park-meta.json from shared volume - Remove playwright devDependency and discover npm script - Simplify scripts/scrape.ts (no RCDB, no discovery checks) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-23 21:25:53 -04:00
parent 757c2a8d4f
commit 4652a92c29
13 changed files with 381 additions and 866 deletions
@@ -1,91 +0,0 @@
-/**
- * RCDB (Roller Coaster DataBase) scraper.
- *
- * Fetches a park's RCDB page (https://rcdb.com/{id}.htm) and extracts the
- * names of operating roller coasters from the "Operating Roller Coasters"
- * section.
- *
- * RCDB has no public API. This scraper reads the static HTML page.
- * Please scrape infrequently (30-day staleness window) to be respectful.
- */
-
-const BASE = "https://rcdb.com";
-
-const HEADERS = {
-  "User-Agent":
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
-    "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
-  Accept: "text/html,application/xhtml+xml",
-  "Accept-Language": "en-US,en;q=0.9",
-};
-
-/**
- * Scrape operating roller coaster names for a park.
- *
- * Returns an array of coaster names on success, or null when the page
- * cannot be fetched or contains no operating coasters.
- */
-export async function scrapeRcdbCoasters(rcdbId: number): Promise<string[] | null> {
-  const url = `${BASE}/${rcdbId}.htm`;
-  try {
-    const res = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(15_000) });
-    if (!res.ok) {
-      console.error(`  RCDB ${rcdbId}: HTTP ${res.status}`);
-      return null;
-    }
-    const html = await res.text();
-    return parseOperatingCoasters(html);
-  } catch (err) {
-    console.error(`  RCDB ${rcdbId}: ${err}`);
-    return null;
-  }
-}
-
-/**
- * Parse operating roller coaster names from RCDB park page HTML.
- *
- * RCDB park pages list coasters in sections bounded by <section> tags.
- * The operating section heading looks like:
- *   <h4>Operating Roller Coasters: <a href="...">16</a></h4>
- *
- * Each coaster is an <a> link to its detail page with an unquoted href:
- *   <td data-sort="Batman The Ride"><a href=/5.htm>Batman The Ride</a>
- *
- * We extract only those links (href=/DIGITS.htm) from within the
- * operating section, stopping at the next <section> tag.
- */
-function parseOperatingCoasters(html: string): string[] {
-  // Find the "Operating Roller Coasters" section heading.
-  const opIdx = html.search(/Operating\s+Roller\s+Coasters/i);
-  if (opIdx === -1) return [];
-
-  // The section ends at the next <section> tag (e.g. "Defunct Roller Coasters").
-  const after = html.slice(opIdx);
-  const nextSection = after.search(/<section\b/i);
-  const sectionHtml = nextSection > 0 ? after.slice(0, nextSection) : after;
-
-  // Extract coaster names from links to RCDB detail pages.
-  // RCDB uses unquoted href attributes: href=/1234.htm
-  // General links (/g.htm, /r.htm, /location.htm, etc.) won't match \d+\.htm.
-  const names: string[] = [];
-  const linkPattern = /<a\s[^>]*href=["']?\/(\d+)\.htm["']?[^>]*>([^<]+)<\/a>/gi;
-  let match: RegExpExecArray | null;
-
-  while ((match = linkPattern.exec(sectionHtml)) !== null) {
-    const name = decodeHtmlEntities(match[2].trim());
-    if (name) names.push(name);
-  }
-
-  // Deduplicate while preserving order
-  return [...new Set(names)];
-}
-
-function decodeHtmlEntities(text: string): string {
-  return text
-    .replace(/&amp;/g, "&")
-    .replace(/&lt;/g, "<")
-    .replace(/&gt;/g, ">")
-    .replace(/&quot;/g, '"')
-    .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
-    .replace(/&[a-z]+;/gi, "");
-}
@@ -1,11 +1,8 @@
 /**
- * Six Flags scraper — calls the internal CloudFront operating-hours API directly.
+ * Six Flags API client — calls the internal CloudFront operating-hours API.
 *
 * API: https://d18car1k0ff81h.cloudfront.net/operating-hours/park/{apiId}?date=YYYYMM
- * Returns full month data in one request — no browser needed.
- *
- * Each park has a numeric API ID that must be discovered first (see scripts/discover.ts).
- * Once stored in the DB, this scraper never touches a browser again.
+ * Returns full month data in one request.
 *
 * Rate limiting: on 429/503, exponential backoff (30s → 60s → 120s), MAX_RETRIES attempts.
 */
@@ -309,7 +306,6 @@ export async function scrapeRidesForDay(

 /**
 * Fetch operating hours for an entire month in a single API call.
- * apiId must be pre-discovered via scripts/discover.ts.
 */
 export async function scrapeMonth(
  apiId: number,
@@ -325,8 +321,7 @@ export async function scrapeMonth(
 }

 /**
- * Fetch park info for a given API ID (used during discovery to identify park type).
- * Uses the current month so there's always some data.
+ * Fetch park info for a given API ID. Uses the current month so there's always some data.
 */
 export async function fetchParkInfo(
  apiId: number
@@ -1,5 +1,6 @@
 export interface Park {
  id: string;
+  apiId: number;
  name: string;
  shortName: string;
  chain: "sixflags" | string;