From e1657f07d761bb6b3ea317c87104d1c7f70fa24d Mon Sep 17 00:00:00 2001 From: josh Date: Sun, 31 May 2026 20:28:25 -0400 Subject: [PATCH] fix: surface silent scraper failures and stop falsely claiming weather delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The homepage was flagging every park as weather delay because calendar.ts collapsed "fetchLiveRides returned null" into the same openRides=0 bucket as "all rides actually closed." Meanwhile every scraper (queuetimes, sixflags operating-hours, sixflags wait-times) was swallowing non-OK responses and exceptions silently, so logs gave no signal which upstream was failing or how. Add a small scraperWarn helper that emits in the same shape as backend/log.ts (without importing it — lib/scrapers is shared with the Next frontend). Use it in all three scrapers to record HTTP status and error name+message before each return null. Add parksSkipped to the tier-5 summary log so we can tell when the openParks filter is rejecting everyone vs the fetcher silently failing. Convert calendar.ts ridesCache to a discriminated union { kind: "ok" | "unknown" }. Weather delay only fires on { kind: "ok", openRides: 0 }; unknown entries get a 30s TTL so we recover quickly when upstream comes back and don't thunder-herd in the meantime. Co-Authored-By: Claude Opus 4.7 --- backend/src/routes/calendar.ts | 54 +++++++++++++++++++++--------- backend/src/services/scheduler.ts | 1 + lib/scrapers/log.ts | 22 ++++++++++++ lib/scrapers/queuetimes.ts | 18 ++++++++-- lib/scrapers/sixflags-waittimes.ts | 18 ++++++++-- lib/scrapers/sixflags.ts | 41 +++++++++++++++++++---- 6 files changed, 129 insertions(+), 25 deletions(-) create mode 100644 lib/scrapers/log.ts diff --git a/backend/src/routes/calendar.ts b/backend/src/routes/calendar.ts index 1484e36..eefaf3b 100644 --- a/backend/src/routes/calendar.ts +++ b/backend/src/routes/calendar.ts @@ -15,7 +15,16 @@ const todayCache = new TtlCache(5 * 60 * 1000); // doesn't re-fetch on every request. Same TTL as todayCache so they expire // together. const todayChecked = new TtlCache(5 * 60 * 1000); -const ridesCache = new TtlCache<{ openRides: number; openCoasters: number } | null>(5 * 60 * 1000); +// "ok" — fresh fetch succeeded; counts reflect actual live data. +// "unknown" — fetch failed (network, timeout, rate-limit, upstream null). +// We do NOT know whether the park is in weather delay; treat as +// "no signal" so the homepage doesn't falsely flag it. Stored with +// a shorter TTL so we recover quickly when upstream comes back. +type RidesCacheEntry = + | { kind: "ok"; openRides: number; openCoasters: number } + | { kind: "unknown" }; +const ridesCache = new TtlCache(5 * 60 * 1000); +const UNKNOWN_RIDES_TTL_MS = 30_000; const app = new Hono(); @@ -89,29 +98,44 @@ app.get("/week", async (c) => { const trackedParks = openTodayParks.filter((p) => QUEUE_TIMES_IDS[p.id]); const results = await Promise.all( - trackedParks.map(async (p) => { - let cached = ridesCache.get(p.id); - if (cached === null) { + trackedParks.map(async (p): Promise<{ id: string; entry: RidesCacheEntry }> => { + let entry = ridesCache.get(p.id); + if (!entry) { const coasterSet = getCoasterSet(p.id); const result = await fetchLiveRides(QUEUE_TIMES_IDS[p.id], coasterSet).catch((err: Error) => { log.warn("calendar.week", "fetchLiveRides failed", { park: p.id, err: err.message }); return null; }); - cached = result - ? { - openRides: result.rides.filter((r) => r.isOpen).length, - openCoasters: result.rides.filter((r) => r.isOpen && r.isCoaster).length, - } - : null; - ridesCache.set(p.id, cached); + if (result) { + entry = { + kind: "ok", + openRides: result.rides.filter((r) => r.isOpen).length, + openCoasters: result.rides.filter((r) => r.isOpen && r.isCoaster).length, + }; + ridesCache.set(p.id, entry); + } else { + entry = { kind: "unknown" }; + ridesCache.set(p.id, entry, UNKNOWN_RIDES_TTL_MS); + } } - return { id: p.id, ...(cached ?? { openRides: 0, openCoasters: 0 }) }; + return { id: p.id, entry }; }), ); - weatherDelayParkIds = results.filter(({ openRides }) => openRides === 0).map(({ id }) => id); - rideCounts = Object.fromEntries(results.filter(({ openRides }) => openRides > 0).map(({ id, openRides }) => [id, openRides])); - coasterCounts = Object.fromEntries(results.filter(({ openCoasters }) => openCoasters > 0).map(({ id, openCoasters }) => [id, openCoasters])); + // Only flag weather delay when we know rides are actually closed. An + // "unknown" entry means our upstream fetch failed — claim no badge rather + // than falsely showing the storm icon for an outage. + for (const { id, entry } of results) { + if (entry.kind !== "ok") continue; + if (entry.openRides === 0) { + weatherDelayParkIds.push(id); + } else { + rideCounts[id] = entry.openRides; + } + if (entry.openCoasters > 0) { + coasterCounts[id] = entry.openCoasters; + } + } } const scrapedCount = Object.values(data).reduce((sum, parkData) => sum + Object.keys(parkData).length, 0); diff --git a/backend/src/services/scheduler.ts b/backend/src/services/scheduler.ts index 4cbd889..6669767 100644 --- a/backend/src/services/scheduler.ts +++ b/backend/src/services/scheduler.ts @@ -72,6 +72,7 @@ export function startScheduler(): void { const r = await sampleAllOpenParks(); log.info("scheduler.tier5", "sample run complete", { parksSampled: r.parksSampled, + parksSkipped: r.parksSkipped, samplesWritten: r.samplesWritten, weatherDelayed: r.weatherDelayed, errors: r.errors, diff --git a/lib/scrapers/log.ts b/lib/scrapers/log.ts new file mode 100644 index 0000000..6b6cd18 --- /dev/null +++ b/lib/scrapers/log.ts @@ -0,0 +1,22 @@ +/** + * Minimal structured warn-logger for scrapers. Matches the backend's + * `${ISO} [WARN] [tag] msg key=value...` shape so warns from these files + * grep alongside backend/src/log.ts output. Lives here (not in backend/) + * because lib/scrapers/ is imported by both backend and Next.js code — + * importing backend's log would cross a layering boundary. + */ + +type Meta = Record; + +export function scraperWarn(tag: string, msg: string, meta?: Meta): void { + const parts: string[] = []; + if (meta) { + for (const [k, v] of Object.entries(meta)) { + if (v === undefined) continue; + const s = typeof v === "string" ? v : JSON.stringify(v); + parts.push(`${k}=${s}`); + } + } + const tail = parts.length ? " " + parts.join(" ") : ""; + console.warn(`${new Date().toISOString()} [WARN] [${tag}] ${msg}${tail}`); +} diff --git a/lib/scrapers/queuetimes.ts b/lib/scrapers/queuetimes.ts index 840899d..8da0d37 100644 --- a/lib/scrapers/queuetimes.ts +++ b/lib/scrapers/queuetimes.ts @@ -8,6 +8,7 @@ */ import { isCoasterMatch } from "../coaster-match"; +import { scraperWarn } from "./log"; const BASE = "https://queue-times.com/parks"; @@ -89,7 +90,14 @@ export async function fetchLiveRides( signal: AbortSignal.timeout(10_000), } as RequestInit & { next: { revalidate: number } }); - if (!res.ok) return null; + if (!res.ok) { + scraperWarn("queuetimes", "fetchLiveRides non-OK response", { + queueTimesId, + status: res.status, + statusText: res.statusText, + }); + return null; + } const json = (await res.json()) as QTResponse; @@ -131,7 +139,13 @@ export async function fetchLiveRides( }); return { rides, fetchedAt: new Date().toISOString() }; - } catch { + } catch (err) { + const e = err as Error; + scraperWarn("queuetimes", "fetchLiveRides threw", { + queueTimesId, + name: e.name, + err: e.message, + }); return null; } } diff --git a/lib/scrapers/sixflags-waittimes.ts b/lib/scrapers/sixflags-waittimes.ts index b37dcb3..f14ef78 100644 --- a/lib/scrapers/sixflags-waittimes.ts +++ b/lib/scrapers/sixflags-waittimes.ts @@ -12,6 +12,7 @@ */ import { normalizeForMatch } from "../coaster-match"; +import { scraperWarn } from "./log"; const WAIT_TIMES_BASE = "https://d18car1k0ff81h.cloudfront.net/wait-times/park"; @@ -119,10 +120,23 @@ export async function fetchFastLaneWaits( signal: AbortSignal.timeout(10_000), } as RequestInit & { next: { revalidate: number } }); - if (!res.ok) return null; + if (!res.ok) { + scraperWarn("sixflags-waittimes", "fetchFastLaneWaits non-OK response", { + apiId, + status: res.status, + statusText: res.statusText, + }); + return null; + } return parseWaitTimes((await res.json()) as WTResponse); - } catch { + } catch (err) { + const e = err as Error; + scraperWarn("sixflags-waittimes", "fetchFastLaneWaits threw", { + apiId, + name: e.name, + err: e.message, + }); return null; } } diff --git a/lib/scrapers/sixflags.ts b/lib/scrapers/sixflags.ts index 1710186..03ee9e5 100644 --- a/lib/scrapers/sixflags.ts +++ b/lib/scrapers/sixflags.ts @@ -7,6 +7,8 @@ * Rate limiting: on 429/503, exponential backoff (30s → 60s → 120s), MAX_RETRIES attempts. */ +import { scraperWarn } from "./log"; + const API_BASE = "https://d18car1k0ff81h.cloudfront.net/operating-hours/park"; const MAX_RETRIES = 3; const BASE_BACKOFF_MS = 30_000; @@ -191,9 +193,18 @@ export async function fetchToday(apiId: number, revalidate?: number): Promise a.date.localeCompare(b.date)); dayData = nextSorted.find((d) => !d.isParkClosed) ?? nextSorted[0]; - } catch { - // If the next month fetch fails, we simply have no fallback data. + } catch (err) { + const e = err as Error; + scraperWarn("sixflags", "scrapeRidesForDay next-month fallback threw", { + apiId, + year: nextYear, + month: nextMonth, + name: e.name, + err: e.message, + }); } }