feat: automated nightly scraper + housekeeping

Scraper automation (docker-compose): - Add scraper service to docker-compose.yml using the same image and shared park_data volume; overrides CMD to run scrape-schedule.sh - scripts/scrape-schedule.sh: runs an initial scrape on container start, then sleeps until 3:00 AM (respects TZ env var) and repeats nightly; logs timestamps and next-run countdown; non-fatal on scrape errors Staleness window: 7 days → 72 hours in lib/db.ts so data refreshes more frequently with the automated schedule in place Remove favicon: delete app/icon.tsx and public/logo.svg Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix: protect today's record from scrape overwrites
2026-04-04 12:47:14 -04:00 · 2026-04-04 12:42:03 -04:00 · 2026-04-04 12:37:27 -04:00
5 changed files with 81 additions and 81 deletions
@@ -1,59 +0,0 @@
-import { ImageResponse } from "next/og";
-
-export const size = { width: 32, height: 32 };
-export const contentType = "image/png";
-
-export default function Icon() {
-  return new ImageResponse(
-    (
-      <div
-        style={{
-          width: 32,
-          height: 32,
-          borderRadius: 8,
-          backgroundColor: "#0c1220",
-          display: "flex",
-          position: "relative",
-        }}
-      >
-        {/* Ground line */}
-        <div
-          style={{
-            position: "absolute",
-            left: 3,
-            right: 3,
-            bottom: 7,
-            height: 2,
-            backgroundColor: "#f59e0b",
-            borderRadius: 1,
-          }}
-        />
-        {/* Lift hill — semicircle bump */}
-        <div
-          style={{
-            position: "absolute",
-            left: 3,
-            bottom: 9,
-            width: 10,
-            height: 13,
-            backgroundColor: "#f59e0b",
-            borderRadius: "50% 50% 0 0",
-          }}
-        />
-        {/* Vertical loop — circle outline */}
-        <div
-          style={{
-            position: "absolute",
-            right: 5,
-            bottom: 7,
-            width: 12,
-            height: 12,
-            border: "2.5px solid #f59e0b",
-            borderRadius: "50%",
-          }}
-        />
-      </div>
-    ),
-    { width: 32, height: 32 },
-  );
-}
@@ -9,5 +9,15 @@ services:
      - NODE_ENV=production
    restart: unless-stopped

+  scraper:
+    image: gitea.thewrightserver.net/josh/sixflagssupercalendar:latest
+    volumes:
+      - park_data:/app/data
+    environment:
+      - NODE_ENV=production
+      - TZ=America/New_York   # set your local timezone so "3am" is 3am your time
+    command: sh /app/scripts/scrape-schedule.sh
+    restart: unless-stopped
+
 volumes:
  park_data:
@@ -46,6 +46,12 @@ export function upsertDay(
  hoursLabel?: string,
  specialType?: string
 ) {
+  // Today and past dates: INSERT new rows freely, but NEVER overwrite existing records.
+  // Once an operating day begins the API drops that date from its response, so a
+  // re-scrape would incorrectly record the day as closed. The DB row written when
+  // the date was still in the future is the permanent truth for that day.
+  //
+  // Future dates only: full upsert — hours can change and closures can be added.
  db.prepare(`
    INSERT INTO park_days (park_id, date, is_open, hours_label, special_type, scraped_at)
    VALUES (?, ?, ?, ?, ?, ?)
@@ -54,6 +60,7 @@ export function upsertDay(
      hours_label  = excluded.hours_label,
      special_type = excluded.special_type,
      scraped_at   = excluded.scraped_at
+    WHERE park_days.date > date('now')
  `).run(parkId, date, isOpen ? 1 : 0, hoursLabel ?? null, specialType ?? null, new Date().toISOString());
 }

@@ -160,16 +167,32 @@ export function getMonthCalendar(
  return result;
 }

-/** True if the DB already has at least one row for this park+month. */
-const STALE_AFTER_MS = 7 * 24 * 60 * 60 * 1000; // 1 week
+const STALE_AFTER_MS = 72 * 60 * 60 * 1000; // 72 hours

-/** True if the DB has data for this park+month scraped within the last week. */
+/**
+ * Returns true when the scraper should skip this park+month.
+ *
+ * Two reasons to skip:
+ * 1. The month is entirely in the past — the API will never return data for
+ *    those dates again, so re-scraping wastes a call and risks nothing but
+ *    wasted time. Historical records are preserved forever by upsertDay.
+ * 2. The month was scraped within the last 7 days — data is still fresh.
+ */
 export function isMonthScraped(
  db: Database.Database,
  parkId: string,
  year: number,
  month: number
 ): boolean {
+  // Compute the last calendar day of this month (avoids timezone issues).
+  const daysInMonth = new Date(year, month, 0).getDate();
+  const lastDay = `${year}-${String(month).padStart(2, "0")}-${String(daysInMonth).padStart(2, "0")}`;
+  const today = new Date().toISOString().slice(0, 10);
+
+  // Past month — history is locked in, no API data available, always skip.
+  if (lastDay < today) return true;
+
+  // Current/future month — skip only if recently scraped.
  const prefix = `${year}-${String(month).padStart(2, "0")}`;
  const row = db
    .prepare(
@@ -1,19 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 56" fill="none">
-  <!-- Lift hill + ground + camelback -->
-  <path
-    d="M 2 52 L 20 6 L 38 52 L 116 52 Q 134 22 152 52 L 196 52"
-    stroke="#f59e0b"
-    stroke-width="3.5"
-    stroke-linecap="round"
-    stroke-linejoin="round"
-  />
-  <!-- Vertical loop -->
-  <circle
-    cx="75"
-    cy="33"
-    r="19"
-    stroke="#f59e0b"
-    stroke-width="3.5"
-    fill="none"
-  />
-</svg>
@@ -0,0 +1,45 @@
+#!/bin/sh
+# Nightly scraper scheduler — runs inside the Docker scraper service.
+#
+# Behaviour:
+#   1. Runs an initial scrape immediately on container start.
+#   2. Sleeps until 3:00 AM (container timezone, set via TZ env var).
+#   3. Runs the scraper, then sleeps until the next 3:00 AM, forever.
+#
+# Timezone: set TZ in the scraper service environment to control when
+# "3am" is (e.g. TZ=America/New_York). Defaults to UTC if unset.
+
+log() {
+  echo "[scheduler] $(date '+%Y-%m-%d %H:%M %Z') — $*"
+}
+
+run_scrape() {
+  log "Starting scrape"
+  if npm run scrape; then
+    log "Scrape completed"
+  else
+    log "Scrape failed — will retry at next scheduled time"
+  fi
+}
+
+seconds_until_3am() {
+  now=$(date +%s)
+  # Try today's 3am first; if already past, use tomorrow's.
+  target=$(date -d "today 03:00" +%s)
+  if [ "$now" -ge "$target" ]; then
+    target=$(date -d "tomorrow 03:00" +%s)
+  fi
+  echo $((target - now))
+}
+
+# ── Run immediately on startup ────────────────────────────────────────────────
+run_scrape
+
+# ── Nightly loop ──────────────────────────────────────────────────────────────
+while true; do
+  wait=$(seconds_until_3am)
+  next=$(date -d "now + ${wait} seconds" '+%Y-%m-%d %H:%M %Z')
+  log "Next scrape in $((wait / 3600))h $((( wait % 3600) / 60))m  (${next})"
+  sleep "$wait"
+  run_scrape
+done