Compare commits

...

3 Commits

Author SHA1 Message Date
da083c125c feat: automated nightly scraper + housekeeping
All checks were successful
Build and Deploy / Build & Push (push) Successful in 3m11s
Scraper automation (docker-compose):
- Add scraper service to docker-compose.yml using the same image and
  shared park_data volume; overrides CMD to run scrape-schedule.sh
- scripts/scrape-schedule.sh: runs an initial scrape on container start,
  then sleeps until 3:00 AM (respects TZ env var) and repeats nightly;
  logs timestamps and next-run countdown; non-fatal on scrape errors

Staleness window: 7 days → 72 hours in lib/db.ts so data refreshes
more frequently with the automated schedule in place

Remove favicon: delete app/icon.tsx and public/logo.svg

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 12:47:14 -04:00
20f1058e9e fix: protect today's record from scrape overwrites
Change upsertDay WHERE guard from >= to > date('now') so today is
treated identically to past dates. Once a park's operating day starts
the API drops that date, making it appear closed. The record written
when the date was still future is the correct one and must be preserved.

Only strictly future dates (> today) are now eligible for upserts.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 12:42:03 -04:00
5ea2dafc0e fix: preserve historical day records, skip scraping past months
upsertDay: add WHERE park_days.date >= date('now') to the ON CONFLICT
DO UPDATE clause. Past dates now behave as INSERT OR IGNORE — new rows
are written freely but existing historical records are never overwritten.
The API stops returning elapsed dates, so the DB row is the permanent
source of truth for any date that has already occurred.

isMonthScraped: months whose last day is before today are permanently
skipped regardless of staleness age. The API has no data for past months
so re-scraping them wastes API calls and cannot improve the records.
Current and future months continue to use the 7-day staleness window.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 12:37:27 -04:00
5 changed files with 81 additions and 81 deletions

View File

@@ -1,59 +0,0 @@
import { ImageResponse } from "next/og";
export const size = { width: 32, height: 32 };
export const contentType = "image/png";
export default function Icon() {
return new ImageResponse(
(
<div
style={{
width: 32,
height: 32,
borderRadius: 8,
backgroundColor: "#0c1220",
display: "flex",
position: "relative",
}}
>
{/* Ground line */}
<div
style={{
position: "absolute",
left: 3,
right: 3,
bottom: 7,
height: 2,
backgroundColor: "#f59e0b",
borderRadius: 1,
}}
/>
{/* Lift hill — semicircle bump */}
<div
style={{
position: "absolute",
left: 3,
bottom: 9,
width: 10,
height: 13,
backgroundColor: "#f59e0b",
borderRadius: "50% 50% 0 0",
}}
/>
{/* Vertical loop — circle outline */}
<div
style={{
position: "absolute",
right: 5,
bottom: 7,
width: 12,
height: 12,
border: "2.5px solid #f59e0b",
borderRadius: "50%",
}}
/>
</div>
),
{ width: 32, height: 32 },
);
}

View File

@@ -9,5 +9,15 @@ services:
- NODE_ENV=production
restart: unless-stopped
scraper:
image: gitea.thewrightserver.net/josh/sixflagssupercalendar:latest
volumes:
- park_data:/app/data
environment:
- NODE_ENV=production
- TZ=America/New_York # set your local timezone so "3am" is 3am your time
command: sh /app/scripts/scrape-schedule.sh
restart: unless-stopped
volumes:
park_data:

View File

@@ -46,6 +46,12 @@ export function upsertDay(
hoursLabel?: string,
specialType?: string
) {
// Today and past dates: INSERT new rows freely, but NEVER overwrite existing records.
// Once an operating day begins the API drops that date from its response, so a
// re-scrape would incorrectly record the day as closed. The DB row written when
// the date was still in the future is the permanent truth for that day.
//
// Future dates only: full upsert — hours can change and closures can be added.
db.prepare(`
INSERT INTO park_days (park_id, date, is_open, hours_label, special_type, scraped_at)
VALUES (?, ?, ?, ?, ?, ?)
@@ -54,6 +60,7 @@ export function upsertDay(
hours_label = excluded.hours_label,
special_type = excluded.special_type,
scraped_at = excluded.scraped_at
WHERE park_days.date > date('now')
`).run(parkId, date, isOpen ? 1 : 0, hoursLabel ?? null, specialType ?? null, new Date().toISOString());
}
@@ -160,16 +167,32 @@ export function getMonthCalendar(
return result;
}
/** True if the DB already has at least one row for this park+month. */
const STALE_AFTER_MS = 7 * 24 * 60 * 60 * 1000; // 1 week
const STALE_AFTER_MS = 72 * 60 * 60 * 1000; // 72 hours
/** True if the DB has data for this park+month scraped within the last week. */
/**
* Returns true when the scraper should skip this park+month.
*
* Two reasons to skip:
* 1. The month is entirely in the past — the API will never return data for
* those dates again, so re-scraping wastes a call and risks nothing but
* wasted time. Historical records are preserved forever by upsertDay.
* 2. The month was scraped within the last 7 days — data is still fresh.
*/
export function isMonthScraped(
db: Database.Database,
parkId: string,
year: number,
month: number
): boolean {
// Compute the last calendar day of this month (avoids timezone issues).
const daysInMonth = new Date(year, month, 0).getDate();
const lastDay = `${year}-${String(month).padStart(2, "0")}-${String(daysInMonth).padStart(2, "0")}`;
const today = new Date().toISOString().slice(0, 10);
// Past month — history is locked in, no API data available, always skip.
if (lastDay < today) return true;
// Current/future month — skip only if recently scraped.
const prefix = `${year}-${String(month).padStart(2, "0")}`;
const row = db
.prepare(

View File

@@ -1,19 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 56" fill="none">
<!-- Lift hill + ground + camelback -->
<path
d="M 2 52 L 20 6 L 38 52 L 116 52 Q 134 22 152 52 L 196 52"
stroke="#f59e0b"
stroke-width="3.5"
stroke-linecap="round"
stroke-linejoin="round"
/>
<!-- Vertical loop -->
<circle
cx="75"
cy="33"
r="19"
stroke="#f59e0b"
stroke-width="3.5"
fill="none"
/>
</svg>

Before

Width:  |  Height:  |  Size: 434 B

View File

@@ -0,0 +1,45 @@
#!/bin/sh
# Nightly scraper scheduler — runs inside the Docker scraper service.
#
# Behaviour:
# 1. Runs an initial scrape immediately on container start.
# 2. Sleeps until 3:00 AM (container timezone, set via TZ env var).
# 3. Runs the scraper, then sleeps until the next 3:00 AM, forever.
#
# Timezone: set TZ in the scraper service environment to control when
# "3am" is (e.g. TZ=America/New_York). Defaults to UTC if unset.
log() {
echo "[scheduler] $(date '+%Y-%m-%d %H:%M %Z')$*"
}
run_scrape() {
log "Starting scrape"
if npm run scrape; then
log "Scrape completed"
else
log "Scrape failed — will retry at next scheduled time"
fi
}
seconds_until_3am() {
now=$(date +%s)
# Try today's 3am first; if already past, use tomorrow's.
target=$(date -d "today 03:00" +%s)
if [ "$now" -ge "$target" ]; then
target=$(date -d "tomorrow 03:00" +%s)
fi
echo $((target - now))
}
# ── Run immediately on startup ────────────────────────────────────────────────
run_scrape
# ── Nightly loop ──────────────────────────────────────────────────────────────
while true; do
wait=$(seconds_until_3am)
next=$(date -d "now + ${wait} seconds" '+%Y-%m-%d %H:%M %Z')
log "Next scrape in $((wait / 3600))h $((( wait % 3600) / 60))m (${next})"
sleep "$wait"
run_scrape
done