From ccd35c4648e90a6d31c20988b516652c3c2f5ebb Mon Sep 17 00:00:00 2001 From: josh Date: Thu, 23 Apr 2026 21:33:34 -0400 Subject: [PATCH] chore: remove old scraper scripts, replaced by backend scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete scripts/scrape.ts and scripts/scrape-schedule.sh — their functionality now lives in the backend's node-cron tiered scheduler (backend/src/services/scheduler.ts + scraper.ts). Remove scrape and scrape:force npm scripts from package.json. Co-Authored-By: Claude Opus 4.6 --- package.json | 2 - scripts/scrape-schedule.sh | 45 ---------------- scripts/scrape.ts | 107 ------------------------------------- 3 files changed, 154 deletions(-) delete mode 100644 scripts/scrape-schedule.sh delete mode 100644 scripts/scrape.ts diff --git a/package.json b/package.json index bcc2352..d50a1b1 100644 --- a/package.json +++ b/package.json @@ -7,8 +7,6 @@ "build": "next build", "start": "next start", "lint": "next lint", - "scrape": "tsx scripts/scrape.ts", - "scrape:force": "tsx scripts/scrape.ts --rescrape", "debug": "tsx scripts/debug.ts", "test": "tsx --test tests/*.test.ts" }, diff --git a/scripts/scrape-schedule.sh b/scripts/scrape-schedule.sh deleted file mode 100644 index 9c91ea0..0000000 --- a/scripts/scrape-schedule.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/sh -# Nightly scraper scheduler — runs inside the Docker scraper service. -# -# Behaviour: -# 1. Runs an initial scrape immediately on container start. -# 2. Sleeps until 3:00 AM (container timezone, set via TZ env var). -# 3. Runs the scraper, then sleeps until the next 3:00 AM, forever. -# -# Timezone: set TZ in the scraper service environment to control when -# "3am" is (e.g. TZ=America/New_York). Defaults to UTC if unset. - -log() { - echo "[scheduler] $(date '+%Y-%m-%d %H:%M %Z') — $*" -} - -run_scrape() { - log "Starting scrape" - if npm run scrape; then - log "Scrape completed" - else - log "Scrape failed — will retry at next scheduled time" - fi -} - -seconds_until_3am() { - now=$(date +%s) - # Try today's 3am first; if already past, use tomorrow's. - target=$(date -d "today 03:00" +%s) - if [ "$now" -ge "$target" ]; then - target=$(date -d "tomorrow 03:00" +%s) - fi - echo $((target - now)) -} - -# ── Run immediately on startup ──────────────────────────────────────────────── -run_scrape - -# ── Nightly loop ────────────────────────────────────────────────────────────── -while true; do - wait=$(seconds_until_3am) - next=$(date -d "now + ${wait} seconds" '+%Y-%m-%d %H:%M %Z') - log "Next scrape in $((wait / 3600))h $((( wait % 3600) / 60))m (${next})" - sleep "$wait" - run_scrape -done diff --git a/scripts/scrape.ts b/scripts/scrape.ts deleted file mode 100644 index fa8c472..0000000 --- a/scripts/scrape.ts +++ /dev/null @@ -1,107 +0,0 @@ -/** - * Scrape job — fetches 2026 operating hours for all parks from the Six Flags API. - * - * npm run scrape — skips months scraped within the last 72h - * npm run scrape:force — re-scrapes everything - */ - -import { openDb, upsertDay, isMonthScraped } from "../lib/db"; -import { PARKS } from "../lib/parks"; -import { scrapeMonth, fetchToday, RateLimitError } from "../lib/scrapers/sixflags"; - -const YEAR = 2026; -const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; -const DELAY_MS = 1000; -const FORCE = process.argv.includes("--rescrape"); - -async function sleep(ms: number) { - return new Promise((r) => setTimeout(r, ms)); -} - -async function main() { - const db = openDb(); - - console.log(`Scraping ${YEAR} — ${PARKS.length} parks\n`); - - let totalFetched = 0; - let totalSkipped = 0; - let totalErrors = 0; - - for (const park of PARKS) { - const label = park.shortName.padEnd(22); - - let openDays = 0; - let fetched = 0; - let skipped = 0; - let errors = 0; - - process.stdout.write(` ${label} `); - - for (const month of MONTHS) { - if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) { - process.stdout.write("·"); - skipped++; - continue; - } - - try { - const days = await scrapeMonth(park.apiId, YEAR, month); - db.transaction(() => { - for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel, d.specialType); - })(); - openDays += days.filter((d) => d.isOpen).length; - fetched++; - process.stdout.write("█"); - if (fetched + skipped + errors < MONTHS.length) await sleep(DELAY_MS); - } catch (err) { - if (err instanceof RateLimitError) { - process.stdout.write("✗"); - } else { - process.stdout.write("✗"); - console.error(`\n error: ${err instanceof Error ? err.message : err}`); - } - errors++; - } - } - - totalFetched += fetched; - totalSkipped += skipped; - totalErrors += errors; - - if (errors > 0) { - console.log(` ${errors} error(s)`); - } else if (skipped === MONTHS.length) { - console.log(" up to date"); - } else { - console.log(` ${openDays} open days`); - } - } - - console.log(`\n ${totalFetched} fetched ${totalSkipped} skipped ${totalErrors} errors`); - if (totalErrors > 0) console.log(" Re-run to retry failed months."); - - // ── Today scrape (always fresh — dateless endpoint returns current day) ──── - console.log("\n── Today's data ──"); - for (const park of PARKS) { - process.stdout.write(` ${park.shortName.padEnd(22)} `); - try { - const today = await fetchToday(park.apiId); - if (today) { - upsertDay(db, park.id, today.date, today.isOpen, today.hoursLabel, today.specialType); - console.log(today.isOpen ? `open ${today.hoursLabel ?? ""}` : "closed"); - } else { - console.log("no data"); - } - } catch { - console.log("error"); - } - await sleep(500); - } - - db.close(); -} - -main().catch((err) => { - console.error("Fatal:", err); - process.exit(1); -});