chore: remove old scraper scripts, replaced by backend scheduler
Delete scripts/scrape.ts and scripts/scrape-schedule.sh — their functionality now lives in the backend's node-cron tiered scheduler (backend/src/services/scheduler.ts + scraper.ts). Remove scrape and scrape:force npm scripts from package.json. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,8 +7,6 @@
|
|||||||
"build": "next build",
|
"build": "next build",
|
||||||
"start": "next start",
|
"start": "next start",
|
||||||
"lint": "next lint",
|
"lint": "next lint",
|
||||||
"scrape": "tsx scripts/scrape.ts",
|
|
||||||
"scrape:force": "tsx scripts/scrape.ts --rescrape",
|
|
||||||
"debug": "tsx scripts/debug.ts",
|
"debug": "tsx scripts/debug.ts",
|
||||||
"test": "tsx --test tests/*.test.ts"
|
"test": "tsx --test tests/*.test.ts"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,45 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
# Nightly scraper scheduler — runs inside the Docker scraper service.
|
|
||||||
#
|
|
||||||
# Behaviour:
|
|
||||||
# 1. Runs an initial scrape immediately on container start.
|
|
||||||
# 2. Sleeps until 3:00 AM (container timezone, set via TZ env var).
|
|
||||||
# 3. Runs the scraper, then sleeps until the next 3:00 AM, forever.
|
|
||||||
#
|
|
||||||
# Timezone: set TZ in the scraper service environment to control when
|
|
||||||
# "3am" is (e.g. TZ=America/New_York). Defaults to UTC if unset.
|
|
||||||
|
|
||||||
log() {
|
|
||||||
echo "[scheduler] $(date '+%Y-%m-%d %H:%M %Z') — $*"
|
|
||||||
}
|
|
||||||
|
|
||||||
run_scrape() {
|
|
||||||
log "Starting scrape"
|
|
||||||
if npm run scrape; then
|
|
||||||
log "Scrape completed"
|
|
||||||
else
|
|
||||||
log "Scrape failed — will retry at next scheduled time"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
seconds_until_3am() {
|
|
||||||
now=$(date +%s)
|
|
||||||
# Try today's 3am first; if already past, use tomorrow's.
|
|
||||||
target=$(date -d "today 03:00" +%s)
|
|
||||||
if [ "$now" -ge "$target" ]; then
|
|
||||||
target=$(date -d "tomorrow 03:00" +%s)
|
|
||||||
fi
|
|
||||||
echo $((target - now))
|
|
||||||
}
|
|
||||||
|
|
||||||
# ── Run immediately on startup ────────────────────────────────────────────────
|
|
||||||
run_scrape
|
|
||||||
|
|
||||||
# ── Nightly loop ──────────────────────────────────────────────────────────────
|
|
||||||
while true; do
|
|
||||||
wait=$(seconds_until_3am)
|
|
||||||
next=$(date -d "now + ${wait} seconds" '+%Y-%m-%d %H:%M %Z')
|
|
||||||
log "Next scrape in $((wait / 3600))h $((( wait % 3600) / 60))m (${next})"
|
|
||||||
sleep "$wait"
|
|
||||||
run_scrape
|
|
||||||
done
|
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
/**
|
|
||||||
* Scrape job — fetches 2026 operating hours for all parks from the Six Flags API.
|
|
||||||
*
|
|
||||||
* npm run scrape — skips months scraped within the last 72h
|
|
||||||
* npm run scrape:force — re-scrapes everything
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { openDb, upsertDay, isMonthScraped } from "../lib/db";
|
|
||||||
import { PARKS } from "../lib/parks";
|
|
||||||
import { scrapeMonth, fetchToday, RateLimitError } from "../lib/scrapers/sixflags";
|
|
||||||
|
|
||||||
const YEAR = 2026;
|
|
||||||
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
|
||||||
const DELAY_MS = 1000;
|
|
||||||
const FORCE = process.argv.includes("--rescrape");
|
|
||||||
|
|
||||||
async function sleep(ms: number) {
|
|
||||||
return new Promise<void>((r) => setTimeout(r, ms));
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const db = openDb();
|
|
||||||
|
|
||||||
console.log(`Scraping ${YEAR} — ${PARKS.length} parks\n`);
|
|
||||||
|
|
||||||
let totalFetched = 0;
|
|
||||||
let totalSkipped = 0;
|
|
||||||
let totalErrors = 0;
|
|
||||||
|
|
||||||
for (const park of PARKS) {
|
|
||||||
const label = park.shortName.padEnd(22);
|
|
||||||
|
|
||||||
let openDays = 0;
|
|
||||||
let fetched = 0;
|
|
||||||
let skipped = 0;
|
|
||||||
let errors = 0;
|
|
||||||
|
|
||||||
process.stdout.write(` ${label} `);
|
|
||||||
|
|
||||||
for (const month of MONTHS) {
|
|
||||||
if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) {
|
|
||||||
process.stdout.write("·");
|
|
||||||
skipped++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const days = await scrapeMonth(park.apiId, YEAR, month);
|
|
||||||
db.transaction(() => {
|
|
||||||
for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel, d.specialType);
|
|
||||||
})();
|
|
||||||
openDays += days.filter((d) => d.isOpen).length;
|
|
||||||
fetched++;
|
|
||||||
process.stdout.write("█");
|
|
||||||
if (fetched + skipped + errors < MONTHS.length) await sleep(DELAY_MS);
|
|
||||||
} catch (err) {
|
|
||||||
if (err instanceof RateLimitError) {
|
|
||||||
process.stdout.write("✗");
|
|
||||||
} else {
|
|
||||||
process.stdout.write("✗");
|
|
||||||
console.error(`\n error: ${err instanceof Error ? err.message : err}`);
|
|
||||||
}
|
|
||||||
errors++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
totalFetched += fetched;
|
|
||||||
totalSkipped += skipped;
|
|
||||||
totalErrors += errors;
|
|
||||||
|
|
||||||
if (errors > 0) {
|
|
||||||
console.log(` ${errors} error(s)`);
|
|
||||||
} else if (skipped === MONTHS.length) {
|
|
||||||
console.log(" up to date");
|
|
||||||
} else {
|
|
||||||
console.log(` ${openDays} open days`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\n ${totalFetched} fetched ${totalSkipped} skipped ${totalErrors} errors`);
|
|
||||||
if (totalErrors > 0) console.log(" Re-run to retry failed months.");
|
|
||||||
|
|
||||||
// ── Today scrape (always fresh — dateless endpoint returns current day) ────
|
|
||||||
console.log("\n── Today's data ──");
|
|
||||||
for (const park of PARKS) {
|
|
||||||
process.stdout.write(` ${park.shortName.padEnd(22)} `);
|
|
||||||
try {
|
|
||||||
const today = await fetchToday(park.apiId);
|
|
||||||
if (today) {
|
|
||||||
upsertDay(db, park.id, today.date, today.isOpen, today.hoursLabel, today.specialType);
|
|
||||||
console.log(today.isOpen ? `open ${today.hoursLabel ?? ""}` : "closed");
|
|
||||||
} else {
|
|
||||||
console.log("no data");
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
console.log("error");
|
|
||||||
}
|
|
||||||
await sleep(500);
|
|
||||||
}
|
|
||||||
|
|
||||||
db.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch((err) => {
|
|
||||||
console.error("Fatal:", err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
Reference in New Issue
Block a user