From aa46cc1b3de44c899020699f77f4b3f072af03f9 Mon Sep 17 00:00:00 2001 From: josh Date: Fri, 29 May 2026 14:45:57 -0400 Subject: [PATCH] fix: run startup scrape only when database is empty Restores the startup scrape removed in deb8e41, gated on getParkDayCount() < 50 so warm restarts don't hammer the API. Cold containers (e.g. after the volume mount fix) populate immediately instead of waiting up to 24h for tier-4 cron. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/src/services/scheduler.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backend/src/services/scheduler.ts b/backend/src/services/scheduler.ts index 74bc580..381e564 100644 --- a/backend/src/services/scheduler.ts +++ b/backend/src/services/scheduler.ts @@ -1,5 +1,6 @@ import cron from "node-cron"; import { scrapeToday, scrapeCurrentMonth, scrapeUpcomingMonths, scrapeFullYear } from "./scraper"; +import { getParkDayCount } from "../db/queries"; let initialized = false; @@ -36,4 +37,18 @@ export function startScheduler(): void { console.log(" tier-2: current month — every 6h"); console.log(" tier-3: upcoming — 3 AM + 3 PM"); console.log(" tier-4: full year — 3 AM daily"); + + const existingRows = getParkDayCount(); + if (existingRows < 50) { + console.log(`[scheduler] DB has ${existingRows} rows — running startup scrape`); + scrapeToday() + .then((r) => { + console.log(`[scheduler] startup today: ${r.fetched} fetched, ${r.updated} updated, ${r.errors} errors`); + return scrapeFullYear(); + }) + .then((r) => console.log(`[scheduler] startup full-year: ${r.fetched} fetched, ${r.skipped} skipped, ${r.errors} errors`)) + .catch((err) => console.error("[scheduler] startup scrape error:", err)); + } else { + console.log(`[scheduler] DB has ${existingRows} rows — skipping startup scrape, relying on cron`); + } }