diff --git a/lib/db.ts b/lib/db.ts index 09ef79a..333a8e9 100644 --- a/lib/db.ts +++ b/lib/db.ts @@ -46,6 +46,11 @@ export function upsertDay( hoursLabel?: string, specialType?: string ) { + // For past dates: INSERT new rows freely, but never overwrite existing records. + // The API stops returning past dates once they've elapsed, so the DB row is the + // permanent historical truth — we must not let a future scrape clobber it. + // + // For today and future dates: full upsert — the schedule can still change. db.prepare(` INSERT INTO park_days (park_id, date, is_open, hours_label, special_type, scraped_at) VALUES (?, ?, ?, ?, ?, ?) @@ -54,6 +59,7 @@ export function upsertDay( hours_label = excluded.hours_label, special_type = excluded.special_type, scraped_at = excluded.scraped_at + WHERE park_days.date >= date('now') `).run(parkId, date, isOpen ? 1 : 0, hoursLabel ?? null, specialType ?? null, new Date().toISOString()); } @@ -160,16 +166,32 @@ export function getMonthCalendar( return result; } -/** True if the DB already has at least one row for this park+month. */ const STALE_AFTER_MS = 7 * 24 * 60 * 60 * 1000; // 1 week -/** True if the DB has data for this park+month scraped within the last week. */ +/** + * Returns true when the scraper should skip this park+month. + * + * Two reasons to skip: + * 1. The month is entirely in the past — the API will never return data for + * those dates again, so re-scraping wastes a call and risks nothing but + * wasted time. Historical records are preserved forever by upsertDay. + * 2. The month was scraped within the last 7 days — data is still fresh. + */ export function isMonthScraped( db: Database.Database, parkId: string, year: number, month: number ): boolean { + // Compute the last calendar day of this month (avoids timezone issues). + const daysInMonth = new Date(year, month, 0).getDate(); + const lastDay = `${year}-${String(month).padStart(2, "0")}-${String(daysInMonth).padStart(2, "0")}`; + const today = new Date().toISOString().slice(0, 10); + + // Past month — history is locked in, no API data available, always skip. + if (lastDay < today) return true; + + // Current/future month — skip only if recently scraped. const prefix = `${year}-${String(month).padStart(2, "0")}`; const row = db .prepare(