import Database from "better-sqlite3"; import path from "path"; import fs from "fs"; const DATA_DIR = path.join(process.cwd(), "data"); const DB_PATH = path.join(DATA_DIR, "parks.db"); export type DbInstance = Database.Database; export function openDb(): Database.Database { fs.mkdirSync(DATA_DIR, { recursive: true }); const db = new Database(DB_PATH); db.pragma("journal_mode = WAL"); db.exec(` CREATE TABLE IF NOT EXISTS park_days ( park_id TEXT NOT NULL, date TEXT NOT NULL, -- YYYY-MM-DD is_open INTEGER NOT NULL DEFAULT 0, hours_label TEXT, special_type TEXT, -- 'passholder_preview' | null scraped_at TEXT NOT NULL, PRIMARY KEY (park_id, date) ); CREATE TABLE IF NOT EXISTS park_api_ids ( park_id TEXT PRIMARY KEY, api_id INTEGER NOT NULL, api_abbreviation TEXT, api_name TEXT, discovered_at TEXT NOT NULL ) `); // Migrate existing databases that predate the special_type column try { db.exec(`ALTER TABLE park_days ADD COLUMN special_type TEXT`); } catch { // Column already exists — safe to ignore } return db; } export function upsertDay( db: Database.Database, parkId: string, date: string, isOpen: boolean, hoursLabel?: string, specialType?: string ) { // For past dates: INSERT new rows freely, but never overwrite existing records. // The API stops returning past dates once they've elapsed, so the DB row is the // permanent historical truth — we must not let a future scrape clobber it. // // For today and future dates: full upsert — the schedule can still change. db.prepare(` INSERT INTO park_days (park_id, date, is_open, hours_label, special_type, scraped_at) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT (park_id, date) DO UPDATE SET is_open = excluded.is_open, hours_label = excluded.hours_label, special_type = excluded.special_type, scraped_at = excluded.scraped_at WHERE park_days.date >= date('now') `).run(parkId, date, isOpen ? 1 : 0, hoursLabel ?? null, specialType ?? null, new Date().toISOString()); } export interface DayData { isOpen: boolean; hoursLabel: string | null; specialType: string | null; } /** * Returns scraped data for all parks across a date range. * Shape: { parkId: { 'YYYY-MM-DD': DayData } } * Missing dates mean that date hasn't been scraped yet (not necessarily closed). */ export function getDateRange( db: Database.Database, startDate: string, endDate: string ): Record> { const rows = db .prepare( `SELECT park_id, date, is_open, hours_label, special_type FROM park_days WHERE date >= ? AND date <= ?` ) .all(startDate, endDate) as { park_id: string; date: string; is_open: number; hours_label: string | null; special_type: string | null; }[]; const result: Record> = {}; for (const row of rows) { if (!result[row.park_id]) result[row.park_id] = {}; result[row.park_id][row.date] = { isOpen: row.is_open === 1, hoursLabel: row.hours_label, specialType: row.special_type, }; } return result; } /** * Returns scraped DayData for a single park for an entire month. * Shape: { 'YYYY-MM-DD': DayData } */ export function getParkMonthData( db: Database.Database, parkId: string, year: number, month: number, ): Record { const prefix = `${year}-${String(month).padStart(2, "0")}`; const rows = db .prepare( `SELECT date, is_open, hours_label, special_type FROM park_days WHERE park_id = ? AND date LIKE ? || '-%' ORDER BY date` ) .all(parkId, prefix) as { date: string; is_open: number; hours_label: string | null; special_type: string | null; }[]; const result: Record = {}; for (const row of rows) { result[row.date] = { isOpen: row.is_open === 1, hoursLabel: row.hours_label, specialType: row.special_type, }; } return result; } /** Returns a map of parkId → boolean[] (index 0 = day 1) for a given month. */ export function getMonthCalendar( db: Database.Database, year: number, month: number ): Record { const prefix = `${year}-${String(month).padStart(2, "0")}`; const rows = db .prepare( `SELECT park_id, date, is_open FROM park_days WHERE date LIKE ? || '-%' ORDER BY date` ) .all(prefix) as { park_id: string; date: string; is_open: number }[]; const result: Record = {}; for (const row of rows) { if (!result[row.park_id]) result[row.park_id] = []; const day = parseInt(row.date.slice(8), 10); result[row.park_id][day - 1] = row.is_open === 1; } return result; } const STALE_AFTER_MS = 7 * 24 * 60 * 60 * 1000; // 1 week /** * Returns true when the scraper should skip this park+month. * * Two reasons to skip: * 1. The month is entirely in the past — the API will never return data for * those dates again, so re-scraping wastes a call and risks nothing but * wasted time. Historical records are preserved forever by upsertDay. * 2. The month was scraped within the last 7 days — data is still fresh. */ export function isMonthScraped( db: Database.Database, parkId: string, year: number, month: number ): boolean { // Compute the last calendar day of this month (avoids timezone issues). const daysInMonth = new Date(year, month, 0).getDate(); const lastDay = `${year}-${String(month).padStart(2, "0")}-${String(daysInMonth).padStart(2, "0")}`; const today = new Date().toISOString().slice(0, 10); // Past month — history is locked in, no API data available, always skip. if (lastDay < today) return true; // Current/future month — skip only if recently scraped. const prefix = `${year}-${String(month).padStart(2, "0")}`; const row = db .prepare( `SELECT MAX(scraped_at) AS last_scraped FROM park_days WHERE park_id = ? AND date LIKE ? || '-%'` ) .get(parkId, prefix) as { last_scraped: string | null }; if (!row.last_scraped) return false; const ageMs = Date.now() - new Date(row.last_scraped).getTime(); return ageMs < STALE_AFTER_MS; } export function getApiId(db: Database.Database, parkId: string): number | null { const row = db .prepare("SELECT api_id FROM park_api_ids WHERE park_id = ?") .get(parkId) as { api_id: number } | undefined; return row?.api_id ?? null; } export function setApiId( db: Database.Database, parkId: string, apiId: number, apiAbbreviation?: string, apiName?: string ) { db.prepare(` INSERT INTO park_api_ids (park_id, api_id, api_abbreviation, api_name, discovered_at) VALUES (?, ?, ?, ?, ?) ON CONFLICT (park_id) DO UPDATE SET api_id = excluded.api_id, api_abbreviation = excluded.api_abbreviation, api_name = excluded.api_name, discovered_at = excluded.discovered_at `).run( parkId, apiId, apiAbbreviation ?? null, apiName ?? null, new Date().toISOString() ); } /** * Find the next park+month to scrape. * Priority: never-scraped first, then oldest scraped_at. * Considers current month through monthsAhead months into the future. */ export function getNextScrapeTarget( db: Database.Database, parkIds: string[], monthsAhead = 12 ): { parkId: string; year: number; month: number } | null { const now = new Date(); const candidates: { parkId: string; year: number; month: number; lastScraped: string | null; }[] = []; for (const parkId of parkIds) { for (let i = 0; i < monthsAhead; i++) { const d = new Date(now.getFullYear(), now.getMonth() + i, 1); const year = d.getFullYear(); const month = d.getMonth() + 1; const prefix = `${year}-${String(month).padStart(2, "0")}`; const row = db .prepare( `SELECT MAX(scraped_at) AS last_scraped FROM park_days WHERE park_id = ? AND date LIKE ? || '-%'` ) .get(parkId, prefix) as { last_scraped: string | null }; candidates.push({ parkId, year, month, lastScraped: row.last_scraped }); } } // Never-scraped (null) first, then oldest scraped_at candidates.sort((a, b) => { if (!a.lastScraped && !b.lastScraped) return 0; if (!a.lastScraped) return -1; if (!b.lastScraped) return 1; return a.lastScraped.localeCompare(b.lastScraped); }); const top = candidates[0]; return top ? { parkId: top.parkId, year: top.year, month: top.month } : null; }