Change upsertDay WHERE guard from >= to > date('now') so today is
treated identically to past dates. Once a park's operating day starts
the API drops that date, making it appear closed. The record written
when the date was still future is the correct one and must be preserved.
Only strictly future dates (> today) are now eligible for upserts.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
290 lines
8.6 KiB
TypeScript
290 lines
8.6 KiB
TypeScript
import Database from "better-sqlite3";
|
|
import path from "path";
|
|
import fs from "fs";
|
|
|
|
const DATA_DIR = path.join(process.cwd(), "data");
|
|
const DB_PATH = path.join(DATA_DIR, "parks.db");
|
|
|
|
export type DbInstance = Database.Database;
|
|
|
|
export function openDb(): Database.Database {
|
|
fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
const db = new Database(DB_PATH);
|
|
db.pragma("journal_mode = WAL");
|
|
db.exec(`
|
|
CREATE TABLE IF NOT EXISTS park_days (
|
|
park_id TEXT NOT NULL,
|
|
date TEXT NOT NULL, -- YYYY-MM-DD
|
|
is_open INTEGER NOT NULL DEFAULT 0,
|
|
hours_label TEXT,
|
|
special_type TEXT, -- 'passholder_preview' | null
|
|
scraped_at TEXT NOT NULL,
|
|
PRIMARY KEY (park_id, date)
|
|
);
|
|
CREATE TABLE IF NOT EXISTS park_api_ids (
|
|
park_id TEXT PRIMARY KEY,
|
|
api_id INTEGER NOT NULL,
|
|
api_abbreviation TEXT,
|
|
api_name TEXT,
|
|
discovered_at TEXT NOT NULL
|
|
)
|
|
`);
|
|
// Migrate existing databases that predate the special_type column
|
|
try {
|
|
db.exec(`ALTER TABLE park_days ADD COLUMN special_type TEXT`);
|
|
} catch {
|
|
// Column already exists — safe to ignore
|
|
}
|
|
return db;
|
|
}
|
|
|
|
export function upsertDay(
|
|
db: Database.Database,
|
|
parkId: string,
|
|
date: string,
|
|
isOpen: boolean,
|
|
hoursLabel?: string,
|
|
specialType?: string
|
|
) {
|
|
// Today and past dates: INSERT new rows freely, but NEVER overwrite existing records.
|
|
// Once an operating day begins the API drops that date from its response, so a
|
|
// re-scrape would incorrectly record the day as closed. The DB row written when
|
|
// the date was still in the future is the permanent truth for that day.
|
|
//
|
|
// Future dates only: full upsert — hours can change and closures can be added.
|
|
db.prepare(`
|
|
INSERT INTO park_days (park_id, date, is_open, hours_label, special_type, scraped_at)
|
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
ON CONFLICT (park_id, date) DO UPDATE SET
|
|
is_open = excluded.is_open,
|
|
hours_label = excluded.hours_label,
|
|
special_type = excluded.special_type,
|
|
scraped_at = excluded.scraped_at
|
|
WHERE park_days.date > date('now')
|
|
`).run(parkId, date, isOpen ? 1 : 0, hoursLabel ?? null, specialType ?? null, new Date().toISOString());
|
|
}
|
|
|
|
export interface DayData {
|
|
isOpen: boolean;
|
|
hoursLabel: string | null;
|
|
specialType: string | null;
|
|
}
|
|
|
|
/**
|
|
* Returns scraped data for all parks across a date range.
|
|
* Shape: { parkId: { 'YYYY-MM-DD': DayData } }
|
|
* Missing dates mean that date hasn't been scraped yet (not necessarily closed).
|
|
*/
|
|
export function getDateRange(
|
|
db: Database.Database,
|
|
startDate: string,
|
|
endDate: string
|
|
): Record<string, Record<string, DayData>> {
|
|
const rows = db
|
|
.prepare(
|
|
`SELECT park_id, date, is_open, hours_label, special_type
|
|
FROM park_days
|
|
WHERE date >= ? AND date <= ?`
|
|
)
|
|
.all(startDate, endDate) as {
|
|
park_id: string;
|
|
date: string;
|
|
is_open: number;
|
|
hours_label: string | null;
|
|
special_type: string | null;
|
|
}[];
|
|
|
|
const result: Record<string, Record<string, DayData>> = {};
|
|
for (const row of rows) {
|
|
if (!result[row.park_id]) result[row.park_id] = {};
|
|
result[row.park_id][row.date] = {
|
|
isOpen: row.is_open === 1,
|
|
hoursLabel: row.hours_label,
|
|
specialType: row.special_type,
|
|
};
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Returns scraped DayData for a single park for an entire month.
|
|
* Shape: { 'YYYY-MM-DD': DayData }
|
|
*/
|
|
export function getParkMonthData(
|
|
db: Database.Database,
|
|
parkId: string,
|
|
year: number,
|
|
month: number,
|
|
): Record<string, DayData> {
|
|
const prefix = `${year}-${String(month).padStart(2, "0")}`;
|
|
const rows = db
|
|
.prepare(
|
|
`SELECT date, is_open, hours_label, special_type
|
|
FROM park_days
|
|
WHERE park_id = ? AND date LIKE ? || '-%'
|
|
ORDER BY date`
|
|
)
|
|
.all(parkId, prefix) as {
|
|
date: string;
|
|
is_open: number;
|
|
hours_label: string | null;
|
|
special_type: string | null;
|
|
}[];
|
|
|
|
const result: Record<string, DayData> = {};
|
|
for (const row of rows) {
|
|
result[row.date] = {
|
|
isOpen: row.is_open === 1,
|
|
hoursLabel: row.hours_label,
|
|
specialType: row.special_type,
|
|
};
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/** Returns a map of parkId → boolean[] (index 0 = day 1) for a given month. */
|
|
export function getMonthCalendar(
|
|
db: Database.Database,
|
|
year: number,
|
|
month: number
|
|
): Record<string, boolean[]> {
|
|
const prefix = `${year}-${String(month).padStart(2, "0")}`;
|
|
const rows = db
|
|
.prepare(
|
|
`SELECT park_id, date, is_open
|
|
FROM park_days
|
|
WHERE date LIKE ? || '-%'
|
|
ORDER BY date`
|
|
)
|
|
.all(prefix) as { park_id: string; date: string; is_open: number }[];
|
|
|
|
const result: Record<string, boolean[]> = {};
|
|
for (const row of rows) {
|
|
if (!result[row.park_id]) result[row.park_id] = [];
|
|
const day = parseInt(row.date.slice(8), 10);
|
|
result[row.park_id][day - 1] = row.is_open === 1;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
const STALE_AFTER_MS = 7 * 24 * 60 * 60 * 1000; // 1 week
|
|
|
|
/**
|
|
* Returns true when the scraper should skip this park+month.
|
|
*
|
|
* Two reasons to skip:
|
|
* 1. The month is entirely in the past — the API will never return data for
|
|
* those dates again, so re-scraping wastes a call and risks nothing but
|
|
* wasted time. Historical records are preserved forever by upsertDay.
|
|
* 2. The month was scraped within the last 7 days — data is still fresh.
|
|
*/
|
|
export function isMonthScraped(
|
|
db: Database.Database,
|
|
parkId: string,
|
|
year: number,
|
|
month: number
|
|
): boolean {
|
|
// Compute the last calendar day of this month (avoids timezone issues).
|
|
const daysInMonth = new Date(year, month, 0).getDate();
|
|
const lastDay = `${year}-${String(month).padStart(2, "0")}-${String(daysInMonth).padStart(2, "0")}`;
|
|
const today = new Date().toISOString().slice(0, 10);
|
|
|
|
// Past month — history is locked in, no API data available, always skip.
|
|
if (lastDay < today) return true;
|
|
|
|
// Current/future month — skip only if recently scraped.
|
|
const prefix = `${year}-${String(month).padStart(2, "0")}`;
|
|
const row = db
|
|
.prepare(
|
|
`SELECT MAX(scraped_at) AS last_scraped
|
|
FROM park_days
|
|
WHERE park_id = ? AND date LIKE ? || '-%'`
|
|
)
|
|
.get(parkId, prefix) as { last_scraped: string | null };
|
|
|
|
if (!row.last_scraped) return false;
|
|
const ageMs = Date.now() - new Date(row.last_scraped).getTime();
|
|
return ageMs < STALE_AFTER_MS;
|
|
}
|
|
|
|
export function getApiId(db: Database.Database, parkId: string): number | null {
|
|
const row = db
|
|
.prepare("SELECT api_id FROM park_api_ids WHERE park_id = ?")
|
|
.get(parkId) as { api_id: number } | undefined;
|
|
return row?.api_id ?? null;
|
|
}
|
|
|
|
export function setApiId(
|
|
db: Database.Database,
|
|
parkId: string,
|
|
apiId: number,
|
|
apiAbbreviation?: string,
|
|
apiName?: string
|
|
) {
|
|
db.prepare(`
|
|
INSERT INTO park_api_ids (park_id, api_id, api_abbreviation, api_name, discovered_at)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
ON CONFLICT (park_id) DO UPDATE SET
|
|
api_id = excluded.api_id,
|
|
api_abbreviation = excluded.api_abbreviation,
|
|
api_name = excluded.api_name,
|
|
discovered_at = excluded.discovered_at
|
|
`).run(
|
|
parkId,
|
|
apiId,
|
|
apiAbbreviation ?? null,
|
|
apiName ?? null,
|
|
new Date().toISOString()
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Find the next park+month to scrape.
|
|
* Priority: never-scraped first, then oldest scraped_at.
|
|
* Considers current month through monthsAhead months into the future.
|
|
*/
|
|
export function getNextScrapeTarget(
|
|
db: Database.Database,
|
|
parkIds: string[],
|
|
monthsAhead = 12
|
|
): { parkId: string; year: number; month: number } | null {
|
|
const now = new Date();
|
|
|
|
const candidates: {
|
|
parkId: string;
|
|
year: number;
|
|
month: number;
|
|
lastScraped: string | null;
|
|
}[] = [];
|
|
|
|
for (const parkId of parkIds) {
|
|
for (let i = 0; i < monthsAhead; i++) {
|
|
const d = new Date(now.getFullYear(), now.getMonth() + i, 1);
|
|
const year = d.getFullYear();
|
|
const month = d.getMonth() + 1;
|
|
const prefix = `${year}-${String(month).padStart(2, "0")}`;
|
|
|
|
const row = db
|
|
.prepare(
|
|
`SELECT MAX(scraped_at) AS last_scraped
|
|
FROM park_days
|
|
WHERE park_id = ? AND date LIKE ? || '-%'`
|
|
)
|
|
.get(parkId, prefix) as { last_scraped: string | null };
|
|
|
|
candidates.push({ parkId, year, month, lastScraped: row.last_scraped });
|
|
}
|
|
}
|
|
|
|
// Never-scraped (null) first, then oldest scraped_at
|
|
candidates.sort((a, b) => {
|
|
if (!a.lastScraped && !b.lastScraped) return 0;
|
|
if (!a.lastScraped) return -1;
|
|
if (!b.lastScraped) return 1;
|
|
return a.lastScraped.localeCompare(b.lastScraped);
|
|
});
|
|
|
|
const top = candidates[0];
|
|
return top ? { parkId: top.parkId, year: top.year, month: top.month } : null;
|
|
}
|