All checks were successful
Build and Deploy / Build & Push (push) Successful in 3m50s
- next.config.ts: CSP, X-Frame-Options, X-Content-Type-Options, Referrer-Policy, Permissions-Policy - sixflags.ts: cap Retry-After at 5 min; add 15s AbortSignal.timeout() - queuetimes.ts: add 10s AbortSignal.timeout() - rcdb.ts: add 15s AbortSignal.timeout() - lib/env.ts: parseStalenessHours() guards against NaN from invalid env vars - db.ts + park-meta.ts: use parseStalenessHours() for staleness window config Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
291 lines
8.7 KiB
TypeScript
291 lines
8.7 KiB
TypeScript
import Database from "better-sqlite3";
|
|
import path from "path";
|
|
import fs from "fs";
|
|
|
|
const DATA_DIR = path.join(process.cwd(), "data");
|
|
const DB_PATH = path.join(DATA_DIR, "parks.db");
|
|
|
|
export type DbInstance = Database.Database;
|
|
|
|
export function openDb(): Database.Database {
|
|
fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
const db = new Database(DB_PATH);
|
|
db.pragma("journal_mode = WAL");
|
|
db.exec(`
|
|
CREATE TABLE IF NOT EXISTS park_days (
|
|
park_id TEXT NOT NULL,
|
|
date TEXT NOT NULL, -- YYYY-MM-DD
|
|
is_open INTEGER NOT NULL DEFAULT 0,
|
|
hours_label TEXT,
|
|
special_type TEXT, -- 'passholder_preview' | null
|
|
scraped_at TEXT NOT NULL,
|
|
PRIMARY KEY (park_id, date)
|
|
);
|
|
CREATE TABLE IF NOT EXISTS park_api_ids (
|
|
park_id TEXT PRIMARY KEY,
|
|
api_id INTEGER NOT NULL,
|
|
api_abbreviation TEXT,
|
|
api_name TEXT,
|
|
discovered_at TEXT NOT NULL
|
|
)
|
|
`);
|
|
// Migrate existing databases that predate the special_type column
|
|
try {
|
|
db.exec(`ALTER TABLE park_days ADD COLUMN special_type TEXT`);
|
|
} catch {
|
|
// Column already exists — safe to ignore
|
|
}
|
|
return db;
|
|
}
|
|
|
|
export function upsertDay(
|
|
db: Database.Database,
|
|
parkId: string,
|
|
date: string,
|
|
isOpen: boolean,
|
|
hoursLabel?: string,
|
|
specialType?: string
|
|
) {
|
|
// Today and past dates: INSERT new rows freely, but NEVER overwrite existing records.
|
|
// Once an operating day begins the API drops that date from its response, so a
|
|
// re-scrape would incorrectly record the day as closed. The DB row written when
|
|
// the date was still in the future is the permanent truth for that day.
|
|
//
|
|
// Future dates only: full upsert — hours can change and closures can be added.
|
|
db.prepare(`
|
|
INSERT INTO park_days (park_id, date, is_open, hours_label, special_type, scraped_at)
|
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
ON CONFLICT (park_id, date) DO UPDATE SET
|
|
is_open = excluded.is_open,
|
|
hours_label = excluded.hours_label,
|
|
special_type = excluded.special_type,
|
|
scraped_at = excluded.scraped_at
|
|
WHERE park_days.date > date('now')
|
|
`).run(parkId, date, isOpen ? 1 : 0, hoursLabel ?? null, specialType ?? null, new Date().toISOString());
|
|
}
|
|
|
|
export interface DayData {
|
|
isOpen: boolean;
|
|
hoursLabel: string | null;
|
|
specialType: string | null;
|
|
}
|
|
|
|
/**
|
|
* Returns scraped data for all parks across a date range.
|
|
* Shape: { parkId: { 'YYYY-MM-DD': DayData } }
|
|
* Missing dates mean that date hasn't been scraped yet (not necessarily closed).
|
|
*/
|
|
export function getDateRange(
|
|
db: Database.Database,
|
|
startDate: string,
|
|
endDate: string
|
|
): Record<string, Record<string, DayData>> {
|
|
const rows = db
|
|
.prepare(
|
|
`SELECT park_id, date, is_open, hours_label, special_type
|
|
FROM park_days
|
|
WHERE date >= ? AND date <= ?`
|
|
)
|
|
.all(startDate, endDate) as {
|
|
park_id: string;
|
|
date: string;
|
|
is_open: number;
|
|
hours_label: string | null;
|
|
special_type: string | null;
|
|
}[];
|
|
|
|
const result: Record<string, Record<string, DayData>> = {};
|
|
for (const row of rows) {
|
|
if (!result[row.park_id]) result[row.park_id] = {};
|
|
result[row.park_id][row.date] = {
|
|
isOpen: row.is_open === 1,
|
|
hoursLabel: row.hours_label,
|
|
specialType: row.special_type,
|
|
};
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Returns scraped DayData for a single park for an entire month.
|
|
* Shape: { 'YYYY-MM-DD': DayData }
|
|
*/
|
|
export function getParkMonthData(
|
|
db: Database.Database,
|
|
parkId: string,
|
|
year: number,
|
|
month: number,
|
|
): Record<string, DayData> {
|
|
const prefix = `${year}-${String(month).padStart(2, "0")}`;
|
|
const rows = db
|
|
.prepare(
|
|
`SELECT date, is_open, hours_label, special_type
|
|
FROM park_days
|
|
WHERE park_id = ? AND date LIKE ? || '-%'
|
|
ORDER BY date`
|
|
)
|
|
.all(parkId, prefix) as {
|
|
date: string;
|
|
is_open: number;
|
|
hours_label: string | null;
|
|
special_type: string | null;
|
|
}[];
|
|
|
|
const result: Record<string, DayData> = {};
|
|
for (const row of rows) {
|
|
result[row.date] = {
|
|
isOpen: row.is_open === 1,
|
|
hoursLabel: row.hours_label,
|
|
specialType: row.special_type,
|
|
};
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/** Returns a map of parkId → boolean[] (index 0 = day 1) for a given month. */
|
|
export function getMonthCalendar(
|
|
db: Database.Database,
|
|
year: number,
|
|
month: number
|
|
): Record<string, boolean[]> {
|
|
const prefix = `${year}-${String(month).padStart(2, "0")}`;
|
|
const rows = db
|
|
.prepare(
|
|
`SELECT park_id, date, is_open
|
|
FROM park_days
|
|
WHERE date LIKE ? || '-%'
|
|
ORDER BY date`
|
|
)
|
|
.all(prefix) as { park_id: string; date: string; is_open: number }[];
|
|
|
|
const result: Record<string, boolean[]> = {};
|
|
for (const row of rows) {
|
|
if (!result[row.park_id]) result[row.park_id] = [];
|
|
const day = parseInt(row.date.slice(8), 10);
|
|
result[row.park_id][day - 1] = row.is_open === 1;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
import { parseStalenessHours } from "./env";
|
|
const STALE_AFTER_MS = parseStalenessHours(process.env.PARK_HOURS_STALENESS_HOURS, 72) * 60 * 60 * 1000;
|
|
|
|
/**
|
|
* Returns true when the scraper should skip this park+month.
|
|
*
|
|
* Two reasons to skip:
|
|
* 1. The month is entirely in the past — the API will never return data for
|
|
* those dates again, so re-scraping wastes a call and risks nothing but
|
|
* wasted time. Historical records are preserved forever by upsertDay.
|
|
* 2. The month was scraped within the last 7 days — data is still fresh.
|
|
*/
|
|
export function isMonthScraped(
|
|
db: Database.Database,
|
|
parkId: string,
|
|
year: number,
|
|
month: number
|
|
): boolean {
|
|
// Compute the last calendar day of this month (avoids timezone issues).
|
|
const daysInMonth = new Date(year, month, 0).getDate();
|
|
const lastDay = `${year}-${String(month).padStart(2, "0")}-${String(daysInMonth).padStart(2, "0")}`;
|
|
const today = new Date().toISOString().slice(0, 10);
|
|
|
|
// Past month — history is locked in, no API data available, always skip.
|
|
if (lastDay < today) return true;
|
|
|
|
// Current/future month — skip only if recently scraped.
|
|
const prefix = `${year}-${String(month).padStart(2, "0")}`;
|
|
const row = db
|
|
.prepare(
|
|
`SELECT MAX(scraped_at) AS last_scraped
|
|
FROM park_days
|
|
WHERE park_id = ? AND date LIKE ? || '-%'`
|
|
)
|
|
.get(parkId, prefix) as { last_scraped: string | null };
|
|
|
|
if (!row.last_scraped) return false;
|
|
const ageMs = Date.now() - new Date(row.last_scraped).getTime();
|
|
return ageMs < STALE_AFTER_MS;
|
|
}
|
|
|
|
export function getApiId(db: Database.Database, parkId: string): number | null {
|
|
const row = db
|
|
.prepare("SELECT api_id FROM park_api_ids WHERE park_id = ?")
|
|
.get(parkId) as { api_id: number } | undefined;
|
|
return row?.api_id ?? null;
|
|
}
|
|
|
|
export function setApiId(
|
|
db: Database.Database,
|
|
parkId: string,
|
|
apiId: number,
|
|
apiAbbreviation?: string,
|
|
apiName?: string
|
|
) {
|
|
db.prepare(`
|
|
INSERT INTO park_api_ids (park_id, api_id, api_abbreviation, api_name, discovered_at)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
ON CONFLICT (park_id) DO UPDATE SET
|
|
api_id = excluded.api_id,
|
|
api_abbreviation = excluded.api_abbreviation,
|
|
api_name = excluded.api_name,
|
|
discovered_at = excluded.discovered_at
|
|
`).run(
|
|
parkId,
|
|
apiId,
|
|
apiAbbreviation ?? null,
|
|
apiName ?? null,
|
|
new Date().toISOString()
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Find the next park+month to scrape.
|
|
* Priority: never-scraped first, then oldest scraped_at.
|
|
* Considers current month through monthsAhead months into the future.
|
|
*/
|
|
export function getNextScrapeTarget(
|
|
db: Database.Database,
|
|
parkIds: string[],
|
|
monthsAhead = 12
|
|
): { parkId: string; year: number; month: number } | null {
|
|
const now = new Date();
|
|
|
|
const candidates: {
|
|
parkId: string;
|
|
year: number;
|
|
month: number;
|
|
lastScraped: string | null;
|
|
}[] = [];
|
|
|
|
for (const parkId of parkIds) {
|
|
for (let i = 0; i < monthsAhead; i++) {
|
|
const d = new Date(now.getFullYear(), now.getMonth() + i, 1);
|
|
const year = d.getFullYear();
|
|
const month = d.getMonth() + 1;
|
|
const prefix = `${year}-${String(month).padStart(2, "0")}`;
|
|
|
|
const row = db
|
|
.prepare(
|
|
`SELECT MAX(scraped_at) AS last_scraped
|
|
FROM park_days
|
|
WHERE park_id = ? AND date LIKE ? || '-%'`
|
|
)
|
|
.get(parkId, prefix) as { last_scraped: string | null };
|
|
|
|
candidates.push({ parkId, year, month, lastScraped: row.last_scraped });
|
|
}
|
|
}
|
|
|
|
// Never-scraped (null) first, then oldest scraped_at
|
|
candidates.sort((a, b) => {
|
|
if (!a.lastScraped && !b.lastScraped) return 0;
|
|
if (!a.lastScraped) return -1;
|
|
if (!b.lastScraped) return 1;
|
|
return a.lastScraped.localeCompare(b.lastScraped);
|
|
});
|
|
|
|
const top = candidates[0];
|
|
return top ? { parkId: top.parkId, year: top.year, month: top.month } : null;
|
|
}
|