SixFlagsSuperCalendar/lib/db.ts

import Database from "better-sqlite3";
import path from "path";
import fs from "fs";

const DATA_DIR = path.join(process.cwd(), "data");
const DB_PATH = path.join(DATA_DIR, "parks.db");

export type DbInstance = Database.Database;

export function openDb(): Database.Database {
  fs.mkdirSync(DATA_DIR, { recursive: true });
  const db = new Database(DB_PATH);
  db.pragma("journal_mode = WAL");
  db.exec(`
    CREATE TABLE IF NOT EXISTS park_days (
      park_id      TEXT    NOT NULL,
      date         TEXT    NOT NULL,  -- YYYY-MM-DD
      is_open      INTEGER NOT NULL DEFAULT 0,
      hours_label  TEXT,
      special_type TEXT,              -- 'passholder_preview' | null
      scraped_at   TEXT    NOT NULL,
      PRIMARY KEY (park_id, date)
    );
    CREATE TABLE IF NOT EXISTS park_api_ids (
      park_id          TEXT PRIMARY KEY,
      api_id           INTEGER NOT NULL,
      api_abbreviation TEXT,
      api_name         TEXT,
      discovered_at    TEXT NOT NULL
    )
  `);
  // Migrate existing databases that predate the special_type column
  try {
    db.exec(`ALTER TABLE park_days ADD COLUMN special_type TEXT`);
  } catch {
    // Column already exists — safe to ignore
  }
  return db;
}

export function upsertDay(
  db: Database.Database,
  parkId: string,
  date: string,
  isOpen: boolean,
  hoursLabel?: string,
  specialType?: string
) {
  // Today and past dates: INSERT new rows freely, but NEVER overwrite existing records.
  // Once an operating day begins the API drops that date from its response, so a
  // re-scrape would incorrectly record the day as closed. The DB row written when
  // the date was still in the future is the permanent truth for that day.
  //
  // Future dates only: full upsert — hours can change and closures can be added.
  db.prepare(`
    INSERT INTO park_days (park_id, date, is_open, hours_label, special_type, scraped_at)
    VALUES (?, ?, ?, ?, ?, ?)
    ON CONFLICT (park_id, date) DO UPDATE SET
      is_open      = excluded.is_open,
      hours_label  = excluded.hours_label,
      special_type = excluded.special_type,
      scraped_at   = excluded.scraped_at
    WHERE park_days.date > date('now')
  `).run(parkId, date, isOpen ? 1 : 0, hoursLabel ?? null, specialType ?? null, new Date().toISOString());
}

export interface DayData {
  isOpen: boolean;
  hoursLabel: string | null;
  specialType: string | null;
}

/**
 * Returns scraped data for all parks across a date range.
 * Shape: { parkId: { 'YYYY-MM-DD': DayData } }
 * Missing dates mean that date hasn't been scraped yet (not necessarily closed).
 */
export function getDateRange(
  db: Database.Database,
  startDate: string,
  endDate: string
): Record<string, Record<string, DayData>> {
  const rows = db
    .prepare(
      `SELECT park_id, date, is_open, hours_label, special_type
       FROM park_days
       WHERE date >= ? AND date <= ?`
    )
    .all(startDate, endDate) as {
    park_id: string;
    date: string;
    is_open: number;
    hours_label: string | null;
    special_type: string | null;
  }[];

  const result: Record<string, Record<string, DayData>> = {};
  for (const row of rows) {
    if (!result[row.park_id]) result[row.park_id] = {};
    result[row.park_id][row.date] = {
      isOpen: row.is_open === 1,
      hoursLabel: row.hours_label,
      specialType: row.special_type,
    };
  }
  return result;
}

/**
 * Returns scraped DayData for a single park for an entire month.
 * Shape: { 'YYYY-MM-DD': DayData }
 */
export function getParkMonthData(
  db: Database.Database,
  parkId: string,
  year: number,
  month: number,
): Record<string, DayData> {
  const prefix = `${year}-${String(month).padStart(2, "0")}`;
  const rows = db
    .prepare(
      `SELECT date, is_open, hours_label, special_type
       FROM park_days
       WHERE park_id = ? AND date LIKE ? || '-%'
       ORDER BY date`
    )
    .all(parkId, prefix) as {
    date: string;
    is_open: number;
    hours_label: string | null;
    special_type: string | null;
  }[];

  const result: Record<string, DayData> = {};
  for (const row of rows) {
    result[row.date] = {
      isOpen: row.is_open === 1,
      hoursLabel: row.hours_label,
      specialType: row.special_type,
    };
  }
  return result;
}

/** Returns a map of parkId → boolean[] (index 0 = day 1) for a given month. */
export function getMonthCalendar(
  db: Database.Database,
  year: number,
  month: number
): Record<string, boolean[]> {
  const prefix = `${year}-${String(month).padStart(2, "0")}`;
  const rows = db
    .prepare(
      `SELECT park_id, date, is_open
       FROM park_days
       WHERE date LIKE ? || '-%'
       ORDER BY date`
    )
    .all(prefix) as { park_id: string; date: string; is_open: number }[];

  const result: Record<string, boolean[]> = {};
  for (const row of rows) {
    if (!result[row.park_id]) result[row.park_id] = [];
    const day = parseInt(row.date.slice(8), 10);
    result[row.park_id][day - 1] = row.is_open === 1;
  }
  return result;
}

const STALE_AFTER_MS = 7 * 24 * 60 * 60 * 1000; // 1 week

/**
 * Returns true when the scraper should skip this park+month.
 *
 * Two reasons to skip:
 * 1. The month is entirely in the past — the API will never return data for
 *    those dates again, so re-scraping wastes a call and risks nothing but
 *    wasted time. Historical records are preserved forever by upsertDay.
 * 2. The month was scraped within the last 7 days — data is still fresh.
 */
export function isMonthScraped(
  db: Database.Database,
  parkId: string,
  year: number,
  month: number
): boolean {
  // Compute the last calendar day of this month (avoids timezone issues).
  const daysInMonth = new Date(year, month, 0).getDate();
  const lastDay = `${year}-${String(month).padStart(2, "0")}-${String(daysInMonth).padStart(2, "0")}`;
  const today = new Date().toISOString().slice(0, 10);

  // Past month — history is locked in, no API data available, always skip.
  if (lastDay < today) return true;

  // Current/future month — skip only if recently scraped.
  const prefix = `${year}-${String(month).padStart(2, "0")}`;
  const row = db
    .prepare(
      `SELECT MAX(scraped_at) AS last_scraped
       FROM park_days
       WHERE park_id = ? AND date LIKE ? || '-%'`
    )
    .get(parkId, prefix) as { last_scraped: string | null };

  if (!row.last_scraped) return false;
  const ageMs = Date.now() - new Date(row.last_scraped).getTime();
  return ageMs < STALE_AFTER_MS;
}

export function getApiId(db: Database.Database, parkId: string): number | null {
  const row = db
    .prepare("SELECT api_id FROM park_api_ids WHERE park_id = ?")
    .get(parkId) as { api_id: number } | undefined;
  return row?.api_id ?? null;
}

export function setApiId(
  db: Database.Database,
  parkId: string,
  apiId: number,
  apiAbbreviation?: string,
  apiName?: string
) {
  db.prepare(`
    INSERT INTO park_api_ids (park_id, api_id, api_abbreviation, api_name, discovered_at)
    VALUES (?, ?, ?, ?, ?)
    ON CONFLICT (park_id) DO UPDATE SET
      api_id           = excluded.api_id,
      api_abbreviation = excluded.api_abbreviation,
      api_name         = excluded.api_name,
      discovered_at    = excluded.discovered_at
  `).run(
    parkId,
    apiId,
    apiAbbreviation ?? null,
    apiName ?? null,
    new Date().toISOString()
  );
}

/**
 * Find the next park+month to scrape.
 * Priority: never-scraped first, then oldest scraped_at.
 * Considers current month through monthsAhead months into the future.
 */
export function getNextScrapeTarget(
  db: Database.Database,
  parkIds: string[],
  monthsAhead = 12
): { parkId: string; year: number; month: number } | null {
  const now = new Date();

  const candidates: {
    parkId: string;
    year: number;
    month: number;
    lastScraped: string | null;
  }[] = [];

  for (const parkId of parkIds) {
    for (let i = 0; i < monthsAhead; i++) {
      const d = new Date(now.getFullYear(), now.getMonth() + i, 1);
      const year = d.getFullYear();
      const month = d.getMonth() + 1;
      const prefix = `${year}-${String(month).padStart(2, "0")}`;

      const row = db
        .prepare(
          `SELECT MAX(scraped_at) AS last_scraped
           FROM park_days
           WHERE park_id = ? AND date LIKE ? || '-%'`
        )
        .get(parkId, prefix) as { last_scraped: string | null };

      candidates.push({ parkId, year, month, lastScraped: row.last_scraped });
    }
  }

  // Never-scraped (null) first, then oldest scraped_at
  candidates.sort((a, b) => {
    if (!a.lastScraped && !b.lastScraped) return 0;
    if (!a.lastScraped) return -1;
    if (!b.lastScraped) return 1;
    return a.lastScraped.localeCompare(b.lastScraped);
  });

  const top = candidates[0];
  return top ? { parkId: top.parkId, year: top.year, month: top.month } : null;
}