From 5bef17aa415c1b945aa57a8afccd367783670529 Mon Sep 17 00:00:00 2001 From: josh Date: Sat, 4 Apr 2026 10:31:11 -0400 Subject: [PATCH] refactor: one-line-per-park output with inline month progress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each park prints a row of █ (fetched) and · (skipped) as months complete, then ends with open day count, "up to date", or error count. Co-Authored-By: Claude Sonnet 4.6 --- scripts/scrape.ts | 135 ++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 77 deletions(-) diff --git a/scripts/scrape.ts b/scripts/scrape.ts index abc8e0a..571b849 100644 --- a/scripts/scrape.ts +++ b/scripts/scrape.ts @@ -3,15 +3,8 @@ * * Prerequisite: run `npm run discover` first to populate API IDs. * - * Run once and leave it: - * npm run scrape - * - * Skips park+month combos scraped within the last week. Re-run to resume after interruption. - * To force a full re-scrape: - * npm run scrape:force - * - * Rate limiting: backs off automatically (30s → 60s → 120s per retry). - * After exhausting retries, skips that park+month and continues. + * npm run scrape — skips months scraped within the last 7 days + * npm run scrape:force — re-scrapes everything */ import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db"; @@ -20,17 +13,9 @@ import { scrapeMonth, RateLimitError } from "../lib/scrapers/sixflags"; const YEAR = 2026; const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; -const DELAY_MS = 1000; // between successful API calls +const DELAY_MS = 1000; const FORCE = process.argv.includes("--rescrape"); -function monthLabel(m: number) { - return `${YEAR}-${String(m).padStart(2, "0")}`; -} - -function pad(n: number, width: number) { - return String(n).padStart(width, " "); -} - async function sleep(ms: number) { return new Promise((r) => setTimeout(r, ms)); } @@ -38,84 +23,80 @@ async function sleep(ms: number) { async function main() { const db = openDb(); - // Separate parks with known API IDs from those needing discovery const ready = PARKS.filter((p) => getApiId(db, p.id) !== null); const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null); if (needsDiscovery.length > 0) { console.log( - `⚠ ${needsDiscovery.length} parks have no API ID — run \`npm run discover\` first:\n` + - needsDiscovery.map((p) => ` ${p.id}`).join("\n") + - "\n" + `⚠ ${needsDiscovery.length} park(s) need discovery first: ${needsDiscovery.map((p) => p.id).join(", ")}\n` ); } if (ready.length === 0) { - console.log("No parks ready to scrape. Run: npm run discover"); + console.log("No parks ready — run: npm run discover"); db.close(); return; } - // Build the full work queue: park × month (all 12 months per park before moving on) - const queue: { month: number; park: (typeof PARKS)[0]; apiId: number }[] = []; + console.log(`Scraping ${YEAR} — ${ready.length} parks\n`); + + let totalFetched = 0; + let totalSkipped = 0; + let totalErrors = 0; + for (const park of ready) { + const apiId = getApiId(db, park.id)!; + const label = park.shortName.padEnd(22); + + let openDays = 0; + let fetched = 0; + let skipped = 0; + let errors = 0; + + process.stdout.write(` ${label} `); + for (const month of MONTHS) { - if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) continue; - queue.push({ month, park, apiId: getApiId(db, park.id)! }); - } - } - - const total = MONTHS.length * ready.length; - const skip = total - queue.length; - console.log( - `Scraping ${YEAR} — ${ready.length} parks × 12 months = ${total} total\n` + - `Skipping ${skip} already-scraped. ${queue.length} to fetch.\n` - ); - - if (queue.length === 0) { - console.log("Nothing to do. To force a full re-scrape: npm run scrape:force"); - db.close(); - return; - } - - let done = 0; - let errors = 0; - - for (const { month, park, apiId } of queue) { - const counter = `[${pad(done + 1, queue.length.toString().length)}/${queue.length}]`; - process.stdout.write(`${counter} ${park.shortName.padEnd(22)} ${monthLabel(month)} ... `); - - try { - const days = await scrapeMonth(apiId, YEAR, month); - const insertAll = db.transaction(() => { - for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel); - }); - insertAll(); - - const openCount = days.filter((d) => d.isOpen).length; - console.log(`${openCount}/${days.length} open`); - done++; - - if (done < queue.length) await sleep(DELAY_MS); - } catch (err) { - if (err instanceof RateLimitError) { - console.log(`RATE LIMITED — skipping (re-run to retry)`); - } else { - console.log(`ERROR: ${err instanceof Error ? err.message : err}`); + if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) { + process.stdout.write("·"); + skipped++; + continue; } - errors++; + + try { + const days = await scrapeMonth(apiId, YEAR, month); + db.transaction(() => { + for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel); + })(); + openDays += days.filter((d) => d.isOpen).length; + fetched++; + process.stdout.write("█"); + if (fetched + skipped + errors < MONTHS.length) await sleep(DELAY_MS); + } catch (err) { + if (err instanceof RateLimitError) { + process.stdout.write("✗"); + } else { + process.stdout.write("✗"); + console.error(`\n error: ${err instanceof Error ? err.message : err}`); + } + errors++; + } + } + + totalFetched += fetched; + totalSkipped += skipped; + totalErrors += errors; + + if (errors > 0) { + console.log(` ${errors} error(s)`); + } else if (skipped === MONTHS.length) { + console.log(" up to date"); + } else { + console.log(` ${openDays} open days`); } } - const summary = [ - `\n── Summary ─────────────────────────────`, - ` Fetched : ${done}`, - ` Skipped : ${skip}`, - ` Errors : ${errors}`, - ` Total : ${total}`, - ]; - if (errors > 0) summary.push(`\nRe-run to retry failed months.`); - console.log(summary.join("\n")); + console.log(`\n ${totalFetched} fetched ${totalSkipped} skipped ${totalErrors} errors`); + if (totalErrors > 0) console.log(" Re-run to retry failed months."); db.close(); }