refactor: one-line-per-park output with inline month progress
All checks were successful
Build and Deploy / Build & Push (push) Successful in 3m11s
All checks were successful
Build and Deploy / Build & Push (push) Successful in 3m11s
Each park prints a row of █ (fetched) and · (skipped) as months complete, then ends with open day count, "up to date", or error count. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,15 +3,8 @@
|
|||||||
*
|
*
|
||||||
* Prerequisite: run `npm run discover` first to populate API IDs.
|
* Prerequisite: run `npm run discover` first to populate API IDs.
|
||||||
*
|
*
|
||||||
* Run once and leave it:
|
* npm run scrape — skips months scraped within the last 7 days
|
||||||
* npm run scrape
|
* npm run scrape:force — re-scrapes everything
|
||||||
*
|
|
||||||
* Skips park+month combos scraped within the last week. Re-run to resume after interruption.
|
|
||||||
* To force a full re-scrape:
|
|
||||||
* npm run scrape:force
|
|
||||||
*
|
|
||||||
* Rate limiting: backs off automatically (30s → 60s → 120s per retry).
|
|
||||||
* After exhausting retries, skips that park+month and continues.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db";
|
import { openDb, upsertDay, getApiId, isMonthScraped } from "../lib/db";
|
||||||
@@ -20,17 +13,9 @@ import { scrapeMonth, RateLimitError } from "../lib/scrapers/sixflags";
|
|||||||
|
|
||||||
const YEAR = 2026;
|
const YEAR = 2026;
|
||||||
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
const MONTHS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||||
const DELAY_MS = 1000; // between successful API calls
|
const DELAY_MS = 1000;
|
||||||
const FORCE = process.argv.includes("--rescrape");
|
const FORCE = process.argv.includes("--rescrape");
|
||||||
|
|
||||||
function monthLabel(m: number) {
|
|
||||||
return `${YEAR}-${String(m).padStart(2, "0")}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
function pad(n: number, width: number) {
|
|
||||||
return String(n).padStart(width, " ");
|
|
||||||
}
|
|
||||||
|
|
||||||
async function sleep(ms: number) {
|
async function sleep(ms: number) {
|
||||||
return new Promise<void>((r) => setTimeout(r, ms));
|
return new Promise<void>((r) => setTimeout(r, ms));
|
||||||
}
|
}
|
||||||
@@ -38,84 +23,80 @@ async function sleep(ms: number) {
|
|||||||
async function main() {
|
async function main() {
|
||||||
const db = openDb();
|
const db = openDb();
|
||||||
|
|
||||||
// Separate parks with known API IDs from those needing discovery
|
|
||||||
const ready = PARKS.filter((p) => getApiId(db, p.id) !== null);
|
const ready = PARKS.filter((p) => getApiId(db, p.id) !== null);
|
||||||
const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null);
|
const needsDiscovery = PARKS.filter((p) => getApiId(db, p.id) === null);
|
||||||
|
|
||||||
if (needsDiscovery.length > 0) {
|
if (needsDiscovery.length > 0) {
|
||||||
console.log(
|
console.log(
|
||||||
`⚠ ${needsDiscovery.length} parks have no API ID — run \`npm run discover\` first:\n` +
|
`⚠ ${needsDiscovery.length} park(s) need discovery first: ${needsDiscovery.map((p) => p.id).join(", ")}\n`
|
||||||
needsDiscovery.map((p) => ` ${p.id}`).join("\n") +
|
|
||||||
"\n"
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ready.length === 0) {
|
if (ready.length === 0) {
|
||||||
console.log("No parks ready to scrape. Run: npm run discover");
|
console.log("No parks ready — run: npm run discover");
|
||||||
db.close();
|
db.close();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build the full work queue: park × month (all 12 months per park before moving on)
|
console.log(`Scraping ${YEAR} — ${ready.length} parks\n`);
|
||||||
const queue: { month: number; park: (typeof PARKS)[0]; apiId: number }[] = [];
|
|
||||||
|
let totalFetched = 0;
|
||||||
|
let totalSkipped = 0;
|
||||||
|
let totalErrors = 0;
|
||||||
|
|
||||||
for (const park of ready) {
|
for (const park of ready) {
|
||||||
for (const month of MONTHS) {
|
const apiId = getApiId(db, park.id)!;
|
||||||
if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) continue;
|
const label = park.shortName.padEnd(22);
|
||||||
queue.push({ month, park, apiId: getApiId(db, park.id)! });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const total = MONTHS.length * ready.length;
|
let openDays = 0;
|
||||||
const skip = total - queue.length;
|
let fetched = 0;
|
||||||
console.log(
|
let skipped = 0;
|
||||||
`Scraping ${YEAR} — ${ready.length} parks × 12 months = ${total} total\n` +
|
|
||||||
`Skipping ${skip} already-scraped. ${queue.length} to fetch.\n`
|
|
||||||
);
|
|
||||||
|
|
||||||
if (queue.length === 0) {
|
|
||||||
console.log("Nothing to do. To force a full re-scrape: npm run scrape:force");
|
|
||||||
db.close();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let done = 0;
|
|
||||||
let errors = 0;
|
let errors = 0;
|
||||||
|
|
||||||
for (const { month, park, apiId } of queue) {
|
process.stdout.write(` ${label} `);
|
||||||
const counter = `[${pad(done + 1, queue.length.toString().length)}/${queue.length}]`;
|
|
||||||
process.stdout.write(`${counter} ${park.shortName.padEnd(22)} ${monthLabel(month)} ... `);
|
for (const month of MONTHS) {
|
||||||
|
if (!FORCE && isMonthScraped(db, park.id, YEAR, month)) {
|
||||||
|
process.stdout.write("·");
|
||||||
|
skipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const days = await scrapeMonth(apiId, YEAR, month);
|
const days = await scrapeMonth(apiId, YEAR, month);
|
||||||
const insertAll = db.transaction(() => {
|
db.transaction(() => {
|
||||||
for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel);
|
for (const d of days) upsertDay(db, park.id, d.date, d.isOpen, d.hoursLabel);
|
||||||
});
|
})();
|
||||||
insertAll();
|
openDays += days.filter((d) => d.isOpen).length;
|
||||||
|
fetched++;
|
||||||
const openCount = days.filter((d) => d.isOpen).length;
|
process.stdout.write("█");
|
||||||
console.log(`${openCount}/${days.length} open`);
|
if (fetched + skipped + errors < MONTHS.length) await sleep(DELAY_MS);
|
||||||
done++;
|
|
||||||
|
|
||||||
if (done < queue.length) await sleep(DELAY_MS);
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof RateLimitError) {
|
if (err instanceof RateLimitError) {
|
||||||
console.log(`RATE LIMITED — skipping (re-run to retry)`);
|
process.stdout.write("✗");
|
||||||
} else {
|
} else {
|
||||||
console.log(`ERROR: ${err instanceof Error ? err.message : err}`);
|
process.stdout.write("✗");
|
||||||
|
console.error(`\n error: ${err instanceof Error ? err.message : err}`);
|
||||||
}
|
}
|
||||||
errors++;
|
errors++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const summary = [
|
totalFetched += fetched;
|
||||||
`\n── Summary ─────────────────────────────`,
|
totalSkipped += skipped;
|
||||||
` Fetched : ${done}`,
|
totalErrors += errors;
|
||||||
` Skipped : ${skip}`,
|
|
||||||
` Errors : ${errors}`,
|
if (errors > 0) {
|
||||||
` Total : ${total}`,
|
console.log(` ${errors} error(s)`);
|
||||||
];
|
} else if (skipped === MONTHS.length) {
|
||||||
if (errors > 0) summary.push(`\nRe-run to retry failed months.`);
|
console.log(" up to date");
|
||||||
console.log(summary.join("\n"));
|
} else {
|
||||||
|
console.log(` ${openDays} open days`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n ${totalFetched} fetched ${totalSkipped} skipped ${totalErrors} errors`);
|
||||||
|
if (totalErrors > 0) console.log(" Re-run to retry failed months.");
|
||||||
|
|
||||||
db.close();
|
db.close();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user