feat: split web and scraper into separate Docker images
All checks were successful
Build and Deploy / Build & Push (push) Successful in 3m4s

- Dockerfile: replace single runner stage with web + scraper named targets
  - web: Next.js standalone only — no playwright, tsx, or scripts
  - scraper: scripts/lib/node_modules/playwright only — no Next.js output
- docker-compose.yml: each service pulls its dedicated image tag
- .gitea/workflows/deploy.yml: build both targets on push to main
- lib/db.ts: STALE_AFTER_MS reads PARK_HOURS_STALENESS_HOURS env var (default 72h)
- lib/park-meta.ts: COASTER_STALE_MS reads COASTER_STALENESS_HOURS env var (default 720h)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-04 16:40:31 -04:00
parent 766fc296a1
commit eeb4a649c1
5 changed files with 58 additions and 46 deletions

View File

@@ -4,26 +4,14 @@ on:
push:
branches:
- main
tags:
- 'v*'
jobs:
build-push:
name: Build & Push
runs-on: ubuntu-latest
if: github.event_name == 'push'
steps:
- uses: actions/checkout@v4
- name: Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ vars.REGISTRY }}/${{ gitea.repository_owner }}/sixflagssupercalendar
tags: |
type=semver,pattern={{version}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Log in to Gitea registry
uses: docker/login-action@v3
with:
@@ -31,10 +19,18 @@ jobs:
username: ${{ gitea.actor }}
password: ${{ secrets.REGISTRY_TOKEN }}
- name: Build and push
- name: Build and push web image
uses: docker/build-push-action@v6
with:
context: .
target: web
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ vars.REGISTRY }}/${{ gitea.repository_owner }}/sixflagssupercalendar:web
- name: Build and push scraper image
uses: docker/build-push-action@v6
with:
context: .
target: scraper
push: true
tags: ${{ vars.REGISTRY }}/${{ gitea.repository_owner }}/sixflagssupercalendar:scraper

View File

@@ -1,4 +1,4 @@
# Stage 1: Install all dependencies (dev included — scripts need tsx + playwright)
# Stage 1: Install all dependencies (dev included — scraper needs tsx + playwright)
FROM node:22-bookworm-slim AS deps
RUN apt-get update && apt-get install -y --no-install-recommends python3 make g++ && \
rm -rf /var/lib/apt/lists/*
@@ -11,47 +11,60 @@ FROM deps AS builder
COPY . .
RUN npm run build
# Stage 3: Production runner
FROM node:22-bookworm-slim AS runner
# ── web ──────────────────────────────────────────────────────────────────────
# Minimal Next.js runner. No playwright, no tsx, no scripts.
# next build --output standalone bundles its own node_modules (incl. better-sqlite3).
FROM node:22-bookworm-slim AS web
WORKDIR /app
ENV NODE_ENV=production
ENV NEXT_TELEMETRY_DISABLED=1
# Store Playwright browser in a predictable path inside the image
ENV PLAYWRIGHT_BROWSERS_PATH=/app/.playwright
# Create non-root user before copying files so --chown works
RUN addgroup --system --gid 1001 nodejs && \
adduser --system --uid 1001 nextjs
# Copy Next.js standalone output
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
COPY --from=builder --chown=nextjs:nodejs /app/public ./public
# Copy scripts + library source (needed for npm run discover/scrape via tsx)
COPY --from=builder --chown=nextjs:nodejs /app/scripts ./scripts
COPY --from=builder --chown=nextjs:nodejs /app/lib ./lib
COPY --from=builder --chown=nextjs:nodejs /app/package.json ./package.json
COPY --from=builder --chown=nextjs:nodejs /app/tsconfig.json ./tsconfig.json
# Replace standalone's minimal node_modules with full deps
# (includes tsx, playwright, and all devDependencies)
COPY --from=deps --chown=nextjs:nodejs /app/node_modules ./node_modules
# Install Playwright Chromium browser + all required system libraries.
# Runs as root so apt-get works; browser lands in PLAYWRIGHT_BROWSERS_PATH.
RUN npx playwright install --with-deps chromium && \
chown -R nextjs:nodejs /app/.playwright
# SQLite data directory — mount a named volume here for persistence
RUN mkdir -p /app/data && chown nextjs:nodejs /app/data
VOLUME ["/app/data"]
USER nextjs
EXPOSE 3000
ENV PORT=3000
ENV HOSTNAME="0.0.0.0"
CMD ["node", "server.js"]
# ── scraper ───────────────────────────────────────────────────────────────────
# Scraper-only image. No Next.js output. Runs on a nightly schedule via
# scripts/scrape-schedule.sh. Staleness windows are configurable via env vars:
# PARK_HOURS_STALENESS_HOURS (default: 72)
# COASTER_STALENESS_HOURS (default: 720 = 30 days)
FROM node:22-bookworm-slim AS scraper
WORKDIR /app
ENV NODE_ENV=production
ENV PLAYWRIGHT_BROWSERS_PATH=/app/.playwright
RUN addgroup --system --gid 1001 nodejs && \
adduser --system --uid 1001 nextjs
COPY --from=builder --chown=nextjs:nodejs /app/scripts ./scripts
COPY --from=builder --chown=nextjs:nodejs /app/lib ./lib
COPY --from=builder --chown=nextjs:nodejs /app/tests ./tests
COPY --from=builder --chown=nextjs:nodejs /app/package.json ./package.json
COPY --from=builder --chown=nextjs:nodejs /app/tsconfig.json ./tsconfig.json
# Full node_modules — includes tsx, playwright, better-sqlite3, all devDeps
COPY --from=deps --chown=nextjs:nodejs /app/node_modules ./node_modules
# Install Playwright Chromium + system libraries (runs as root, then fixes ownership)
RUN npx playwright install --with-deps chromium && \
chown -R nextjs:nodejs /app/.playwright
RUN mkdir -p /app/data && chown nextjs:nodejs /app/data
VOLUME ["/app/data"]
USER nextjs
CMD ["sh", "/app/scripts/scrape-schedule.sh"]

View File

@@ -1,6 +1,6 @@
services:
web:
image: gitea.thewrightserver.net/josh/sixflagssupercalendar:latest
image: gitea.thewrightserver.net/josh/sixflagssupercalendar:web
ports:
- "3000:3000"
volumes:
@@ -10,13 +10,14 @@ services:
restart: unless-stopped
scraper:
image: gitea.thewrightserver.net/josh/sixflagssupercalendar:latest
image: gitea.thewrightserver.net/josh/sixflagssupercalendar:scraper
volumes:
- park_data:/app/data
environment:
- NODE_ENV=production
- TZ=America/New_York # set your local timezone so "3am" is 3am your time
command: sh /app/scripts/scrape-schedule.sh
- TZ=America/New_York
- PARK_HOURS_STALENESS_HOURS=72
- COASTER_STALENESS_HOURS=720
restart: unless-stopped
volumes:

View File

@@ -167,7 +167,8 @@ export function getMonthCalendar(
return result;
}
const STALE_AFTER_MS = 72 * 60 * 60 * 1000; // 72 hours
const STALE_AFTER_MS =
parseInt(process.env.PARK_HOURS_STALENESS_HOURS ?? "72", 10) * 60 * 60 * 1000;
/**
* Returns true when the scraper should skip this park+month.

View File

@@ -44,7 +44,8 @@ export function defaultParkMeta(): ParkMeta {
return { rcdb_id: null, coasters: [], coasters_scraped_at: null };
}
const COASTER_STALE_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
const COASTER_STALE_MS =
parseInt(process.env.COASTER_STALENESS_HOURS ?? "720", 10) * 60 * 60 * 1000;
/** Returns true when the coaster list needs to be re-scraped from RCDB. */
export function areCoastersStale(entry: ParkMeta): boolean {