feat: split web and scraper into separate Docker images

- Dockerfile: replace single runner stage with web + scraper named targets - web: Next.js standalone only — no playwright, tsx, or scripts - scraper: scripts/lib/node_modules/playwright only — no Next.js output - docker-compose.yml: each service pulls its dedicated image tag - .gitea/workflows/deploy.yml: build both targets on push to main - lib/db.ts: STALE_AFTER_MS reads PARK_HOURS_STALENESS_HOURS env var (default 72h) - lib/park-meta.ts: COASTER_STALE_MS reads COASTER_STALENESS_HOURS env var (default 720h) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 16:40:31 -04:00
parent 766fc296a1
commit eeb4a649c1
5 changed files with 58 additions and 46 deletions
--- a/.gitea/workflows/deploy.yml
+++ b/.gitea/workflows/deploy.yml
@@ -4,26 +4,14 @@ on:
  push:
    branches:
      - main
-    tags:
-      - 'v*'

 jobs:
  build-push:
    name: Build & Push
    runs-on: ubuntu-latest
-    if: github.event_name == 'push'
    steps:
      - uses: actions/checkout@v4

-      - name: Docker metadata
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ vars.REGISTRY }}/${{ gitea.repository_owner }}/sixflagssupercalendar
-          tags: |
-            type=semver,pattern={{version}}
-            type=raw,value=latest,enable={{is_default_branch}}
-
      - name: Log in to Gitea registry
        uses: docker/login-action@v3
        with:
@@ -31,10 +19,18 @@ jobs:
          username: ${{ gitea.actor }}
          password: ${{ secrets.REGISTRY_TOKEN }}

-      - name: Build and push
+      - name: Build and push web image
        uses: docker/build-push-action@v6
        with:
          context: .
+          target: web
          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
+          tags: ${{ vars.REGISTRY }}/${{ gitea.repository_owner }}/sixflagssupercalendar:web
+
+      - name: Build and push scraper image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          target: scraper
+          push: true
+          tags: ${{ vars.REGISTRY }}/${{ gitea.repository_owner }}/sixflagssupercalendar:scraper
--- a/63
+++ b/63
@@ -1,4 +1,4 @@
-# Stage 1: Install all dependencies (dev included — scripts need tsx + playwright)
+# Stage 1: Install all dependencies (dev included — scraper needs tsx + playwright)
 FROM node:22-bookworm-slim AS deps
 RUN apt-get update && apt-get install -y --no-install-recommends python3 make g++ && \
    rm -rf /var/lib/apt/lists/*
@@ -11,47 +11,60 @@ FROM deps AS builder
 COPY . .
 RUN npm run build

-# Stage 3: Production runner
-FROM node:22-bookworm-slim AS runner
+# ── web ──────────────────────────────────────────────────────────────────────
+# Minimal Next.js runner. No playwright, no tsx, no scripts.
+# next build --output standalone bundles its own node_modules (incl. better-sqlite3).
+FROM node:22-bookworm-slim AS web
 WORKDIR /app

 ENV NODE_ENV=production
 ENV NEXT_TELEMETRY_DISABLED=1
-# Store Playwright browser in a predictable path inside the image
-ENV PLAYWRIGHT_BROWSERS_PATH=/app/.playwright

-# Create non-root user before copying files so --chown works
 RUN addgroup --system --gid 1001 nodejs && \
    adduser --system --uid 1001 nextjs

-# Copy Next.js standalone output
 COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
 COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
 COPY --from=builder --chown=nextjs:nodejs /app/public ./public

-# Copy scripts + library source (needed for npm run discover/scrape via tsx)
-COPY --from=builder --chown=nextjs:nodejs /app/scripts ./scripts
-COPY --from=builder --chown=nextjs:nodejs /app/lib ./lib
-COPY --from=builder --chown=nextjs:nodejs /app/package.json ./package.json
-COPY --from=builder --chown=nextjs:nodejs /app/tsconfig.json ./tsconfig.json
-
-# Replace standalone's minimal node_modules with full deps
-# (includes tsx, playwright, and all devDependencies)
-COPY --from=deps --chown=nextjs:nodejs /app/node_modules ./node_modules
-
-# Install Playwright Chromium browser + all required system libraries.
-# Runs as root so apt-get works; browser lands in PLAYWRIGHT_BROWSERS_PATH.
-RUN npx playwright install --with-deps chromium && \
-    chown -R nextjs:nodejs /app/.playwright
-
-# SQLite data directory — mount a named volume here for persistence
 RUN mkdir -p /app/data && chown nextjs:nodejs /app/data
 VOLUME ["/app/data"]

 USER nextjs
-
 EXPOSE 3000
 ENV PORT=3000
 ENV HOSTNAME="0.0.0.0"
-
 CMD ["node", "server.js"]
+
+# ── scraper ───────────────────────────────────────────────────────────────────
+# Scraper-only image. No Next.js output. Runs on a nightly schedule via
+# scripts/scrape-schedule.sh. Staleness windows are configurable via env vars:
+#   PARK_HOURS_STALENESS_HOURS  (default: 72)
+#   COASTER_STALENESS_HOURS     (default: 720 = 30 days)
+FROM node:22-bookworm-slim AS scraper
+WORKDIR /app
+
+ENV NODE_ENV=production
+ENV PLAYWRIGHT_BROWSERS_PATH=/app/.playwright
+
+RUN addgroup --system --gid 1001 nodejs && \
+    adduser --system --uid 1001 nextjs
+
+COPY --from=builder --chown=nextjs:nodejs /app/scripts ./scripts
+COPY --from=builder --chown=nextjs:nodejs /app/lib ./lib
+COPY --from=builder --chown=nextjs:nodejs /app/tests ./tests
+COPY --from=builder --chown=nextjs:nodejs /app/package.json ./package.json
+COPY --from=builder --chown=nextjs:nodejs /app/tsconfig.json ./tsconfig.json
+
+# Full node_modules — includes tsx, playwright, better-sqlite3, all devDeps
+COPY --from=deps --chown=nextjs:nodejs /app/node_modules ./node_modules
+
+# Install Playwright Chromium + system libraries (runs as root, then fixes ownership)
+RUN npx playwright install --with-deps chromium && \
+    chown -R nextjs:nodejs /app/.playwright
+
+RUN mkdir -p /app/data && chown nextjs:nodejs /app/data
+VOLUME ["/app/data"]
+
+USER nextjs
+CMD ["sh", "/app/scripts/scrape-schedule.sh"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,6 @@
 services:
  web:
-    image: gitea.thewrightserver.net/josh/sixflagssupercalendar:latest
+    image: gitea.thewrightserver.net/josh/sixflagssupercalendar:web
    ports:
      - "3000:3000"
    volumes:
@@ -10,13 +10,14 @@ services:
    restart: unless-stopped

  scraper:
-    image: gitea.thewrightserver.net/josh/sixflagssupercalendar:latest
+    image: gitea.thewrightserver.net/josh/sixflagssupercalendar:scraper
    volumes:
      - park_data:/app/data
    environment:
      - NODE_ENV=production
-      - TZ=America/New_York   # set your local timezone so "3am" is 3am your time
-    command: sh /app/scripts/scrape-schedule.sh
+      - TZ=America/New_York
+      - PARK_HOURS_STALENESS_HOURS=72
+      - COASTER_STALENESS_HOURS=720
    restart: unless-stopped

 volumes:
--- a/lib/db.ts
+++ b/lib/db.ts
@@ -167,7 +167,8 @@ export function getMonthCalendar(
  return result;
 }

-const STALE_AFTER_MS = 72 * 60 * 60 * 1000; // 72 hours
+const STALE_AFTER_MS =
+  parseInt(process.env.PARK_HOURS_STALENESS_HOURS ?? "72", 10) * 60 * 60 * 1000;

 /**
 * Returns true when the scraper should skip this park+month.
--- a/lib/park-meta.ts
+++ b/lib/park-meta.ts
@@ -44,7 +44,8 @@ export function defaultParkMeta(): ParkMeta {
  return { rcdb_id: null, coasters: [], coasters_scraped_at: null };
 }

-const COASTER_STALE_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
+const COASTER_STALE_MS =
+  parseInt(process.env.COASTER_STALENESS_HOURS ?? "720", 10) * 60 * 60 * 1000;

 /** Returns true when the coaster list needs to be re-scraped from RCDB. */
 export function areCoastersStale(entry: ParkMeta): boolean {