#!/bin/sh
# Nightly scraper scheduler — runs inside the Docker scraper service.
#
# Behaviour:
#   1. Runs an initial scrape immediately on container start.
#   2. Sleeps until 3:00 AM (container timezone, set via TZ env var).
#   3. Runs the scraper, then sleeps until the next 3:00 AM, forever.
#
# Timezone: set TZ in the scraper service environment to control when
# "3am" is (e.g. TZ=America/New_York). Defaults to UTC if unset.

log() {
  echo "[scheduler] $(date '+%Y-%m-%d %H:%M %Z') — $*"
}

run_scrape() {
  log "Starting scrape"
  if npm run scrape; then
    log "Scrape completed"
  else
    log "Scrape failed — will retry at next scheduled time"
  fi
}

seconds_until_3am() {
  now=$(date +%s)
  # Try today's 3am first; if already past, use tomorrow's.
  target=$(date -d "today 03:00" +%s)
  if [ "$now" -ge "$target" ]; then
    target=$(date -d "tomorrow 03:00" +%s)
  fi
  echo $((target - now))
}

# ── Run immediately on startup ────────────────────────────────────────────────
run_scrape

# ── Nightly loop ──────────────────────────────────────────────────────────────
while true; do
  wait=$(seconds_until_3am)
  next=$(date -d "now + ${wait} seconds" '+%Y-%m-%d %H:%M %Z')
  log "Next scrape in $((wait / 3600))h $((( wait % 3600) / 60))m  (${next})"
  sleep "$wait"
  run_scrape
done