Fix LXC deploy: absolute paths + systemd section for StartLimit
CI / Lint + build + test (push) Failing after 5m17s

Service was crashing on every boot because vetting.example.yaml uses
./var/... relative paths that resolve to / under ProtectSystem=strict.
Ship a separate vetting.production.yaml with absolute /var/lib/vetting
+ /var/log/vetting paths that match the unit's ReadWritePaths, and
have install.sh copy that one. Also move StartLimit* keys into [Unit]
to silence the 'Unknown key' warning on modern systemd.
This commit is contained in:
2026-04-17 22:02:03 -04:00
parent 47b4fa35a6
commit 273e7593bc
3 changed files with 89 additions and 5 deletions
+4 -1
View File
@@ -110,9 +110,12 @@ if [[ -n "${GEN_PW}" ]]; then
fi fi
echo "==> installing config and systemd unit" echo "==> installing config and systemd unit"
# vetting.production.yaml uses absolute /var/lib/vetting + /var/log/vetting
# paths that match the systemd unit's ReadWritePaths. vetting.example.yaml
# uses ./var/... relatives and is only correct for `make run` in a dev tree.
if [[ ! -f "${CONFIG_DIR}/vetting.yaml" ]]; then if [[ ! -f "${CONFIG_DIR}/vetting.yaml" ]]; then
install -m 0640 -o root -g "${SERVICE_USER}" \ install -m 0640 -o root -g "${SERVICE_USER}" \
"${SCRIPT_DIR}/vetting.example.yaml" \ "${SCRIPT_DIR}/vetting.production.yaml" \
"${CONFIG_DIR}/vetting.yaml" "${CONFIG_DIR}/vetting.yaml"
echo " -> installed default config at ${CONFIG_DIR}/vetting.yaml" echo " -> installed default config at ${CONFIG_DIR}/vetting.yaml"
else else
+81
View File
@@ -0,0 +1,81 @@
server:
# Loopback-only by default; change to "0.0.0.0:8080" (or similar) once
# you've wired up TLS or fronted the service with a reverse proxy.
bind: "127.0.0.1:8080"
# Base URL the orchestrator is reachable at from the operator's
# browser. Used as the click-through link in notifications.
public_url: "http://127.0.0.1:8080"
tls:
enabled: false
cert_file: ""
key_file: ""
database:
path: "/var/lib/vetting/vetting.db"
artifacts:
dir: "/var/lib/vetting/artifacts"
# Days to keep per-run artifact files (report.html, report.json, fio,
# iperf, inventory.json, hold keys). DB rows are preserved. 0 = forever.
retention_days: 30
logs:
dir: "/var/log/vetting"
# Days to keep per-run log files. 0 = forever.
retention_days: 30
janitor:
# Interval between cleanup sweeps. 0 defaults to 60.
interval_minutes: 60
auth:
# bcrypt hash of your admin password.
# Generate via: gen-admin-password 'your-password'
admin_password_bcrypt: "$2a$10$REPLACE_ME_WITH_A_REAL_BCRYPT_HASH_0123456789abcdefABCDEFxx"
# Random 32-byte hex string used to sign session cookies.
# Generate via: openssl rand -hex 32
session_secret_hex: "0000000000000000000000000000000000000000000000000000000000000000"
session_ttl_hours: 24
dispatcher:
max_concurrent_runs: 3
pxe:
enabled: false
interface: "" # e.g. "eth0"
dhcp_range: "" # e.g. "10.77.0.100,10.77.0.200,12h"
orchestrator_url: "" # e.g. "http://10.77.0.1:8080"
tftp_root: "/var/lib/vetting/tftp" # holds ipxe.efi + undionly.kpxe
live_dir: "/var/lib/vetting/live" # holds vmlinuz + initrd.img; served at /live/*
# Notifications fire on StageFailed, SpecMismatch, HoldingOpened,
# RunCompleted. Declare one or more notifiers and route each event
# kind (and optionally severity) to a notifier by name. Delivery is
# fire-and-forget (one attempt per event, logged on failure).
#
# Example (uncomment and fill in):
#
# notifiers:
# - name: ops-ntfy
# type: ntfy
# server: https://ntfy.sh
# topic: vetting-YOUR-TOPIC
# - name: ops-discord
# type: discord
# webhook_url: https://discord.com/api/webhooks/XXX/YYY
# - name: ops-email
# type: smtp
# smtp:
# host: mail.lan
# port: 25
# from: vetting@lan.local
# to: [ops@lan.local]
#
# routes:
# - match_severity: [critical]
# notifier: ops-ntfy
# - match_kind: [RunCompleted]
# notifier: ops-ntfy
notifiers: []
routes: []
+4 -4
View File
@@ -1,8 +1,10 @@
[Unit] [Unit]
Description=Vetting orchestrator (post-repair hardware validation) Description=Vetting orchestrator (post-repair hardware validation)
Documentation=https://github.com/your-org/vetting Documentation=https://gitea.thewrightserver.net/josh/Vetting
After=network-online.target After=network-online.target
Wants=network-online.target Wants=network-online.target
StartLimitBurst=5
StartLimitIntervalSec=60
[Service] [Service]
Type=simple Type=simple
@@ -38,11 +40,9 @@ RestrictNamespaces=true
LockPersonality=true LockPersonality=true
# Restart policy — crash out loudly on startup errors, but recover from # Restart policy — crash out loudly on startup errors, but recover from
# transient failures. # transient failures. (StartLimit* lives under [Unit] in modern systemd.)
Restart=on-failure Restart=on-failure
RestartSec=5 RestartSec=5
StartLimitBurst=5
StartLimitIntervalSec=60
# Logs go to journald; the orchestrator's own per-run log files live # Logs go to journald; the orchestrator's own per-run log files live
# under /var/log/vetting regardless. # under /var/log/vetting regardless.