#!/usr/bin/env bash # install.sh — one-shot installer for the vetting orchestrator on a # Proxmox LXC (or any Debian/Ubuntu host). # # What it does: # 1. apt-installs runtime dependencies (dnsmasq, iperf3, ca-certs). # 2. Creates the `vetting` system user with /var/lib/vetting homedir. # 3. Copies the pre-built `vetting` binary into /usr/local/bin. # 4. Drops the systemd unit and example config into /etc/vetting. # 5. Reminds the operator to edit the config before enabling # the service — we don't auto-start because the default bind # is loopback-only and needs at least a tweak to be useful. # # What it deliberately does NOT do: # - Build the orchestrator (this script assumes you ran # `make orchestrator-linux` beforehand and that bin/vetting-linux-amd64 # exists alongside this script, or pass --binary to locate it). # - Fetch TFTP iPXE payloads (that's pxe-setup.sh's job — it also # writes the pxe: block of vetting.yaml with first-time args). # # Live-image staging has two modes: # - Release bundle (new format): the bundle carries only a # live-image/VERSION pointer. We compare it to ${LIVE_DIR}/VERSION # and, on mismatch, fetch vmlinuz+initrd.img from the Gitea # generic registry at live-image//. Matched versions # skip the fetch (set FORCE_LIVE_IMAGE=1 to override). # - Repo checkout / legacy bundle: if vmlinuz+initrd.img are present # next to this script (${SCRIPT_DIR}/live-image/) or under # ${REPO_ROOT}/live-image/build/, they're copied straight in. # # Usage: # sudo ./install.sh [--binary PATH] [--config-dir /etc/vetting] # set -euo pipefail BINARY="" AGENT_BINARY="" CONFIG_DIR="/etc/vetting" STATE_DIR="/var/lib/vetting" LOG_DIR="/var/log/vetting" ASSET_DIR="/var/lib/vetting/assets" LIVE_DIR="/var/lib/vetting/live" LIVE_IMAGE_SRC="" SERVICE_USER="vetting" usage() { cat <&2; usage; exit 2 ;; esac done if [[ $EUID -ne 0 ]]; then echo "install.sh must be run as root (try: sudo $0)" >&2 exit 1 fi # heal_pxe_config: make sure /etc/vetting/vetting.yaml's pxe.interface # and pxe.subnet reference things that actually exist on this host. Stale # values (common after an LXC rebuild renames the NIC, or after pxe-setup # was pointed at a NIC that later got removed) block vetting.service # startup with "pxe.interface X not found on host". # # Only runs when pxe.enabled is true — a disabled pxe block doesn't gate # startup. Only rewrites fields that are currently invalid; a good # interface/subnet pair is preserved exactly as the operator had it. heal_pxe_config() { local config="$1" [[ -f "${config}" ]] || return 0 # Minimal one-key reader for the pxe: block. Mirrors pxe-setup.sh's # extract_yaml_value so the two scripts stay independent. _pxe_val() { awk -v key="$1" ' /^pxe:/ { in_pxe=1; next } in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 } in_pxe { re = "^[[:space:]]+" key ":[[:space:]]*" if ($0 ~ re) { line = $0 sub(re, "", line) if (match(line, /"[^"]*"/)) { print substr(line, RSTART+1, RLENGTH-2); exit } sub(/[[:space:]]*#.*$/, "", line) gsub(/^[[:space:]]+|[[:space:]]+$/, "", line) print line; exit } } ' "${config}" } local enabled cur_iface cur_subnet enabled="$(_pxe_val enabled)" cur_iface="$(_pxe_val interface)" cur_subnet="$(_pxe_val subnet)" [[ "${enabled}" == "true" ]] || return 0 local iface_ok=0 subnet_ok=0 if [[ -n "${cur_iface}" ]] && ip link show "${cur_iface}" >/dev/null 2>&1; then iface_ok=1 fi if [[ "${cur_subnet}" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}/[0-9]{1,2}$ ]]; then subnet_ok=1 fi (( iface_ok && subnet_ok )) && return 0 local detected_iface detected_subnet detected_iface="$(ip -4 -o route show default 2>/dev/null | awk '{print $5; exit}')" if [[ -n "${detected_iface}" ]]; then detected_subnet="$(ip -4 -o route show dev "${detected_iface}" proto kernel scope link 2>/dev/null | awk '{print $1; exit}')" fi if [[ -z "${detected_iface}" || -z "${detected_subnet}" ]]; then echo "WARN: pxe is enabled in ${config} but pxe.interface=${cur_iface:-} / pxe.subnet=${cur_subnet:-} is stale," >&2 echo " and no default-route NIC was found to auto-detect from. Edit the file manually before starting." >&2 return 0 fi local iface_to_write="${cur_iface}" subnet_to_write="${cur_subnet}" if (( iface_ok == 0 )); then echo "==> pxe.interface \"${cur_iface}\" is not present on this host; auto-patching to \"${detected_iface}\"" iface_to_write="${detected_iface}" fi if (( subnet_ok == 0 )); then echo "==> pxe.subnet \"${cur_subnet:-}\" is missing/invalid; auto-patching to \"${detected_subnet}\"" subnet_to_write="${detected_subnet}" fi local tmp tmp="$(mktemp)" IFACE="${iface_to_write}" SUBNET="${subnet_to_write}" awk ' /^pxe:/ { in_pxe=1; print; next } in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 } in_pxe && /^[[:space:]]+interface:/ { print " interface: \"" ENVIRON["IFACE"] "\""; next } in_pxe && /^[[:space:]]+subnet:/ { print " subnet: \"" ENVIRON["SUBNET"] "\""; next } { print } ' "${config}" > "${tmp}" chown --reference="${config}" "${tmp}" chmod --reference="${config}" "${tmp}" mv "${tmp}" "${config}" } # refresh_live_image: pull vmlinuz+initrd.img from the Gitea generic # package registry when the bundle's live-image/VERSION pointer differs # from ${LIVE_DIR}/VERSION. Skips the fetch when versions match unless # FORCE_LIVE_IMAGE=1 (useful when on-disk files got corrupted). Set by # proxmox-install.sh; on a direct `install.sh` invocation the caller # must export REGISTRY_URL (and optionally PACKAGE_OWNER). refresh_live_image() { local pointer="${SCRIPT_DIR}/live-image/VERSION" local bundle_ver bundle_ver="$(tr -d '[:space:]' < "${pointer}" 2>/dev/null || true)" if [[ -z "${bundle_ver}" ]]; then echo "WARN: bundle's ${pointer} is empty; skipping live-image fetch" >&2 return 0 fi local installed_ver="" if [[ -f "${LIVE_DIR}/VERSION" ]]; then installed_ver="$(tr -d '[:space:]' < "${LIVE_DIR}/VERSION")" fi if [[ "${bundle_ver}" == "${installed_ver}" && "${FORCE_LIVE_IMAGE:-0}" != "1" ]]; then echo "==> live-image already at ${bundle_ver}; skipping fetch (FORCE_LIVE_IMAGE=1 to redownload)" return 0 fi if [[ -z "${REGISTRY_URL:-}" ]]; then echo "WARN: REGISTRY_URL is not set; cannot fetch live-image ${bundle_ver}. Re-run via proxmox-install.sh or export REGISTRY_URL." >&2 return 0 fi local owner="${PACKAGE_OWNER:-josh}" local base="${REGISTRY_URL%/}/api/packages/${owner}/generic/live-image/${bundle_ver}" echo "==> fetching live-image ${bundle_ver} (was '${installed_ver:-none}') from ${base}" local tmp tmp="$(mktemp -d)" # shellcheck disable=SC2064 trap "rm -rf '${tmp}'" RETURN # Default curl meter shows rate + ETA, which matters for the ~300 MB # initrd on slow links. curl -fL -o "${tmp}/vmlinuz" "${base}/vmlinuz" curl -fL -o "${tmp}/initrd.img" "${base}/initrd.img" install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${LIVE_DIR}" install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \ "${tmp}/vmlinuz" "${LIVE_DIR}/vmlinuz" install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \ "${tmp}/initrd.img" "${LIVE_DIR}/initrd.img" printf '%s\n' "${bundle_ver}" > "${LIVE_DIR}/VERSION" chown "${SERVICE_USER}:${SERVICE_USER}" "${LIVE_DIR}/VERSION" chmod 0644 "${LIVE_DIR}/VERSION" } SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" if [[ -z "${BINARY}" ]]; then for cand in \ "${REPO_ROOT}/bin/vetting-linux-amd64" \ "${REPO_ROOT}/bin/vetting" \ "${SCRIPT_DIR}/vetting"; do if [[ -x "${cand}" ]]; then BINARY="${cand}"; break; fi done fi if [[ -z "${BINARY}" || ! -x "${BINARY}" ]]; then echo "could not find a vetting binary to install; pass --binary PATH or run 'make orchestrator-linux' first" >&2 exit 1 fi if [[ -z "${AGENT_BINARY}" ]]; then for cand in \ "${REPO_ROOT}/bin/vetting-agent.linux-amd64" \ "${REPO_ROOT}/bin/vetting-agent-linux-amd64" \ "${SCRIPT_DIR}/vetting-agent-linux-amd64"; do if [[ -x "${cand}" ]]; then AGENT_BINARY="${cand}"; break; fi done fi if [[ -z "${AGENT_BINARY}" || ! -x "${AGENT_BINARY}" ]]; then echo "could not find a vetting-agent binary; pass --agent-binary PATH or run 'make agent-linux' first" >&2 exit 1 fi echo "==> installing runtime dependencies" export DEBIAN_FRONTEND=noninteractive apt-get update -qq apt-get install -y --no-install-recommends \ ca-certificates dnsmasq iperf3 echo "==> creating ${SERVICE_USER} user" if ! id -u "${SERVICE_USER}" >/dev/null 2>&1; then useradd --system \ --home-dir "${STATE_DIR}" \ --shell /usr/sbin/nologin \ "${SERVICE_USER}" fi echo "==> preparing directories" install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${STATE_DIR}" install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${LOG_DIR}" install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${ASSET_DIR}" install -d -m 0755 "${CONFIG_DIR}" echo "==> installing binary" install -m 0755 "${BINARY}" /usr/local/bin/vetting install -m 0755 "${AGENT_BINARY}" "${ASSET_DIR}/vetting-agent-linux-amd64" echo "==> installing config and systemd unit" # vetting.production.yaml uses absolute /var/lib/vetting + /var/log/vetting # paths that match the systemd unit's ReadWritePaths. vetting.example.yaml # uses ./var/... relatives and is only correct for `make run` in a dev tree. if [[ ! -f "${CONFIG_DIR}/vetting.yaml" ]]; then install -m 0640 -o root -g "${SERVICE_USER}" \ "${SCRIPT_DIR}/vetting.production.yaml" \ "${CONFIG_DIR}/vetting.yaml" echo " -> installed default config at ${CONFIG_DIR}/vetting.yaml" else echo " -> preserving existing ${CONFIG_DIR}/vetting.yaml" fi install -m 0644 "${SCRIPT_DIR}/vetting.service" /etc/systemd/system/vetting.service # Install pxe-setup.sh + its pinned iPXE SHAs into a stable path so the # operator can run `vetting-pxe-setup ...` after the one-liner install. # The bundle's tempdir gets wiped by proxmox-install.sh on exit, so # without this the script would be inaccessible. if [[ -f "${SCRIPT_DIR}/pxe-setup.sh" && -f "${SCRIPT_DIR}/ipxe-shas.txt" ]]; then echo "==> installing pxe-setup.sh and ipxe-shas.txt" install -d -m 0755 /usr/local/share/vetting install -m 0755 "${SCRIPT_DIR}/pxe-setup.sh" /usr/local/share/vetting/pxe-setup.sh install -m 0644 "${SCRIPT_DIR}/ipxe-shas.txt" /usr/local/share/vetting/ipxe-shas.txt ln -sfn /usr/local/share/vetting/pxe-setup.sh /usr/local/sbin/vetting-pxe-setup fi # Stage the live image into LIVE_DIR. Preference order: # 1. --live-image-src explicitly given, or local files found in the # bundle/repo — copy straight in (dev and legacy bundle layouts). # 2. Bundle carries only live-image/VERSION — fetch from the Gitea # generic registry when the pointer differs from ${LIVE_DIR}/VERSION. # 3. Neither — skip quietly (no-PXE installs don't need a live image, # and dev checkouts that haven't run `make live-image` shouldn't # fail the install). if [[ -z "${LIVE_IMAGE_SRC}" ]]; then for cand in \ "${SCRIPT_DIR}/live-image" \ "${REPO_ROOT}/live-image/build"; do if [[ -f "${cand}/vmlinuz" && -f "${cand}/initrd.img" ]]; then LIVE_IMAGE_SRC="${cand}" break fi done fi if [[ -n "${LIVE_IMAGE_SRC}" ]]; then echo "==> staging live image from ${LIVE_IMAGE_SRC} into ${LIVE_DIR}" install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${LIVE_DIR}" install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \ "${LIVE_IMAGE_SRC}/vmlinuz" "${LIVE_DIR}/vmlinuz" install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \ "${LIVE_IMAGE_SRC}/initrd.img" "${LIVE_DIR}/initrd.img" # Record the version that produced these files if the source has # one (bundle with legacy layout carrying VERSION alongside the # kernel; dev tree has live-image/VERSION at repo root). Lets a # future bundle-based install decide whether to refetch. for vcand in \ "${LIVE_IMAGE_SRC}/VERSION" \ "${SCRIPT_DIR}/live-image/VERSION" \ "${REPO_ROOT}/live-image/VERSION"; do if [[ -f "${vcand}" ]]; then install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \ "${vcand}" "${LIVE_DIR}/VERSION" break fi done elif [[ -f "${SCRIPT_DIR}/live-image/VERSION" ]]; then refresh_live_image else echo "==> no live image found (bundle/live-image or ../live-image/build); skipping live-dir staging" fi # Disable the distro's dnsmasq so only the orchestrator-supervised # instance owns DHCP/TFTP. Operators who want to keep dnsmasq for # something else can re-enable it after configuring a disjoint listen # address. if systemctl is-enabled --quiet dnsmasq 2>/dev/null; then echo "==> disabling distro dnsmasq (orchestrator supervises its own)" systemctl disable --now dnsmasq fi echo "==> validating pxe config against this host's interfaces" heal_pxe_config "${CONFIG_DIR}/vetting.yaml" systemctl daemon-reload # Upgrade path: if vetting.service is already enabled, restart it so the # new binary + live image take effect without an explicit second # command. First-install path (service not enabled yet) leaves the # service alone so the operator can edit the config before starting. if systemctl is-enabled --quiet vetting.service 2>/dev/null; then echo "==> restarting vetting.service (upgrade path)" systemctl reset-failed vetting.service 2>/dev/null || true systemctl restart vetting.service cat <