Automate PXE setup: release bundle + pxe-setup.sh + startup validation
CI / Lint + build + test (push) Has been cancelled

Collapses the LXC side of PXE enablement from a six-step manual dance
(build, fetch iPXE, scp, bridge, hand-edit yaml) into:

  make release                   # dev box (Linux/WSL)
  scp bundle.tar.gz lxc:/tmp/
  sudo ./install.sh              # base install, unchanged
  sudo ./pxe-setup.sh --interface ... --dhcp-range ... --orchestrator-url ...

pxe-setup.sh fetches iPXE from boot.ipxe.org, verifies against pinned
SHA256s in deploy/ipxe-shas.txt (fail-closed), places vmlinuz/initrd.img
from the bundle, and rewrites only the pxe: block of vetting.yaml.
Idempotent; --force gates overwriting a hand-edited block.

Adds Supervisor.Validate() — called before dnsmasq spawn — so typo'd
configs fail at orchestrator startup with clear errors naming the
missing file or yaml key, instead of silently serving broken TFTP
until a real host tries to PXE-boot. Nine tests cover missing files,
bogus interface, malformed dhcp_range, bad orchestrator_url, and
aggregate reporting.

Hypervisor bridge creation stays documented (LXC can't do it) but
everything downstream of the bridge is now scripted.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 01:38:43 -04:00
parent d245fa6235
commit a5055b3c7a
8 changed files with 660 additions and 54 deletions
+18
View File
@@ -0,0 +1,18 @@
# Pinned iPXE binary checksums. pxe-setup.sh fetches ipxe.efi +
# undionly.kpxe from https://boot.ipxe.org and verifies the SHA256
# against these pins. Mismatch = hard fail; the script will not place
# mismatched binaries into tftp_root.
#
# Sources (both from the iPXE project's latest-build tree):
# ipxe.efi → https://boot.ipxe.org/x86_64-efi/ipxe.efi
# undionly.kpxe → https://boot.ipxe.org/undionly.kpxe
#
# To bump: fetch fresh binaries, verify via a second trusted source
# (e.g. a checksum published by a distro package, or a second mirror),
# regenerate with `sha256sum ipxe.efi undionly.kpxe > ipxe-shas.txt`,
# and commit. Treat this as a security-sensitive change.
#
# Format: compatible with `sha256sum -c ipxe-shas.txt` when run from
# the directory containing both files.
270afb529c4a8c1a89e2b852eca150789d948edaca9ca7099a12f170cc9c82e5 ipxe.efi
a84c7945d5ac941b8284a279bb2c93062bc19370681c9cf9a28b52daa1782a95 undionly.kpxe
+268
View File
@@ -0,0 +1,268 @@
#!/usr/bin/env bash
# pxe-setup.sh — finish the PXE half of a vetting install.
#
# Run AFTER deploy/install.sh on the LXC (or wherever the orchestrator
# lives). Fetches pinned iPXE binaries, places the live image, and
# writes the pxe: block of /etc/vetting/vetting.yaml. Does NOT create
# the PXE bridge — that's a hypervisor-level step, see
# docs/operations.md.
#
# Idempotent: safe to re-run with the same args. A second run with
# different args overwrites the pxe: block; pass --force to override
# a hand-edited block that differs from our args.
#
# Usage:
# sudo ./pxe-setup.sh \
# --interface eth1 \
# --dhcp-range 10.77.0.100,10.77.0.200,12h \
# --orchestrator-url http://10.77.0.2:8080
#
# Optional:
# --tftp-root DIR default /var/lib/vetting/tftp
# --live-dir DIR default /var/lib/vetting/live
# --config PATH default /etc/vetting/vetting.yaml
# --bundle-dir DIR default: this script's dir (release tarball root)
# --force overwrite a customised pxe: block
set -euo pipefail
INTERFACE=""
DHCP_RANGE=""
ORCH_URL=""
TFTP_ROOT="/var/lib/vetting/tftp"
LIVE_DIR="/var/lib/vetting/live"
CONFIG="/etc/vetting/vetting.yaml"
BUNDLE_DIR=""
FORCE=0
SERVICE_USER="vetting"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
usage() {
sed -n '2,24p' "${BASH_SOURCE[0]}"
}
while [[ $# -gt 0 ]]; do
case "$1" in
--interface) INTERFACE="$2"; shift 2 ;;
--dhcp-range) DHCP_RANGE="$2"; shift 2 ;;
--orchestrator-url) ORCH_URL="$2"; shift 2 ;;
--tftp-root) TFTP_ROOT="$2"; shift 2 ;;
--live-dir) LIVE_DIR="$2"; shift 2 ;;
--config) CONFIG="$2"; shift 2 ;;
--bundle-dir) BUNDLE_DIR="$2"; shift 2 ;;
--force) FORCE=1; shift ;;
-h|--help) usage; exit 0 ;;
*) echo "unknown arg: $1" >&2; usage; exit 2 ;;
esac
done
if [[ $EUID -ne 0 ]]; then
echo "pxe-setup.sh must be run as root (try: sudo $0 ...)" >&2
exit 1
fi
[[ -z "${INTERFACE}" ]] && { echo "ERROR: --interface is required" >&2; exit 2; }
[[ -z "${DHCP_RANGE}" ]] && { echo "ERROR: --dhcp-range is required" >&2; exit 2; }
[[ -z "${ORCH_URL}" ]] && { echo "ERROR: --orchestrator-url is required" >&2; exit 2; }
# --- sanity checks -----------------------------------------------------
if ! ip link show "${INTERFACE}" >/dev/null 2>&1; then
echo "ERROR: interface ${INTERFACE} not found on host. Check \`ip link\` — the" >&2
echo " interface must exist *before* the orchestrator starts dnsmasq." >&2
exit 1
fi
# "start_ip,end_ip,lease" — dnsmasq will still validate, but catch the
# obvious shape errors before we write anything to disk.
if [[ ! "${DHCP_RANGE}" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3},([0-9]{1,3}\.){3}[0-9]{1,3},[^[:space:]]+$ ]]; then
echo "ERROR: --dhcp-range must be start_ip,end_ip,lease (e.g. 10.77.0.100,10.77.0.200,12h)" >&2
exit 2
fi
if [[ ! -f "${CONFIG}" ]]; then
echo "ERROR: ${CONFIG} not found — run deploy/install.sh first." >&2
exit 1
fi
if ! id -u "${SERVICE_USER}" >/dev/null 2>&1; then
echo "ERROR: ${SERVICE_USER} user not found — run deploy/install.sh first." >&2
exit 1
fi
# Resolve the bundle dir. When pxe-setup.sh is run from a release
# tarball it sits alongside ipxe-shas.txt and a live-image/ subdir; when
# run from the repo tree it's deploy/pxe-setup.sh and the live image is
# under live-image/build/. Detect both.
if [[ -z "${BUNDLE_DIR}" ]]; then
if [[ -f "${SCRIPT_DIR}/ipxe-shas.txt" ]]; then
BUNDLE_DIR="${SCRIPT_DIR}"
else
BUNDLE_DIR="${SCRIPT_DIR}"
fi
fi
SHAS_FILE="${BUNDLE_DIR}/ipxe-shas.txt"
if [[ ! -f "${SHAS_FILE}" ]]; then
echo "ERROR: ${SHAS_FILE} not found — bundle is incomplete." >&2
exit 1
fi
# --- iPXE binaries: stage, verify, install ----------------------------
#
# Stage into a temp dir so a corrupt download never clobbers a known-
# good file in tftp_root. sha256sum -c must pass before we `install` —
# install(1) unlink-replaces, which avoids ETXTBSY and makes the whole
# operation atomic per file.
echo "==> ensuring ${TFTP_ROOT} exists"
install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${TFTP_ROOT}"
STAGE="$(mktemp -d)"
trap 'rm -rf "${STAGE}"' EXIT
need_fetch=0
for name in ipxe.efi undionly.kpxe; do
if [[ ! -f "${TFTP_ROOT}/${name}" ]]; then
need_fetch=1
break
fi
done
# Even if both files exist, re-verify against pinned SHAs. If they match
# we skip the fetch entirely; if not, re-download.
if (( ! need_fetch )); then
if ! ( cd "${TFTP_ROOT}" && sha256sum -c --status "${SHAS_FILE}" ); then
echo "==> ${TFTP_ROOT} iPXE binaries don't match pinned SHAs — re-fetching"
need_fetch=1
else
echo "==> iPXE binaries already match pins — skipping fetch"
fi
fi
if (( need_fetch )); then
echo "==> fetching iPXE binaries from boot.ipxe.org"
curl -fsSLo "${STAGE}/ipxe.efi" "https://boot.ipxe.org/x86_64-efi/ipxe.efi"
curl -fsSLo "${STAGE}/undionly.kpxe" "https://boot.ipxe.org/undionly.kpxe"
echo "==> verifying SHA256 against ${SHAS_FILE}"
if ! ( cd "${STAGE}" && sha256sum -c "${SHAS_FILE}" ); then
echo "ERROR: iPXE SHA256 mismatch. Upstream binaries changed, or a MITM." >&2
echo " To accept the new binaries, regenerate ${SHAS_FILE} after" >&2
echo " independently verifying the new checksums, then re-run." >&2
exit 1
fi
install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \
"${STAGE}/ipxe.efi" "${TFTP_ROOT}/ipxe.efi"
install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \
"${STAGE}/undionly.kpxe" "${TFTP_ROOT}/undionly.kpxe"
fi
# --- live image: copy from bundle into live_dir -----------------------
# Accept two layouts: release tarball (${BUNDLE_DIR}/live-image/) or
# repo tree (${BUNDLE_DIR}/../live-image/build/).
LIVE_SRC=""
for cand in \
"${BUNDLE_DIR}/live-image" \
"${BUNDLE_DIR}/../live-image/build"; do
if [[ -f "${cand}/vmlinuz" && -f "${cand}/initrd.img" ]]; then
LIVE_SRC="${cand}"
break
fi
done
if [[ -z "${LIVE_SRC}" ]]; then
echo "WARN: no live image found under ${BUNDLE_DIR}/live-image or" >&2
echo " ${BUNDLE_DIR}/../live-image/build — skipping live_dir staging." >&2
echo " Build with 'wsl make live-image' or use a release tarball," >&2
echo " then copy vmlinuz + initrd.img into ${LIVE_DIR} manually." >&2
else
echo "==> staging live image from ${LIVE_SRC} into ${LIVE_DIR}"
install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${LIVE_DIR}"
install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \
"${LIVE_SRC}/vmlinuz" "${LIVE_DIR}/vmlinuz"
install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \
"${LIVE_SRC}/initrd.img" "${LIVE_DIR}/initrd.img"
fi
# --- patch the pxe: block in vetting.yaml -----------------------------
#
# Replace the contents of the pxe: block in-place. Uses awk to walk
# line-by-line: when we hit `pxe:`, skip everything up to the next
# top-level key (a line starting with a non-whitespace letter + ":")
# or EOF, and emit our freshly-rendered block instead. Everything
# outside the pxe: block is passed through unchanged, so hand-tuned
# server:/database:/notifiers: blocks survive intact.
existing_iface="$(awk '
/^pxe:/ { in_pxe=1; next }
in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 }
in_pxe && /^[[:space:]]+interface:/ {
sub(/^[[:space:]]+interface:[[:space:]]*/, "")
gsub(/^"|"$/, "")
print; exit
}
' "${CONFIG}")"
existing_range="$(awk '
/^pxe:/ { in_pxe=1; next }
in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 }
in_pxe && /^[[:space:]]+dhcp_range:/ {
sub(/^[[:space:]]+dhcp_range:[[:space:]]*/, "")
gsub(/^"|"$/, "")
print; exit
}
' "${CONFIG}")"
if [[ -n "${existing_iface}" && "${existing_iface}" != "${INTERFACE}" && ${FORCE} -eq 0 ]]; then
echo "ERROR: pxe.interface in ${CONFIG} is already set to ${existing_iface}, which" >&2
echo " differs from --interface ${INTERFACE}. Pass --force to overwrite." >&2
exit 1
fi
if [[ -n "${existing_range}" && "${existing_range}" != "${DHCP_RANGE}" && ${FORCE} -eq 0 ]]; then
echo "ERROR: pxe.dhcp_range in ${CONFIG} is already ${existing_range}, which" >&2
echo " differs from --dhcp-range ${DHCP_RANGE}. Pass --force to overwrite." >&2
exit 1
fi
new_block=$(cat <<EOF
pxe:
enabled: true
interface: "${INTERFACE}"
dhcp_range: "${DHCP_RANGE}"
orchestrator_url: "${ORCH_URL}"
tftp_root: "${TFTP_ROOT}"
live_dir: "${LIVE_DIR}"
EOF
)
tmp_yaml="$(mktemp)"
# Pass the rendered block through awk ENVIRON so we don't have to
# quote-escape it into -v (which chokes on the embedded newlines).
NEW_BLOCK="${new_block}" awk '
BEGIN { skipping=0; emitted=0 }
/^pxe:/ { print ENVIRON["NEW_BLOCK"]; skipping=1; emitted=1; next }
skipping && /^[A-Za-z_][A-Za-z0-9_]*:/ { skipping=0 }
!skipping { print }
END {
if (!emitted) {
# No existing pxe: block — append one.
print ENVIRON["NEW_BLOCK"]
}
}
' "${CONFIG}" > "${tmp_yaml}"
# Preserve owner + mode from the original.
orig_mode="$(stat -c '%a' "${CONFIG}")"
orig_owner="$(stat -c '%U:%G' "${CONFIG}")"
install -m "${orig_mode}" -o "${orig_owner%:*}" -g "${orig_owner#*:}" \
"${tmp_yaml}" "${CONFIG}"
rm -f "${tmp_yaml}"
echo
echo "==> rendered pxe: block in ${CONFIG}:"
echo "${new_block}" | sed 's/^/ /'
echo
echo "Next: systemctl restart vetting && journalctl -fu vetting"
echo "The orchestrator will refuse to start with clear errors if anything"
echo "is still missing; you should see dnsmasq come up cleanly."