cf3a75591c
proxmox-install.sh tarball-extracts into a tempdir that gets wiped on EXIT, so after the one-liner there's no pxe-setup.sh on disk for the operator to run. Have install.sh drop the script + ipxe-shas.txt into /usr/local/share/vetting/ and symlink it as /usr/local/sbin/vetting-pxe-setup (in PATH). pxe-setup.sh now readlink -f's BASH_SOURCE so the symlink resolves to the share dir where ipxe-shas.txt lives, and gracefully handles the case where install.sh already staged vmlinuz + initrd.img into LIVE_DIR (no bundle live-image/ needed at that point). Update the trailing hint in proxmox-install.sh and the operations runbook to surface the new `sudo vetting-pxe-setup ...` command. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
297 lines
11 KiB
Bash
Executable File
297 lines
11 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# pxe-setup.sh — finish the PXE half of a vetting install.
|
|
#
|
|
# Run AFTER deploy/install.sh on the LXC (or wherever the orchestrator
|
|
# lives). Fetches pinned iPXE binaries, places the live image, and
|
|
# writes the pxe: block of /etc/vetting/vetting.yaml.
|
|
#
|
|
# dnsmasq runs in proxy-DHCP mode: it coexists with whatever DHCP
|
|
# server already serves your LAN (UniFi, pfSense, Asus, etc.) and
|
|
# only supplements the PXE options. No dedicated bridge, no VLAN,
|
|
# no cabling changes.
|
|
#
|
|
# Idempotent: safe to re-run with the same args. A second run with
|
|
# different args overwrites the pxe: block; pass --force to override
|
|
# a hand-edited block that differs from our args.
|
|
#
|
|
# Usage:
|
|
# sudo ./pxe-setup.sh \
|
|
# --interface eth0 \
|
|
# --subnet 192.168.1.0/24 \
|
|
# --orchestrator-url http://192.168.1.135:8080
|
|
#
|
|
# Optional:
|
|
# --tftp-root DIR default /var/lib/vetting/tftp
|
|
# --live-dir DIR default /var/lib/vetting/live
|
|
# --config PATH default /etc/vetting/vetting.yaml
|
|
# --bundle-dir DIR default: this script's dir (release tarball root)
|
|
# --force overwrite a customised pxe: block
|
|
set -euo pipefail
|
|
|
|
INTERFACE=""
|
|
SUBNET=""
|
|
ORCH_URL=""
|
|
TFTP_ROOT="/var/lib/vetting/tftp"
|
|
LIVE_DIR="/var/lib/vetting/live"
|
|
CONFIG="/etc/vetting/vetting.yaml"
|
|
BUNDLE_DIR=""
|
|
FORCE=0
|
|
SERVICE_USER="vetting"
|
|
|
|
# Resolve symlinks so `vetting-pxe-setup` (a symlink into /usr/local/sbin
|
|
# installed by install.sh) finds ipxe-shas.txt alongside the real script
|
|
# in /usr/local/share/vetting/.
|
|
SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"
|
|
|
|
usage() {
|
|
sed -n '2,28p' "${BASH_SOURCE[0]}"
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--interface) INTERFACE="$2"; shift 2 ;;
|
|
--subnet) SUBNET="$2"; shift 2 ;;
|
|
--orchestrator-url) ORCH_URL="$2"; shift 2 ;;
|
|
--tftp-root) TFTP_ROOT="$2"; shift 2 ;;
|
|
--live-dir) LIVE_DIR="$2"; shift 2 ;;
|
|
--config) CONFIG="$2"; shift 2 ;;
|
|
--bundle-dir) BUNDLE_DIR="$2"; shift 2 ;;
|
|
--force) FORCE=1; shift ;;
|
|
-h|--help) usage; exit 0 ;;
|
|
*) echo "unknown arg: $1" >&2; usage; exit 2 ;;
|
|
esac
|
|
done
|
|
|
|
if [[ $EUID -ne 0 ]]; then
|
|
echo "pxe-setup.sh must be run as root (try: sudo $0 ...)" >&2
|
|
exit 1
|
|
fi
|
|
|
|
[[ -z "${INTERFACE}" ]] && { echo "ERROR: --interface is required" >&2; exit 2; }
|
|
[[ -z "${SUBNET}" ]] && { echo "ERROR: --subnet is required (e.g. 192.168.1.0/24)" >&2; exit 2; }
|
|
[[ -z "${ORCH_URL}" ]] && { echo "ERROR: --orchestrator-url is required" >&2; exit 2; }
|
|
|
|
# --- sanity checks -----------------------------------------------------
|
|
|
|
if ! ip link show "${INTERFACE}" >/dev/null 2>&1; then
|
|
echo "ERROR: interface ${INTERFACE} not found on host. Check \`ip link\` — the" >&2
|
|
echo " interface must exist *before* the orchestrator starts dnsmasq." >&2
|
|
exit 1
|
|
fi
|
|
|
|
# CIDR shape check — dnsmasq will re-validate, but catch the obvious
|
|
# errors before we write anything to disk.
|
|
if [[ ! "${SUBNET}" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}/[0-9]{1,2}$ ]]; then
|
|
echo "ERROR: --subnet must be CIDR form (e.g. 192.168.1.0/24), got '${SUBNET}'" >&2
|
|
exit 2
|
|
fi
|
|
|
|
if [[ ! -f "${CONFIG}" ]]; then
|
|
echo "ERROR: ${CONFIG} not found — run deploy/install.sh first." >&2
|
|
exit 1
|
|
fi
|
|
|
|
if ! id -u "${SERVICE_USER}" >/dev/null 2>&1; then
|
|
echo "ERROR: ${SERVICE_USER} user not found — run deploy/install.sh first." >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Resolve the bundle dir. When pxe-setup.sh is run from a release
|
|
# tarball it sits alongside ipxe-shas.txt and a live-image/ subdir; when
|
|
# run from the repo tree it's deploy/pxe-setup.sh and the live image is
|
|
# under live-image/build/. Detect both.
|
|
if [[ -z "${BUNDLE_DIR}" ]]; then
|
|
if [[ -f "${SCRIPT_DIR}/ipxe-shas.txt" ]]; then
|
|
BUNDLE_DIR="${SCRIPT_DIR}"
|
|
else
|
|
BUNDLE_DIR="${SCRIPT_DIR}"
|
|
fi
|
|
fi
|
|
SHAS_FILE="${BUNDLE_DIR}/ipxe-shas.txt"
|
|
if [[ ! -f "${SHAS_FILE}" ]]; then
|
|
echo "ERROR: ${SHAS_FILE} not found — bundle is incomplete." >&2
|
|
exit 1
|
|
fi
|
|
|
|
# --- iPXE binaries: stage, verify, install ----------------------------
|
|
#
|
|
# Stage into a temp dir so a corrupt download never clobbers a known-
|
|
# good file in tftp_root. sha256sum -c must pass before we `install` —
|
|
# install(1) unlink-replaces, which avoids ETXTBSY and makes the whole
|
|
# operation atomic per file.
|
|
|
|
echo "==> ensuring ${TFTP_ROOT} exists"
|
|
install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${TFTP_ROOT}"
|
|
|
|
STAGE="$(mktemp -d)"
|
|
trap 'rm -rf "${STAGE}"' EXIT
|
|
|
|
need_fetch=0
|
|
for name in ipxe.efi undionly.kpxe; do
|
|
if [[ ! -f "${TFTP_ROOT}/${name}" ]]; then
|
|
need_fetch=1
|
|
break
|
|
fi
|
|
done
|
|
|
|
# Even if both files exist, re-verify against pinned SHAs. If they match
|
|
# we skip the fetch entirely; if not, re-download.
|
|
if (( ! need_fetch )); then
|
|
if ! ( cd "${TFTP_ROOT}" && sha256sum -c --status "${SHAS_FILE}" ); then
|
|
echo "==> ${TFTP_ROOT} iPXE binaries don't match pinned SHAs — re-fetching"
|
|
need_fetch=1
|
|
else
|
|
echo "==> iPXE binaries already match pins — skipping fetch"
|
|
fi
|
|
fi
|
|
|
|
if (( need_fetch )); then
|
|
echo "==> fetching iPXE binaries from boot.ipxe.org"
|
|
curl -fsSLo "${STAGE}/ipxe.efi" "https://boot.ipxe.org/x86_64-efi/ipxe.efi"
|
|
curl -fsSLo "${STAGE}/undionly.kpxe" "https://boot.ipxe.org/undionly.kpxe"
|
|
|
|
echo "==> verifying SHA256 against ${SHAS_FILE}"
|
|
if ! ( cd "${STAGE}" && sha256sum -c "${SHAS_FILE}" ); then
|
|
echo "ERROR: iPXE SHA256 mismatch. Upstream binaries changed, or a MITM." >&2
|
|
echo " To accept the new binaries, regenerate ${SHAS_FILE} after" >&2
|
|
echo " independently verifying the new checksums, then re-run." >&2
|
|
exit 1
|
|
fi
|
|
|
|
install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \
|
|
"${STAGE}/ipxe.efi" "${TFTP_ROOT}/ipxe.efi"
|
|
install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \
|
|
"${STAGE}/undionly.kpxe" "${TFTP_ROOT}/undionly.kpxe"
|
|
fi
|
|
|
|
# --- live image: copy from bundle into live_dir -----------------------
|
|
|
|
# Accept two layouts: release tarball (${BUNDLE_DIR}/live-image/) or
|
|
# repo tree (${BUNDLE_DIR}/../live-image/build/).
|
|
LIVE_SRC=""
|
|
for cand in \
|
|
"${BUNDLE_DIR}/live-image" \
|
|
"${BUNDLE_DIR}/../live-image/build"; do
|
|
if [[ -f "${cand}/vmlinuz" && -f "${cand}/initrd.img" ]]; then
|
|
LIVE_SRC="${cand}"
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [[ -z "${LIVE_SRC}" ]]; then
|
|
# install.sh already stages vmlinuz + initrd.img into LIVE_DIR during
|
|
# the one-liner install, so a missing bundle/live-image/ is expected
|
|
# when pxe-setup.sh is run from /usr/local/sbin.
|
|
if [[ -f "${LIVE_DIR}/vmlinuz" && -f "${LIVE_DIR}/initrd.img" ]]; then
|
|
echo "==> live image already staged in ${LIVE_DIR} (from install.sh)"
|
|
else
|
|
echo "WARN: no live image found under ${BUNDLE_DIR}/live-image," >&2
|
|
echo " ${BUNDLE_DIR}/../live-image/build, or ${LIVE_DIR}." >&2
|
|
echo " The orchestrator will fail PXE startup validation until" >&2
|
|
echo " vmlinuz + initrd.img land in ${LIVE_DIR}." >&2
|
|
fi
|
|
else
|
|
echo "==> staging live image from ${LIVE_SRC} into ${LIVE_DIR}"
|
|
install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${LIVE_DIR}"
|
|
install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \
|
|
"${LIVE_SRC}/vmlinuz" "${LIVE_DIR}/vmlinuz"
|
|
install -m 0644 -o "${SERVICE_USER}" -g "${SERVICE_USER}" \
|
|
"${LIVE_SRC}/initrd.img" "${LIVE_DIR}/initrd.img"
|
|
fi
|
|
|
|
# --- patch the pxe: block in vetting.yaml -----------------------------
|
|
#
|
|
# Replace the contents of the pxe: block in-place. Uses awk to walk
|
|
# line-by-line: when we hit `pxe:`, skip everything up to the next
|
|
# top-level key (a line starting with a non-whitespace letter + ":")
|
|
# or EOF, and emit our freshly-rendered block instead. Everything
|
|
# outside the pxe: block is passed through unchanged, so hand-tuned
|
|
# server:/database:/notifiers: blocks survive intact.
|
|
|
|
# extract_yaml_value <key> <config-path>: reads ` key: "value" # comment`
|
|
# from inside the pxe: block and prints the bare `value`. Empty or missing
|
|
# key → empty output. The production yaml ships default values like
|
|
# `interface: "" # e.g. "eth0"` — so we must
|
|
# strip the trailing comment BEFORE unquoting, or the comment's inner
|
|
# quotes get picked up.
|
|
extract_yaml_value() {
|
|
local key="$1" path="$2"
|
|
awk -v key="${key}" '
|
|
/^pxe:/ { in_pxe=1; next }
|
|
in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 }
|
|
in_pxe {
|
|
re = "^[[:space:]]+" key ":[[:space:]]*"
|
|
if ($0 ~ re) {
|
|
line = $0
|
|
sub(re, "", line)
|
|
# Quoted value: extract between the first pair of quotes.
|
|
if (match(line, /"[^"]*"/)) {
|
|
print substr(line, RSTART+1, RLENGTH-2)
|
|
exit
|
|
}
|
|
# Unquoted value: drop any trailing comment + whitespace.
|
|
sub(/[[:space:]]*#.*$/, "", line)
|
|
gsub(/^[[:space:]]+|[[:space:]]+$/, "", line)
|
|
print line
|
|
exit
|
|
}
|
|
}
|
|
' "${path}"
|
|
}
|
|
|
|
existing_iface="$(extract_yaml_value interface "${CONFIG}")"
|
|
existing_subnet="$(extract_yaml_value subnet "${CONFIG}")"
|
|
|
|
if [[ -n "${existing_iface}" && "${existing_iface}" != "${INTERFACE}" && ${FORCE} -eq 0 ]]; then
|
|
echo "ERROR: pxe.interface in ${CONFIG} is already set to ${existing_iface}, which" >&2
|
|
echo " differs from --interface ${INTERFACE}. Pass --force to overwrite." >&2
|
|
exit 1
|
|
fi
|
|
if [[ -n "${existing_subnet}" && "${existing_subnet}" != "${SUBNET}" && ${FORCE} -eq 0 ]]; then
|
|
echo "ERROR: pxe.subnet in ${CONFIG} is already ${existing_subnet}, which" >&2
|
|
echo " differs from --subnet ${SUBNET}. Pass --force to overwrite." >&2
|
|
exit 1
|
|
fi
|
|
|
|
new_block=$(cat <<EOF
|
|
pxe:
|
|
enabled: true
|
|
interface: "${INTERFACE}"
|
|
subnet: "${SUBNET}"
|
|
orchestrator_url: "${ORCH_URL}"
|
|
tftp_root: "${TFTP_ROOT}"
|
|
live_dir: "${LIVE_DIR}"
|
|
EOF
|
|
)
|
|
|
|
tmp_yaml="$(mktemp)"
|
|
# Pass the rendered block through awk ENVIRON so we don't have to
|
|
# quote-escape it into -v (which chokes on the embedded newlines).
|
|
NEW_BLOCK="${new_block}" awk '
|
|
BEGIN { skipping=0; emitted=0 }
|
|
/^pxe:/ { print ENVIRON["NEW_BLOCK"]; skipping=1; emitted=1; next }
|
|
skipping && /^[A-Za-z_][A-Za-z0-9_]*:/ { skipping=0 }
|
|
!skipping { print }
|
|
END {
|
|
if (!emitted) {
|
|
# No existing pxe: block — append one.
|
|
print ENVIRON["NEW_BLOCK"]
|
|
}
|
|
}
|
|
' "${CONFIG}" > "${tmp_yaml}"
|
|
|
|
# Preserve owner + mode from the original.
|
|
orig_mode="$(stat -c '%a' "${CONFIG}")"
|
|
orig_owner="$(stat -c '%U:%G' "${CONFIG}")"
|
|
install -m "${orig_mode}" -o "${orig_owner%:*}" -g "${orig_owner#*:}" \
|
|
"${tmp_yaml}" "${CONFIG}"
|
|
rm -f "${tmp_yaml}"
|
|
|
|
echo
|
|
echo "==> rendered pxe: block in ${CONFIG}:"
|
|
echo "${new_block}" | sed 's/^/ /'
|
|
echo
|
|
echo "Next: systemctl restart vetting && journalctl -fu vetting"
|
|
echo "The orchestrator will refuse to start with clear errors if anything"
|
|
echo "is still missing; you should see dnsmasq come up cleanly."
|