feat(install): auto-heal pxe.interface/pxe.subnet against the host
CI / Lint + build + test (push) Successful in 1m42s
Release / release (push) Successful in 19m30s

A stale /etc/vetting/vetting.yaml (e.g. pxe.interface=eth1 after an
LXC rebuild renamed the NIC to eth0) blocks vetting.service startup
with "pxe.interface 'eth1' not found on host", requiring the operator
to ssh in and hand-edit the yaml after every rebuild.

install.sh now validates the pxe block against the host's actual
network state on every install/upgrade run. If pxe.enabled is true and
pxe.interface doesn't exist (or pxe.subnet is missing/malformed), the
script auto-detects the primary NIC via the default route, reads its
subnet from the kernel-scope route, and patches both values in place.
Valid configs are left exactly as the operator had them; fresh
installs with pxe.enabled=false skip the check entirely.

The one-liner install/update is now self-healing for the most common
stale-config failure mode.
This commit is contained in:
2026-04-20 19:56:39 -04:00
parent 599fd156d0
commit a01db63952
+91
View File
@@ -77,6 +77,94 @@ if [[ $EUID -ne 0 ]]; then
exit 1 exit 1
fi fi
# heal_pxe_config: make sure /etc/vetting/vetting.yaml's pxe.interface
# and pxe.subnet reference things that actually exist on this host. Stale
# values (common after an LXC rebuild renames the NIC, or after pxe-setup
# was pointed at a NIC that later got removed) block vetting.service
# startup with "pxe.interface X not found on host".
#
# Only runs when pxe.enabled is true — a disabled pxe block doesn't gate
# startup. Only rewrites fields that are currently invalid; a good
# interface/subnet pair is preserved exactly as the operator had it.
heal_pxe_config() {
local config="$1"
[[ -f "${config}" ]] || return 0
# Minimal one-key reader for the pxe: block. Mirrors pxe-setup.sh's
# extract_yaml_value so the two scripts stay independent.
_pxe_val() {
awk -v key="$1" '
/^pxe:/ { in_pxe=1; next }
in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 }
in_pxe {
re = "^[[:space:]]+" key ":[[:space:]]*"
if ($0 ~ re) {
line = $0
sub(re, "", line)
if (match(line, /"[^"]*"/)) {
print substr(line, RSTART+1, RLENGTH-2); exit
}
sub(/[[:space:]]*#.*$/, "", line)
gsub(/^[[:space:]]+|[[:space:]]+$/, "", line)
print line; exit
}
}
' "${config}"
}
local enabled cur_iface cur_subnet
enabled="$(_pxe_val enabled)"
cur_iface="$(_pxe_val interface)"
cur_subnet="$(_pxe_val subnet)"
[[ "${enabled}" == "true" ]] || return 0
local iface_ok=0 subnet_ok=0
if [[ -n "${cur_iface}" ]] && ip link show "${cur_iface}" >/dev/null 2>&1; then
iface_ok=1
fi
if [[ "${cur_subnet}" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}/[0-9]{1,2}$ ]]; then
subnet_ok=1
fi
(( iface_ok && subnet_ok )) && return 0
local detected_iface detected_subnet
detected_iface="$(ip -4 -o route show default 2>/dev/null | awk '{print $5; exit}')"
if [[ -n "${detected_iface}" ]]; then
detected_subnet="$(ip -4 -o route show dev "${detected_iface}" proto kernel scope link 2>/dev/null | awk '{print $1; exit}')"
fi
if [[ -z "${detected_iface}" || -z "${detected_subnet}" ]]; then
echo "WARN: pxe is enabled in ${config} but pxe.interface=${cur_iface:-<empty>} / pxe.subnet=${cur_subnet:-<empty>} is stale," >&2
echo " and no default-route NIC was found to auto-detect from. Edit the file manually before starting." >&2
return 0
fi
local iface_to_write="${cur_iface}" subnet_to_write="${cur_subnet}"
if (( iface_ok == 0 )); then
echo "==> pxe.interface \"${cur_iface}\" is not present on this host; auto-patching to \"${detected_iface}\""
iface_to_write="${detected_iface}"
fi
if (( subnet_ok == 0 )); then
echo "==> pxe.subnet \"${cur_subnet:-<empty>}\" is missing/invalid; auto-patching to \"${detected_subnet}\""
subnet_to_write="${detected_subnet}"
fi
local tmp
tmp="$(mktemp)"
IFACE="${iface_to_write}" SUBNET="${subnet_to_write}" awk '
/^pxe:/ { in_pxe=1; print; next }
in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 }
in_pxe && /^[[:space:]]+interface:/ { print " interface: \"" ENVIRON["IFACE"] "\""; next }
in_pxe && /^[[:space:]]+subnet:/ { print " subnet: \"" ENVIRON["SUBNET"] "\""; next }
{ print }
' "${config}" > "${tmp}"
chown --reference="${config}" "${tmp}"
chmod --reference="${config}" "${tmp}"
mv "${tmp}" "${config}"
}
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
@@ -193,6 +281,9 @@ if systemctl is-enabled --quiet dnsmasq 2>/dev/null; then
systemctl disable --now dnsmasq systemctl disable --now dnsmasq
fi fi
echo "==> validating pxe config against this host's interfaces"
heal_pxe_config "${CONFIG_DIR}/vetting.yaml"
systemctl daemon-reload systemctl daemon-reload
# Upgrade path: if vetting.service is already enabled, restart it so the # Upgrade path: if vetting.service is already enabled, restart it so the