feat(install): auto-heal pxe.interface/pxe.subnet against the host
A stale /etc/vetting/vetting.yaml (e.g. pxe.interface=eth1 after an LXC rebuild renamed the NIC to eth0) blocks vetting.service startup with "pxe.interface 'eth1' not found on host", requiring the operator to ssh in and hand-edit the yaml after every rebuild. install.sh now validates the pxe block against the host's actual network state on every install/upgrade run. If pxe.enabled is true and pxe.interface doesn't exist (or pxe.subnet is missing/malformed), the script auto-detects the primary NIC via the default route, reads its subnet from the kernel-scope route, and patches both values in place. Valid configs are left exactly as the operator had them; fresh installs with pxe.enabled=false skip the check entirely. The one-liner install/update is now self-healing for the most common stale-config failure mode.
This commit is contained in:
@@ -77,6 +77,94 @@ if [[ $EUID -ne 0 ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# heal_pxe_config: make sure /etc/vetting/vetting.yaml's pxe.interface
|
||||||
|
# and pxe.subnet reference things that actually exist on this host. Stale
|
||||||
|
# values (common after an LXC rebuild renames the NIC, or after pxe-setup
|
||||||
|
# was pointed at a NIC that later got removed) block vetting.service
|
||||||
|
# startup with "pxe.interface X not found on host".
|
||||||
|
#
|
||||||
|
# Only runs when pxe.enabled is true — a disabled pxe block doesn't gate
|
||||||
|
# startup. Only rewrites fields that are currently invalid; a good
|
||||||
|
# interface/subnet pair is preserved exactly as the operator had it.
|
||||||
|
heal_pxe_config() {
|
||||||
|
local config="$1"
|
||||||
|
[[ -f "${config}" ]] || return 0
|
||||||
|
|
||||||
|
# Minimal one-key reader for the pxe: block. Mirrors pxe-setup.sh's
|
||||||
|
# extract_yaml_value so the two scripts stay independent.
|
||||||
|
_pxe_val() {
|
||||||
|
awk -v key="$1" '
|
||||||
|
/^pxe:/ { in_pxe=1; next }
|
||||||
|
in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 }
|
||||||
|
in_pxe {
|
||||||
|
re = "^[[:space:]]+" key ":[[:space:]]*"
|
||||||
|
if ($0 ~ re) {
|
||||||
|
line = $0
|
||||||
|
sub(re, "", line)
|
||||||
|
if (match(line, /"[^"]*"/)) {
|
||||||
|
print substr(line, RSTART+1, RLENGTH-2); exit
|
||||||
|
}
|
||||||
|
sub(/[[:space:]]*#.*$/, "", line)
|
||||||
|
gsub(/^[[:space:]]+|[[:space:]]+$/, "", line)
|
||||||
|
print line; exit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
' "${config}"
|
||||||
|
}
|
||||||
|
|
||||||
|
local enabled cur_iface cur_subnet
|
||||||
|
enabled="$(_pxe_val enabled)"
|
||||||
|
cur_iface="$(_pxe_val interface)"
|
||||||
|
cur_subnet="$(_pxe_val subnet)"
|
||||||
|
|
||||||
|
[[ "${enabled}" == "true" ]] || return 0
|
||||||
|
|
||||||
|
local iface_ok=0 subnet_ok=0
|
||||||
|
if [[ -n "${cur_iface}" ]] && ip link show "${cur_iface}" >/dev/null 2>&1; then
|
||||||
|
iface_ok=1
|
||||||
|
fi
|
||||||
|
if [[ "${cur_subnet}" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}/[0-9]{1,2}$ ]]; then
|
||||||
|
subnet_ok=1
|
||||||
|
fi
|
||||||
|
(( iface_ok && subnet_ok )) && return 0
|
||||||
|
|
||||||
|
local detected_iface detected_subnet
|
||||||
|
detected_iface="$(ip -4 -o route show default 2>/dev/null | awk '{print $5; exit}')"
|
||||||
|
if [[ -n "${detected_iface}" ]]; then
|
||||||
|
detected_subnet="$(ip -4 -o route show dev "${detected_iface}" proto kernel scope link 2>/dev/null | awk '{print $1; exit}')"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "${detected_iface}" || -z "${detected_subnet}" ]]; then
|
||||||
|
echo "WARN: pxe is enabled in ${config} but pxe.interface=${cur_iface:-<empty>} / pxe.subnet=${cur_subnet:-<empty>} is stale," >&2
|
||||||
|
echo " and no default-route NIC was found to auto-detect from. Edit the file manually before starting." >&2
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local iface_to_write="${cur_iface}" subnet_to_write="${cur_subnet}"
|
||||||
|
if (( iface_ok == 0 )); then
|
||||||
|
echo "==> pxe.interface \"${cur_iface}\" is not present on this host; auto-patching to \"${detected_iface}\""
|
||||||
|
iface_to_write="${detected_iface}"
|
||||||
|
fi
|
||||||
|
if (( subnet_ok == 0 )); then
|
||||||
|
echo "==> pxe.subnet \"${cur_subnet:-<empty>}\" is missing/invalid; auto-patching to \"${detected_subnet}\""
|
||||||
|
subnet_to_write="${detected_subnet}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
local tmp
|
||||||
|
tmp="$(mktemp)"
|
||||||
|
IFACE="${iface_to_write}" SUBNET="${subnet_to_write}" awk '
|
||||||
|
/^pxe:/ { in_pxe=1; print; next }
|
||||||
|
in_pxe && /^[A-Za-z_][A-Za-z0-9_]*:/ { in_pxe=0 }
|
||||||
|
in_pxe && /^[[:space:]]+interface:/ { print " interface: \"" ENVIRON["IFACE"] "\""; next }
|
||||||
|
in_pxe && /^[[:space:]]+subnet:/ { print " subnet: \"" ENVIRON["SUBNET"] "\""; next }
|
||||||
|
{ print }
|
||||||
|
' "${config}" > "${tmp}"
|
||||||
|
|
||||||
|
chown --reference="${config}" "${tmp}"
|
||||||
|
chmod --reference="${config}" "${tmp}"
|
||||||
|
mv "${tmp}" "${config}"
|
||||||
|
}
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||||
|
|
||||||
@@ -193,6 +281,9 @@ if systemctl is-enabled --quiet dnsmasq 2>/dev/null; then
|
|||||||
systemctl disable --now dnsmasq
|
systemctl disable --now dnsmasq
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo "==> validating pxe config against this host's interfaces"
|
||||||
|
heal_pxe_config "${CONFIG_DIR}/vetting.yaml"
|
||||||
|
|
||||||
systemctl daemon-reload
|
systemctl daemon-reload
|
||||||
|
|
||||||
# Upgrade path: if vetting.service is already enabled, restart it so the
|
# Upgrade path: if vetting.service is already enabled, restart it so the
|
||||||
|
|||||||
Reference in New Issue
Block a user