pxe: switch dnsmasq to proxy-DHCP mode on the LAN
CI / Lint + build + test (push) Successful in 1m48s
Release / release (push) Successful in 2m22s

Previously the orchestrator ran a full DHCP server on a dedicated
br-vetting bridge (10.77.0.0/24), which required a hypervisor-level
bridge + physical cabling onto that bridge for every repaired host.
Real-world bite: the LXC's br-vetting had no L2 path to the target
host's PXE NIC, so DHCPDISCOVERs never reached eth1 and PXE silently
timed out.

dnsmasq's proxy-DHCP mode is the idiomatic answer: it coexists with
the LAN's existing DHCP server (UniFi, etc.), never assigns an IP
itself, and only supplements the PXE options. No dedicated bridge,
no VLAN, no cabling changes \u2014 dnsmasq binds to the LAN interface
and layers option 66/67 + the PXE BINL on top of the real DHCP
exchange. The MAC allowlist still gates replies, so random LAN
clients booting from network get nothing.

Template switches dhcp-range=<start,end,lease> to
dhcp-range=<cidr>,proxy and replaces dhcp-boot= for first-boot ROM
clients with pxe-service= directives (the correct proxy-mode
chainload form). Validation drops the dhcp_range regex for a
net.ParseCIDR check on pxe.subnet. Config, production/example yaml,
and pxe-setup.sh swap --dhcp-range for --subnet.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 12:02:49 -04:00
parent b809bf5f3e
commit 506c856046
7 changed files with 63 additions and 68 deletions
+1 -1
View File
@@ -151,7 +151,7 @@ func main() {
supervisor = pxe.NewSupervisor(pxe.SupervisorConfig{ supervisor = pxe.NewSupervisor(pxe.SupervisorConfig{
Enabled: true, Enabled: true,
Interface: cfg.PXE.Interface, Interface: cfg.PXE.Interface,
DHCPRange: cfg.PXE.DHCPRange, Subnet: cfg.PXE.Subnet,
OrchestratorURL: cfg.PXE.OrchestratorURL, OrchestratorURL: cfg.PXE.OrchestratorURL,
RuntimeDir: filepath.Join(stateRoot, "pxe"), RuntimeDir: filepath.Join(stateRoot, "pxe"),
TFTPRoot: tftpRoot, TFTPRoot: tftpRoot,
+25 -22
View File
@@ -3,9 +3,12 @@
# #
# Run AFTER deploy/install.sh on the LXC (or wherever the orchestrator # Run AFTER deploy/install.sh on the LXC (or wherever the orchestrator
# lives). Fetches pinned iPXE binaries, places the live image, and # lives). Fetches pinned iPXE binaries, places the live image, and
# writes the pxe: block of /etc/vetting/vetting.yaml. Does NOT create # writes the pxe: block of /etc/vetting/vetting.yaml.
# the PXE bridge — that's a hypervisor-level step, see #
# docs/operations.md. # dnsmasq runs in proxy-DHCP mode: it coexists with whatever DHCP
# server already serves your LAN (UniFi, pfSense, Asus, etc.) and
# only supplements the PXE options. No dedicated bridge, no VLAN,
# no cabling changes.
# #
# Idempotent: safe to re-run with the same args. A second run with # Idempotent: safe to re-run with the same args. A second run with
# different args overwrites the pxe: block; pass --force to override # different args overwrites the pxe: block; pass --force to override
@@ -13,9 +16,9 @@
# #
# Usage: # Usage:
# sudo ./pxe-setup.sh \ # sudo ./pxe-setup.sh \
# --interface eth1 \ # --interface eth0 \
# --dhcp-range 10.77.0.100,10.77.0.200,12h \ # --subnet 192.168.1.0/24 \
# --orchestrator-url http://10.77.0.2:8080 # --orchestrator-url http://192.168.1.135:8080
# #
# Optional: # Optional:
# --tftp-root DIR default /var/lib/vetting/tftp # --tftp-root DIR default /var/lib/vetting/tftp
@@ -26,7 +29,7 @@
set -euo pipefail set -euo pipefail
INTERFACE="" INTERFACE=""
DHCP_RANGE="" SUBNET=""
ORCH_URL="" ORCH_URL=""
TFTP_ROOT="/var/lib/vetting/tftp" TFTP_ROOT="/var/lib/vetting/tftp"
LIVE_DIR="/var/lib/vetting/live" LIVE_DIR="/var/lib/vetting/live"
@@ -38,13 +41,13 @@ SERVICE_USER="vetting"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
usage() { usage() {
sed -n '2,24p' "${BASH_SOURCE[0]}" sed -n '2,28p' "${BASH_SOURCE[0]}"
} }
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
--interface) INTERFACE="$2"; shift 2 ;; --interface) INTERFACE="$2"; shift 2 ;;
--dhcp-range) DHCP_RANGE="$2"; shift 2 ;; --subnet) SUBNET="$2"; shift 2 ;;
--orchestrator-url) ORCH_URL="$2"; shift 2 ;; --orchestrator-url) ORCH_URL="$2"; shift 2 ;;
--tftp-root) TFTP_ROOT="$2"; shift 2 ;; --tftp-root) TFTP_ROOT="$2"; shift 2 ;;
--live-dir) LIVE_DIR="$2"; shift 2 ;; --live-dir) LIVE_DIR="$2"; shift 2 ;;
@@ -61,9 +64,9 @@ if [[ $EUID -ne 0 ]]; then
exit 1 exit 1
fi fi
[[ -z "${INTERFACE}" ]] && { echo "ERROR: --interface is required" >&2; exit 2; } [[ -z "${INTERFACE}" ]] && { echo "ERROR: --interface is required" >&2; exit 2; }
[[ -z "${DHCP_RANGE}" ]] && { echo "ERROR: --dhcp-range is required" >&2; exit 2; } [[ -z "${SUBNET}" ]] && { echo "ERROR: --subnet is required (e.g. 192.168.1.0/24)" >&2; exit 2; }
[[ -z "${ORCH_URL}" ]] && { echo "ERROR: --orchestrator-url is required" >&2; exit 2; } [[ -z "${ORCH_URL}" ]] && { echo "ERROR: --orchestrator-url is required" >&2; exit 2; }
# --- sanity checks ----------------------------------------------------- # --- sanity checks -----------------------------------------------------
@@ -73,10 +76,10 @@ if ! ip link show "${INTERFACE}" >/dev/null 2>&1; then
exit 1 exit 1
fi fi
# "start_ip,end_ip,lease" — dnsmasq will still validate, but catch the # CIDR shape check — dnsmasq will re-validate, but catch the obvious
# obvious shape errors before we write anything to disk. # errors before we write anything to disk.
if [[ ! "${DHCP_RANGE}" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3},([0-9]{1,3}\.){3}[0-9]{1,3},[^[:space:]]+$ ]]; then if [[ ! "${SUBNET}" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}/[0-9]{1,2}$ ]]; then
echo "ERROR: --dhcp-range must be start_ip,end_ip,lease (e.g. 10.77.0.100,10.77.0.200,12h)" >&2 echo "ERROR: --subnet must be CIDR form (e.g. 192.168.1.0/24), got '${SUBNET}'" >&2
exit 2 exit 2
fi fi
@@ -226,17 +229,17 @@ extract_yaml_value() {
' "${path}" ' "${path}"
} }
existing_iface="$(extract_yaml_value interface "${CONFIG}")" existing_iface="$(extract_yaml_value interface "${CONFIG}")"
existing_range="$(extract_yaml_value dhcp_range "${CONFIG}")" existing_subnet="$(extract_yaml_value subnet "${CONFIG}")"
if [[ -n "${existing_iface}" && "${existing_iface}" != "${INTERFACE}" && ${FORCE} -eq 0 ]]; then if [[ -n "${existing_iface}" && "${existing_iface}" != "${INTERFACE}" && ${FORCE} -eq 0 ]]; then
echo "ERROR: pxe.interface in ${CONFIG} is already set to ${existing_iface}, which" >&2 echo "ERROR: pxe.interface in ${CONFIG} is already set to ${existing_iface}, which" >&2
echo " differs from --interface ${INTERFACE}. Pass --force to overwrite." >&2 echo " differs from --interface ${INTERFACE}. Pass --force to overwrite." >&2
exit 1 exit 1
fi fi
if [[ -n "${existing_range}" && "${existing_range}" != "${DHCP_RANGE}" && ${FORCE} -eq 0 ]]; then if [[ -n "${existing_subnet}" && "${existing_subnet}" != "${SUBNET}" && ${FORCE} -eq 0 ]]; then
echo "ERROR: pxe.dhcp_range in ${CONFIG} is already ${existing_range}, which" >&2 echo "ERROR: pxe.subnet in ${CONFIG} is already ${existing_subnet}, which" >&2
echo " differs from --dhcp-range ${DHCP_RANGE}. Pass --force to overwrite." >&2 echo " differs from --subnet ${SUBNET}. Pass --force to overwrite." >&2
exit 1 exit 1
fi fi
@@ -244,7 +247,7 @@ new_block=$(cat <<EOF
pxe: pxe:
enabled: true enabled: true
interface: "${INTERFACE}" interface: "${INTERFACE}"
dhcp_range: "${DHCP_RANGE}" subnet: "${SUBNET}"
orchestrator_url: "${ORCH_URL}" orchestrator_url: "${ORCH_URL}"
tftp_root: "${TFTP_ROOT}" tftp_root: "${TFTP_ROOT}"
live_dir: "${LIVE_DIR}" live_dir: "${LIVE_DIR}"
+3 -3
View File
@@ -35,9 +35,9 @@ dispatcher:
pxe: pxe:
enabled: false enabled: false
interface: "" # e.g. "eth0" interface: "" # LAN NIC, e.g. "eth0"
dhcp_range: "" # e.g. "10.77.0.100,10.77.0.200,12h" subnet: "" # LAN CIDR, e.g. "192.168.1.0/24"; proxy-DHCP scope
orchestrator_url: "" # e.g. "http://10.77.0.1:8080" orchestrator_url: "" # e.g. "http://192.168.1.135:8080"
tftp_root: "" # holds ipxe.efi + undionly.kpxe tftp_root: "" # holds ipxe.efi + undionly.kpxe
live_dir: "" # holds vmlinuz + initrd.img; served at /live/* live_dir: "" # holds vmlinuz + initrd.img; served at /live/*
+3 -3
View File
@@ -33,9 +33,9 @@ dispatcher:
pxe: pxe:
enabled: false enabled: false
interface: "" # e.g. "eth0" interface: "" # LAN NIC, e.g. "eth0"
dhcp_range: "" # e.g. "10.77.0.100,10.77.0.200,12h" subnet: "" # LAN CIDR, e.g. "192.168.1.0/24"; dnsmasq runs in proxy-DHCP mode scoped to this subnet, coexisting with the LAN's existing DHCP server
orchestrator_url: "" # e.g. "http://10.77.0.1:8080" orchestrator_url: "" # e.g. "http://192.168.1.135:8080"
tftp_root: "/var/lib/vetting/tftp" # holds ipxe.efi + undionly.kpxe tftp_root: "/var/lib/vetting/tftp" # holds ipxe.efi + undionly.kpxe
live_dir: "/var/lib/vetting/live" # holds vmlinuz + initrd.img; served at /live/* live_dir: "/var/lib/vetting/live" # holds vmlinuz + initrd.img; served at /live/*
+1 -1
View File
@@ -66,7 +66,7 @@ type Network struct {
type PXE struct { type PXE struct {
Enabled bool `yaml:"enabled"` Enabled bool `yaml:"enabled"`
Interface string `yaml:"interface"` Interface string `yaml:"interface"`
DHCPRange string `yaml:"dhcp_range"` Subnet string `yaml:"subnet"` // LAN CIDR, e.g. "192.168.1.0/24"; dnsmasq runs in proxy-DHCP mode scoped to this subnet
OrchestratorURL string `yaml:"orchestrator_url"` OrchestratorURL string `yaml:"orchestrator_url"`
TFTPRoot string `yaml:"tftp_root"` // holds ipxe.efi + undionly.kpxe TFTPRoot string `yaml:"tftp_root"` // holds ipxe.efi + undionly.kpxe
LiveDir string `yaml:"live_dir"` // holds vmlinuz + initrd.img; served at /live LiveDir string `yaml:"live_dir"` // holds vmlinuz + initrd.img; served at /live
+17 -25
View File
@@ -11,7 +11,6 @@ import (
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"regexp"
"runtime" "runtime"
"strings" "strings"
"sync" "sync"
@@ -25,7 +24,7 @@ import (
type SupervisorConfig struct { type SupervisorConfig struct {
Enabled bool Enabled bool
Interface string // e.g. "eth0" Interface string // e.g. "eth0"
DHCPRange string // e.g. "10.77.0.100,10.77.0.200,12h" Subnet string // LAN CIDR, e.g. "192.168.1.0/24"; scopes the proxy-DHCP response
OrchestratorURL string // baked into iPXE scripts OrchestratorURL string // baked into iPXE scripts
RuntimeDir string // writable dir for dnsmasq.conf and leases RuntimeDir string // writable dir for dnsmasq.conf and leases
TFTPRoot string // holds ipxe.efi, undionly.kpxe TFTPRoot string // holds ipxe.efi, undionly.kpxe
@@ -50,11 +49,6 @@ func NewSupervisor(cfg SupervisorConfig) *Supervisor {
return &Supervisor{cfg: cfg} return &Supervisor{cfg: cfg}
} }
// dhcpRangeRE matches "start_ip,end_ip,lease" — the three-field form
// dnsmasq expects. Lease can be "12h", "infinite", etc.; any non-empty
// token is accepted here and dnsmasq will reject nonsense at startup.
var dhcpRangeRE = regexp.MustCompile(`^(\d{1,3}\.){3}\d{1,3},(\d{1,3}\.){3}\d{1,3},\S+$`)
// Validate checks the preconditions required for dnsmasq to actually // Validate checks the preconditions required for dnsmasq to actually
// serve PXE boots: the interface must exist, the iPXE payloads must // serve PXE boots: the interface must exist, the iPXE payloads must
// be on disk, the DHCP range + orchestrator URL must parse. Returns // be on disk, the DHCP range + orchestrator URL must parse. Returns
@@ -94,10 +88,10 @@ func (s *Supervisor) Validate() error {
} }
} }
if s.cfg.DHCPRange == "" { if s.cfg.Subnet == "" {
errs = append(errs, fmt.Errorf("pxe.dhcp_range is required (e.g. \"10.77.0.100,10.77.0.200,12h\")")) errs = append(errs, fmt.Errorf("pxe.subnet is required (e.g. \"192.168.1.0/24\") — the LAN CIDR dnsmasq proxy-DHCP scopes to"))
} else if !dhcpRangeRE.MatchString(s.cfg.DHCPRange) { } else if _, _, err := net.ParseCIDR(s.cfg.Subnet); err != nil {
errs = append(errs, fmt.Errorf("pxe.dhcp_range %q must be \"start_ip,end_ip,lease\"", s.cfg.DHCPRange)) errs = append(errs, fmt.Errorf("pxe.subnet %q is not a valid CIDR: %v", s.cfg.Subnet, err))
} }
if s.cfg.OrchestratorURL == "" { if s.cfg.OrchestratorURL == "" {
@@ -267,38 +261,36 @@ domain-needed
bogus-priv bogus-priv
no-resolv no-resolv
# Proxy DHCP: coexist with the LAN's real DHCP server. We never hand
# out an IP — we only answer the PXE options (option 66/67 and the
# PXE BINL on port 4011) when a registered MAC boots from the network.
dhcp-range={{ .Cfg.Subnet }},proxy
# MAC allowlist: dnsmasq only answers DHCP for MACs with a dhcp-host= below. # MAC allowlist: dnsmasq only answers DHCP for MACs with a dhcp-host= below.
dhcp-ignore=tag:!known dhcp-ignore=tag:!known
{{- range .Hosts }} {{- range .Hosts }}
dhcp-host={{ .MAC }},set:known dhcp-host={{ .MAC }},set:known
{{- end }} {{- end }}
# DHCP range (broader subnet coverage is fine; allowlist above gates replies).
dhcp-range={{ .Cfg.DHCPRange }}
# Keep runtime state inside RuntimeDir so the systemd sandbox # Keep runtime state inside RuntimeDir so the systemd sandbox
# (ReadWritePaths=/var/lib/vetting ...) doesn't block writes to the # (ReadWritePaths=/var/lib/vetting ...) doesn't block writes to the
# distro defaults (/var/lib/misc/dnsmasq.leases, /run/dnsmasq.pid). # distro defaults (/var/lib/misc/dnsmasq.leases, /run/dnsmasq.pid).
dhcp-leasefile={{ .Cfg.RuntimeDir }}/dhcp.leases dhcp-leasefile={{ .Cfg.RuntimeDir }}/dhcp.leases
pid-file={{ .Cfg.RuntimeDir }}/dnsmasq.pid pid-file={{ .Cfg.RuntimeDir }}/dnsmasq.pid
# TFTP + HTTP boot (iPXE chainload). # TFTP for first-boot BIOS/UEFI clients; already-iPXE clients skip it.
enable-tftp enable-tftp
tftp-root={{ .Cfg.TFTPRoot }} tftp-root={{ .Cfg.TFTPRoot }}
# BIOS (undionly.kpxe) and UEFI (ipxe.efi) clients both get iPXE first, # Already-iPXE clients: chainload the per-MAC HTTP script directly.
# which then re-requests a per-MAC script from the orchestrator.
dhcp-match=set:bios,option:client-arch,0
dhcp-match=set:efi64,option:client-arch,7
dhcp-match=set:efi64,option:client-arch,9
# If the client is iPXE itself, send it the per-MAC HTTP script.
dhcp-match=set:ipxe,175 dhcp-match=set:ipxe,175
dhcp-boot=tag:ipxe,{{ .Cfg.OrchestratorURL }}/ipxe/${mac} dhcp-boot=tag:ipxe,{{ .Cfg.OrchestratorURL }}/ipxe/${mac}
# Otherwise (first boot from ROM) chainload iPXE from TFTP. # First-boot PXE ROM -> iPXE. In proxy-DHCP mode, chainloading uses
dhcp-boot=tag:!ipxe,tag:bios,undionly.kpxe # pxe-service= (not dhcp-boot=) because the real LAN DHCP has already
dhcp-boot=tag:!ipxe,tag:efi64,ipxe.efi # assigned the IP; we only supplement the boot menu.
pxe-service=tag:!ipxe,x86PC,"iPXE (BIOS)",undionly.kpxe
pxe-service=tag:!ipxe,X86-64_EFI,"iPXE (UEFI)",ipxe.efi
log-facility=- log-facility=-
` `
+13 -13
View File
@@ -39,8 +39,8 @@ func goodCfg(t *testing.T, tftpRoot string) SupervisorConfig {
return SupervisorConfig{ return SupervisorConfig{
Enabled: true, Enabled: true,
Interface: existingInterface(t), Interface: existingInterface(t),
DHCPRange: "10.77.0.100,10.77.0.200,12h", Subnet: "192.168.1.0/24",
OrchestratorURL: "http://10.77.0.1:8080", OrchestratorURL: "http://192.168.1.2:8080",
TFTPRoot: tftpRoot, TFTPRoot: tftpRoot,
} }
} }
@@ -128,26 +128,26 @@ func TestValidate_LiveDirEmptySkipsLiveChecks(t *testing.T) {
} }
} }
func TestValidate_MalformedDHCPRange(t *testing.T) { func TestValidate_MalformedSubnet(t *testing.T) {
tftp := t.TempDir() tftp := t.TempDir()
seedTFTP(t, tftp, "ipxe.efi", "undionly.kpxe") seedTFTP(t, tftp, "ipxe.efi", "undionly.kpxe")
cases := []struct { cases := []struct {
name string name string
dhcp string subnet string
}{ }{
{"single field", "10.77.0.100"}, {"no mask", "192.168.1.0"},
{"two fields", "10.77.0.100,10.77.0.200"}, {"bad ip", "hello/24"},
{"non-ip start", "hello,10.77.0.200,12h"}, {"bad mask", "192.168.1.0/99"},
{"empty lease", "10.77.0.100,10.77.0.200,"}, {"leftover dhcp_range form", "192.168.1.100,192.168.1.200,12h"},
} }
for _, tc := range cases { for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
cfg := goodCfg(t, tftp) cfg := goodCfg(t, tftp)
cfg.DHCPRange = tc.dhcp cfg.Subnet = tc.subnet
s := NewSupervisor(cfg) s := NewSupervisor(cfg)
err := s.Validate() err := s.Validate()
if err == nil || !strings.Contains(err.Error(), "dhcp_range") { if err == nil || !strings.Contains(err.Error(), "pxe.subnet") {
t.Fatalf("expected dhcp_range error for %q, got: %v", tc.dhcp, err) t.Fatalf("expected pxe.subnet error for %q, got: %v", tc.subnet, err)
} }
}) })
} }
@@ -183,7 +183,7 @@ func TestValidate_AggregatesErrors(t *testing.T) {
if err == nil { if err == nil {
t.Fatalf("expected aggregated error") t.Fatalf("expected aggregated error")
} }
for _, want := range []string{"pxe.interface", "pxe.tftp_root", "pxe.dhcp_range", "pxe.orchestrator_url"} { for _, want := range []string{"pxe.interface", "pxe.tftp_root", "pxe.subnet", "pxe.orchestrator_url"} {
if !strings.Contains(err.Error(), want) { if !strings.Contains(err.Error(), want) {
t.Fatalf("expected %q in aggregated error, got: %v", want, err) t.Fatalf("expected %q in aggregated error, got: %v", want, err)
} }