pxe: switch dnsmasq to proxy-DHCP mode on the LAN
Previously the orchestrator ran a full DHCP server on a dedicated br-vetting bridge (10.77.0.0/24), which required a hypervisor-level bridge + physical cabling onto that bridge for every repaired host. Real-world bite: the LXC's br-vetting had no L2 path to the target host's PXE NIC, so DHCPDISCOVERs never reached eth1 and PXE silently timed out. dnsmasq's proxy-DHCP mode is the idiomatic answer: it coexists with the LAN's existing DHCP server (UniFi, etc.), never assigns an IP itself, and only supplements the PXE options. No dedicated bridge, no VLAN, no cabling changes \u2014 dnsmasq binds to the LAN interface and layers option 66/67 + the PXE BINL on top of the real DHCP exchange. The MAC allowlist still gates replies, so random LAN clients booting from network get nothing. Template switches dhcp-range=<start,end,lease> to dhcp-range=<cidr>,proxy and replaces dhcp-boot= for first-boot ROM clients with pxe-service= directives (the correct proxy-mode chainload form). Validation drops the dhcp_range regex for a net.ParseCIDR check on pxe.subnet. Config, production/example yaml, and pxe-setup.sh swap --dhcp-range for --subnet. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+17
-25
@@ -11,7 +11,6 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -25,7 +24,7 @@ import (
|
||||
type SupervisorConfig struct {
|
||||
Enabled bool
|
||||
Interface string // e.g. "eth0"
|
||||
DHCPRange string // e.g. "10.77.0.100,10.77.0.200,12h"
|
||||
Subnet string // LAN CIDR, e.g. "192.168.1.0/24"; scopes the proxy-DHCP response
|
||||
OrchestratorURL string // baked into iPXE scripts
|
||||
RuntimeDir string // writable dir for dnsmasq.conf and leases
|
||||
TFTPRoot string // holds ipxe.efi, undionly.kpxe
|
||||
@@ -50,11 +49,6 @@ func NewSupervisor(cfg SupervisorConfig) *Supervisor {
|
||||
return &Supervisor{cfg: cfg}
|
||||
}
|
||||
|
||||
// dhcpRangeRE matches "start_ip,end_ip,lease" — the three-field form
|
||||
// dnsmasq expects. Lease can be "12h", "infinite", etc.; any non-empty
|
||||
// token is accepted here and dnsmasq will reject nonsense at startup.
|
||||
var dhcpRangeRE = regexp.MustCompile(`^(\d{1,3}\.){3}\d{1,3},(\d{1,3}\.){3}\d{1,3},\S+$`)
|
||||
|
||||
// Validate checks the preconditions required for dnsmasq to actually
|
||||
// serve PXE boots: the interface must exist, the iPXE payloads must
|
||||
// be on disk, the DHCP range + orchestrator URL must parse. Returns
|
||||
@@ -94,10 +88,10 @@ func (s *Supervisor) Validate() error {
|
||||
}
|
||||
}
|
||||
|
||||
if s.cfg.DHCPRange == "" {
|
||||
errs = append(errs, fmt.Errorf("pxe.dhcp_range is required (e.g. \"10.77.0.100,10.77.0.200,12h\")"))
|
||||
} else if !dhcpRangeRE.MatchString(s.cfg.DHCPRange) {
|
||||
errs = append(errs, fmt.Errorf("pxe.dhcp_range %q must be \"start_ip,end_ip,lease\"", s.cfg.DHCPRange))
|
||||
if s.cfg.Subnet == "" {
|
||||
errs = append(errs, fmt.Errorf("pxe.subnet is required (e.g. \"192.168.1.0/24\") — the LAN CIDR dnsmasq proxy-DHCP scopes to"))
|
||||
} else if _, _, err := net.ParseCIDR(s.cfg.Subnet); err != nil {
|
||||
errs = append(errs, fmt.Errorf("pxe.subnet %q is not a valid CIDR: %v", s.cfg.Subnet, err))
|
||||
}
|
||||
|
||||
if s.cfg.OrchestratorURL == "" {
|
||||
@@ -267,38 +261,36 @@ domain-needed
|
||||
bogus-priv
|
||||
no-resolv
|
||||
|
||||
# Proxy DHCP: coexist with the LAN's real DHCP server. We never hand
|
||||
# out an IP — we only answer the PXE options (option 66/67 and the
|
||||
# PXE BINL on port 4011) when a registered MAC boots from the network.
|
||||
dhcp-range={{ .Cfg.Subnet }},proxy
|
||||
|
||||
# MAC allowlist: dnsmasq only answers DHCP for MACs with a dhcp-host= below.
|
||||
dhcp-ignore=tag:!known
|
||||
{{- range .Hosts }}
|
||||
dhcp-host={{ .MAC }},set:known
|
||||
{{- end }}
|
||||
|
||||
# DHCP range (broader subnet coverage is fine; allowlist above gates replies).
|
||||
dhcp-range={{ .Cfg.DHCPRange }}
|
||||
|
||||
# Keep runtime state inside RuntimeDir so the systemd sandbox
|
||||
# (ReadWritePaths=/var/lib/vetting ...) doesn't block writes to the
|
||||
# distro defaults (/var/lib/misc/dnsmasq.leases, /run/dnsmasq.pid).
|
||||
dhcp-leasefile={{ .Cfg.RuntimeDir }}/dhcp.leases
|
||||
pid-file={{ .Cfg.RuntimeDir }}/dnsmasq.pid
|
||||
|
||||
# TFTP + HTTP boot (iPXE chainload).
|
||||
# TFTP for first-boot BIOS/UEFI clients; already-iPXE clients skip it.
|
||||
enable-tftp
|
||||
tftp-root={{ .Cfg.TFTPRoot }}
|
||||
|
||||
# BIOS (undionly.kpxe) and UEFI (ipxe.efi) clients both get iPXE first,
|
||||
# which then re-requests a per-MAC script from the orchestrator.
|
||||
dhcp-match=set:bios,option:client-arch,0
|
||||
dhcp-match=set:efi64,option:client-arch,7
|
||||
dhcp-match=set:efi64,option:client-arch,9
|
||||
|
||||
# If the client is iPXE itself, send it the per-MAC HTTP script.
|
||||
# Already-iPXE clients: chainload the per-MAC HTTP script directly.
|
||||
dhcp-match=set:ipxe,175
|
||||
dhcp-boot=tag:ipxe,{{ .Cfg.OrchestratorURL }}/ipxe/${mac}
|
||||
|
||||
# Otherwise (first boot from ROM) chainload iPXE from TFTP.
|
||||
dhcp-boot=tag:!ipxe,tag:bios,undionly.kpxe
|
||||
dhcp-boot=tag:!ipxe,tag:efi64,ipxe.efi
|
||||
# First-boot PXE ROM -> iPXE. In proxy-DHCP mode, chainloading uses
|
||||
# pxe-service= (not dhcp-boot=) because the real LAN DHCP has already
|
||||
# assigned the IP; we only supplement the boot menu.
|
||||
pxe-service=tag:!ipxe,x86PC,"iPXE (BIOS)",undionly.kpxe
|
||||
pxe-service=tag:!ipxe,X86-64_EFI,"iPXE (UEFI)",ipxe.efi
|
||||
|
||||
log-facility=-
|
||||
`
|
||||
|
||||
Reference in New Issue
Block a user