Automate PXE setup: release bundle + pxe-setup.sh + startup validation
CI / Lint + build + test (push) Has been cancelled
CI / Lint + build + test (push) Has been cancelled
Collapses the LXC side of PXE enablement from a six-step manual dance (build, fetch iPXE, scp, bridge, hand-edit yaml) into: make release # dev box (Linux/WSL) scp bundle.tar.gz lxc:/tmp/ sudo ./install.sh # base install, unchanged sudo ./pxe-setup.sh --interface ... --dhcp-range ... --orchestrator-url ... pxe-setup.sh fetches iPXE from boot.ipxe.org, verifies against pinned SHA256s in deploy/ipxe-shas.txt (fail-closed), places vmlinuz/initrd.img from the bundle, and rewrites only the pxe: block of vetting.yaml. Idempotent; --force gates overwriting a hand-edited block. Adds Supervisor.Validate() — called before dnsmasq spawn — so typo'd configs fail at orchestrator startup with clear errors naming the missing file or yaml key, instead of silently serving broken TFTP until a real host tries to PXE-boot. Nine tests cover missing files, bogus interface, malformed dhcp_range, bad orchestrator_url, and aggregate reporting. Hypervisor bridge creation stays documented (LXC can't do it) but everything downstream of the bridge is now scripted. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -2,12 +2,16 @@ package pxe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -25,6 +29,7 @@ type SupervisorConfig struct {
|
||||
OrchestratorURL string // baked into iPXE scripts
|
||||
RuntimeDir string // writable dir for dnsmasq.conf and leases
|
||||
TFTPRoot string // holds ipxe.efi, undionly.kpxe
|
||||
LiveDir string // holds vmlinuz, initrd.img (served via HTTP, not dnsmasq; "" disables validation)
|
||||
DNSMasqBin string // path to dnsmasq binary (default: "dnsmasq")
|
||||
}
|
||||
|
||||
@@ -45,6 +50,65 @@ func NewSupervisor(cfg SupervisorConfig) *Supervisor {
|
||||
return &Supervisor{cfg: cfg}
|
||||
}
|
||||
|
||||
// dhcpRangeRE matches "start_ip,end_ip,lease" — the three-field form
|
||||
// dnsmasq expects. Lease can be "12h", "infinite", etc.; any non-empty
|
||||
// token is accepted here and dnsmasq will reject nonsense at startup.
|
||||
var dhcpRangeRE = regexp.MustCompile(`^(\d{1,3}\.){3}\d{1,3},(\d{1,3}\.){3}\d{1,3},\S+$`)
|
||||
|
||||
// Validate checks the preconditions required for dnsmasq to actually
|
||||
// serve PXE boots: the interface must exist, the iPXE payloads must
|
||||
// be on disk, the DHCP range + orchestrator URL must parse. Returns
|
||||
// nil when Enabled=false — tests and dev mode skip all of this.
|
||||
//
|
||||
// Without Validate(), dnsmasq starts cleanly on typo'd configs and
|
||||
// the only symptom is a silent TFTP 404 when a real host PXE-boots.
|
||||
func (s *Supervisor) Validate() error {
|
||||
if !s.cfg.Enabled {
|
||||
return nil
|
||||
}
|
||||
var errs []error
|
||||
|
||||
if s.cfg.Interface == "" {
|
||||
errs = append(errs, fmt.Errorf("pxe.interface is required"))
|
||||
} else if _, err := net.InterfaceByName(s.cfg.Interface); err != nil {
|
||||
errs = append(errs, fmt.Errorf("pxe.interface %q not found on host — check `ip link` or fix pxe.interface in vetting.yaml", s.cfg.Interface))
|
||||
}
|
||||
|
||||
if s.cfg.TFTPRoot == "" {
|
||||
errs = append(errs, fmt.Errorf("pxe.tftp_root is required"))
|
||||
} else {
|
||||
for _, name := range []string{"ipxe.efi", "undionly.kpxe"} {
|
||||
p := filepath.Join(s.cfg.TFTPRoot, name)
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
errs = append(errs, fmt.Errorf("missing %s — run deploy/pxe-setup.sh to fetch iPXE binaries", p))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if s.cfg.LiveDir != "" {
|
||||
for _, name := range []string{"vmlinuz", "initrd.img"} {
|
||||
p := filepath.Join(s.cfg.LiveDir, name)
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
errs = append(errs, fmt.Errorf("missing %s — build the live image (`make live-image`) and copy into pxe.live_dir, or use the release tarball", p))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if s.cfg.DHCPRange == "" {
|
||||
errs = append(errs, fmt.Errorf("pxe.dhcp_range is required (e.g. \"10.77.0.100,10.77.0.200,12h\")"))
|
||||
} else if !dhcpRangeRE.MatchString(s.cfg.DHCPRange) {
|
||||
errs = append(errs, fmt.Errorf("pxe.dhcp_range %q must be \"start_ip,end_ip,lease\"", s.cfg.DHCPRange))
|
||||
}
|
||||
|
||||
if s.cfg.OrchestratorURL == "" {
|
||||
errs = append(errs, fmt.Errorf("pxe.orchestrator_url is required"))
|
||||
} else if u, err := url.Parse(s.cfg.OrchestratorURL); err != nil || (u.Scheme != "http" && u.Scheme != "https") || u.Host == "" {
|
||||
errs = append(errs, fmt.Errorf("pxe.orchestrator_url %q must be an http(s) URL with a host", s.cfg.OrchestratorURL))
|
||||
}
|
||||
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
// Start launches dnsmasq in the background. If cfg.Enabled is false
|
||||
// Start is a no-op (useful for dev on Windows where dnsmasq isn't
|
||||
// available).
|
||||
@@ -56,6 +120,9 @@ func (s *Supervisor) Start(ctx context.Context, hosts []model.Host) error {
|
||||
if runtime.GOOS == "windows" {
|
||||
return fmt.Errorf("dnsmasq supervision is not supported on Windows — run orchestrator on Linux")
|
||||
}
|
||||
if err := s.Validate(); err != nil {
|
||||
return fmt.Errorf("pxe preconditions failed: %w", err)
|
||||
}
|
||||
if err := os.MkdirAll(s.cfg.RuntimeDir, 0o755); err != nil {
|
||||
return fmt.Errorf("mkdir runtime: %w", err)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
package pxe
|
||||
|
||||
import (
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// existingInterface returns any real interface on the host so the
|
||||
// Validate tests can exercise the happy path without hardcoding
|
||||
// "lo" (which exists on Linux but might be gated elsewhere).
|
||||
func existingInterface(t *testing.T) string {
|
||||
t.Helper()
|
||||
ifaces, err := net.Interfaces()
|
||||
if err != nil || len(ifaces) == 0 {
|
||||
t.Skipf("no network interfaces: %v", err)
|
||||
}
|
||||
return ifaces[0].Name
|
||||
}
|
||||
|
||||
// seedTFTP drops zero-byte ipxe.efi + undionly.kpxe into dir so the
|
||||
// stat check passes. Callers can omit a name to simulate "missing".
|
||||
func seedTFTP(t *testing.T, dir string, names ...string) {
|
||||
t.Helper()
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir tftp: %v", err)
|
||||
}
|
||||
for _, name := range names {
|
||||
if err := os.WriteFile(filepath.Join(dir, name), nil, 0o644); err != nil {
|
||||
t.Fatalf("seed %s: %v", name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func goodCfg(t *testing.T, tftpRoot string) SupervisorConfig {
|
||||
t.Helper()
|
||||
return SupervisorConfig{
|
||||
Enabled: true,
|
||||
Interface: existingInterface(t),
|
||||
DHCPRange: "10.77.0.100,10.77.0.200,12h",
|
||||
OrchestratorURL: "http://10.77.0.1:8080",
|
||||
TFTPRoot: tftpRoot,
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_DisabledSkipsChecks(t *testing.T) {
|
||||
s := NewSupervisor(SupervisorConfig{Enabled: false})
|
||||
if err := s.Validate(); err != nil {
|
||||
t.Fatalf("disabled supervisor should skip validation, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_HappyPath(t *testing.T) {
|
||||
tftp := t.TempDir()
|
||||
seedTFTP(t, tftp, "ipxe.efi", "undionly.kpxe")
|
||||
s := NewSupervisor(goodCfg(t, tftp))
|
||||
if err := s.Validate(); err != nil {
|
||||
t.Fatalf("happy-path validate: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_MissingIPXEBinary(t *testing.T) {
|
||||
tftp := t.TempDir()
|
||||
// Only seed one of the two required files.
|
||||
seedTFTP(t, tftp, "undionly.kpxe")
|
||||
s := NewSupervisor(goodCfg(t, tftp))
|
||||
err := s.Validate()
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for missing ipxe.efi")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "ipxe.efi") {
|
||||
t.Fatalf("error should name the missing file, got: %v", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "pxe-setup.sh") {
|
||||
t.Fatalf("error should point operator at pxe-setup.sh, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_MissingUndionly(t *testing.T) {
|
||||
tftp := t.TempDir()
|
||||
seedTFTP(t, tftp, "ipxe.efi")
|
||||
s := NewSupervisor(goodCfg(t, tftp))
|
||||
err := s.Validate()
|
||||
if err == nil || !strings.Contains(err.Error(), "undionly.kpxe") {
|
||||
t.Fatalf("expected undionly.kpxe error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_MissingInterface(t *testing.T) {
|
||||
tftp := t.TempDir()
|
||||
seedTFTP(t, tftp, "ipxe.efi", "undionly.kpxe")
|
||||
cfg := goodCfg(t, tftp)
|
||||
cfg.Interface = "definitely-not-a-real-iface-9999"
|
||||
s := NewSupervisor(cfg)
|
||||
err := s.Validate()
|
||||
if err == nil || !strings.Contains(err.Error(), "pxe.interface") {
|
||||
t.Fatalf("expected interface error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_MissingLiveImage(t *testing.T) {
|
||||
tftp := t.TempDir()
|
||||
seedTFTP(t, tftp, "ipxe.efi", "undionly.kpxe")
|
||||
cfg := goodCfg(t, tftp)
|
||||
cfg.LiveDir = t.TempDir() // empty dir; vmlinuz + initrd.img missing
|
||||
s := NewSupervisor(cfg)
|
||||
err := s.Validate()
|
||||
if err == nil {
|
||||
t.Fatalf("expected live image error")
|
||||
}
|
||||
for _, want := range []string{"vmlinuz", "initrd.img"} {
|
||||
if !strings.Contains(err.Error(), want) {
|
||||
t.Fatalf("error should name %s, got: %v", want, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_LiveDirEmptySkipsLiveChecks(t *testing.T) {
|
||||
tftp := t.TempDir()
|
||||
seedTFTP(t, tftp, "ipxe.efi", "undionly.kpxe")
|
||||
cfg := goodCfg(t, tftp)
|
||||
cfg.LiveDir = "" // explicit opt-out; HTTP /live just 404s
|
||||
s := NewSupervisor(cfg)
|
||||
if err := s.Validate(); err != nil {
|
||||
t.Fatalf("empty LiveDir should not trigger live checks, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_MalformedDHCPRange(t *testing.T) {
|
||||
tftp := t.TempDir()
|
||||
seedTFTP(t, tftp, "ipxe.efi", "undionly.kpxe")
|
||||
cases := []struct {
|
||||
name string
|
||||
dhcp string
|
||||
}{
|
||||
{"single field", "10.77.0.100"},
|
||||
{"two fields", "10.77.0.100,10.77.0.200"},
|
||||
{"non-ip start", "hello,10.77.0.200,12h"},
|
||||
{"empty lease", "10.77.0.100,10.77.0.200,"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
cfg := goodCfg(t, tftp)
|
||||
cfg.DHCPRange = tc.dhcp
|
||||
s := NewSupervisor(cfg)
|
||||
err := s.Validate()
|
||||
if err == nil || !strings.Contains(err.Error(), "dhcp_range") {
|
||||
t.Fatalf("expected dhcp_range error for %q, got: %v", tc.dhcp, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_BadOrchestratorURL(t *testing.T) {
|
||||
tftp := t.TempDir()
|
||||
seedTFTP(t, tftp, "ipxe.efi", "undionly.kpxe")
|
||||
cases := []string{"", "not a url", "ftp://10.0.0.1", "http://"}
|
||||
for _, u := range cases {
|
||||
t.Run(u, func(t *testing.T) {
|
||||
cfg := goodCfg(t, tftp)
|
||||
cfg.OrchestratorURL = u
|
||||
s := NewSupervisor(cfg)
|
||||
err := s.Validate()
|
||||
if err == nil || !strings.Contains(err.Error(), "orchestrator_url") {
|
||||
t.Fatalf("expected orchestrator_url error for %q, got: %v", u, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_AggregatesErrors(t *testing.T) {
|
||||
// Multiple problems at once: Validate must report them all in
|
||||
// one pass so the operator sees the full picture instead of
|
||||
// whack-a-mole-ing one error per restart.
|
||||
cfg := SupervisorConfig{
|
||||
Enabled: true,
|
||||
// Everything else zero.
|
||||
}
|
||||
s := NewSupervisor(cfg)
|
||||
err := s.Validate()
|
||||
if err == nil {
|
||||
t.Fatalf("expected aggregated error")
|
||||
}
|
||||
for _, want := range []string{"pxe.interface", "pxe.tftp_root", "pxe.dhcp_range", "pxe.orchestrator_url"} {
|
||||
if !strings.Contains(err.Error(), want) {
|
||||
t.Fatalf("expected %q in aggregated error, got: %v", want, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user