package pxe import ( "context" "fmt" "io" "log" "os" "os/exec" "path/filepath" "runtime" "strings" "sync" "text/template" "time" "vetting/internal/model" ) // SupervisorConfig controls how dnsmasq is launched and configured. type SupervisorConfig struct { Enabled bool Interface string // e.g. "eth0" DHCPRange string // e.g. "10.77.0.100,10.77.0.200,12h" OrchestratorURL string // baked into iPXE scripts RuntimeDir string // writable dir for dnsmasq.conf and leases TFTPRoot string // holds ipxe.efi, undionly.kpxe DNSMasqBin string // path to dnsmasq binary (default: "dnsmasq") } // Supervisor owns a dnsmasq subprocess, rewrites its config when the // host registry changes, and sends SIGHUP to reload. The MAC allowlist // is the safety barrier: only registered MACs see a DHCP reply. type Supervisor struct { cfg SupervisorConfig mu sync.Mutex cmd *exec.Cmd cancel context.CancelFunc } func NewSupervisor(cfg SupervisorConfig) *Supervisor { if cfg.DNSMasqBin == "" { cfg.DNSMasqBin = "dnsmasq" } return &Supervisor{cfg: cfg} } // Start launches dnsmasq in the background. If cfg.Enabled is false // Start is a no-op (useful for dev on Windows where dnsmasq isn't // available). func (s *Supervisor) Start(ctx context.Context, hosts []model.Host) error { if !s.cfg.Enabled { log.Printf("pxe: disabled in config — skipping dnsmasq") return nil } if runtime.GOOS == "windows" { return fmt.Errorf("dnsmasq supervision is not supported on Windows — run orchestrator on Linux") } if err := os.MkdirAll(s.cfg.RuntimeDir, 0o755); err != nil { return fmt.Errorf("mkdir runtime: %w", err) } if err := s.writeConf(hosts); err != nil { return err } subCtx, cancel := context.WithCancel(ctx) s.mu.Lock() s.cancel = cancel s.mu.Unlock() confPath := filepath.Join(s.cfg.RuntimeDir, "dnsmasq.conf") cmd := exec.CommandContext(subCtx, s.cfg.DNSMasqBin, "--conf-file="+confPath, "--no-daemon", "--log-queries", "--log-dhcp", ) cmd.Stdout = logWriter{prefix: "dnsmasq"} cmd.Stderr = logWriter{prefix: "dnsmasq"} if err := cmd.Start(); err != nil { cancel() return fmt.Errorf("start dnsmasq: %w", err) } s.mu.Lock() s.cmd = cmd s.mu.Unlock() go func() { if err := cmd.Wait(); err != nil && subCtx.Err() == nil { log.Printf("dnsmasq exited: %v", err) } }() return nil } // Reload rewrites the conf with the latest host registry and sends // SIGHUP. It will restart the subprocess if SIGHUP is unsupported // (e.g. when running behind an OS that doesn't support it). func (s *Supervisor) Reload(hosts []model.Host) error { if !s.cfg.Enabled { return nil } if err := s.writeConf(hosts); err != nil { return err } s.mu.Lock() cmd := s.cmd s.mu.Unlock() if cmd == nil || cmd.Process == nil { return nil } if err := sighup(cmd.Process); err != nil { return fmt.Errorf("sighup dnsmasq: %w", err) } return nil } // Shutdown stops dnsmasq within the timeout. func (s *Supervisor) Shutdown(timeout time.Duration) error { if !s.cfg.Enabled { return nil } s.mu.Lock() cancel := s.cancel cmd := s.cmd s.mu.Unlock() if cancel != nil { cancel() } if cmd != nil && cmd.Process != nil { done := make(chan struct{}) go func() { _, _ = cmd.Process.Wait() close(done) }() select { case <-done: case <-time.After(timeout): _ = cmd.Process.Kill() } } return nil } func (s *Supervisor) writeConf(hosts []model.Host) error { tmpl, err := template.New("dnsmasq").Parse(dnsmasqTemplate) if err != nil { return err } conf := filepath.Join(s.cfg.RuntimeDir, "dnsmasq.conf") tmp := conf + ".new" f, err := os.Create(tmp) if err != nil { return fmt.Errorf("create conf: %w", err) } data := struct { Cfg SupervisorConfig Hosts []model.Host }{s.cfg, hosts} if err := tmpl.Execute(f, data); err != nil { _ = f.Close() return fmt.Errorf("render conf: %w", err) } if err := f.Sync(); err != nil { _ = f.Close() return err } if err := f.Close(); err != nil { return err } if err := os.Rename(tmp, conf); err != nil { return fmt.Errorf("rename conf: %w", err) } return nil } // Exposed for the UI handlers to show operators what config is live. func (s *Supervisor) ConfPath() string { return filepath.Join(s.cfg.RuntimeDir, "dnsmasq.conf") } type logWriter struct{ prefix string } func (w logWriter) Write(p []byte) (int, error) { for _, line := range strings.Split(strings.TrimRight(string(p), "\n"), "\n") { if line == "" { continue } log.Printf("[%s] %s", w.prefix, line) } return len(p), nil } // Allow package consumers to swap io.Writer for logs in tests. var _ io.Writer = logWriter{} const dnsmasqTemplate = `# Generated by Vetting — do not hand-edit. interface={{ .Cfg.Interface }} bind-interfaces port=0 domain-needed bogus-priv no-resolv # MAC allowlist: dnsmasq only answers DHCP for MACs with a dhcp-host= below. dhcp-ignore=tag:!known {{- range .Hosts }} dhcp-host={{ .MAC }},set:known {{- end }} # DHCP range (broader subnet coverage is fine; allowlist above gates replies). dhcp-range={{ .Cfg.DHCPRange }} # TFTP + HTTP boot (iPXE chainload). enable-tftp tftp-root={{ .Cfg.TFTPRoot }} # BIOS (undionly.kpxe) and UEFI (ipxe.efi) clients both get iPXE first, # which then re-requests a per-MAC script from the orchestrator. dhcp-match=set:bios,option:client-arch,0 dhcp-match=set:efi64,option:client-arch,7 dhcp-match=set:efi64,option:client-arch,9 # If the client is iPXE itself, send it the per-MAC HTTP script. dhcp-match=set:ipxe,175 dhcp-boot=tag:ipxe,{{ .Cfg.OrchestratorURL }}/ipxe/${mac} # Otherwise (first boot from ROM) chainload iPXE from TFTP. dhcp-boot=tag:!ipxe,tag:bios,undionly.kpxe dhcp-boot=tag:!ipxe,tag:efi64,ipxe.efi log-facility=- `