Files
Vetting/internal/pxe/dnsmasq.go
T
josh 9bb4b09a04
CI / Lint + build + test (push) Has been cancelled
Initial commit: full Phases 1-6 implementation
Post-repair hardware validation pipeline for Proxmox cluster hosts.
Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq
PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
2026-04-17 21:32:10 -04:00

232 lines
5.7 KiB
Go

package pxe
import (
"context"
"fmt"
"io"
"log"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"sync"
"text/template"
"time"
"vetting/internal/model"
)
// SupervisorConfig controls how dnsmasq is launched and configured.
type SupervisorConfig struct {
Enabled bool
Interface string // e.g. "eth0"
DHCPRange string // e.g. "10.77.0.100,10.77.0.200,12h"
OrchestratorURL string // baked into iPXE scripts
RuntimeDir string // writable dir for dnsmasq.conf and leases
TFTPRoot string // holds ipxe.efi, undionly.kpxe
DNSMasqBin string // path to dnsmasq binary (default: "dnsmasq")
}
// Supervisor owns a dnsmasq subprocess, rewrites its config when the
// host registry changes, and sends SIGHUP to reload. The MAC allowlist
// is the safety barrier: only registered MACs see a DHCP reply.
type Supervisor struct {
cfg SupervisorConfig
mu sync.Mutex
cmd *exec.Cmd
cancel context.CancelFunc
}
func NewSupervisor(cfg SupervisorConfig) *Supervisor {
if cfg.DNSMasqBin == "" {
cfg.DNSMasqBin = "dnsmasq"
}
return &Supervisor{cfg: cfg}
}
// Start launches dnsmasq in the background. If cfg.Enabled is false
// Start is a no-op (useful for dev on Windows where dnsmasq isn't
// available).
func (s *Supervisor) Start(ctx context.Context, hosts []model.Host) error {
if !s.cfg.Enabled {
log.Printf("pxe: disabled in config — skipping dnsmasq")
return nil
}
if runtime.GOOS == "windows" {
return fmt.Errorf("dnsmasq supervision is not supported on Windows — run orchestrator on Linux")
}
if err := os.MkdirAll(s.cfg.RuntimeDir, 0o755); err != nil {
return fmt.Errorf("mkdir runtime: %w", err)
}
if err := s.writeConf(hosts); err != nil {
return err
}
subCtx, cancel := context.WithCancel(ctx)
s.mu.Lock()
s.cancel = cancel
s.mu.Unlock()
confPath := filepath.Join(s.cfg.RuntimeDir, "dnsmasq.conf")
cmd := exec.CommandContext(subCtx, s.cfg.DNSMasqBin,
"--conf-file="+confPath,
"--no-daemon",
"--log-queries",
"--log-dhcp",
)
cmd.Stdout = logWriter{prefix: "dnsmasq"}
cmd.Stderr = logWriter{prefix: "dnsmasq"}
if err := cmd.Start(); err != nil {
cancel()
return fmt.Errorf("start dnsmasq: %w", err)
}
s.mu.Lock()
s.cmd = cmd
s.mu.Unlock()
go func() {
if err := cmd.Wait(); err != nil && subCtx.Err() == nil {
log.Printf("dnsmasq exited: %v", err)
}
}()
return nil
}
// Reload rewrites the conf with the latest host registry and sends
// SIGHUP. It will restart the subprocess if SIGHUP is unsupported
// (e.g. when running behind an OS that doesn't support it).
func (s *Supervisor) Reload(hosts []model.Host) error {
if !s.cfg.Enabled {
return nil
}
if err := s.writeConf(hosts); err != nil {
return err
}
s.mu.Lock()
cmd := s.cmd
s.mu.Unlock()
if cmd == nil || cmd.Process == nil {
return nil
}
if err := sighup(cmd.Process); err != nil {
return fmt.Errorf("sighup dnsmasq: %w", err)
}
return nil
}
// Shutdown stops dnsmasq within the timeout.
func (s *Supervisor) Shutdown(timeout time.Duration) error {
if !s.cfg.Enabled {
return nil
}
s.mu.Lock()
cancel := s.cancel
cmd := s.cmd
s.mu.Unlock()
if cancel != nil {
cancel()
}
if cmd != nil && cmd.Process != nil {
done := make(chan struct{})
go func() {
_, _ = cmd.Process.Wait()
close(done)
}()
select {
case <-done:
case <-time.After(timeout):
_ = cmd.Process.Kill()
}
}
return nil
}
func (s *Supervisor) writeConf(hosts []model.Host) error {
tmpl, err := template.New("dnsmasq").Parse(dnsmasqTemplate)
if err != nil {
return err
}
conf := filepath.Join(s.cfg.RuntimeDir, "dnsmasq.conf")
tmp := conf + ".new"
f, err := os.Create(tmp)
if err != nil {
return fmt.Errorf("create conf: %w", err)
}
data := struct {
Cfg SupervisorConfig
Hosts []model.Host
}{s.cfg, hosts}
if err := tmpl.Execute(f, data); err != nil {
_ = f.Close()
return fmt.Errorf("render conf: %w", err)
}
if err := f.Sync(); err != nil {
_ = f.Close()
return err
}
if err := f.Close(); err != nil {
return err
}
if err := os.Rename(tmp, conf); err != nil {
return fmt.Errorf("rename conf: %w", err)
}
return nil
}
// Exposed for the UI handlers to show operators what config is live.
func (s *Supervisor) ConfPath() string {
return filepath.Join(s.cfg.RuntimeDir, "dnsmasq.conf")
}
type logWriter struct{ prefix string }
func (w logWriter) Write(p []byte) (int, error) {
for _, line := range strings.Split(strings.TrimRight(string(p), "\n"), "\n") {
if line == "" {
continue
}
log.Printf("[%s] %s", w.prefix, line)
}
return len(p), nil
}
// Allow package consumers to swap io.Writer for logs in tests.
var _ io.Writer = logWriter{}
const dnsmasqTemplate = `# Generated by Vetting — do not hand-edit.
interface={{ .Cfg.Interface }}
bind-interfaces
port=0
domain-needed
bogus-priv
no-resolv
# MAC allowlist: dnsmasq only answers DHCP for MACs with a dhcp-host= below.
dhcp-ignore=tag:!known
{{- range .Hosts }}
dhcp-host={{ .MAC }},set:known
{{- end }}
# DHCP range (broader subnet coverage is fine; allowlist above gates replies).
dhcp-range={{ .Cfg.DHCPRange }}
# TFTP + HTTP boot (iPXE chainload).
enable-tftp
tftp-root={{ .Cfg.TFTPRoot }}
# BIOS (undionly.kpxe) and UEFI (ipxe.efi) clients both get iPXE first,
# which then re-requests a per-MAC script from the orchestrator.
dhcp-match=set:bios,option:client-arch,0
dhcp-match=set:efi64,option:client-arch,7
dhcp-match=set:efi64,option:client-arch,9
# If the client is iPXE itself, send it the per-MAC HTTP script.
dhcp-match=set:ipxe,175
dhcp-boot=tag:ipxe,{{ .Cfg.OrchestratorURL }}/ipxe/${mac}
# Otherwise (first boot from ROM) chainload iPXE from TFTP.
dhcp-boot=tag:!ipxe,tag:bios,undionly.kpxe
dhcp-boot=tag:!ipxe,tag:efi64,ipxe.efi
log-facility=-
`