a6603b463f
Hosts stuck in states like pxe_ready had zero visibility into why. This adds a persistent activity log that records every meaningful step (state transitions, PXE events, cluster join stages, failures) and surfaces it on the host detail page with live SSE updates. Includes a stuck-detection warning banner when a host sits in pxe_ready for >10 minutes with no iPXE request. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
189 lines
4.3 KiB
Go
189 lines
4.3 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"path/filepath"
|
|
"syscall"
|
|
"time"
|
|
|
|
"provisioning/internal/api"
|
|
"provisioning/internal/config"
|
|
"provisioning/internal/db"
|
|
"provisioning/internal/events"
|
|
"provisioning/internal/httpserver"
|
|
"provisioning/internal/image"
|
|
"provisioning/internal/infra"
|
|
"provisioning/internal/orchestrator"
|
|
"provisioning/internal/pxe"
|
|
"provisioning/internal/store"
|
|
)
|
|
|
|
func main() {
|
|
cfgPath := flag.String("config", "provisioning.yaml", "path to config file")
|
|
flag.Parse()
|
|
|
|
cfg, err := config.Load(*cfgPath)
|
|
if err != nil {
|
|
log.Fatalf("config: %v", err)
|
|
}
|
|
|
|
serverTypes, err := config.LoadServerTypes(cfg.ServerTypePath)
|
|
if err != nil {
|
|
log.Fatalf("server types: %v", err)
|
|
}
|
|
|
|
if err := os.MkdirAll(filepath.Dir(cfg.Database.Path), 0o755); err != nil {
|
|
log.Fatalf("create db dir: %v", err)
|
|
}
|
|
database, err := db.Open(cfg.Database.Path)
|
|
if err != nil {
|
|
log.Fatalf("database: %v", err)
|
|
}
|
|
defer database.Close()
|
|
|
|
if err := os.MkdirAll(cfg.Images.Dir, 0o755); err != nil {
|
|
log.Fatalf("create images dir: %v", err)
|
|
}
|
|
|
|
hosts := &store.Hosts{DB: database}
|
|
ops := &store.Operations{DB: database}
|
|
locks := &store.Locks{DB: database, TTLMinutes: cfg.Locks.TTLMinutes}
|
|
images := &store.Images{DB: database}
|
|
activity := &store.Activity{DB: database}
|
|
|
|
imageSvc := &image.Service{Store: images, ImageDir: cfg.Images.Dir}
|
|
|
|
hub := events.NewHub()
|
|
|
|
runner := &orchestrator.Runner{
|
|
Hosts: hosts,
|
|
Ops: ops,
|
|
Locks: locks,
|
|
Hub: hub,
|
|
Activity: activity,
|
|
}
|
|
|
|
pxeSupervisor := pxe.NewSupervisor(pxe.SupervisorConfig{
|
|
Enabled: cfg.PXE.Enabled,
|
|
Interface: cfg.PXE.Interface,
|
|
Subnet: cfg.PXE.Subnet,
|
|
RuntimeDir: cfg.PXE.RuntimeDir,
|
|
TFTPRoot: cfg.PXE.TFTPRoot,
|
|
DnsmasqBin: cfg.PXE.DnsmasqBin,
|
|
PublicURL: cfg.Server.PublicURL,
|
|
})
|
|
|
|
var infraClient *infra.Client
|
|
if cfg.Infrastructure.BaseURL != "" {
|
|
infraClient = infra.NewClient(cfg.Infrastructure.BaseURL, time.Duration(cfg.Infrastructure.TimeoutSec)*time.Second)
|
|
}
|
|
|
|
clusterJoiner := &orchestrator.ClusterJoiner{
|
|
ExistingNode: cfg.Proxmox.ExistingNode,
|
|
ClusterName: cfg.Proxmox.ClusterName,
|
|
JoinFingerprint: cfg.Proxmox.JoinFingerprint,
|
|
}
|
|
|
|
hostOrch := &orchestrator.HostOrchestrator{
|
|
Runner: runner,
|
|
Hosts: hosts,
|
|
Ops: ops,
|
|
Locks: locks,
|
|
Cluster: clusterJoiner,
|
|
InfraClient: infraClient,
|
|
Config: cfg,
|
|
ServerTypes: serverTypes,
|
|
}
|
|
|
|
imageAPI := &api.ImageAPI{Svc: imageSvc}
|
|
|
|
hostAPI := &api.HostAPI{
|
|
Hosts: hosts,
|
|
Ops: ops,
|
|
Locks: locks,
|
|
Images: images,
|
|
Runner: runner,
|
|
Orchestrator: hostOrch,
|
|
PXE: pxeSupervisor,
|
|
Config: cfg,
|
|
ServerTypes: serverTypes,
|
|
}
|
|
|
|
bootAPI := &api.BootAPI{
|
|
Hosts: hosts,
|
|
Images: images,
|
|
Runner: runner,
|
|
Orchestrator: hostOrch,
|
|
Config: cfg,
|
|
ServerTypes: serverTypes,
|
|
}
|
|
|
|
ui := &api.UI{
|
|
Hosts: hosts,
|
|
Ops: ops,
|
|
Locks: locks,
|
|
Images: images,
|
|
Activity: activity,
|
|
ImageSvc: imageSvc,
|
|
Runner: runner,
|
|
Orchestrator: hostOrch,
|
|
Hub: hub,
|
|
PXE: pxeSupervisor,
|
|
Config: cfg,
|
|
ServerTypes: serverTypes,
|
|
}
|
|
|
|
router := httpserver.NewRouter(httpserver.Deps{
|
|
HostAPI: hostAPI,
|
|
BootAPI: bootAPI,
|
|
ImageAPI: imageAPI,
|
|
UI: ui,
|
|
Hub: hub,
|
|
ImageDir: cfg.Images.Dir,
|
|
})
|
|
|
|
srv := &http.Server{
|
|
Addr: cfg.Server.Bind,
|
|
Handler: router,
|
|
}
|
|
|
|
// Start PXE
|
|
allHosts, _ := hosts.List(context.Background())
|
|
if err := pxeSupervisor.Start(context.Background(), allHosts); err != nil {
|
|
log.Printf("pxe: failed to start: %v", err)
|
|
}
|
|
|
|
// Watch server types for hot reload
|
|
stop := make(chan struct{})
|
|
serverTypes.Watch(stop)
|
|
|
|
// Start HTTP server
|
|
go func() {
|
|
log.Printf("listening on %s", cfg.Server.Bind)
|
|
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
|
log.Fatalf("http: %v", err)
|
|
}
|
|
}()
|
|
|
|
// Graceful shutdown
|
|
shutdown := make(chan os.Signal, 1)
|
|
signal.Notify(shutdown, os.Interrupt, syscall.SIGTERM)
|
|
<-shutdown
|
|
|
|
log.Printf("shutting down")
|
|
close(stop)
|
|
_ = pxeSupervisor.Shutdown()
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
if err := srv.Shutdown(ctx); err != nil {
|
|
log.Printf("http shutdown: %v", err)
|
|
}
|
|
_ = hub.Shutdown(ctx)
|
|
}
|