Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
This commit is contained in:
@@ -0,0 +1,249 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"flag"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"vetting/internal/api"
|
||||
"vetting/internal/auth"
|
||||
"vetting/internal/config"
|
||||
"vetting/internal/db"
|
||||
"vetting/internal/events"
|
||||
"vetting/internal/httpserver"
|
||||
"vetting/internal/janitor"
|
||||
"vetting/internal/logs"
|
||||
"vetting/internal/model"
|
||||
"vetting/internal/notify"
|
||||
"vetting/internal/orchestrator"
|
||||
"vetting/internal/pxe"
|
||||
"vetting/internal/store"
|
||||
"vetting/internal/web/templates"
|
||||
)
|
||||
|
||||
func main() {
|
||||
configPath := flag.String("config", "deploy/vetting.example.yaml", "path to vetting.yaml")
|
||||
flag.Parse()
|
||||
|
||||
cfg, err := config.Load(*configPath)
|
||||
if err != nil {
|
||||
log.Fatalf("load config: %v", err)
|
||||
}
|
||||
|
||||
for _, dir := range []string{
|
||||
filepath.Dir(cfg.Database.Path),
|
||||
cfg.Artifacts.Dir,
|
||||
cfg.Logs.Dir,
|
||||
} {
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
log.Fatalf("mkdir %s: %v", dir, err)
|
||||
}
|
||||
}
|
||||
|
||||
conn, err := db.Open(cfg.Database.Path)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer func() { _ = conn.Close() }()
|
||||
|
||||
secret, err := cfg.Auth.SessionSecret()
|
||||
if err != nil {
|
||||
log.Fatalf("auth: %v", err)
|
||||
}
|
||||
authMgr := &auth.Manager{
|
||||
PasswordHash: cfg.Auth.AdminPasswordBcrypt,
|
||||
Secret: secret,
|
||||
TTL: time.Duration(cfg.Auth.SessionTTLHours) * time.Hour,
|
||||
}
|
||||
if err := validateAuth(cfg, authMgr); err != nil {
|
||||
log.Fatalf("auth: %v", err)
|
||||
}
|
||||
|
||||
hostStore := &store.Hosts{DB: conn}
|
||||
runStore := &store.Runs{DB: conn}
|
||||
stageStore := &store.Stages{DB: conn}
|
||||
artifactStore := &store.Artifacts{DB: conn}
|
||||
specDiffStore := &store.SpecDiffs{DB: conn}
|
||||
measurementStore := &store.Measurements{DB: conn}
|
||||
|
||||
hub := events.NewHub()
|
||||
|
||||
logHub, err := logs.NewHub(cfg.Logs.Dir, hub)
|
||||
if err != nil {
|
||||
log.Fatalf("logs hub: %v", err)
|
||||
}
|
||||
defer logHub.Close()
|
||||
|
||||
runner := &orchestrator.Runner{
|
||||
Runs: runStore,
|
||||
Hosts: hostStore,
|
||||
Stages: stageStore,
|
||||
EventHub: hub,
|
||||
}
|
||||
|
||||
tiles := &api.TileEnricher{
|
||||
Runs: runStore,
|
||||
Artifacts: artifactStore,
|
||||
SpecDiffs: specDiffStore,
|
||||
}
|
||||
|
||||
// Inject a templ renderer so the Runner can publish tile-refresh
|
||||
// fragments via SSE without pulling web/templates into the
|
||||
// orchestrator package. The closure enriches the tile with spec-
|
||||
// diff count and hold-key path so every tile render shows the
|
||||
// same data, whether it came from /events or an initial page load.
|
||||
orchestrator.TileRenderer = func(ctx context.Context, host model.Host, latest *model.Run) string {
|
||||
return templates.RenderTileString(tiles.Build(ctx, host, latest))
|
||||
}
|
||||
|
||||
notifyReg, err := notify.BuildRegistry(cfg.Notifiers, cfg.Routes)
|
||||
if err != nil {
|
||||
log.Fatalf("notify: %v", err)
|
||||
}
|
||||
|
||||
ui := &api.UI{
|
||||
Hosts: hostStore,
|
||||
Runs: runStore,
|
||||
Artifacts: artifactStore,
|
||||
Auth: authMgr,
|
||||
EventHub: hub,
|
||||
Runner: runner,
|
||||
Tiles: tiles,
|
||||
}
|
||||
|
||||
agentAPI := &api.Agent{
|
||||
Hosts: hostStore,
|
||||
Runs: runStore,
|
||||
Stages: stageStore,
|
||||
Artifacts: artifactStore,
|
||||
SpecDiffs: specDiffStore,
|
||||
Measurements: measurementStore,
|
||||
Runner: runner,
|
||||
EventHub: hub,
|
||||
Logs: logHub,
|
||||
Notify: notifyReg,
|
||||
ArtifactsDir: cfg.Artifacts.Dir,
|
||||
OrchestratorURL: cfg.PXE.OrchestratorURL,
|
||||
PublicURL: cfg.Server.PublicURL,
|
||||
IperfPort: cfg.Network.IperfPort,
|
||||
}
|
||||
agentAPI.LiveKernelURL, agentAPI.LiveInitrdURL = pxe.BuildLiveURLs(cfg.PXE.OrchestratorURL)
|
||||
|
||||
dispatcher := orchestrator.NewDispatcher(cfg.Dispatcher.MaxConcurrentRuns, runStore, hostStore, runner)
|
||||
iperfSup := orchestrator.NewIperfSupervisor(cfg.Network.IperfPort)
|
||||
|
||||
janitorSvc := janitor.New(janitor.Config{
|
||||
ArtifactRetention: time.Duration(cfg.Artifacts.RetentionDays) * 24 * time.Hour,
|
||||
LogRetention: time.Duration(cfg.Logs.RetentionDays) * 24 * time.Hour,
|
||||
Interval: time.Duration(cfg.Janitor.IntervalMinutes) * time.Minute,
|
||||
}, &janitor.StoreAdapter{Runs: runStore, Artifacts: artifactStore, Logs: logHub})
|
||||
|
||||
tftpRoot := cfg.PXE.TFTPRoot
|
||||
if tftpRoot == "" {
|
||||
tftpRoot = filepath.Join(cfg.Logs.Dir, "..", "tftp")
|
||||
}
|
||||
var supervisor *pxe.Supervisor
|
||||
if cfg.PXE.Enabled {
|
||||
supervisor = pxe.NewSupervisor(pxe.SupervisorConfig{
|
||||
Enabled: true,
|
||||
Interface: cfg.PXE.Interface,
|
||||
DHCPRange: cfg.PXE.DHCPRange,
|
||||
OrchestratorURL: cfg.PXE.OrchestratorURL,
|
||||
RuntimeDir: filepath.Join(cfg.Logs.Dir, "..", "pxe"),
|
||||
TFTPRoot: tftpRoot,
|
||||
})
|
||||
}
|
||||
|
||||
router := httpserver.NewRouter(httpserver.Deps{
|
||||
Auth: authMgr,
|
||||
UI: ui,
|
||||
Agent: agentAPI,
|
||||
LiveDir: cfg.PXE.LiveDir,
|
||||
})
|
||||
|
||||
srv := &http.Server{
|
||||
Addr: cfg.Server.Bind,
|
||||
Handler: router,
|
||||
ReadHeaderTimeout: 10 * time.Second,
|
||||
}
|
||||
if cfg.Server.TLS.Enabled {
|
||||
srv.TLSConfig = &tls.Config{MinVersion: tls.VersionTLS12}
|
||||
}
|
||||
|
||||
shutdown := make(chan os.Signal, 1)
|
||||
signal.Notify(shutdown, os.Interrupt, syscall.SIGTERM)
|
||||
|
||||
rootCtx, cancelRoot := context.WithCancel(context.Background())
|
||||
defer cancelRoot()
|
||||
|
||||
dispatcher.Start(rootCtx)
|
||||
janitorSvc.Start(rootCtx)
|
||||
|
||||
if err := iperfSup.Start(rootCtx); err != nil {
|
||||
log.Fatalf("start iperf3: %v", err)
|
||||
}
|
||||
|
||||
if supervisor != nil {
|
||||
hosts, err := hostStore.List(rootCtx)
|
||||
if err != nil {
|
||||
log.Fatalf("list hosts for dnsmasq: %v", err)
|
||||
}
|
||||
if err := supervisor.Start(rootCtx, hosts); err != nil {
|
||||
log.Fatalf("start dnsmasq: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
go func() {
|
||||
log.Printf("vetting listening on %s (tls=%v, db=%s)", cfg.Server.Bind, cfg.Server.TLS.Enabled, cfg.Database.Path)
|
||||
var err error
|
||||
if cfg.Server.TLS.Enabled {
|
||||
err = srv.ListenAndServeTLS(cfg.Server.TLS.CertFile, cfg.Server.TLS.KeyFile)
|
||||
} else {
|
||||
err = srv.ListenAndServe()
|
||||
}
|
||||
if err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
log.Fatalf("server: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
<-shutdown
|
||||
log.Printf("shutting down")
|
||||
|
||||
dispatcher.Stop()
|
||||
janitorSvc.Stop()
|
||||
_ = iperfSup.Shutdown(3 * time.Second)
|
||||
if supervisor != nil {
|
||||
_ = supervisor.Shutdown(5 * time.Second)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
if err := srv.Shutdown(ctx); err != nil {
|
||||
log.Printf("server shutdown: %v", err)
|
||||
}
|
||||
_ = hub.Shutdown(ctx)
|
||||
}
|
||||
|
||||
func validateAuth(cfg *config.Config, _ *auth.Manager) error {
|
||||
if cfg.Auth.AdminPasswordBcrypt == "" || cfg.Auth.AdminPasswordBcrypt == "$2a$10$REPLACE_ME_WITH_A_REAL_BCRYPT_HASH_0123456789abcdefABCDEFxx" {
|
||||
return errPlaceholderPassword
|
||||
}
|
||||
if len(cfg.Auth.AdminPasswordBcrypt) < 4 || cfg.Auth.AdminPasswordBcrypt[0] != '$' {
|
||||
return errPlaceholderPassword
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var errPlaceholderPassword = plainErr("auth.admin_password_bcrypt is the placeholder; run bin/gen-admin-password and paste the hash into your config")
|
||||
|
||||
type plainErr string
|
||||
|
||||
func (e plainErr) Error() string { return string(e) }
|
||||
Reference in New Issue
Block a user