a6603b463f
Hosts stuck in states like pxe_ready had zero visibility into why. This adds a persistent activity log that records every meaningful step (state transitions, PXE events, cluster join stages, failures) and surfaces it on the host detail page with live SSE updates. Includes a stuck-detection warning banner when a host sits in pxe_ready for >10 minutes with no iPXE request. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
177 lines
4.7 KiB
Go
177 lines
4.7 KiB
Go
package api
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"provisioning/internal/config"
|
|
"provisioning/internal/model"
|
|
"provisioning/internal/orchestrator"
|
|
"provisioning/internal/pxe"
|
|
"provisioning/internal/statemachine"
|
|
"provisioning/internal/store"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
)
|
|
|
|
type BootAPI struct {
|
|
Hosts *store.Hosts
|
|
Images *store.Images
|
|
Runner *orchestrator.Runner
|
|
Orchestrator *orchestrator.HostOrchestrator
|
|
Config *config.Config
|
|
ServerTypes *config.ServerTypeRegistry
|
|
}
|
|
|
|
func (a *BootAPI) IPXEScript(w http.ResponseWriter, r *http.Request) {
|
|
mac := normalizeMAC(chi.URLParam(r, "mac"))
|
|
host, err := a.Hosts.GetByMAC(r.Context(), mac)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
http.Error(w, "#!ipxe\nexit", http.StatusNotFound)
|
|
return
|
|
}
|
|
http.Error(w, "internal error", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
img, err := a.Images.GetDefault(r.Context())
|
|
if err != nil {
|
|
http.Error(w, "#!ipxe\necho No default image configured\nshell", http.StatusServiceUnavailable)
|
|
return
|
|
}
|
|
|
|
if host.State == model.StatePXEReady {
|
|
a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerPXEScriptServed)
|
|
a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "iPXE script served — kernel + initrd delivered")
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "text/plain")
|
|
w.Write([]byte(pxe.BuildIPXEScript(a.Config.Server.PublicURL, img, mac)))
|
|
}
|
|
|
|
func (a *BootAPI) AnswerFile(w http.ResponseWriter, r *http.Request) {
|
|
var sysInfo struct {
|
|
MAC string `json:"mac"`
|
|
}
|
|
if err := json.NewDecoder(r.Body).Decode(&sysInfo); err != nil {
|
|
http.Error(w, "invalid json", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
mac := normalizeMAC(sysInfo.MAC)
|
|
host, err := a.Hosts.GetByMAC(r.Context(), mac)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
http.Error(w, "unknown host", http.StatusForbidden)
|
|
return
|
|
}
|
|
http.Error(w, "internal error", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
st, ok := a.ServerTypes.Get(host.ServerType)
|
|
if !ok {
|
|
http.Error(w, "unknown server type", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
if host.State == model.StatePXEBooted {
|
|
a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerAnswerServed)
|
|
a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "Answer file served — installation starting")
|
|
}
|
|
|
|
_, pubKey, _ := a.Hosts.GetEphemeralKey(r.Context(), host.ID)
|
|
if pubKey == "" {
|
|
http.Error(w, "no ephemeral key for host", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
answer := pxe.GenerateAnswerFile(host, st, a.Config, pubKey)
|
|
w.Header().Set("Content-Type", "application/toml")
|
|
w.Write([]byte(answer))
|
|
}
|
|
|
|
func (a *BootAPI) InstallComplete(w http.ResponseWriter, r *http.Request) {
|
|
id, ok := idFromURL(w, r)
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
host, err := a.Hosts.Get(r.Context(), id)
|
|
if err != nil {
|
|
writeJSONErr(w, http.StatusNotFound, "host not found")
|
|
return
|
|
}
|
|
|
|
if host.State == model.StateInstalling {
|
|
a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "Install-complete webhook received")
|
|
if _, err := a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerInstallWebhook); err != nil {
|
|
log.Printf("host %d: install-complete transition failed: %v", host.ID, err)
|
|
}
|
|
}
|
|
|
|
w.WriteHeader(http.StatusOK)
|
|
}
|
|
|
|
func (a *BootAPI) FirstBootScript(w http.ResponseWriter, r *http.Request) {
|
|
id, ok := idFromURL(w, r)
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
host, err := a.Hosts.Get(r.Context(), id)
|
|
if err != nil {
|
|
http.Error(w, "host not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
st, ok := a.ServerTypes.Get(host.ServerType)
|
|
if !ok {
|
|
http.Error(w, "unknown server type", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
script := pxe.GenerateFirstBootScript(host, st, a.Config)
|
|
w.Header().Set("Content-Type", "text/x-shellscript")
|
|
w.Write([]byte(script))
|
|
}
|
|
|
|
func (a *BootAPI) PhoneHome(w http.ResponseWriter, r *http.Request) {
|
|
id, ok := idFromURL(w, r)
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
var req struct {
|
|
IP string `json:"ip"`
|
|
HardwareID string `json:"hardware_id"`
|
|
}
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJSONErr(w, http.StatusBadRequest, "invalid json")
|
|
return
|
|
}
|
|
|
|
host, err := a.Hosts.Get(r.Context(), id)
|
|
if err != nil {
|
|
writeJSONErr(w, http.StatusNotFound, "host not found")
|
|
return
|
|
}
|
|
|
|
log.Printf("host %d (%s): phone-home from %s, hwid=%s", host.ID, host.Hostname, req.IP, req.HardwareID)
|
|
a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", fmt.Sprintf("Phone-home received from %s", req.IP))
|
|
a.Orchestrator.HandlePhoneHome(r.Context(), host.ID, req.IP, req.HardwareID)
|
|
|
|
writeJSON(w, http.StatusOK, map[string]any{"ok": true})
|
|
}
|
|
|
|
func normalizeMAC(m string) string {
|
|
m = strings.ToLower(strings.TrimSpace(m))
|
|
m = strings.ReplaceAll(m, "-", ":")
|
|
return m
|
|
}
|