Add activity log system for provisioning lifecycle visibility
build-and-push / test (push) Failing after 32s
build-and-push / build-and-push (push) Has been skipped

Hosts stuck in states like pxe_ready had zero visibility into why.
This adds a persistent activity log that records every meaningful
step (state transitions, PXE events, cluster join stages, failures)
and surfaces it on the host detail page with live SSE updates.
Includes a stuck-detection warning banner when a host sits in
pxe_ready for >10 minutes with no iPXE request.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 23:30:21 -04:00
parent c3a1cf99f9
commit a6603b463f
12 changed files with 209 additions and 12 deletions
+5
View File
@@ -3,6 +3,7 @@ package api
import (
"encoding/json"
"errors"
"fmt"
"log"
"net/http"
"strings"
@@ -46,6 +47,7 @@ func (a *BootAPI) IPXEScript(w http.ResponseWriter, r *http.Request) {
if host.State == model.StatePXEReady {
a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerPXEScriptServed)
a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "iPXE script served — kernel + initrd delivered")
}
w.Header().Set("Content-Type", "text/plain")
@@ -80,6 +82,7 @@ func (a *BootAPI) AnswerFile(w http.ResponseWriter, r *http.Request) {
if host.State == model.StatePXEBooted {
a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerAnswerServed)
a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "Answer file served — installation starting")
}
_, pubKey, _ := a.Hosts.GetEphemeralKey(r.Context(), host.ID)
@@ -106,6 +109,7 @@ func (a *BootAPI) InstallComplete(w http.ResponseWriter, r *http.Request) {
}
if host.State == model.StateInstalling {
a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "Install-complete webhook received")
if _, err := a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerInstallWebhook); err != nil {
log.Printf("host %d: install-complete transition failed: %v", host.ID, err)
}
@@ -159,6 +163,7 @@ func (a *BootAPI) PhoneHome(w http.ResponseWriter, r *http.Request) {
}
log.Printf("host %d (%s): phone-home from %s, hwid=%s", host.ID, host.Hostname, req.IP, req.HardwareID)
a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", fmt.Sprintf("Phone-home received from %s", req.IP))
a.Orchestrator.HandlePhoneHome(r.Context(), host.ID, req.IP, req.HardwareID)
writeJSON(w, http.StatusOK, map[string]any{"ok": true})