Add activity log system for provisioning lifecycle visibility
build-and-push / test (push) Failing after 32s
build-and-push / build-and-push (push) Has been skipped

Hosts stuck in states like pxe_ready had zero visibility into why.
This adds a persistent activity log that records every meaningful
step (state transitions, PXE events, cluster join stages, failures)
and surfaces it on the host detail page with live SSE updates.
Includes a stuck-detection warning banner when a host sits in
pxe_ready for >10 minutes with no iPXE request.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 23:30:21 -04:00
parent c3a1cf99f9
commit a6603b463f
12 changed files with 209 additions and 12 deletions
+4 -1
View File
@@ -24,6 +24,7 @@ type UI struct {
Ops *store.Operations
Locks *store.Locks
Images *store.Images
Activity *store.Activity
ImageSvc *image.Service
Runner *orchestrator.Runner
Orchestrator *orchestrator.HostOrchestrator
@@ -106,7 +107,8 @@ func (u *UI) HostDetail(w http.ResponseWriter, r *http.Request) {
return
}
ops, _ := u.Ops.ListByHost(r.Context(), host.ID)
renderHTML(w, hostDetailPage(host, ops))
activity, _ := u.Activity.ListByHost(r.Context(), host.ID, 50)
renderHTML(w, hostDetailPage(host, ops, activity))
}
func (u *UI) TriggerRebuild(w http.ResponseWriter, r *http.Request) {
@@ -131,6 +133,7 @@ func (u *UI) TriggerRebuild(w http.ResponseWriter, r *http.Request) {
Kind: model.OpRebuildProxmox,
})
_ = u.Locks.Acquire(r.Context(), host.ID, opID)
u.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "ui", "Rebuild triggered by user")
if err := u.Orchestrator.PrepareRebuild(r.Context(), host.ID); err != nil {
_ = u.Locks.Release(r.Context(), host.ID)