diff --git a/cmd/provisioning/main.go b/cmd/provisioning/main.go
index 7c2cad4..449d6be 100644
--- a/cmd/provisioning/main.go
+++ b/cmd/provisioning/main.go
@@ -54,16 +54,18 @@ func main() {
ops := &store.Operations{DB: database}
locks := &store.Locks{DB: database, TTLMinutes: cfg.Locks.TTLMinutes}
images := &store.Images{DB: database}
+ activity := &store.Activity{DB: database}
imageSvc := &image.Service{Store: images, ImageDir: cfg.Images.Dir}
hub := events.NewHub()
runner := &orchestrator.Runner{
- Hosts: hosts,
- Ops: ops,
- Locks: locks,
- Hub: hub,
+ Hosts: hosts,
+ Ops: ops,
+ Locks: locks,
+ Hub: hub,
+ Activity: activity,
}
pxeSupervisor := pxe.NewSupervisor(pxe.SupervisorConfig{
@@ -126,6 +128,7 @@ func main() {
Ops: ops,
Locks: locks,
Images: images,
+ Activity: activity,
ImageSvc: imageSvc,
Runner: runner,
Orchestrator: hostOrch,
diff --git a/internal/api/boot.go b/internal/api/boot.go
index a721509..366d25c 100644
--- a/internal/api/boot.go
+++ b/internal/api/boot.go
@@ -3,6 +3,7 @@ package api
import (
"encoding/json"
"errors"
+ "fmt"
"log"
"net/http"
"strings"
@@ -46,6 +47,7 @@ func (a *BootAPI) IPXEScript(w http.ResponseWriter, r *http.Request) {
if host.State == model.StatePXEReady {
a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerPXEScriptServed)
+ a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "iPXE script served — kernel + initrd delivered")
}
w.Header().Set("Content-Type", "text/plain")
@@ -80,6 +82,7 @@ func (a *BootAPI) AnswerFile(w http.ResponseWriter, r *http.Request) {
if host.State == model.StatePXEBooted {
a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerAnswerServed)
+ a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "Answer file served — installation starting")
}
_, pubKey, _ := a.Hosts.GetEphemeralKey(r.Context(), host.ID)
@@ -106,6 +109,7 @@ func (a *BootAPI) InstallComplete(w http.ResponseWriter, r *http.Request) {
}
if host.State == model.StateInstalling {
+ a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", "Install-complete webhook received")
if _, err := a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerInstallWebhook); err != nil {
log.Printf("host %d: install-complete transition failed: %v", host.ID, err)
}
@@ -159,6 +163,7 @@ func (a *BootAPI) PhoneHome(w http.ResponseWriter, r *http.Request) {
}
log.Printf("host %d (%s): phone-home from %s, hwid=%s", host.ID, host.Hostname, req.IP, req.HardwareID)
+ a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "pxe", fmt.Sprintf("Phone-home received from %s", req.IP))
a.Orchestrator.HandlePhoneHome(r.Context(), host.ID, req.IP, req.HardwareID)
writeJSON(w, http.StatusOK, map[string]any{"ok": true})
diff --git a/internal/api/hosts.go b/internal/api/hosts.go
index 73d2594..726dcb9 100644
--- a/internal/api/hosts.go
+++ b/internal/api/hosts.go
@@ -124,6 +124,7 @@ func (a *HostAPI) Rebuild(w http.ResponseWriter, r *http.Request) {
writeJSONErr(w, http.StatusInternalServerError, "failed to acquire lock")
return
}
+ a.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "api", "Rebuild triggered via API")
if err := a.Orchestrator.PrepareRebuild(r.Context(), host.ID); err != nil {
_ = a.Locks.Release(r.Context(), host.ID)
diff --git a/internal/api/render.go b/internal/api/render.go
index d07b7f5..514e07b 100644
--- a/internal/api/render.go
+++ b/internal/api/render.go
@@ -5,6 +5,7 @@ import (
"html"
"net/http"
"strings"
+ "time"
"provisioning/internal/model"
)
@@ -83,7 +84,7 @@ func hostFormPage(types []string, errMsg string, prefill *model.Host) string {
`, errHTML, hostname, mac, opts.String(), notes))
}
-func hostDetailPage(h *model.Host, ops []model.Operation) string {
+func hostDetailPage(h *model.Host, ops []model.Operation, activity []model.ActivityEntry) string {
stateClass := stateColor(h.State)
led := ledClass(h.State)
canRebuild := h.State == model.StateRegistered || h.State == model.StateReady || h.State == model.StateFailed
@@ -113,12 +114,29 @@ func hostDetailPage(h *model.Host, ops []model.Operation) string {
ip = "—"
}
+ var stuckWarning string
+ if h.State == model.StatePXEReady && time.Since(h.UpdatedAt) > 10*time.Minute {
+ mins := int(time.Since(h.UpdatedAt).Minutes())
+ stuckWarning = fmt.Sprintf(`
Host has been in PXE_READY for %d minutes with no iPXE request. This usually means the host failed to PXE boot — check secure boot settings, network connectivity, and BIOS boot order.
`, mins)
+ }
+
+ var activityHTML strings.Builder
+ for _, e := range activity {
+ activityHTML.WriteString(fmt.Sprintf(
+ `%s%s%s
`,
+ e.Level, e.CreatedAt.Format("15:04"), html.EscapeString(e.Source), html.EscapeString(e.Message)))
+ }
+ if len(activity) == 0 {
+ activityHTML.WriteString(`No activity recorded yet.
`)
+ }
+
return layout(h.Hostname, fmt.Sprintf(`
+ %s
| MAC | %s |
@@ -135,7 +153,15 @@ func hostDetailPage(h *model.Host, ops []model.Operation) string {
%s
- `, led, html.EscapeString(h.Hostname), stateClass, h.State, h.MAC, h.ServerType, ip, html.EscapeString(h.Notes), actions.String(), opsHTML.String()))
+ Activity Log
+
+ `, led, html.EscapeString(h.Hostname), stateClass, h.State,
+ stuckWarning,
+ h.MAC, h.ServerType, ip, html.EscapeString(h.Notes), actions.String(),
+ opsHTML.String(),
+ h.ID, activityHTML.String()))
}
func imagesPage(images []model.Image) string {
diff --git a/internal/api/ui.go b/internal/api/ui.go
index c2052b0..fd36e8b 100644
--- a/internal/api/ui.go
+++ b/internal/api/ui.go
@@ -24,6 +24,7 @@ type UI struct {
Ops *store.Operations
Locks *store.Locks
Images *store.Images
+ Activity *store.Activity
ImageSvc *image.Service
Runner *orchestrator.Runner
Orchestrator *orchestrator.HostOrchestrator
@@ -106,7 +107,8 @@ func (u *UI) HostDetail(w http.ResponseWriter, r *http.Request) {
return
}
ops, _ := u.Ops.ListByHost(r.Context(), host.ID)
- renderHTML(w, hostDetailPage(host, ops))
+ activity, _ := u.Activity.ListByHost(r.Context(), host.ID, 50)
+ renderHTML(w, hostDetailPage(host, ops, activity))
}
func (u *UI) TriggerRebuild(w http.ResponseWriter, r *http.Request) {
@@ -131,6 +133,7 @@ func (u *UI) TriggerRebuild(w http.ResponseWriter, r *http.Request) {
Kind: model.OpRebuildProxmox,
})
_ = u.Locks.Acquire(r.Context(), host.ID, opID)
+ u.Runner.LogActivity(r.Context(), host.ID, model.LogInfo, "ui", "Rebuild triggered by user")
if err := u.Orchestrator.PrepareRebuild(r.Context(), host.ID); err != nil {
_ = u.Locks.Release(r.Context(), host.ID)
diff --git a/internal/db/migrations/0003_activity_log.sql b/internal/db/migrations/0003_activity_log.sql
new file mode 100644
index 0000000..5e43cbb
--- /dev/null
+++ b/internal/db/migrations/0003_activity_log.sql
@@ -0,0 +1,10 @@
+CREATE TABLE activity_log (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ host_id INTEGER NOT NULL REFERENCES hosts(id) ON DELETE CASCADE,
+ operation_id INTEGER REFERENCES operations(id) ON DELETE SET NULL,
+ level TEXT NOT NULL DEFAULT 'info',
+ message TEXT NOT NULL,
+ source TEXT NOT NULL DEFAULT '',
+ created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now'))
+);
+CREATE INDEX idx_activity_host ON activity_log(host_id, created_at DESC);
diff --git a/internal/model/model.go b/internal/model/model.go
index f2b4fe9..2a85175 100644
--- a/internal/model/model.go
+++ b/internal/model/model.go
@@ -66,6 +66,24 @@ type Image struct {
CreatedAt time.Time
}
+type LogLevel string
+
+const (
+ LogInfo LogLevel = "info"
+ LogWarn LogLevel = "warn"
+ LogError LogLevel = "error"
+)
+
+type ActivityEntry struct {
+ ID int64
+ HostID int64
+ OperationID int64
+ Level LogLevel
+ Message string
+ Source string
+ CreatedAt time.Time
+}
+
type ServerType struct {
Key string
DisplayName string `yaml:"display_name"`
diff --git a/internal/orchestrator/host.go b/internal/orchestrator/host.go
index 571c4ce..66f77c0 100644
--- a/internal/orchestrator/host.go
+++ b/internal/orchestrator/host.go
@@ -33,6 +33,7 @@ func (o *HostOrchestrator) PrepareRebuild(ctx context.Context, hostID int64) err
}
func (o *HostOrchestrator) HandlePhoneHome(ctx context.Context, hostID int64, ip string, hardwareID string) {
+ o.Runner.LogActivity(ctx, hostID, model.LogInfo, "orchestrator", "Phone-home: updating IP to "+ip)
if err := o.Hosts.UpdateIP(ctx, hostID, ip, hardwareID); err != nil {
log.Printf("host %d: failed to update IP: %v", hostID, err)
o.Runner.FailHost(ctx, hostID, "failed to update IP: "+err.Error())
@@ -69,15 +70,17 @@ func (o *HostOrchestrator) postPhoneHome(hostID int64, ip string, hardwareID str
return
}
+ o.Runner.LogActivity(ctx, hostID, model.LogInfo, "orchestrator", "Starting cluster join via "+o.Cluster.ExistingNode)
if err := o.Cluster.Join(ctx, ip, privateKey, publicKey); err != nil {
log.Printf("host %d: cluster join failed: %v", hostID, err)
o.Runner.FailHost(ctx, hostID, "cluster join: "+err.Error())
return
}
+ o.Runner.LogActivity(ctx, hostID, model.LogInfo, "orchestrator", "Cluster join complete")
- // Key has been removed from the remote host; clear it from the DB
_ = o.Hosts.ClearEphemeralKey(ctx, hostID)
+ o.Runner.LogActivity(ctx, hostID, model.LogInfo, "orchestrator", "Registering with infrastructure API")
if err := o.registerInfra(ctx, host, ip, hardwareID); err != nil {
log.Printf("host %d: infra registration failed: %v", hostID, err)
o.Runner.FailHost(ctx, hostID, "infra registration: "+err.Error())
@@ -94,6 +97,7 @@ func (o *HostOrchestrator) postPhoneHome(hostID int64, ip string, hardwareID str
_ = o.Ops.Complete(ctx, op.ID)
}
_ = o.Locks.Release(ctx, hostID)
+ o.Runner.LogActivity(ctx, hostID, model.LogInfo, "orchestrator", "Provisioning complete")
log.Printf("host %d (%s): provisioning complete", hostID, host.Hostname)
}
diff --git a/internal/orchestrator/runner.go b/internal/orchestrator/runner.go
index d3e1783..c987e4e 100644
--- a/internal/orchestrator/runner.go
+++ b/internal/orchestrator/runner.go
@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"log"
+ "time"
"provisioning/internal/events"
"provisioning/internal/model"
@@ -12,10 +13,11 @@ import (
)
type Runner struct {
- Hosts *store.Hosts
- Ops *store.Operations
- Locks *store.Locks
- Hub *events.Hub
+ Hosts *store.Hosts
+ Ops *store.Operations
+ Locks *store.Locks
+ Hub *events.Hub
+ Activity *store.Activity
}
func (r *Runner) Transition(ctx context.Context, hostID int64, trigger statemachine.Trigger) (model.HostState, error) {
@@ -35,10 +37,12 @@ func (r *Runner) Transition(ctx context.Context, hostID int64, trigger statemach
Name: "host.state_changed",
Payload: fmt.Sprintf(`{"host_id":%d,"old_state":"%s","new_state":"%s"}`, hostID, host.State, next),
})
+ r.LogActivity(ctx, hostID, model.LogInfo, "state", fmt.Sprintf("%s → %s", host.State, next))
return next, nil
}
func (r *Runner) FailHost(ctx context.Context, hostID int64, reason string) {
+ r.LogActivity(ctx, hostID, model.LogError, "orchestrator", "host failed: "+reason)
if _, err := r.Transition(ctx, hostID, statemachine.TriggerFailed); err != nil {
log.Printf("host %d: failed to transition to failed state: %v", hostID, err)
return
@@ -49,3 +53,16 @@ func (r *Runner) FailHost(ctx context.Context, hostID int64, reason string) {
}
_ = r.Locks.Release(ctx, hostID)
}
+
+func (r *Runner) LogActivity(ctx context.Context, hostID int64, level model.LogLevel, source, message string) {
+ var opID int64
+ if op, err := r.Ops.GetActive(ctx, hostID); err == nil {
+ opID = op.ID
+ }
+ id, _ := r.Activity.Log(ctx, hostID, opID, level, source, message)
+ r.Hub.Publish(events.Event{
+ Name: "activity.logged",
+ Payload: fmt.Sprintf(`{"id":%d,"host_id":%d,"level":"%s","source":"%s","message":"%s","created_at":"%s"}`,
+ id, hostID, level, source, message, time.Now().UTC().Format(time.RFC3339)),
+ })
+}
diff --git a/internal/store/activity.go b/internal/store/activity.go
new file mode 100644
index 0000000..b75a74d
--- /dev/null
+++ b/internal/store/activity.go
@@ -0,0 +1,50 @@
+package store
+
+import (
+ "context"
+ "database/sql"
+ "fmt"
+ "time"
+
+ "provisioning/internal/model"
+)
+
+type Activity struct {
+ DB *sql.DB
+}
+
+func (s *Activity) Log(ctx context.Context, hostID, opID int64, level model.LogLevel, source, message string) (int64, error) {
+ res, err := s.DB.ExecContext(ctx, `
+ INSERT INTO activity_log(host_id, operation_id, level, message, source)
+ VALUES(?,?,?,?,?)
+ `, hostID, nullInt64(opID), level, message, source)
+ if err != nil {
+ return 0, fmt.Errorf("insert activity: %w", err)
+ }
+ return res.LastInsertId()
+}
+
+func (s *Activity) ListByHost(ctx context.Context, hostID int64, limit int) ([]model.ActivityEntry, error) {
+ if limit <= 0 {
+ limit = 50
+ }
+ rows, err := s.DB.QueryContext(ctx, `
+ SELECT id, host_id, COALESCE(operation_id, 0), level, message, source, created_at
+ FROM activity_log WHERE host_id = ? ORDER BY created_at DESC LIMIT ?
+ `, hostID, limit)
+ if err != nil {
+ return nil, fmt.Errorf("list activity: %w", err)
+ }
+ defer rows.Close()
+ var out []model.ActivityEntry
+ for rows.Next() {
+ var e model.ActivityEntry
+ var createdAt string
+ if err := rows.Scan(&e.ID, &e.HostID, &e.OperationID, &e.Level, &e.Message, &e.Source, &createdAt); err != nil {
+ return nil, fmt.Errorf("scan activity: %w", err)
+ }
+ e.CreatedAt, _ = time.Parse(time.RFC3339, createdAt)
+ out = append(out, e)
+ }
+ return out, rows.Err()
+}
diff --git a/internal/web/static/app.css b/internal/web/static/app.css
index e26ed06..3cf7b77 100644
--- a/internal/web/static/app.css
+++ b/internal/web/static/app.css
@@ -391,6 +391,48 @@ main {
.progress-text { font-size: 0.825rem; color: var(--text); font-weight: 500; }
.progress-detail { font-size: 0.775rem; color: var(--text-secondary); }
+/* === STUCK WARNING === */
+.stuck-warning {
+ background: var(--amber-bg);
+ color: var(--amber);
+ border: 1px solid var(--amber-border);
+ padding: 0.75rem 1rem;
+ border-radius: var(--radius-sm);
+ margin-bottom: 1rem;
+ font-size: 0.825rem;
+ font-weight: 500;
+ line-height: 1.5;
+}
+
+/* === ACTIVITY LOG === */
+.activity-log { max-height: 400px; overflow-y: auto; }
+.log-entry {
+ display: flex;
+ align-items: baseline;
+ gap: 0.75rem;
+ padding: 0.4rem 0.75rem;
+ border-bottom: 1px solid var(--border);
+ font-size: 0.825rem;
+}
+.log-entry:last-child { border-bottom: none; }
+.log-time {
+ color: var(--text-tertiary);
+ font-family: var(--font-mono);
+ font-size: 0.75rem;
+ min-width: 3.5rem;
+ flex-shrink: 0;
+}
+.log-source {
+ color: var(--text-tertiary);
+ font-family: var(--font-mono);
+ font-size: 0.75rem;
+ min-width: 5rem;
+ flex-shrink: 0;
+}
+.log-msg { color: var(--text); }
+.log-warn .log-msg { color: var(--amber); }
+.log-error .log-msg { color: var(--red); font-weight: 500; }
+
/* === UTILITY === */
.inline { display: inline; }
diff --git a/internal/web/static/app.js b/internal/web/static/app.js
index c417ff5..739131c 100644
--- a/internal/web/static/app.js
+++ b/internal/web/static/app.js
@@ -10,6 +10,24 @@
es.addEventListener('host.state_changed', function() {
window.location.reload();
});
+ es.addEventListener('activity.logged', function(e) {
+ var data;
+ try { data = JSON.parse(e.data); } catch(_) { return; }
+ var logDiv = document.getElementById('activity-log');
+ if (!logDiv) return;
+ var hostId = logDiv.getAttribute('data-host-id');
+ if (String(data.host_id) !== hostId) return;
+ var empty = logDiv.querySelector('.empty');
+ if (empty) empty.remove();
+ var entry = document.createElement('div');
+ entry.className = 'log-entry log-' + data.level;
+ var t = new Date(data.created_at);
+ var ts = t.getHours().toString().padStart(2,'0') + ':' + t.getMinutes().toString().padStart(2,'0');
+ entry.innerHTML = '' + ts + '' +
+ '' + data.source + '' +
+ '' + data.message + '';
+ logDiv.insertBefore(entry, logDiv.firstChild);
+ });
es.onerror = function() {
dot.className = 'led led-red';
es.close();