Host detail v2: full pipeline + per-stage logs + WoL diagnostics
CI / Lint + build + test (push) Has been cancelled

Pipeline now always renders all 13 nodes (3 pre-stage + 9 stage +
Completed), synthesising ghosts from run state when stage rows
aren't seeded yet. Makes a WaitingWoL host show the full timeline
ahead of it instead of just 4 dots.

Agent tags each log line with its stage; logs.Hub fans out to both
log-{runID} and log-{runID}-{stage} SSE events so the detail page
can show per-stage tabs with a pure-CSS radio-sibling switch. Flat
run log prepends [stage] so grep still works.

Dispatcher writes picked/sent-WoL/heartbeat lines into the per-run
log — the operator opens the detail page, sees WaitingWoL stuck,
and reads exactly what the dispatcher did and why nothing's
progressing, instead of having to tail journalctl on the LXC.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 00:38:27 -04:00
parent a3d5e2d0a4
commit 1694c20b12
16 changed files with 1053 additions and 162 deletions
+73 -6
View File
@@ -21,6 +21,7 @@ import (
type Line struct {
TS time.Time
Level string // info|warn|error|debug
Stage string // optional — one of store.DefaultStageOrder; empty = orchestrator/agent framing
Text string
}
@@ -85,6 +86,54 @@ func (h *Hub) PathFor(runID int64) string {
return filepath.Join(h.dir, fmt.Sprintf("run-%d.log", runID))
}
// Replay reads the on-disk log for a run and returns one
// <div class="log-line"> fragment per line, suitable for inlining into
// the "All" log pane on initial page load. Missing file → empty string;
// the pane just stays empty until live events arrive. Does not subscribe
// to the SSE hub — callers are expected to pair this with a live
// sse-swap target on the same element.
func (h *Hub) Replay(runID int64) string {
path := h.PathFor(runID)
b, err := os.ReadFile(path)
if err != nil {
return ""
}
var out strings.Builder
for _, raw := range strings.Split(string(b), "\n") {
if raw == "" {
continue
}
// Format from Append: "<RFC3339Nano> <LEVEL> <text>"
// where LEVEL is right-padded to width 5 (e.g. " INFO",
// "ERROR"). TrimLeft the pad before splitting off the level.
tsEnd := strings.IndexByte(raw, ' ')
if tsEnd < 0 {
continue
}
ts, err := time.Parse(time.RFC3339Nano, raw[:tsEnd])
if err != nil {
continue
}
rest := strings.TrimLeft(raw[tsEnd+1:], " ")
lvEnd := strings.IndexByte(rest, ' ')
if lvEnd < 0 {
continue
}
level := strings.ToLower(rest[:lvEnd])
text := rest[lvEnd+1:]
// Disk format prepends "[stage] " to text when stage was set.
stage := ""
if strings.HasPrefix(text, "[") {
if end := strings.Index(text, "] "); end > 1 {
stage = text[1:end]
text = text[end+2:]
}
}
out.WriteString(renderLogSSE(Line{TS: ts, Level: level, Stage: stage, Text: text}))
}
return out.String()
}
// Append writes a line to disk and publishes an SSE event. Failures
// on disk log but don't block the SSE fan-out — the operator can still
// see the live tail even if disk IO is degraded.
@@ -97,15 +146,26 @@ func (w *Writer) Append(line Line) {
if line.Level == "" {
line.Level = "info"
}
stamped := fmt.Sprintf("%s %5s %s\n", line.TS.Format(time.RFC3339Nano), strings.ToUpper(line.Level), line.Text)
diskText := line.Text
if line.Stage != "" {
diskText = "[" + line.Stage + "] " + diskText
}
stamped := fmt.Sprintf("%s %5s %s\n", line.TS.Format(time.RFC3339Nano), strings.ToUpper(line.Level), diskText)
if _, err := w.f.WriteString(stamped); err != nil {
log.Printf("logs: write run-%d: %v", w.runID, err)
}
if w.hub != nil {
payload := renderLogSSE(line)
w.hub.Publish(events.Event{
Name: fmt.Sprintf("log-%d", w.runID),
Payload: renderLogSSE(line),
Payload: payload,
})
if line.Stage != "" {
w.hub.Publish(events.Event{
Name: fmt.Sprintf("log-%d-%s", w.runID, line.Stage),
Payload: payload,
})
}
}
}
@@ -120,15 +180,22 @@ func (w *Writer) Close() error {
return err
}
// renderLogSSE returns an HTMX-compatible fragment. The tile contains
// a <div id="log-N" hx-swap-oob="beforeend">: each event appends one
// <div class="log-line log-LEVEL"> to it.
// renderLogSSE returns an HTMX-compatible fragment. The detail-page
// panes contain <div id="log-N-..." hx-swap="beforeend">: each event
// appends one <div class="log-line log-LEVEL"> to them. Stage, if set,
// is rendered as a dim prefix so the "All" pane stays disambiguable
// even with multiple stages interleaved.
func renderLogSSE(l Line) string {
level := strings.ToLower(l.Level)
stagePrefix := ""
if l.Stage != "" {
stagePrefix = fmt.Sprintf(`<span class="log-stage">[%s]</span> `, html.EscapeString(l.Stage))
}
return fmt.Sprintf(
`<div class="log-line log-%s">%s %s</div>`,
`<div class="log-line log-%s">%s %s%s</div>`,
html.EscapeString(level),
html.EscapeString(l.TS.Format("15:04:05")),
stagePrefix,
html.EscapeString(l.Text),
)
}
+82
View File
@@ -76,6 +76,88 @@ func TestAppendFansOutToSSE(t *testing.T) {
}
}
// TestAppendStagePublishesBothEvents: a line tagged with a stage must
// fan out to BOTH the all-pane event (log-<runID>) AND the stage-pane
// event (log-<runID>-<stage>) so the detail page's per-stage tabs see
// their own slice. Disk format prepends "[stage] " so the flat log
// remains greppable.
func TestAppendStagePublishesBothEvents(t *testing.T) {
dir := t.TempDir()
hub := events.NewHub()
lh, err := logs.NewHub(dir, hub)
if err != nil {
t.Fatalf("NewHub: %v", err)
}
defer lh.Close()
_, ch, cancel := hub.Subscribe()
defer cancel()
w, err := lh.WriterFor(42)
if err != nil {
t.Fatalf("WriterFor: %v", err)
}
w.Append(logs.Line{Level: "info", Stage: "SMART", Text: "reading attributes"})
got := collect(ch, 4, 500*time.Millisecond)
names := map[string]int{}
for _, ev := range got {
if strings.HasPrefix(ev.Name, "log-") {
names[ev.Name]++
}
}
if names["log-42"] != 1 {
t.Fatalf("expected 1 event on log-42, got %d (names=%+v)", names["log-42"], names)
}
if names["log-42-SMART"] != 1 {
t.Fatalf("expected 1 event on log-42-SMART, got %d (names=%+v)", names["log-42-SMART"], names)
}
// Disk: stage prepended so flat log is still useful.
body, err := os.ReadFile(filepath.Join(dir, "run-42.log"))
if err != nil {
t.Fatalf("read log file: %v", err)
}
if !strings.Contains(string(body), "[SMART] reading attributes") {
t.Fatalf("disk log missing stage prefix: %q", body)
}
}
// TestReplay re-parses a file written by Append and emits the same SSE
// fragments — detail-page uses this to seed the All pane on reload of
// an in-flight run.
func TestReplay(t *testing.T) {
dir := t.TempDir()
hub := events.NewHub()
lh, err := logs.NewHub(dir, hub)
if err != nil {
t.Fatalf("NewHub: %v", err)
}
defer lh.Close()
w, err := lh.WriterFor(99)
if err != nil {
t.Fatalf("WriterFor: %v", err)
}
w.Append(logs.Line{Level: "info", Text: "dispatcher: picked"})
w.Append(logs.Line{Level: "info", Stage: "SMART", Text: "smartctl /dev/sda"})
replay := lh.Replay(99)
if !strings.Contains(replay, "dispatcher: picked") {
t.Fatalf("replay missing untagged line: %q", replay)
}
if !strings.Contains(replay, "smartctl /dev/sda") {
t.Fatalf("replay missing tagged line: %q", replay)
}
if !strings.Contains(replay, `class="log-stage"`) {
t.Fatalf("replay should render stage badge for tagged line: %q", replay)
}
// Missing file → empty string, no panic.
if got := lh.Replay(12345); got != "" {
t.Fatalf("replay of unknown run = %q, want empty", got)
}
}
// TestWriterForIsCached verifies a second call returns the same Writer
// — otherwise parallel /log POSTs would race on file opens and possibly
// stomp on in-flight writes.