Host detail v2: full pipeline + per-stage logs + WoL diagnostics
CI / Lint + build + test (push) Has been cancelled
CI / Lint + build + test (push) Has been cancelled
Pipeline now always renders all 13 nodes (3 pre-stage + 9 stage +
Completed), synthesising ghosts from run state when stage rows
aren't seeded yet. Makes a WaitingWoL host show the full timeline
ahead of it instead of just 4 dots.
Agent tags each log line with its stage; logs.Hub fans out to both
log-{runID} and log-{runID}-{stage} SSE events so the detail page
can show per-stage tabs with a pure-CSS radio-sibling switch. Flat
run log prepends [stage] so grep still works.
Dispatcher writes picked/sent-WoL/heartbeat lines into the per-run
log — the operator opens the detail page, sees WaitingWoL stuck,
and reads exactly what the dispatcher did and why nothing's
progressing, instead of having to tail journalctl on the LXC.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+73
-6
@@ -21,6 +21,7 @@ import (
|
||||
type Line struct {
|
||||
TS time.Time
|
||||
Level string // info|warn|error|debug
|
||||
Stage string // optional — one of store.DefaultStageOrder; empty = orchestrator/agent framing
|
||||
Text string
|
||||
}
|
||||
|
||||
@@ -85,6 +86,54 @@ func (h *Hub) PathFor(runID int64) string {
|
||||
return filepath.Join(h.dir, fmt.Sprintf("run-%d.log", runID))
|
||||
}
|
||||
|
||||
// Replay reads the on-disk log for a run and returns one
|
||||
// <div class="log-line"> fragment per line, suitable for inlining into
|
||||
// the "All" log pane on initial page load. Missing file → empty string;
|
||||
// the pane just stays empty until live events arrive. Does not subscribe
|
||||
// to the SSE hub — callers are expected to pair this with a live
|
||||
// sse-swap target on the same element.
|
||||
func (h *Hub) Replay(runID int64) string {
|
||||
path := h.PathFor(runID)
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
var out strings.Builder
|
||||
for _, raw := range strings.Split(string(b), "\n") {
|
||||
if raw == "" {
|
||||
continue
|
||||
}
|
||||
// Format from Append: "<RFC3339Nano> <LEVEL> <text>"
|
||||
// where LEVEL is right-padded to width 5 (e.g. " INFO",
|
||||
// "ERROR"). TrimLeft the pad before splitting off the level.
|
||||
tsEnd := strings.IndexByte(raw, ' ')
|
||||
if tsEnd < 0 {
|
||||
continue
|
||||
}
|
||||
ts, err := time.Parse(time.RFC3339Nano, raw[:tsEnd])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
rest := strings.TrimLeft(raw[tsEnd+1:], " ")
|
||||
lvEnd := strings.IndexByte(rest, ' ')
|
||||
if lvEnd < 0 {
|
||||
continue
|
||||
}
|
||||
level := strings.ToLower(rest[:lvEnd])
|
||||
text := rest[lvEnd+1:]
|
||||
// Disk format prepends "[stage] " to text when stage was set.
|
||||
stage := ""
|
||||
if strings.HasPrefix(text, "[") {
|
||||
if end := strings.Index(text, "] "); end > 1 {
|
||||
stage = text[1:end]
|
||||
text = text[end+2:]
|
||||
}
|
||||
}
|
||||
out.WriteString(renderLogSSE(Line{TS: ts, Level: level, Stage: stage, Text: text}))
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
// Append writes a line to disk and publishes an SSE event. Failures
|
||||
// on disk log but don't block the SSE fan-out — the operator can still
|
||||
// see the live tail even if disk IO is degraded.
|
||||
@@ -97,15 +146,26 @@ func (w *Writer) Append(line Line) {
|
||||
if line.Level == "" {
|
||||
line.Level = "info"
|
||||
}
|
||||
stamped := fmt.Sprintf("%s %5s %s\n", line.TS.Format(time.RFC3339Nano), strings.ToUpper(line.Level), line.Text)
|
||||
diskText := line.Text
|
||||
if line.Stage != "" {
|
||||
diskText = "[" + line.Stage + "] " + diskText
|
||||
}
|
||||
stamped := fmt.Sprintf("%s %5s %s\n", line.TS.Format(time.RFC3339Nano), strings.ToUpper(line.Level), diskText)
|
||||
if _, err := w.f.WriteString(stamped); err != nil {
|
||||
log.Printf("logs: write run-%d: %v", w.runID, err)
|
||||
}
|
||||
if w.hub != nil {
|
||||
payload := renderLogSSE(line)
|
||||
w.hub.Publish(events.Event{
|
||||
Name: fmt.Sprintf("log-%d", w.runID),
|
||||
Payload: renderLogSSE(line),
|
||||
Payload: payload,
|
||||
})
|
||||
if line.Stage != "" {
|
||||
w.hub.Publish(events.Event{
|
||||
Name: fmt.Sprintf("log-%d-%s", w.runID, line.Stage),
|
||||
Payload: payload,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,15 +180,22 @@ func (w *Writer) Close() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// renderLogSSE returns an HTMX-compatible fragment. The tile contains
|
||||
// a <div id="log-N" hx-swap-oob="beforeend">: each event appends one
|
||||
// <div class="log-line log-LEVEL"> to it.
|
||||
// renderLogSSE returns an HTMX-compatible fragment. The detail-page
|
||||
// panes contain <div id="log-N-..." hx-swap="beforeend">: each event
|
||||
// appends one <div class="log-line log-LEVEL"> to them. Stage, if set,
|
||||
// is rendered as a dim prefix so the "All" pane stays disambiguable
|
||||
// even with multiple stages interleaved.
|
||||
func renderLogSSE(l Line) string {
|
||||
level := strings.ToLower(l.Level)
|
||||
stagePrefix := ""
|
||||
if l.Stage != "" {
|
||||
stagePrefix = fmt.Sprintf(`<span class="log-stage">[%s]</span> `, html.EscapeString(l.Stage))
|
||||
}
|
||||
return fmt.Sprintf(
|
||||
`<div class="log-line log-%s">%s %s</div>`,
|
||||
`<div class="log-line log-%s">%s %s%s</div>`,
|
||||
html.EscapeString(level),
|
||||
html.EscapeString(l.TS.Format("15:04:05")),
|
||||
stagePrefix,
|
||||
html.EscapeString(l.Text),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -76,6 +76,88 @@ func TestAppendFansOutToSSE(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestAppendStagePublishesBothEvents: a line tagged with a stage must
|
||||
// fan out to BOTH the all-pane event (log-<runID>) AND the stage-pane
|
||||
// event (log-<runID>-<stage>) so the detail page's per-stage tabs see
|
||||
// their own slice. Disk format prepends "[stage] " so the flat log
|
||||
// remains greppable.
|
||||
func TestAppendStagePublishesBothEvents(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
hub := events.NewHub()
|
||||
lh, err := logs.NewHub(dir, hub)
|
||||
if err != nil {
|
||||
t.Fatalf("NewHub: %v", err)
|
||||
}
|
||||
defer lh.Close()
|
||||
|
||||
_, ch, cancel := hub.Subscribe()
|
||||
defer cancel()
|
||||
|
||||
w, err := lh.WriterFor(42)
|
||||
if err != nil {
|
||||
t.Fatalf("WriterFor: %v", err)
|
||||
}
|
||||
w.Append(logs.Line{Level: "info", Stage: "SMART", Text: "reading attributes"})
|
||||
|
||||
got := collect(ch, 4, 500*time.Millisecond)
|
||||
names := map[string]int{}
|
||||
for _, ev := range got {
|
||||
if strings.HasPrefix(ev.Name, "log-") {
|
||||
names[ev.Name]++
|
||||
}
|
||||
}
|
||||
if names["log-42"] != 1 {
|
||||
t.Fatalf("expected 1 event on log-42, got %d (names=%+v)", names["log-42"], names)
|
||||
}
|
||||
if names["log-42-SMART"] != 1 {
|
||||
t.Fatalf("expected 1 event on log-42-SMART, got %d (names=%+v)", names["log-42-SMART"], names)
|
||||
}
|
||||
|
||||
// Disk: stage prepended so flat log is still useful.
|
||||
body, err := os.ReadFile(filepath.Join(dir, "run-42.log"))
|
||||
if err != nil {
|
||||
t.Fatalf("read log file: %v", err)
|
||||
}
|
||||
if !strings.Contains(string(body), "[SMART] reading attributes") {
|
||||
t.Fatalf("disk log missing stage prefix: %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReplay re-parses a file written by Append and emits the same SSE
|
||||
// fragments — detail-page uses this to seed the All pane on reload of
|
||||
// an in-flight run.
|
||||
func TestReplay(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
hub := events.NewHub()
|
||||
lh, err := logs.NewHub(dir, hub)
|
||||
if err != nil {
|
||||
t.Fatalf("NewHub: %v", err)
|
||||
}
|
||||
defer lh.Close()
|
||||
|
||||
w, err := lh.WriterFor(99)
|
||||
if err != nil {
|
||||
t.Fatalf("WriterFor: %v", err)
|
||||
}
|
||||
w.Append(logs.Line{Level: "info", Text: "dispatcher: picked"})
|
||||
w.Append(logs.Line{Level: "info", Stage: "SMART", Text: "smartctl /dev/sda"})
|
||||
|
||||
replay := lh.Replay(99)
|
||||
if !strings.Contains(replay, "dispatcher: picked") {
|
||||
t.Fatalf("replay missing untagged line: %q", replay)
|
||||
}
|
||||
if !strings.Contains(replay, "smartctl /dev/sda") {
|
||||
t.Fatalf("replay missing tagged line: %q", replay)
|
||||
}
|
||||
if !strings.Contains(replay, `class="log-stage"`) {
|
||||
t.Fatalf("replay should render stage badge for tagged line: %q", replay)
|
||||
}
|
||||
// Missing file → empty string, no panic.
|
||||
if got := lh.Replay(12345); got != "" {
|
||||
t.Fatalf("replay of unknown run = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWriterForIsCached verifies a second call returns the same Writer
|
||||
// — otherwise parallel /log POSTs would race on file opens and possibly
|
||||
// stomp on in-flight writes.
|
||||
|
||||
Reference in New Issue
Block a user