ui: GitHub-Actions-style detail page, sub-steps, mini-tile run-view
Reshapes the detail page into a run-view: hybrid horizontal pipeline
+ expanded active-step pane with sub-steps, a per-step log pane with
line-numbered permalinks and client-side search, and a runs-history
sidebar that navigates via ?run=N. Default step is server-picked
(running → failed → Reporting) so the operator lands on the thing
that's moving.
Adds a sub_steps table + SSE topic (substep-{run}-{stage}-{ordinal})
so per-disk and per-pass work (SMART, CPUStress CPU/RAM, Storage,
GPU) is visible in the UI instead of buried in stage summary JSON.
Agent emits sub-step reports from existing per-iteration loops.
Dashboard tiles become a mini run-view with a 9-dot step strip so
the operator reads run health across the whole grid at a glance.
Register page gets the same card shell + button styling.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+57
-23
@@ -3,6 +3,7 @@ package tests
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@@ -52,6 +53,7 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
||||
|
||||
cores := runtime.NumCPU()
|
||||
extras := map[string]any{"cores": cores}
|
||||
var subs []SubStepReport
|
||||
|
||||
// Pass 1: CPU
|
||||
cpu := runStressPass(ctx, d, "CPU", cpuPassDuration, []string{
|
||||
@@ -62,12 +64,14 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
||||
"--verify",
|
||||
})
|
||||
extras["cpu_pass"] = cpu
|
||||
subs = append(subs, subStepFromPass("CPU pass", cpu))
|
||||
if !cpu.Passed {
|
||||
return Outcome{
|
||||
Passed: false,
|
||||
Message: "CPU pass failed: " + cpu.Err,
|
||||
Summary: fmt.Sprintf("CPU pass failed after %ds", cpu.ElapsedSecs),
|
||||
Extras: extras,
|
||||
Passed: false,
|
||||
Message: "CPU pass failed: " + cpu.Err,
|
||||
Summary: fmt.Sprintf("CPU pass failed after %ds", cpu.ElapsedSecs),
|
||||
Extras: extras,
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,10 +81,11 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
||||
if err != nil {
|
||||
d.Error("CPUStress: read MemAvailable: " + err.Error())
|
||||
return Outcome{
|
||||
Passed: false,
|
||||
Message: "read MemAvailable: " + err.Error(),
|
||||
Summary: "failed (meminfo unreadable)",
|
||||
Extras: extras,
|
||||
Passed: false,
|
||||
Message: "read MemAvailable: " + err.Error(),
|
||||
Summary: "failed (meminfo unreadable)",
|
||||
Extras: extras,
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
cap := avail - memHeadroomBytes
|
||||
@@ -92,10 +97,11 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
||||
avail, memFloorBytes, memHeadroomBytes)
|
||||
d.Error("CPUStress: " + msg)
|
||||
return Outcome{
|
||||
Passed: false,
|
||||
Message: msg,
|
||||
Summary: "failed (insufficient free RAM for memory pass)",
|
||||
Extras: extras,
|
||||
Passed: false,
|
||||
Message: msg,
|
||||
Summary: "failed (insufficient free RAM for memory pass)",
|
||||
Extras: extras,
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
mem := runStressPass(ctx, d, "memory", memPassDuration, []string{
|
||||
@@ -107,12 +113,14 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
||||
"--verify",
|
||||
})
|
||||
extras["mem_pass"] = mem
|
||||
subs = append(subs, subStepFromPass(fmt.Sprintf("Memory pass (cap %s)", humanBytes(cap)), mem))
|
||||
if !mem.Passed {
|
||||
return Outcome{
|
||||
Passed: false,
|
||||
Message: "memory pass failed: " + mem.Err,
|
||||
Summary: fmt.Sprintf("memory pass failed after %ds", mem.ElapsedSecs),
|
||||
Extras: extras,
|
||||
Passed: false,
|
||||
Message: "memory pass failed: " + mem.Err,
|
||||
Summary: fmt.Sprintf("memory pass failed after %ds", mem.ElapsedSecs),
|
||||
Extras: extras,
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,7 +128,26 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
||||
Passed: true,
|
||||
Summary: fmt.Sprintf("CPU+RAM PASSED (%d cores, %s cap)",
|
||||
cores, humanBytes(cap)),
|
||||
Extras: extras,
|
||||
Extras: extras,
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
|
||||
// subStepFromPass projects a stressPass into a SubStepReport — shared by
|
||||
// both passes and by the mid-stage early-return paths so the UI always
|
||||
// sees exactly one row per pass, even on failure.
|
||||
func subStepFromPass(name string, p stressPass) SubStepReport {
|
||||
summary, _ := json.Marshal(map[string]any{
|
||||
"elapsed_secs": p.ElapsedSecs,
|
||||
"target_secs": p.TargetSecs,
|
||||
"err": p.Err,
|
||||
})
|
||||
return SubStepReport{
|
||||
Name: name,
|
||||
Passed: p.Passed,
|
||||
StartedAt: p.StartedAt,
|
||||
CompletedAt: p.CompletedAt,
|
||||
SummaryJSON: summary,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,12 +167,16 @@ const (
|
||||
|
||||
// stressPass is the per-pass result embedded in CPUStress's Extras.
|
||||
// Passed==true and Elapsed close to target is the only happy path.
|
||||
// StartedAt/CompletedAt are not serialized (the summary already has
|
||||
// ElapsedSecs) but are used by the caller to emit SubStepReport rows.
|
||||
type stressPass struct {
|
||||
Passed bool `json:"passed"`
|
||||
Err string `json:"err,omitempty"`
|
||||
ElapsedSecs int `json:"elapsed_secs"`
|
||||
TargetSecs int `json:"target_secs"`
|
||||
OutputTail string `json:"output_tail,omitempty"`
|
||||
Passed bool `json:"passed"`
|
||||
Err string `json:"err,omitempty"`
|
||||
ElapsedSecs int `json:"elapsed_secs"`
|
||||
TargetSecs int `json:"target_secs"`
|
||||
OutputTail string `json:"output_tail,omitempty"`
|
||||
StartedAt time.Time `json:"-"`
|
||||
CompletedAt time.Time `json:"-"`
|
||||
}
|
||||
|
||||
// runStressPass invokes stress-ng and validates both exit code and
|
||||
@@ -159,12 +190,15 @@ func runStressPass(ctx context.Context, d Deps, label string, target time.Durati
|
||||
cmd := exec.CommandContext(runCtx, "stress-ng", args...)
|
||||
start := time.Now()
|
||||
out, err := cmd.CombinedOutput()
|
||||
elapsed := time.Since(start)
|
||||
end := time.Now()
|
||||
elapsed := end.Sub(start)
|
||||
|
||||
res := stressPass{
|
||||
ElapsedSecs: int(elapsed.Round(time.Second).Seconds()),
|
||||
TargetSecs: int(target.Round(time.Second).Seconds()),
|
||||
OutputTail: tailLines(string(out), 20),
|
||||
StartedAt: start,
|
||||
CompletedAt: end,
|
||||
}
|
||||
if err != nil {
|
||||
res.Err = err.Error()
|
||||
|
||||
+39
-3
@@ -2,8 +2,11 @@ package tests
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// GPU enumerates VGA / 3D PCI devices. No devices → skip cleanly (a
|
||||
@@ -11,7 +14,9 @@ import (
|
||||
// stress). Devices present → try nvidia-smi for NVIDIA cards, else
|
||||
// accept PCI presence.
|
||||
func GPU(ctx context.Context, d Deps) Outcome {
|
||||
pciStart := time.Now()
|
||||
devices := listGPUPCI(ctx)
|
||||
pciEnd := time.Now()
|
||||
if len(devices) == 0 {
|
||||
d.Info("GPU: no VGA/3D PCI devices found — skipping stage")
|
||||
return Outcome{
|
||||
@@ -22,7 +27,9 @@ func GPU(ctx context.Context, d Deps) Outcome {
|
||||
}
|
||||
d.Info("GPU: found " + joinDevices(devices))
|
||||
|
||||
nvStart := time.Now()
|
||||
nvidia := nvidiaSmiList(ctx)
|
||||
nvEnd := time.Now()
|
||||
extras := map[string]any{
|
||||
"pci_devices": devices,
|
||||
"skipped": false,
|
||||
@@ -31,10 +38,39 @@ func GPU(ctx context.Context, d Deps) Outcome {
|
||||
extras["nvidia"] = nvidia
|
||||
d.Info("GPU: nvidia-smi reports: " + strings.Join(nvidia, ", "))
|
||||
}
|
||||
|
||||
// Sub-step rows: one per enumerated PCI device, plus (optionally) one
|
||||
// per NVIDIA card when nvidia-smi sees anything. PCI enumeration runs
|
||||
// once for all devices — we bracket that single invocation by
|
||||
// pciStart/pciEnd and attribute the window to each device row so the
|
||||
// UI can still slice the log per row by time.
|
||||
var subs []SubStepReport
|
||||
for i, dev := range devices {
|
||||
summary, _ := json.Marshal(map[string]any{"pci": dev, "ordinal": i})
|
||||
subs = append(subs, SubStepReport{
|
||||
Name: fmt.Sprintf("pci #%d", i),
|
||||
Passed: true,
|
||||
StartedAt: pciStart,
|
||||
CompletedAt: pciEnd,
|
||||
SummaryJSON: summary,
|
||||
})
|
||||
}
|
||||
for i, line := range nvidia {
|
||||
summary, _ := json.Marshal(map[string]any{"nvidia_smi": line})
|
||||
subs = append(subs, SubStepReport{
|
||||
Name: fmt.Sprintf("nvidia #%d", i),
|
||||
Passed: true,
|
||||
StartedAt: nvStart,
|
||||
CompletedAt: nvEnd,
|
||||
SummaryJSON: summary,
|
||||
})
|
||||
}
|
||||
|
||||
return Outcome{
|
||||
Passed: true,
|
||||
Summary: formatCount(len(devices), "GPU present"),
|
||||
Extras: extras,
|
||||
Passed: true,
|
||||
Summary: formatCount(len(devices), "GPU present"),
|
||||
Extras: extras,
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+43
-15
@@ -8,6 +8,7 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SMART runs smartctl -a on each block device the kernel exposes. We
|
||||
@@ -46,25 +47,21 @@ func SMART(ctx context.Context, d Deps) Outcome {
|
||||
return Outcome{Passed: true, Summary: "skipped (no disks)", Extras: map[string]any{"skipped": true}}
|
||||
}
|
||||
|
||||
type diskReport struct {
|
||||
Device string `json:"device"`
|
||||
Passed bool `json:"passed"`
|
||||
Skipped bool `json:"skipped,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Raw map[string]any `json:"raw,omitempty"`
|
||||
}
|
||||
|
||||
var reports []diskReport
|
||||
var reports []smartDiskReport
|
||||
var subs []SubStepReport
|
||||
failed := 0
|
||||
usable := 0
|
||||
for _, dev := range disks {
|
||||
rep := diskReport{Device: dev}
|
||||
rep := smartDiskReport{Device: dev}
|
||||
started := time.Now()
|
||||
out, err := runSmartctl(ctx, dev)
|
||||
ended := time.Now()
|
||||
if err != nil {
|
||||
rep.Skipped = true
|
||||
rep.Reason = err.Error()
|
||||
reports = append(reports, rep)
|
||||
d.Info("SMART: " + dev + " skipped (" + err.Error() + ")")
|
||||
subs = append(subs, subStepFromSMART(dev, rep, started, ended))
|
||||
continue
|
||||
}
|
||||
usable++
|
||||
@@ -82,6 +79,7 @@ func SMART(ctx context.Context, d Deps) Outcome {
|
||||
rep.Reason = "no smart_status in output"
|
||||
}
|
||||
reports = append(reports, rep)
|
||||
subs = append(subs, subStepFromSMART(dev, rep, started, ended))
|
||||
}
|
||||
|
||||
extras := map[string]any{
|
||||
@@ -91,10 +89,11 @@ func SMART(ctx context.Context, d Deps) Outcome {
|
||||
}
|
||||
if failed > 0 {
|
||||
return Outcome{
|
||||
Passed: false,
|
||||
Message: fmt.Sprintf("%d disk(s) report SMART FAILED", failed),
|
||||
Summary: fmt.Sprintf("%d/%d failing", failed, usable),
|
||||
Extras: extras,
|
||||
Passed: false,
|
||||
Message: fmt.Sprintf("%d disk(s) report SMART FAILED", failed),
|
||||
Summary: fmt.Sprintf("%d/%d failing", failed, usable),
|
||||
Extras: extras,
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
summary := fmt.Sprintf("%d disks, %d SMART-reporting, all PASSED", len(disks), usable)
|
||||
@@ -102,7 +101,36 @@ func SMART(ctx context.Context, d Deps) Outcome {
|
||||
summary = "skipped (no smartctl data on any disk)"
|
||||
extras["skipped"] = true
|
||||
}
|
||||
return Outcome{Passed: true, Summary: summary, Extras: extras}
|
||||
return Outcome{Passed: true, Summary: summary, Extras: extras, SubSteps: subs}
|
||||
}
|
||||
|
||||
// smartDiskReport is the per-disk probe result. Lifted to package scope
|
||||
// so subStepFromSMART can accept it by value.
|
||||
type smartDiskReport struct {
|
||||
Device string `json:"device"`
|
||||
Passed bool `json:"passed"`
|
||||
Skipped bool `json:"skipped,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
Raw map[string]any `json:"raw,omitempty"`
|
||||
}
|
||||
|
||||
// subStepFromSMART builds a per-disk sub-step row from the in-flight
|
||||
// report. "skipped" takes precedence over passed so virtio-blk etc.
|
||||
// render as skipped rather than failed in the UI.
|
||||
func subStepFromSMART(dev string, rep smartDiskReport, started, ended time.Time) SubStepReport {
|
||||
summary, _ := json.Marshal(map[string]any{
|
||||
"device": rep.Device,
|
||||
"reason": rep.Reason,
|
||||
"skipped": rep.Skipped,
|
||||
})
|
||||
return SubStepReport{
|
||||
Name: fmt.Sprintf("smartctl %s", dev),
|
||||
Passed: rep.Passed || rep.Skipped,
|
||||
Skipped: rep.Skipped,
|
||||
StartedAt: started,
|
||||
CompletedAt: ended,
|
||||
SummaryJSON: summary,
|
||||
}
|
||||
}
|
||||
|
||||
func listBlockDisks() ([]string, error) {
|
||||
|
||||
+23
-4
@@ -16,11 +16,30 @@ import (
|
||||
// - Message is only used on failure; the UI displays it in the log.
|
||||
// - Extras is merged into the posted summary so stages can add
|
||||
// their own shape (e.g. Storage returns per-disk probe results).
|
||||
// - SubSteps carries agent-authored sub-step rows (CPU/Memory passes,
|
||||
// per-disk SMART, per-device GPU, …). Empty for stages with no
|
||||
// natural breakdown; persisted verbatim by the /result handler.
|
||||
type Outcome struct {
|
||||
Passed bool
|
||||
Message string
|
||||
Summary string // short human-readable one-liner
|
||||
Extras map[string]any // merged into posted summary JSON
|
||||
Passed bool
|
||||
Message string
|
||||
Summary string // short human-readable one-liner
|
||||
Extras map[string]any // merged into posted summary JSON
|
||||
SubSteps []SubStepReport // agent-authored granular rows
|
||||
}
|
||||
|
||||
// SubStepReport is one entry a stage contributes to its sub-step list.
|
||||
// Ordinal is assigned in the order entries appear in the slice — the
|
||||
// agent shouldn't set it manually. State is derived from Passed/Skipped
|
||||
// the same way Outcome is: Skipped wins if set, else Passed ? passed :
|
||||
// failed. StartedAt/CompletedAt are required so the UI can order rows
|
||||
// and slice the stage log by time window.
|
||||
type SubStepReport struct {
|
||||
Name string
|
||||
Passed bool
|
||||
Skipped bool
|
||||
StartedAt time.Time
|
||||
CompletedAt time.Time
|
||||
SummaryJSON json.RawMessage
|
||||
}
|
||||
|
||||
// MarshalSummary builds the summary JSON body POSTed to /result.
|
||||
|
||||
+32
-7
@@ -91,12 +91,35 @@ func Storage(ctx context.Context, d Deps) Outcome {
|
||||
|
||||
// Per target: short badblocks write sample + fio random-read/write.
|
||||
var samples []Sample
|
||||
var subs []SubStepReport
|
||||
perDisk := map[string]any{}
|
||||
for _, t := range targets {
|
||||
d.Info("Storage: running badblocks write sample on " + t.Device)
|
||||
bbStart := time.Now()
|
||||
bb := runBadblocks(ctx, t.Device)
|
||||
bbEnd := time.Now()
|
||||
bbSummary, _ := json.Marshal(bb)
|
||||
subs = append(subs, SubStepReport{
|
||||
Name: fmt.Sprintf("badblocks %s", t.Device),
|
||||
Passed: bb.OK,
|
||||
StartedAt: bbStart,
|
||||
CompletedAt: bbEnd,
|
||||
SummaryJSON: bbSummary,
|
||||
})
|
||||
|
||||
d.Info(fmt.Sprintf("Storage: running fio random rw on %s", t.Device))
|
||||
fioStart := time.Now()
|
||||
fr := runFio(ctx, t.Device)
|
||||
fioEnd := time.Now()
|
||||
fioSummary, _ := json.Marshal(fr)
|
||||
subs = append(subs, SubStepReport{
|
||||
Name: fmt.Sprintf("fio %s", t.Device),
|
||||
Passed: fr.Error == "",
|
||||
StartedAt: fioStart,
|
||||
CompletedAt: fioEnd,
|
||||
SummaryJSON: fioSummary,
|
||||
})
|
||||
|
||||
perDisk[t.Device] = map[string]any{
|
||||
"badblocks": bb,
|
||||
"fio": fr,
|
||||
@@ -107,10 +130,11 @@ func Storage(ctx context.Context, d Deps) Outcome {
|
||||
)
|
||||
if !bb.OK {
|
||||
return Outcome{
|
||||
Passed: false,
|
||||
Message: "badblocks found errors on " + t.Device,
|
||||
Summary: "badblocks failed on " + t.Device,
|
||||
Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes},
|
||||
Passed: false,
|
||||
Message: "badblocks found errors on " + t.Device,
|
||||
Summary: "badblocks failed on " + t.Device,
|
||||
Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes},
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -120,9 +144,10 @@ func Storage(ctx context.Context, d Deps) Outcome {
|
||||
|
||||
d.Info(fmt.Sprintf("Storage: %d disk(s) passed badblocks + fio", len(targets)))
|
||||
return Outcome{
|
||||
Passed: true,
|
||||
Summary: fmt.Sprintf("%d disks passed", len(targets)),
|
||||
Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes},
|
||||
Passed: true,
|
||||
Summary: fmt.Sprintf("%d disks passed", len(targets)),
|
||||
Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes},
|
||||
SubSteps: subs,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user