Files
Vetting/internal/web/templates/pipeline.templ
T
josh 23c689aa5b
CI / Lint + build + test (push) Failing after 1m57s
Release / release (push) Has been cancelled
deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now
carry a profile (quick/deep/soak); every profile walks the same
11-stage order — Inventory → Firmware → SpecValidate → SMART →
CPUStress → Storage → Network → Burn → GPU → PSU → Reporting —
with only per-stage durations and concurrency scaled.

Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile
column + CreateWithProfile; threshold table + evaluator seeded per-run
from the shared vetting.thresholds block; breach flips result at
/sensor + /result.

Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify +
EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta),
Network (sustained iperf + /proc/net/dev deltas) with per-profile
knobs from Deps.

Phase 3: Burn super-stage with goroutine fan-out for CPU + memory +
fio + iperf, PSU rails sampled across the Burn window, SensorMux
(2 s flush, 500-sample cap) to absorb backpressure.

Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode
(BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl),
lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into
SpecValidate with pin-by-identifier and fan-out-across-component
matching; mismatches park the run in FailedHolding.

Phase 5: profile radio on the host start form, profile chip on the
run header, Firmware section in the HTML report, coverage artifact
uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath
seam + stress_ng and dmidecode example fakes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-18 22:50:57 -04:00

319 lines
9.5 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package templates
import (
"bytes"
"context"
"fmt"
"time"
"vetting/internal/model"
"vetting/internal/store"
)
// PipelineNode is one dot on the detail-page timeline. The template
// doesn't know stages from pre-stages — it just renders whatever the
// BuildPipeline helper produces, in order.
type PipelineNode struct {
Name string
State string // pending|running|passed|failed|skipped
StartedAt *time.Time
CompletedAt *time.Time
}
// preStageOrder are the nodes that show before the first real stage.
// Derived from run.State rather than stage rows since we don't persist
// pre-stage timestamps.
var preStageOrder = []model.RunState{
model.StateQueued,
model.StateWaitingReboot,
model.StateBooting,
}
// runStateRank returns how far along the state machine a run is, using
// a linear ranking across pre-stages, stage states, and terminals. Used
// by BuildPipeline to decide which pre-stage nodes are "past" (passed),
// "current" (running), or "pending".
func runStateRank(s model.RunState) int {
order := []model.RunState{
model.StateRegistered,
model.StateQueued,
model.StateWaitingReboot,
model.StateBooting,
model.StateInventoryCheck,
model.StateFirmware,
model.StateSpecValidate,
model.StateSMART,
model.StateCPUStress,
model.StateStorage,
model.StateNetwork,
model.StateBurn,
model.StateGPU,
model.StatePSU,
model.StateReporting,
model.StateCompleted,
}
for i, v := range order {
if v == s {
return i
}
}
return -1
}
// BuildPipeline projects (run, stages) into a linear slice of nodes
// covering the whole lifecycle: pre-stage → all 9 stage nodes →
// Completed. Every stage in store.DefaultStageOrder always appears,
// even if its row hasn't been seeded yet — those show as "pending"
// ghosts. This way a run stuck in WaitingWoL (stages unseeded until
// /claim) still shows the full pipeline ahead of it.
//
// When run == nil we emit a ghost timeline (everything pending) so a
// never-run host still shows what's coming.
func BuildPipeline(run *model.Run, stages []model.Stage) []PipelineNode {
nodes := make([]PipelineNode, 0, len(preStageOrder)+len(store.DefaultStageOrder)+1)
// --- pre-stage nodes ---
for _, ps := range preStageOrder {
n := PipelineNode{Name: string(ps), State: "pending"}
if run != nil {
switch {
case run.State == model.StateFailedHolding || run.State == model.StateFailed:
// If we failed before reaching a stage, a pre-stage may
// still have been entered — keep the "past" rank logic.
if runStateRank(ps) < runStateRank(firstStageState(run)) {
n.State = "passed"
}
case run.State == ps:
n.State = "running"
case runStateRank(run.State) > runStateRank(ps):
n.State = "passed"
}
}
nodes = append(nodes, n)
}
// --- stage nodes ---
// Iterate DefaultStageOrder, not the stages slice, so the list is
// always the full 9 nodes. For each stage, prefer the persisted row
// if it exists; otherwise synthesize a ghost whose state is derived
// from run state (passed if we've advanced past this stage's
// RunState, running if we're in it, skipped if a prior stage failed,
// pending otherwise).
stageByName := make(map[string]model.Stage, len(stages))
for _, st := range stages {
stageByName[st.Name] = st
}
failedBefore := false
for _, name := range store.DefaultStageOrder {
n := PipelineNode{Name: name}
if st, ok := stageByName[name]; ok {
n.StartedAt = st.StartedAt
n.CompletedAt = st.CompletedAt
switch {
case failedBefore:
n.State = "skipped"
case st.State == model.StagePassed:
n.State = "passed"
case st.State == model.StageRunning:
n.State = "running"
case st.State == model.StageFailed:
n.State = "failed"
failedBefore = true
case st.State == model.StageSkipped:
n.State = "skipped"
default:
n.State = "pending"
}
} else {
// Ghost: no row seeded yet. Derive from run state.
n.State = ghostStageState(run, name, failedBefore)
}
nodes = append(nodes, n)
}
// --- terminal Completed node ---
term := PipelineNode{Name: "Completed", State: "pending"}
if run != nil && run.State == model.StateCompleted {
term.State = "passed"
term.CompletedAt = run.CompletedAt
}
nodes = append(nodes, term)
return nodes
}
// ghostStageState derives a pipeline-node state for a stage with no DB
// row — either the run hasn't reached /claim yet (pre-seed) or the stage
// is simply later than the run's current state. Mirrors the seeded-row
// logic so a ghost node transitions through the same visual states as a
// real one.
func ghostStageState(run *model.Run, name string, failedBefore bool) string {
if failedBefore {
return "skipped"
}
if run == nil {
return "pending"
}
// Failed/FailedHolding: anything past the failed stage is skipped.
if run.State == model.StateFailed || run.State == model.StateFailedHolding {
if run.FailedStage != "" {
failedRank, ok1 := stageRank(run.FailedStage)
myRank, ok2 := stageRank(name)
if ok1 && ok2 && myRank > failedRank {
return "skipped"
}
}
return "pending"
}
stageState, ok := stageStateByName(name)
if !ok {
return "pending"
}
switch {
case run.State == stageState:
return "running"
case runStateRank(run.State) > runStateRank(stageState):
return "passed"
}
return "pending"
}
// stageRank returns the ordinal of a stage within DefaultStageOrder,
// used to decide which stages are "after" a failed stage.
func stageRank(name string) (int, bool) {
for i, s := range store.DefaultStageOrder {
if s == name {
return i, true
}
}
return -1, false
}
// firstStageState returns the stage-state the run was in when it failed,
// or the current state for runs still in-flight. Used only by the
// pre-stage "past" check to decide if a Booting node should render
// "passed" even after the run failed further along.
func firstStageState(run *model.Run) model.RunState {
if run.FailedStage != "" {
if s, ok := stageStateByName(run.FailedStage); ok {
return s
}
}
return run.State
}
// stageStateByName mirrors orchestrator.StateForStage without the
// import (templates can't see orchestrator).
func stageStateByName(name string) (model.RunState, bool) {
m := map[string]model.RunState{
"Inventory": model.StateInventoryCheck,
"Firmware": model.StateFirmware,
"SpecValidate": model.StateSpecValidate,
"SMART": model.StateSMART,
"CPUStress": model.StateCPUStress,
"Storage": model.StateStorage,
"Network": model.StateNetwork,
"Burn": model.StateBurn,
"GPU": model.StateGPU,
"PSU": model.StatePSU,
"Reporting": model.StateReporting,
}
s, ok := m[name]
return s, ok
}
// stageDuration renders node timing as "1.2s" / "12s" / "4m". Empty
// string when the node hasn't started or hasn't finished.
func stageDuration(n PipelineNode) string {
if n.StartedAt == nil {
return ""
}
end := time.Now()
if n.CompletedAt != nil {
end = *n.CompletedAt
}
d := end.Sub(*n.StartedAt)
if d < 0 {
d = 0
}
switch {
case d < time.Second:
return fmt.Sprintf("%dms", int(d/time.Millisecond))
case d < 10*time.Second:
return fmt.Sprintf("%.1fs", d.Seconds())
case d < time.Minute:
return fmt.Sprintf("%ds", int(d/time.Second))
case d < time.Hour:
return fmt.Sprintf("%dm", int(d/time.Minute))
default:
return fmt.Sprintf("%dh", int(d/time.Hour))
}
}
// stageMarker returns the single-char glyph shown in the node's dot.
// Dots stay colored-via-class; the glyph is redundant-but-helpful.
func stageMarker(state string) string {
switch state {
case "passed":
return "✓"
case "failed":
return "!"
case "running":
return "●"
case "skipped":
return ""
}
return ""
}
// Pipeline renders the ordered dot-and-line timeline. The caller wraps
// it in a <section id="pipeline-{runID}" sse-swap=...> so the runner can
// re-emit the fragment as stages progress.
templ Pipeline(nodes []PipelineNode) {
<div class="pipeline">
for i, n := range nodes {
if i > 0 {
<div class={ "stage-connector", "stage-connector-" + nodes[i-1].State }></div>
}
<div class={ "stage-node", "stage-node-" + n.State }>
<div class={ "stage-dot", "stage-dot-" + n.State }>{ stageMarker(n.State) }</div>
<div class="stage-name">{ n.Name }</div>
<div class="stage-duration">{ stageDuration(n) }</div>
</div>
}
</div>
}
// PipelineSection wraps Pipeline in the same <section id=pipeline-N
// sse-swap=pipeline-N hx-swap=outerHTML> the runner targets. Used both
// from the initial detail-page shell and from RenderPipelineString so
// the wrapper is present on the wire and after every SSE swap — without
// this, the first outerHTML swap would replace the section with a bare
// <div class=pipeline>, wiping out the sse-swap attribute and freezing
// the pipeline until full page reload.
templ PipelineSection(run *model.Run, nodes []PipelineNode) {
<section
id={ fmt.Sprintf("pipeline-%d", run.ID) }
class="detail-section"
sse-swap={ fmt.Sprintf("pipeline-%d", run.ID) }
hx-swap="outerHTML"
>
<h2>Pipeline</h2>
@Pipeline(nodes)
</section>
}
// RenderPipelineString is the one-shot renderer the orchestrator
// registers at startup so it can publish pipeline fragments over SSE
// without pulling in the template package directly. Returns the full
// PipelineSection wrapper so repeat outerHTML swaps preserve the
// sse-swap target.
func RenderPipelineString(run *model.Run, stages []model.Stage) string {
if run == nil {
return ""
}
var buf bytes.Buffer
_ = PipelineSection(run, BuildPipeline(run, stages)).Render(context.Background(), &buf)
return buf.String()
}