deep profile + threshold gating + firmware stage + Burn super-stage
CI / Lint + build + test (push) Failing after 1m57s
Release / release (push) Has been cancelled

Ships all five phases of the deep-profile overhaul together. Runs now
carry a profile (quick/deep/soak); every profile walks the same
11-stage order — Inventory → Firmware → SpecValidate → SMART →
CPUStress → Storage → Network → Burn → GPU → PSU → Reporting —
with only per-stage durations and concurrency scaled.

Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile
column + CreateWithProfile; threshold table + evaluator seeded per-run
from the shared vetting.thresholds block; breach flips result at
/sensor + /result.

Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify +
EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta),
Network (sustained iperf + /proc/net/dev deltas) with per-profile
knobs from Deps.

Phase 3: Burn super-stage with goroutine fan-out for CPU + memory +
fio + iperf, PSU rails sampled across the Burn window, SensorMux
(2 s flush, 500-sample cap) to absorb backpressure.

Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode
(BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl),
lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into
SpecValidate with pin-by-identifier and fan-out-across-component
matching; mismatches park the run in FailedHolding.

Phase 5: profile radio on the host start form, profile chip on the
run header, Firmware section in the HTML report, coverage artifact
uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath
seam + stress_ng and dmidecode example fakes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 22:50:57 -04:00
parent fbb21cbafd
commit 23c689aa5b
60 changed files with 5911 additions and 527 deletions
+27 -17
View File
@@ -361,7 +361,7 @@ func HostActions(d HostPageData) templ.Component {
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "\" class=\"inline host-start-form\"><label class=\"host-nd-toggle\"><input type=\"checkbox\" name=\"non_destructive\" value=\"1\"> Non-destructive (skip wipe-probe + disk writes)</label> <button type=\"submit\" class=\"btn-primary\">Start vetting</button></form>")
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "\" class=\"inline host-start-form\"><fieldset class=\"host-profile-picker\"><legend>Profile</legend> <label title=\"~10 min — post-repair sanity: all probes + gates, short budgets\"><input type=\"radio\" name=\"profile\" value=\"quick\" checked> quick</label> <label title=\"~812 h — overnight soak: long CPU/RAM, full-disk fio verify, 30 min network\"><input type=\"radio\" name=\"profile\" value=\"deep\"> deep</label> <label title=\"≥24 h — week-long burn-in; opt-in when you suspect intermittent faults\"><input type=\"radio\" name=\"profile\" value=\"soak\"> soak</label></fieldset><label class=\"host-nd-toggle\"><input type=\"checkbox\" name=\"non_destructive\" value=\"1\"> Non-destructive (skip wipe-probe + disk writes)</label> <button type=\"submit\" class=\"btn-primary\">Start vetting</button></form>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
@@ -383,7 +383,7 @@ func HostActions(d HostPageData) templ.Component {
var templ_7745c5c3_Var19 templ.SafeURL
templ_7745c5c3_Var19, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/delete", d.Host.ID)))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 116, Col: 89}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 131, Col: 89}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var19))
if templ_7745c5c3_Err != nil {
@@ -428,7 +428,7 @@ func InFlightBanner(d HostPageData) templ.Component {
var templ_7745c5c3_Var21 string
templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-inflight-%d", d.Host.ID))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 128, Col: 51}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 143, Col: 51}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var21))
if templ_7745c5c3_Err != nil {
@@ -441,7 +441,7 @@ func InFlightBanner(d HostPageData) templ.Component {
var templ_7745c5c3_Var22 string
templ_7745c5c3_Var22, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-inflight-%d", d.Host.ID))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 130, Col: 57}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 145, Col: 57}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var22))
if templ_7745c5c3_Err != nil {
@@ -459,7 +459,7 @@ func InFlightBanner(d HostPageData) templ.Component {
var templ_7745c5c3_Var23 templ.SafeURL
templ_7745c5c3_Var23, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.ActiveRun.ID)))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 134, Col: 92}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 149, Col: 92}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var23))
if templ_7745c5c3_Err != nil {
@@ -472,7 +472,7 @@ func InFlightBanner(d HostPageData) templ.Component {
var templ_7745c5c3_Var24 string
templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d", d.ActiveRun.ID))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 135, Col: 74}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 150, Col: 74}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
if templ_7745c5c3_Err != nil {
@@ -485,7 +485,7 @@ func InFlightBanner(d HostPageData) templ.Component {
var templ_7745c5c3_Var25 string
templ_7745c5c3_Var25, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(d.ActiveRun))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 136, Col: 59}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 151, Col: 59}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var25))
if templ_7745c5c3_Err != nil {
@@ -541,7 +541,7 @@ func HostEmptyState(d HostPageData) templ.Component {
var templ_7745c5c3_Var27 templ.SafeURL
templ_7745c5c3_Var27, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 152, Col: 88}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 167, Col: 88}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var27))
if templ_7745c5c3_Err != nil {
@@ -655,7 +655,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var31 string
templ_7745c5c3_Var31, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("runrow-%d", d.Run.ID))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 204, Col: 41}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 219, Col: 41}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var31))
if templ_7745c5c3_Err != nil {
@@ -681,7 +681,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var33 string
templ_7745c5c3_Var33, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("runrow-%d", d.Run.ID))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 206, Col: 47}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 221, Col: 47}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var33))
if templ_7745c5c3_Err != nil {
@@ -694,7 +694,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var34 templ.SafeURL
templ_7745c5c3_Var34, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 210, Col: 61}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 225, Col: 61}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var34))
if templ_7745c5c3_Err != nil {
@@ -707,7 +707,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var35 string
templ_7745c5c3_Var35, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("#%d", d.Run.ID))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 210, Col: 94}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 225, Col: 94}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
if templ_7745c5c3_Err != nil {
@@ -742,7 +742,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var38 string
templ_7745c5c3_Var38, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(&d.Run))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 213, Col: 92}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 228, Col: 92}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var38))
if templ_7745c5c3_Err != nil {
@@ -755,7 +755,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var39 string
templ_7745c5c3_Var39, templ_7745c5c3_Err = templ.JoinStringErrs(relativeTime(d.Run.StartedAt))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 215, Col: 62}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 230, Col: 62}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var39))
if templ_7745c5c3_Err != nil {
@@ -768,7 +768,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var40 string
templ_7745c5c3_Var40, templ_7745c5c3_Err = templ.JoinStringErrs(runDuration(&d.Run))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 216, Col: 53}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 231, Col: 53}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var40))
if templ_7745c5c3_Err != nil {
@@ -805,7 +805,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var43 string
templ_7745c5c3_Var43, templ_7745c5c3_Err = templ.JoinStringErrs(name)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 221, Col: 94}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 236, Col: 94}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var43))
if templ_7745c5c3_Err != nil {
@@ -823,7 +823,7 @@ func RunRow(d RunRowData) templ.Component {
var templ_7745c5c3_Var44 templ.SafeURL
templ_7745c5c3_Var44, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 226, Col: 84}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 241, Col: 84}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var44))
if templ_7745c5c3_Err != nil {
@@ -867,6 +867,16 @@ func hostCanStartIfOnline(d HostPageData) bool {
return d.ActiveRun == nil
}
// profileChipValue normalizes a Run.Profile string for display on the
// run page chip. Older runs with an empty column predate Phase 1 — show
// them as "quick" (the prior implicit default).
func profileChipValue(p string) string {
if p == "" {
return "quick"
}
return p
}
// runDuration formats the elapsed time for a run using the same buckets
// as stageDuration. In-flight runs clock from StartedAt to now so the
// run-page header + runs-table row keep ticking on each SSE push.