deep profile + threshold gating + firmware stage + Burn super-stage
CI / Lint + build + test (push) Failing after 1m57s
Release / release (push) Has been cancelled

Ships all five phases of the deep-profile overhaul together. Runs now
carry a profile (quick/deep/soak); every profile walks the same
11-stage order — Inventory → Firmware → SpecValidate → SMART →
CPUStress → Storage → Network → Burn → GPU → PSU → Reporting —
with only per-stage durations and concurrency scaled.

Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile
column + CreateWithProfile; threshold table + evaluator seeded per-run
from the shared vetting.thresholds block; breach flips result at
/sensor + /result.

Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify +
EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta),
Network (sustained iperf + /proc/net/dev deltas) with per-profile
knobs from Deps.

Phase 3: Burn super-stage with goroutine fan-out for CPU + memory +
fio + iperf, PSU rails sampled across the Burn window, SensorMux
(2 s flush, 500-sample cap) to absorb backpressure.

Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode
(BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl),
lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into
SpecValidate with pin-by-identifier and fan-out-across-component
matching; mismatches park the run in FailedHolding.

Phase 5: profile radio on the host start form, profile chip on the
run header, Firmware section in the HTML report, coverage artifact
uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath
seam + stress_ng and dmidecode example fakes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 22:50:57 -04:00
parent fbb21cbafd
commit 23c689aa5b
60 changed files with 5911 additions and 527 deletions
+12 -8
View File
@@ -48,11 +48,13 @@ func runStateRank(s model.RunState) int {
model.StateWaitingReboot,
model.StateBooting,
model.StateInventoryCheck,
model.StateFirmware,
model.StateSpecValidate,
model.StateSMART,
model.StateCPUStress,
model.StateStorage,
model.StateNetwork,
model.StateBurn,
model.StateGPU,
model.StatePSU,
model.StateReporting,
@@ -213,11 +215,13 @@ func firstStageState(run *model.Run) model.RunState {
func stageStateByName(name string) (model.RunState, bool) {
m := map[string]model.RunState{
"Inventory": model.StateInventoryCheck,
"Firmware": model.StateFirmware,
"SpecValidate": model.StateSpecValidate,
"SMART": model.StateSMART,
"CPUStress": model.StateCPUStress,
"Storage": model.StateStorage,
"Network": model.StateNetwork,
"Burn": model.StateBurn,
"GPU": model.StateGPU,
"PSU": model.StatePSU,
"Reporting": model.StateReporting,
@@ -312,7 +316,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
var templ_7745c5c3_Var3 string
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
if templ_7745c5c3_Err != nil {
@@ -339,7 +343,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
var templ_7745c5c3_Var5 string
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var4).String())
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
if templ_7745c5c3_Err != nil {
@@ -361,7 +365,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
var templ_7745c5c3_Var7 string
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var6).String())
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
if templ_7745c5c3_Err != nil {
@@ -374,7 +378,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
var templ_7745c5c3_Var8 string
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(stageMarker(n.State))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 275, Col: 77}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 279, Col: 77}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
if templ_7745c5c3_Err != nil {
@@ -387,7 +391,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
var templ_7745c5c3_Var9 string
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(n.Name)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 276, Col: 36}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 280, Col: 36}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
if templ_7745c5c3_Err != nil {
@@ -400,7 +404,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
var templ_7745c5c3_Var10 string
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(stageDuration(n))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 277, Col: 50}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 281, Col: 50}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
if templ_7745c5c3_Err != nil {
@@ -454,7 +458,7 @@ func PipelineSection(run *model.Run, nodes []PipelineNode) templ.Component {
var templ_7745c5c3_Var12 string
templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("pipeline-%d", run.ID))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 292, Col: 41}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 296, Col: 41}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
if templ_7745c5c3_Err != nil {
@@ -467,7 +471,7 @@ func PipelineSection(run *model.Run, nodes []PipelineNode) templ.Component {
var templ_7745c5c3_Var13 string
templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("pipeline-%d", run.ID))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 294, Col: 47}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 298, Col: 47}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var13))
if templ_7745c5c3_Err != nil {