deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now carry a profile (quick/deep/soak); every profile walks the same 11-stage order — Inventory → Firmware → SpecValidate → SMART → CPUStress → Storage → Network → Burn → GPU → PSU → Reporting — with only per-stage durations and concurrency scaled. Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile column + CreateWithProfile; threshold table + evaluator seeded per-run from the shared vetting.thresholds block; breach flips result at /sensor + /result. Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify + EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta), Network (sustained iperf + /proc/net/dev deltas) with per-profile knobs from Deps. Phase 3: Burn super-stage with goroutine fan-out for CPU + memory + fio + iperf, PSU rails sampled across the Burn window, SensorMux (2 s flush, 500-sample cap) to absorb backpressure. Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode (BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl), lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into SpecValidate with pin-by-identifier and fan-out-across-component matching; mismatches park the run in FailedHolding. Phase 5: profile radio on the host start form, profile chip on the run header, Firmware section in the HTML report, coverage artifact uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath seam + stress_ng and dmidecode example fakes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -8,26 +8,28 @@ import (
|
||||
)
|
||||
|
||||
// node indexes for the default pipeline layout: pre-stages (3) + stage
|
||||
// rows (9) + terminal Completed (1) = 13 nodes.
|
||||
// rows (11) + terminal Completed (1) = 15 nodes.
|
||||
const (
|
||||
idxQueued = 0
|
||||
idxWaitingReboot = 1
|
||||
idxBooting = 2
|
||||
idxInventory = 3
|
||||
idxSpecValidate = 4
|
||||
idxSMART = 5
|
||||
idxCPUStress = 6
|
||||
idxStorage = 7
|
||||
idxNetwork = 8
|
||||
idxGPU = 9
|
||||
idxPSU = 10
|
||||
idxReporting = 11
|
||||
idxCompleted = 12
|
||||
idxFirmware = 4
|
||||
idxSpecValidate = 5
|
||||
idxSMART = 6
|
||||
idxCPUStress = 7
|
||||
idxStorage = 8
|
||||
idxNetwork = 9
|
||||
idxBurn = 10
|
||||
idxGPU = 11
|
||||
idxPSU = 12
|
||||
idxReporting = 13
|
||||
idxCompleted = 14
|
||||
)
|
||||
|
||||
// seedStages returns a fresh all-pending stage slice in the canonical order.
|
||||
func seedStages() []model.Stage {
|
||||
names := []string{"Inventory", "SpecValidate", "SMART", "CPUStress", "Storage", "Network", "GPU", "PSU", "Reporting"}
|
||||
names := []string{"Inventory", "Firmware", "SpecValidate", "SMART", "CPUStress", "Storage", "Network", "Burn", "GPU", "PSU", "Reporting"}
|
||||
out := make([]model.Stage, len(names))
|
||||
for i, n := range names {
|
||||
out[i] = model.Stage{Name: n, Ordinal: i, State: model.StagePending}
|
||||
@@ -37,10 +39,10 @@ func seedStages() []model.Stage {
|
||||
|
||||
func TestBuildPipeline_NoRun(t *testing.T) {
|
||||
nodes := BuildPipeline(nil, nil)
|
||||
// Ghost pipeline: 3 pre-stages + 9 stage ghosts + 1 terminal = 13
|
||||
// Ghost pipeline: 3 pre-stages + 10 stage ghosts + 1 terminal = 14
|
||||
// nodes, all pending.
|
||||
if len(nodes) != 13 {
|
||||
t.Fatalf("len = %d, want 13", len(nodes))
|
||||
if len(nodes) != 15 {
|
||||
t.Fatalf("len = %d, want 15", len(nodes))
|
||||
}
|
||||
for i, n := range nodes {
|
||||
if n.State != "pending" {
|
||||
@@ -56,8 +58,8 @@ func TestBuildPipeline_NoRun(t *testing.T) {
|
||||
func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
||||
run := &model.Run{State: model.StateWaitingReboot}
|
||||
nodes := BuildPipeline(run, nil)
|
||||
if len(nodes) != 13 {
|
||||
t.Fatalf("len = %d, want 13", len(nodes))
|
||||
if len(nodes) != 15 {
|
||||
t.Fatalf("len = %d, want 15", len(nodes))
|
||||
}
|
||||
if nodes[idxQueued].State != "passed" {
|
||||
t.Errorf("Queued = %q, want passed", nodes[idxQueued].State)
|
||||
@@ -65,7 +67,7 @@ func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
||||
if nodes[idxWaitingReboot].State != "running" {
|
||||
t.Errorf("WaitingReboot = %q, want running", nodes[idxWaitingReboot].State)
|
||||
}
|
||||
// All 9 stage ghosts must be pending — nothing has started yet.
|
||||
// All 11 stage ghosts must be pending — nothing has started yet.
|
||||
for i := idxInventory; i <= idxReporting; i++ {
|
||||
if nodes[i].State != "pending" {
|
||||
t.Errorf("%s (ghost) = %q, want pending", nodes[i].Name, nodes[i].State)
|
||||
@@ -81,19 +83,20 @@ func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
||||
// pending ghosts rather than silently disappearing.
|
||||
func TestBuildPipeline_GhostStagesDuringStage(t *testing.T) {
|
||||
run := &model.Run{State: model.StateSMART}
|
||||
// Only Inventory + SpecValidate seeded; SMART onwards are ghosts.
|
||||
// Only Inventory + Firmware + SpecValidate seeded; SMART onwards are ghosts.
|
||||
stages := []model.Stage{
|
||||
{Name: "Inventory", Ordinal: 0, State: model.StagePassed},
|
||||
{Name: "SpecValidate", Ordinal: 1, State: model.StagePassed},
|
||||
{Name: "Firmware", Ordinal: 1, State: model.StagePassed},
|
||||
{Name: "SpecValidate", Ordinal: 2, State: model.StagePassed},
|
||||
}
|
||||
nodes := BuildPipeline(run, stages)
|
||||
if len(nodes) != 13 {
|
||||
t.Fatalf("len = %d, want 13", len(nodes))
|
||||
if len(nodes) != 15 {
|
||||
t.Fatalf("len = %d, want 15", len(nodes))
|
||||
}
|
||||
if nodes[idxSMART].State != "running" {
|
||||
t.Errorf("SMART (ghost) = %q, want running", nodes[idxSMART].State)
|
||||
}
|
||||
for _, i := range []int{idxCPUStress, idxStorage, idxNetwork, idxGPU, idxPSU, idxReporting} {
|
||||
for _, i := range []int{idxCPUStress, idxStorage, idxNetwork, idxBurn, idxGPU, idxPSU, idxReporting} {
|
||||
if nodes[i].State != "pending" {
|
||||
t.Errorf("%s (ghost) = %q, want pending", nodes[i].Name, nodes[i].State)
|
||||
}
|
||||
@@ -103,12 +106,13 @@ func TestBuildPipeline_GhostStagesDuringStage(t *testing.T) {
|
||||
func TestBuildPipeline_Running(t *testing.T) {
|
||||
run := &model.Run{State: model.StateSMART}
|
||||
stages := seedStages()
|
||||
stages[0].State = model.StagePassed
|
||||
stages[1].State = model.StagePassed
|
||||
stages[2].State = model.StageRunning
|
||||
stages[0].State = model.StagePassed // Inventory
|
||||
stages[1].State = model.StagePassed // Firmware
|
||||
stages[2].State = model.StagePassed // SpecValidate
|
||||
stages[3].State = model.StageRunning // SMART
|
||||
nodes := BuildPipeline(run, stages)
|
||||
if len(nodes) != 13 {
|
||||
t.Fatalf("len = %d, want 13", len(nodes))
|
||||
if len(nodes) != 15 {
|
||||
t.Fatalf("len = %d, want 15", len(nodes))
|
||||
}
|
||||
// Pre-stages are all past for a run that has reached SMART.
|
||||
for i := idxQueued; i <= idxBooting; i++ {
|
||||
@@ -136,10 +140,10 @@ func TestBuildPipeline_Running(t *testing.T) {
|
||||
func TestBuildPipeline_Failed(t *testing.T) {
|
||||
run := &model.Run{State: model.StateFailedHolding, FailedStage: "Storage"}
|
||||
stages := seedStages()
|
||||
for i := 0; i <= 3; i++ {
|
||||
for i := 0; i <= 4; i++ {
|
||||
stages[i].State = model.StagePassed
|
||||
}
|
||||
stages[4].State = model.StageFailed // Storage
|
||||
stages[5].State = model.StageFailed // Storage
|
||||
nodes := BuildPipeline(run, stages)
|
||||
// Pre-stages are past a run that reached Storage.
|
||||
for i := idxQueued; i <= idxBooting; i++ {
|
||||
@@ -150,7 +154,7 @@ func TestBuildPipeline_Failed(t *testing.T) {
|
||||
if nodes[idxStorage].State != "failed" {
|
||||
t.Errorf("Storage = %q, want failed", nodes[idxStorage].State)
|
||||
}
|
||||
for _, i := range []int{idxNetwork, idxGPU, idxPSU, idxReporting} {
|
||||
for _, i := range []int{idxNetwork, idxBurn, idxGPU, idxPSU, idxReporting} {
|
||||
if nodes[i].State != "skipped" {
|
||||
t.Errorf("%s = %q, want skipped", nodes[i].Name, nodes[i].State)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user