deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now carry a profile (quick/deep/soak); every profile walks the same 11-stage order — Inventory → Firmware → SpecValidate → SMART → CPUStress → Storage → Network → Burn → GPU → PSU → Reporting — with only per-stage durations and concurrency scaled. Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile column + CreateWithProfile; threshold table + evaluator seeded per-run from the shared vetting.thresholds block; breach flips result at /sensor + /result. Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify + EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta), Network (sustained iperf + /proc/net/dev deltas) with per-profile knobs from Deps. Phase 3: Burn super-stage with goroutine fan-out for CPU + memory + fio + iperf, PSU rails sampled across the Burn window, SensorMux (2 s flush, 500-sample cap) to absorb backpressure. Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode (BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl), lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into SpecValidate with pin-by-identifier and fan-out-across-component matching; mismatches park the run in FailedHolding. Phase 5: profile radio on the host start form, profile chip on the run header, Firmware section in the HTML report, coverage artifact uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath seam + stress_ng and dmidecode example fakes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -75,6 +75,12 @@ func newCaptureRegistry(c *captureNotifier) *notify.Registry {
|
||||
// (agent, runID, plainTokenForBearer). Caller is responsible for
|
||||
// transitioning the run out of Queued.
|
||||
func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
||||
return fullAgentWithSpec(t, "")
|
||||
}
|
||||
|
||||
// fullAgentWithSpec is the same as fullAgent but seeds the host with
|
||||
// an ExpectedSpecYAML so SpecValidate can pick up diffs in the test.
|
||||
func fullAgentWithSpec(t *testing.T, expectedSpecYAML string) (*api.Agent, int64, string) {
|
||||
t.Helper()
|
||||
tmp := t.TempDir()
|
||||
conn, err := db.Open(filepath.Join(tmp, "vetting.db"))
|
||||
@@ -89,6 +95,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
||||
artifactStore := &store.Artifacts{DB: conn}
|
||||
specDiffStore := &store.SpecDiffs{DB: conn}
|
||||
measurementStore := &store.Measurements{DB: conn}
|
||||
firmwareStore := &store.Firmware{DB: conn}
|
||||
|
||||
hub := events.NewHub()
|
||||
logHub, err := logs.NewHub(filepath.Join(tmp, "logs"), hub)
|
||||
@@ -109,7 +116,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
||||
MAC: "aa:bb:cc:dd:ee:10",
|
||||
WoLBroadcastIP: "10.0.0.255",
|
||||
WoLPort: 9,
|
||||
ExpectedSpecYAML: "", // empty spec → no diffs
|
||||
ExpectedSpecYAML: expectedSpecYAML,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("create host: %v", err)
|
||||
@@ -132,6 +139,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
||||
Artifacts: artifactStore,
|
||||
SpecDiffs: specDiffStore,
|
||||
Measurements: measurementStore,
|
||||
Firmware: firmwareStore,
|
||||
Runner: runner,
|
||||
EventHub: hub,
|
||||
Logs: logHub,
|
||||
@@ -195,20 +203,24 @@ func TestFullPipelineToCompleted(t *testing.T) {
|
||||
Memory: spec.MemorySpec{TotalGiB: 16},
|
||||
}
|
||||
next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv})
|
||||
// After Inventory → SpecValidate resolves inline → SMART
|
||||
if next != "SMART" {
|
||||
t.Fatalf("after Inventory, next_state = %q, want SMART", next)
|
||||
// After Inventory → Firmware
|
||||
if next != "Firmware" {
|
||||
t.Fatalf("after Inventory, next_state = %q, want Firmware", next)
|
||||
}
|
||||
|
||||
// The remaining stages advance one-for-one in order.
|
||||
// The remaining stages advance one-for-one in order. After Firmware
|
||||
// the inline SpecValidate resolver advances through SpecValidate to
|
||||
// SMART without a dedicated /result POST for SpecValidate.
|
||||
walkPlan := []struct {
|
||||
stage string
|
||||
expected string
|
||||
}{
|
||||
{"Firmware", "SMART"},
|
||||
{"SMART", "CPUStress"},
|
||||
{"CPUStress", "Storage"},
|
||||
{"Storage", "Network"},
|
||||
{"Network", "GPU"},
|
||||
{"Network", "Burn"},
|
||||
{"Burn", "GPU"},
|
||||
{"GPU", "PSU"},
|
||||
{"PSU", "Completed"}, // PSU → Reporting resolves inline → Completed
|
||||
}
|
||||
@@ -287,8 +299,11 @@ func TestFaultInjectionSMART(t *testing.T) {
|
||||
}
|
||||
|
||||
inv := spec.Inventory{Memory: spec.MemorySpec{TotalGiB: 16}}
|
||||
if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "SMART" {
|
||||
t.Fatalf("after Inventory, next = %q want SMART", next)
|
||||
if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "Firmware" {
|
||||
t.Fatalf("after Inventory, next = %q want Firmware", next)
|
||||
}
|
||||
if next := walkStage(t, a, runID, token, "Firmware", true, nil); next != "SMART" {
|
||||
t.Fatalf("after Firmware, next = %q want SMART (inline SpecValidate)", next)
|
||||
}
|
||||
|
||||
// Fake SMART failure → expect FailedHolding.
|
||||
@@ -316,3 +331,76 @@ func TestFaultInjectionSMART(t *testing.T) {
|
||||
t.Errorf("StageFailed severity = %q, want critical", ev.Severity)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFirmwarePersistAndSpecMismatch exercises the Phase 4 firmware
|
||||
// integration: the agent POSTs Firmware snapshots; server persists; the
|
||||
// following SpecValidate diff picks up a firmware mismatch and parks
|
||||
// the run in FailedHolding with FailedStage=SpecValidate.
|
||||
func TestFirmwarePersistAndSpecMismatch(t *testing.T) {
|
||||
// Host demands BIOS 3.3; agent will POST 3.2 → one critical firmware diff.
|
||||
yaml := "firmware:\n - component: bios\n version: \"3.3\"\n"
|
||||
a, runID, token := fullAgentWithSpec(t, yaml)
|
||||
a.Notify = newCaptureRegistry(&captureNotifier{name: "capture"})
|
||||
|
||||
if err := a.Runs.SetState(context.Background(), runID, model.StateInventoryCheck); err != nil {
|
||||
t.Fatalf("set state: %v", err)
|
||||
}
|
||||
|
||||
inv := spec.Inventory{Memory: spec.MemorySpec{TotalGiB: 16}}
|
||||
if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "Firmware" {
|
||||
t.Fatalf("after Inventory, next = %q want Firmware", next)
|
||||
}
|
||||
|
||||
// Firmware stage: agent reports actual BIOS 3.2 → one row persisted.
|
||||
fw := []map[string]any{
|
||||
{"component": "bios", "identifier": "system", "version": "3.2", "vendor": "AMI"},
|
||||
}
|
||||
next := walkStage(t, a, runID, token, "Firmware", true, map[string]any{"firmware": fw})
|
||||
// Inline SpecValidate should detect the firmware mismatch and send
|
||||
// the run to FailedHolding without the agent posting SpecValidate.
|
||||
if next != "FailedHolding" {
|
||||
t.Fatalf("after Firmware mismatch, next = %q want FailedHolding", next)
|
||||
}
|
||||
|
||||
run, err := a.Runs.Get(context.Background(), runID)
|
||||
if err != nil {
|
||||
t.Fatalf("get run: %v", err)
|
||||
}
|
||||
if run.State != model.StateFailedHolding {
|
||||
t.Fatalf("run.State = %q, want FailedHolding", run.State)
|
||||
}
|
||||
if run.FailedStage != "SpecValidate" {
|
||||
t.Fatalf("run.FailedStage = %q, want SpecValidate", run.FailedStage)
|
||||
}
|
||||
|
||||
// Persistence: row landed in firmware_snapshots.
|
||||
snaps, err := a.Firmware.ListForRun(context.Background(), runID)
|
||||
if err != nil {
|
||||
t.Fatalf("ListForRun firmware: %v", err)
|
||||
}
|
||||
if len(snaps) != 1 {
|
||||
t.Fatalf("firmware rows = %d, want 1: %+v", len(snaps), snaps)
|
||||
}
|
||||
if snaps[0].Component != "bios" || snaps[0].Version != "3.2" {
|
||||
t.Errorf("persisted snapshot = %+v", snaps[0])
|
||||
}
|
||||
|
||||
// Diff row: SpecDiffs has a firmware-specific entry (rather than
|
||||
// only CPU/memory/disk rows) and is critical.
|
||||
diffs, err := a.SpecDiffs.ListForRun(context.Background(), runID)
|
||||
if err != nil {
|
||||
t.Fatalf("ListForRun specdiffs: %v", err)
|
||||
}
|
||||
found := false
|
||||
for _, d := range diffs {
|
||||
if strings.HasPrefix(d.Field, "firmware[") {
|
||||
found = true
|
||||
if d.Severity != "critical" {
|
||||
t.Errorf("firmware diff severity = %q, want critical", d.Severity)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatalf("no firmware[...] entry in spec diffs: %+v", diffs)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user