deep profile + threshold gating + firmware stage + Burn super-stage
CI / Lint + build + test (push) Failing after 1m57s
Release / release (push) Has been cancelled

Ships all five phases of the deep-profile overhaul together. Runs now
carry a profile (quick/deep/soak); every profile walks the same
11-stage order — Inventory → Firmware → SpecValidate → SMART →
CPUStress → Storage → Network → Burn → GPU → PSU → Reporting —
with only per-stage durations and concurrency scaled.

Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile
column + CreateWithProfile; threshold table + evaluator seeded per-run
from the shared vetting.thresholds block; breach flips result at
/sensor + /result.

Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify +
EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta),
Network (sustained iperf + /proc/net/dev deltas) with per-profile
knobs from Deps.

Phase 3: Burn super-stage with goroutine fan-out for CPU + memory +
fio + iperf, PSU rails sampled across the Burn window, SensorMux
(2 s flush, 500-sample cap) to absorb backpressure.

Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode
(BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl),
lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into
SpecValidate with pin-by-identifier and fan-out-across-component
matching; mismatches park the run in FailedHolding.

Phase 5: profile radio on the host start form, profile chip on the
run header, Firmware section in the HTML report, coverage artifact
uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath
seam + stress_ng and dmidecode example fakes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 22:50:57 -04:00
parent fbb21cbafd
commit 23c689aa5b
60 changed files with 5911 additions and 527 deletions
+93
View File
@@ -119,3 +119,96 @@ func TestDiffSeverityAlwaysCritical(t *testing.T) {
}
}
}
func TestDiffFirmwareIdentifierMatch(t *testing.T) {
exp := []FirmwareSpec{{Component: "bios", Version: "3.2"}}
obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
if d := DiffFirmware(exp, obs); len(d) != 0 {
t.Fatalf("matching bios version should produce no diff, got %+v", d)
}
}
func TestDiffFirmwareVersionMismatch(t *testing.T) {
exp := []FirmwareSpec{{Component: "bios", Version: "3.3"}}
obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
d := DiffFirmware(exp, obs)
if len(d) != 1 {
t.Fatalf("want 1 diff, got %d: %+v", len(d), d)
}
if d[0].Expected != "3.3" || d[0].Actual != "3.2" {
t.Fatalf("diff expected/actual = %q/%q, want 3.3/3.2", d[0].Expected, d[0].Actual)
}
if d[0].Severity != "critical" {
t.Errorf("severity = %q, want critical", d[0].Severity)
}
}
func TestDiffFirmwareMissingComponentPresent(t *testing.T) {
// Expected rule with no identifier + zero observed snapshots →
// single "present=false" diff, not N.
exp := []FirmwareSpec{{Component: "bmc", Version: "1.74"}}
d := DiffFirmware(exp, nil)
if len(d) != 1 {
t.Fatalf("want 1 diff for missing BMC, got %d: %+v", len(d), d)
}
if d[0].Field != "firmware[bmc].present" || d[0].Expected != "true" || d[0].Actual != "false" {
t.Fatalf("missing-BMC diff = %+v", d[0])
}
}
func TestDiffFirmwareWildcardFanOut(t *testing.T) {
// Expected rule with empty identifier fans across every observed
// snapshot of the component — one port matches, one doesn't → one diff.
exp := []FirmwareSpec{{Component: "nic", Version: "16.32.1010"}}
obs := []FirmwareObserved{
{Component: "nic", Identifier: "eth0", Version: "16.32.1010"},
{Component: "nic", Identifier: "eth1", Version: "14.28.0000"},
}
d := DiffFirmware(exp, obs)
if len(d) != 1 {
t.Fatalf("want 1 diff (mismatched eth1 only), got %d: %+v", len(d), d)
}
if d[0].Field != "firmware[nic/eth1].version" {
t.Errorf("field = %q, want firmware[nic/eth1].version", d[0].Field)
}
}
func TestDiffFirmwareIdentifierPin(t *testing.T) {
// Identifier set: pins the rule to a specific port. Other ports
// with mismatched firmware are not evaluated by this rule.
exp := []FirmwareSpec{{Component: "nic", Identifier: "eth0", Version: "1.0"}}
obs := []FirmwareObserved{
{Component: "nic", Identifier: "eth0", Version: "1.0"},
{Component: "nic", Identifier: "eth1", Version: "9.9"},
}
if d := DiffFirmware(exp, obs); len(d) != 0 {
t.Fatalf("pinned rule should ignore other ports, got %+v", d)
}
}
func TestDiffFirmwareIdentifierPinMissing(t *testing.T) {
// Pinned rule with no matching observed snapshot → present=false diff.
exp := []FirmwareSpec{{Component: "nic", Identifier: "eth0", Version: "1.0"}}
if d := DiffFirmware(exp, nil); len(d) != 1 || d[0].Field != "firmware[nic/eth0].present" {
t.Fatalf("want present=false for pinned rule, got %+v", d)
}
}
func TestDiffFirmwareEmptyRuleSkipped(t *testing.T) {
// Empty component or empty version silently skip rather than panic.
exp := []FirmwareSpec{{Component: "", Version: "x"}, {Component: "bios", Version: ""}}
obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
if d := DiffFirmware(exp, obs); len(d) != 0 {
t.Fatalf("empty rules should skip, got %+v", d)
}
}
func TestDiffFirmwareCaseInsensitive(t *testing.T) {
// Version match is case-insensitive after trim; avoids spurious diff
// from ethtool's "FW1234" vs expected YAML's "fw1234".
exp := []FirmwareSpec{{Component: "nvme_fw", Identifier: "nvme0", Version: "fw1234"}}
obs := []FirmwareObserved{{Component: "nvme_fw", Identifier: "nvme0", Version: "FW1234"}}
if d := DiffFirmware(exp, obs); len(d) != 0 {
t.Fatalf("case-insensitive match expected, got %+v", d)
}
}