deep profile + threshold gating + firmware stage + Burn super-stage
CI / Lint + build + test (push) Failing after 1m57s
Release / release (push) Has been cancelled

Ships all five phases of the deep-profile overhaul together. Runs now
carry a profile (quick/deep/soak); every profile walks the same
11-stage order — Inventory → Firmware → SpecValidate → SMART →
CPUStress → Storage → Network → Burn → GPU → PSU → Reporting —
with only per-stage durations and concurrency scaled.

Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile
column + CreateWithProfile; threshold table + evaluator seeded per-run
from the shared vetting.thresholds block; breach flips result at
/sensor + /result.

Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify +
EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta),
Network (sustained iperf + /proc/net/dev deltas) with per-profile
knobs from Deps.

Phase 3: Burn super-stage with goroutine fan-out for CPU + memory +
fio + iperf, PSU rails sampled across the Burn window, SensorMux
(2 s flush, 500-sample cap) to absorb backpressure.

Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode
(BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl),
lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into
SpecValidate with pin-by-identifier and fan-out-across-component
matching; mismatches park the run in FailedHolding.

Phase 5: profile radio on the host start form, profile chip on the
run header, Firmware section in the HTML report, coverage artifact
uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath
seam + stress_ng and dmidecode example fakes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 22:50:57 -04:00
parent fbb21cbafd
commit 23c689aa5b
60 changed files with 5911 additions and 527 deletions
+192
View File
@@ -0,0 +1,192 @@
package tests
import (
"encoding/json"
"testing"
"vetting/agent/probes"
)
// TestParseIperfJSON_SumSent confirms we pull throughput, retransmits,
// and bytes_sent from end.sum_sent. Real iperf3 -J output nests these
// three under end.sum_sent for TCP streams.
func TestParseIperfJSON_SumSent(t *testing.T) {
raw := `{
"end": {
"sum_sent": {
"bits_per_second": 950000000,
"retransmits": 42,
"bytes": 1187500000
}
}
}`
mbps, retrans, bytesSent, _, err := parseIperfJSON([]byte(raw))
if err != nil {
t.Fatalf("parseIperfJSON: %v", err)
}
if mbps != 950 {
t.Errorf("mbps = %v, want 950", mbps)
}
if retrans != 42 {
t.Errorf("retransmits = %d, want 42", retrans)
}
if bytesSent != 1187500000 {
t.Errorf("bytesSent = %d, want 1187500000", bytesSent)
}
}
// TestParseIperfJSON_MissingEnd fails cleanly when iperf returned
// something without an end block (partial/aborted run).
func TestParseIperfJSON_MissingEnd(t *testing.T) {
raw := `{"start": {}}`
if _, _, _, _, err := parseIperfJSON([]byte(raw)); err == nil {
t.Errorf("expected error on iperf output missing end block")
}
}
// TestParseIperfJSON_ZeroBps returns an error so the stage can fail
// fast. A successful-exit iperf that pushed zero bits is indistinguishable
// from a broken run and must not pass.
func TestParseIperfJSON_ZeroBps(t *testing.T) {
raw := `{"end": {"sum_sent": {"bits_per_second": 0}}}`
if _, _, _, _, err := parseIperfJSON([]byte(raw)); err == nil {
t.Errorf("expected error when bits_per_second is 0")
}
}
// TestParseIperfJSON_FallsBackToSumReceived: UDP tests and some edge
// cases don't populate sum_sent. The parser walks sum_sent → sum_received
// → sum and picks the first that has a throughput number.
func TestParseIperfJSON_FallsBackToSumReceived(t *testing.T) {
raw := `{
"end": {
"sum_received": {"bits_per_second": 500000000}
}
}`
mbps, _, _, _, err := parseIperfJSON([]byte(raw))
if err != nil {
t.Fatalf("parseIperfJSON: %v", err)
}
if mbps != 500 {
t.Errorf("mbps = %v, want 500", mbps)
}
}
// TestDiffNetDev_HappyPath confirms end start on a shared interface
// produces the delta we expect. eth0 pushed 10k bytes and accumulated
// 3 tx errors during the window.
func TestDiffNetDev_HappyPath(t *testing.T) {
start := map[string]probes.NetDevSnapshot{
"eth0": {Iface: "eth0", RxBytes: 1000, RxErrs: 0, TxBytes: 5000, TxErrs: 1},
}
end := map[string]probes.NetDevSnapshot{
"eth0": {Iface: "eth0", RxBytes: 2000, RxErrs: 0, TxBytes: 15000, TxErrs: 4},
}
delta := diffNetDev(start, end)
got, ok := delta["eth0"]
if !ok {
t.Fatalf("eth0 missing from diff output")
}
if got.RxBytes != 1000 {
t.Errorf("RxBytes delta=%d, want 1000", got.RxBytes)
}
if got.TxBytes != 10000 {
t.Errorf("TxBytes delta=%d, want 10000", got.TxBytes)
}
if got.TxErrs != 3 {
t.Errorf("TxErrs delta=%d, want 3", got.TxErrs)
}
}
// TestDiffNetDev_InterfaceVanished: an interface present at start but
// gone at end drops from the diff rather than carrying a negative or
// stale number.
func TestDiffNetDev_InterfaceVanished(t *testing.T) {
start := map[string]probes.NetDevSnapshot{
"eth0": {Iface: "eth0", TxBytes: 1000},
"eth1": {Iface: "eth1", TxBytes: 500},
}
end := map[string]probes.NetDevSnapshot{
"eth0": {Iface: "eth0", TxBytes: 2000},
}
delta := diffNetDev(start, end)
if _, ok := delta["eth1"]; ok {
t.Errorf("eth1 should have been dropped (gone at end)")
}
if delta["eth0"].TxBytes != 1000 {
t.Errorf("eth0 TxBytes delta=%d, want 1000", delta["eth0"].TxBytes)
}
}
// TestDiffNetDev_CounterReset: if a counter resets between snapshots
// (kernel restart, wrap-around on a 32-bit counter) we clamp to 0
// rather than underflow a uint64.
func TestDiffNetDev_CounterReset(t *testing.T) {
start := map[string]probes.NetDevSnapshot{
"eth0": {Iface: "eth0", TxBytes: 9999, TxErrs: 5},
}
end := map[string]probes.NetDevSnapshot{
"eth0": {Iface: "eth0", TxBytes: 100, TxErrs: 0},
}
delta := diffNetDev(start, end)
if delta["eth0"].TxBytes != 0 {
t.Errorf("reset TxBytes delta=%d, want 0 (clamped)", delta["eth0"].TxBytes)
}
if delta["eth0"].TxErrs != 0 {
t.Errorf("reset TxErrs delta=%d, want 0 (clamped)", delta["eth0"].TxErrs)
}
}
// TestDeriveHost: orchestrator URL → host extraction is how the agent
// picks the iperf3 server target. Handles both https://host and
// https://host:port shapes.
func TestDeriveHost(t *testing.T) {
cases := []struct {
raw string
want string
}{
{"https://orch.local", "orch.local"},
{"https://orch.local:8443", "orch.local"},
{"http://10.0.0.5:8080", "10.0.0.5"},
}
for _, c := range cases {
got, err := deriveHost(c.raw)
if err != nil {
t.Errorf("deriveHost(%q) error: %v", c.raw, err)
continue
}
if got != c.want {
t.Errorf("deriveHost(%q) = %q, want %q", c.raw, got, c.want)
}
}
}
func TestDeriveHost_Empty(t *testing.T) {
if _, err := deriveHost(""); err == nil {
t.Errorf("deriveHost(\"\") should error")
}
}
// TestParseIperfJSON_ParsesEndMap confirms the full end map is returned
// so extras can show every field iperf produced, not just the three we
// extract by hand.
func TestParseIperfJSON_ParsesEndMap(t *testing.T) {
raw := `{
"end": {
"sum_sent": {"bits_per_second": 1000000, "retransmits": 0, "bytes": 125000},
"cpu_utilization_percent": {"host_total": 12.3}
}
}`
_, _, _, endMap, err := parseIperfJSON([]byte(raw))
if err != nil {
t.Fatalf("parseIperfJSON: %v", err)
}
if endMap == nil {
t.Fatalf("endMap is nil")
}
// Sanity: both keys round-trip via json.
b, _ := json.Marshal(endMap)
if len(b) == 0 {
t.Errorf("endMap marshaled to empty")
}
}