Files
josh 23c689aa5b
CI / Lint + build + test (push) Failing after 1m57s
Release / release (push) Has been cancelled
deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now
carry a profile (quick/deep/soak); every profile walks the same
11-stage order — Inventory → Firmware → SpecValidate → SMART →
CPUStress → Storage → Network → Burn → GPU → PSU → Reporting —
with only per-stage durations and concurrency scaled.

Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile
column + CreateWithProfile; threshold table + evaluator seeded per-run
from the shared vetting.thresholds block; breach flips result at
/sensor + /result.

Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify +
EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta),
Network (sustained iperf + /proc/net/dev deltas) with per-profile
knobs from Deps.

Phase 3: Burn super-stage with goroutine fan-out for CPU + memory +
fio + iperf, PSU rails sampled across the Burn window, SensorMux
(2 s flush, 500-sample cap) to absorb backpressure.

Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode
(BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl),
lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into
SpecValidate with pin-by-identifier and fan-out-across-component
matching; mismatches park the run in FailedHolding.

Phase 5: profile radio on the host start form, profile chip on the
run header, Firmware section in the HTML report, coverage artifact
uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath
seam + stress_ng and dmidecode example fakes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-18 22:50:57 -04:00

98 lines
2.8 KiB
Go

package store
import (
"context"
"database/sql"
"fmt"
)
// FirmwareSnapshot is one row in firmware_snapshots. A run captures
// many (one per BIOS/BMC/NIC/HBA/microcode/NVMe) so SpecValidate can
// diff them against the host's expected spec in Phase 4.
type FirmwareSnapshot struct {
ID int64
RunID int64
Component string // bios|bmc|nic|hba|microcode|nvme_fw
Identifier string // slot/serial/device path
Version string
Vendor string
RawJSON string
}
// Firmware is the CRUD seam. The agent's Phase-4 probe POSTs captured
// rows; the orchestrator stores them. SpecValidate reads them back.
type Firmware struct {
DB *sql.DB
}
// Create inserts a single firmware snapshot. One call per (run, component,
// identifier) — the agent probe owns dedup/formatting.
func (f *Firmware) Create(ctx context.Context, s FirmwareSnapshot) (int64, error) {
raw := s.RawJSON
if raw == "" {
raw = "{}"
}
res, err := f.DB.ExecContext(ctx, `
INSERT INTO firmware_snapshots(run_id, component, identifier, version, vendor, raw_json)
VALUES(?,?,?,?,?,?)
`, s.RunID, s.Component, s.Identifier, s.Version, s.Vendor, raw)
if err != nil {
return 0, fmt.Errorf("insert firmware: %w", err)
}
return res.LastInsertId()
}
// CreateBatch persists a slice of snapshots under one transaction.
// Agent probe enumerates all components in one pass, so batching wins.
func (f *Firmware) CreateBatch(ctx context.Context, rows []FirmwareSnapshot) error {
if len(rows) == 0 {
return nil
}
tx, err := f.DB.BeginTx(ctx, nil)
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.PrepareContext(ctx, `
INSERT INTO firmware_snapshots(run_id, component, identifier, version, vendor, raw_json)
VALUES(?,?,?,?,?,?)
`)
if err != nil {
return fmt.Errorf("prepare firmware insert: %w", err)
}
defer func() { _ = stmt.Close() }()
for _, s := range rows {
raw := s.RawJSON
if raw == "" {
raw = "{}"
}
if _, err := stmt.ExecContext(ctx, s.RunID, s.Component, s.Identifier, s.Version, s.Vendor, raw); err != nil {
return fmt.Errorf("insert firmware %s/%s: %w", s.Component, s.Identifier, err)
}
}
return tx.Commit()
}
// ListForRun returns every firmware snapshot for a run in stable order.
// Report page + SpecValidate both read this.
func (f *Firmware) ListForRun(ctx context.Context, runID int64) ([]FirmwareSnapshot, error) {
rows, err := f.DB.QueryContext(ctx, `
SELECT id, run_id, component, identifier, version, vendor, raw_json
FROM firmware_snapshots WHERE run_id = ? ORDER BY id
`, runID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []FirmwareSnapshot
for rows.Next() {
var s FirmwareSnapshot
if err := rows.Scan(&s.ID, &s.RunID, &s.Component, &s.Identifier,
&s.Version, &s.Vendor, &s.RawJSON); err != nil {
return nil, err
}
out = append(out, s)
}
return out, rows.Err()
}