deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now carry a profile (quick/deep/soak); every profile walks the same 11-stage order — Inventory → Firmware → SpecValidate → SMART → CPUStress → Storage → Network → Burn → GPU → PSU → Reporting — with only per-stage durations and concurrency scaled. Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile column + CreateWithProfile; threshold table + evaluator seeded per-run from the shared vetting.thresholds block; breach flips result at /sensor + /result. Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify + EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta), Network (sustained iperf + /proc/net/dev deltas) with per-profile knobs from Deps. Phase 3: Burn super-stage with goroutine fan-out for CPU + memory + fio + iperf, PSU rails sampled across the Burn window, SensorMux (2 s flush, 500-sample cap) to absorb backpressure. Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode (BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl), lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into SpecValidate with pin-by-identifier and fan-out-across-component matching; mismatches park the run in FailedHolding. Phase 5: profile radio on the host start form, profile chip on the run header, Firmware section in the HTML report, coverage artifact uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath seam + stress_ng and dmidecode example fakes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+30
-12
@@ -14,16 +14,30 @@ type Runs struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
// Create inserts a new run using the default "quick" profile. Older
|
||||
// call sites (and most tests) target this form — the profile column's
|
||||
// DEFAULT 'quick' on runs takes care of the backfill.
|
||||
func (r *Runs) Create(ctx context.Context, hostID int64, tokenHash string, nonDestructive bool) (int64, error) {
|
||||
return r.CreateWithProfile(ctx, hostID, tokenHash, nonDestructive, "quick")
|
||||
}
|
||||
|
||||
// CreateWithProfile inserts a new run with an explicit profile
|
||||
// ("quick"|"deep"|"soak"). The UI handler is the authoritative caller;
|
||||
// empty profile falls back to "quick" so a misconfigured form doesn't
|
||||
// leave a row with a blank profile column.
|
||||
func (r *Runs) CreateWithProfile(ctx context.Context, hostID int64, tokenHash string, nonDestructive bool, profile string) (int64, error) {
|
||||
if profile == "" {
|
||||
profile = "quick"
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
nd := 0
|
||||
if nonDestructive {
|
||||
nd = 1
|
||||
}
|
||||
res, err := r.DB.ExecContext(ctx, `
|
||||
INSERT INTO runs(host_id, state, agent_token_hash, next_boot_target, started_at, non_destructive)
|
||||
VALUES(?,?,?,?,?,?)
|
||||
`, hostID, string(model.StateQueued), tokenHash, "linux", now, nd)
|
||||
INSERT INTO runs(host_id, state, agent_token_hash, next_boot_target, started_at, non_destructive, profile)
|
||||
VALUES(?,?,?,?,?,?,?)
|
||||
`, hostID, string(model.StateQueued), tokenHash, "linux", now, nd, profile)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("insert run: %w", err)
|
||||
}
|
||||
@@ -107,14 +121,15 @@ func (r *Runs) Get(ctx context.Context, id int64) (*model.Run, error) {
|
||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
|
||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
|
||||
COALESCE(profile,'quick')
|
||||
FROM runs WHERE id = ?
|
||||
`, id)
|
||||
var run model.Run
|
||||
var completedAt sql.NullTime
|
||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
@@ -133,7 +148,8 @@ func (r *Runs) LatestForHost(ctx context.Context, hostID int64) (*model.Run, err
|
||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
|
||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
|
||||
COALESCE(profile,'quick')
|
||||
FROM runs WHERE host_id = ?
|
||||
ORDER BY id DESC LIMIT 1
|
||||
`, hostID)
|
||||
@@ -141,7 +157,7 @@ func (r *Runs) LatestForHost(ctx context.Context, hostID int64) (*model.Run, err
|
||||
var completedAt sql.NullTime
|
||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -165,7 +181,8 @@ func (r *Runs) ListForHost(ctx context.Context, hostID int64, limit int) ([]mode
|
||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
|
||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
|
||||
COALESCE(profile,'quick')
|
||||
FROM runs
|
||||
WHERE host_id = ?
|
||||
ORDER BY id DESC
|
||||
@@ -181,7 +198,7 @@ func (r *Runs) ListForHost(ctx context.Context, hostID int64, limit int) ([]mode
|
||||
var completedAt sql.NullTime
|
||||
if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive); err != nil {
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if completedAt.Valid {
|
||||
@@ -206,7 +223,8 @@ func (r *Runs) Active(ctx context.Context) ([]model.Run, error) {
|
||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
|
||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
|
||||
COALESCE(profile,'quick')
|
||||
FROM runs
|
||||
WHERE state NOT IN ('Completed','Released','Cancelled')
|
||||
ORDER BY id
|
||||
@@ -221,7 +239,7 @@ func (r *Runs) Active(ctx context.Context) ([]model.Run, error) {
|
||||
var completedAt sql.NullTime
|
||||
if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive); err != nil {
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if completedAt.Valid {
|
||||
@@ -275,7 +293,7 @@ func (r *Runs) FindActiveByMAC(ctx context.Context, mac string) (*model.Run, err
|
||||
var completedAt sql.NullTime
|
||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user