deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now carry a profile (quick/deep/soak); every profile walks the same 11-stage order — Inventory → Firmware → SpecValidate → SMART → CPUStress → Storage → Network → Burn → GPU → PSU → Reporting — with only per-stage durations and concurrency scaled. Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile column + CreateWithProfile; threshold table + evaluator seeded per-run from the shared vetting.thresholds block; breach flips result at /sensor + /result. Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify + EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta), Network (sustained iperf + /proc/net/dev deltas) with per-profile knobs from Deps. Phase 3: Burn super-stage with goroutine fan-out for CPU + memory + fio + iperf, PSU rails sampled across the Burn window, SensorMux (2 s flush, 500-sample cap) to absorb backpressure. Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode (BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl), lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into SpecValidate with pin-by-identifier and fan-out-across-component matching; mismatches park the run in FailedHolding. Phase 5: profile radio on the host start form, profile chip on the run header, Firmware section in the HTML report, coverage artifact uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath seam + stress_ng and dmidecode example fakes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+64
-13
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/go-chi/chi/v5"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"vetting/internal/config"
|
||||
"vetting/internal/events"
|
||||
"vetting/internal/logs"
|
||||
"vetting/internal/model"
|
||||
@@ -26,17 +27,19 @@ import (
|
||||
)
|
||||
|
||||
type UI struct {
|
||||
Hosts *store.Hosts
|
||||
Runs *store.Runs
|
||||
Stages *store.Stages
|
||||
SubSteps *store.SubSteps
|
||||
SpecDiffs *store.SpecDiffs
|
||||
Artifacts *store.Artifacts
|
||||
EventHub *events.Hub
|
||||
Logs *logs.Hub
|
||||
Runner *orchestrator.Runner
|
||||
Tiles *TileEnricher
|
||||
PublicURL string // user-visible base URL baked into the quick-register one-liner
|
||||
Hosts *store.Hosts
|
||||
Runs *store.Runs
|
||||
Stages *store.Stages
|
||||
SubSteps *store.SubSteps
|
||||
SpecDiffs *store.SpecDiffs
|
||||
Artifacts *store.Artifacts
|
||||
Thresholds *store.Thresholds // Phase 1: seeded at StartRun from Profiles
|
||||
Profiles *config.ProfileRegistry
|
||||
EventHub *events.Hub
|
||||
Logs *logs.Hub
|
||||
Runner *orchestrator.Runner
|
||||
Tiles *TileEnricher
|
||||
PublicURL string // user-visible base URL baked into the quick-register one-liner
|
||||
// PXE, when non-nil, gets Reload()ed after host create/delete so
|
||||
// dnsmasq's dhcp-host= allowlist reflects the current registry.
|
||||
// Without this, a newly-registered host PXE-boots and gets
|
||||
@@ -316,23 +319,71 @@ func (u *UI) StartRun(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
nonDestructive := r.PostFormValue("non_destructive") == "1"
|
||||
profile := strings.TrimSpace(r.PostFormValue("profile"))
|
||||
if profile == "" {
|
||||
profile = config.ProfileQuick
|
||||
}
|
||||
if !config.IsValidProfile(profile) {
|
||||
http.Error(w, "unknown profile: "+profile, http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
_, hash, err := orchestrator.IssueRunToken()
|
||||
if err != nil {
|
||||
http.Error(w, "token: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
runID, err := u.Runs.Create(r.Context(), hostID, hash, nonDestructive)
|
||||
runID, err := u.Runs.CreateWithProfile(r.Context(), hostID, hash, nonDestructive, profile)
|
||||
if err != nil {
|
||||
http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
log.Printf("ui: created run %d for host %d (state=Queued)", runID, hostID)
|
||||
if err := u.seedThresholds(r.Context(), runID, host, profile); err != nil {
|
||||
// A threshold-seed failure shouldn't orphan a run row — log
|
||||
// and continue. Samples will just accumulate without a gate
|
||||
// until the operator retries, same as before Phase 1.
|
||||
log.Printf("ui: seed thresholds run %d: %v", runID, err)
|
||||
}
|
||||
log.Printf("ui: created run %d for host %d profile=%s (state=Queued)", runID, hostID, profile)
|
||||
// Send the operator straight to the new run — the button they clicked
|
||||
// was "Start vetting", the thing they want next is to watch it.
|
||||
http.Redirect(w, r, fmt.Sprintf("/runs/%d", runID), http.StatusSeeOther)
|
||||
}
|
||||
|
||||
// seedThresholds materializes the per-run threshold table from the
|
||||
// ProfileRegistry. The shared vetting.thresholds block applies to
|
||||
// every profile; future per-profile overrides will layer on top here,
|
||||
// and per-host overrides (Phase 1 extra) land via ExpectedSpecYAML in
|
||||
// a later iteration. Safe to skip silently when Thresholds or the
|
||||
// registry isn't wired — tests do not always build one.
|
||||
func (u *UI) seedThresholds(ctx context.Context, runID int64, host *model.Host, profile string) error {
|
||||
if u.Thresholds == nil || u.Profiles == nil {
|
||||
return nil
|
||||
}
|
||||
_ = host // reserved for per-host override layer
|
||||
_ = profile // reserved for per-profile override layer
|
||||
defaults := u.Profiles.Vetting.Thresholds
|
||||
if len(defaults) == 0 {
|
||||
return nil
|
||||
}
|
||||
specs := make([]store.ThresholdSpec, 0, len(defaults))
|
||||
for _, d := range defaults {
|
||||
specs = append(specs, store.ThresholdSpec{
|
||||
Stage: d.Stage,
|
||||
Kind: d.Kind,
|
||||
Key: d.Key,
|
||||
Op: d.Op,
|
||||
Value: d.Value,
|
||||
Nominal: d.Nominal,
|
||||
Unit: d.Unit,
|
||||
Severity: d.Severity,
|
||||
Source: "profile",
|
||||
})
|
||||
}
|
||||
_, err := u.Thresholds.SeedForRun(ctx, runID, specs)
|
||||
return err
|
||||
}
|
||||
|
||||
func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) {
|
||||
_ = templates.Registration(templates.RegistrationForm{
|
||||
QuickRegisterURL: u.baseURL(r),
|
||||
|
||||
Reference in New Issue
Block a user