deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now carry a profile (quick/deep/soak); every profile walks the same 11-stage order — Inventory → Firmware → SpecValidate → SMART → CPUStress → Storage → Network → Burn → GPU → PSU → Reporting — with only per-stage durations and concurrency scaled. Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile column + CreateWithProfile; threshold table + evaluator seeded per-run from the shared vetting.thresholds block; breach flips result at /sensor + /result. Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify + EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta), Network (sustained iperf + /proc/net/dev deltas) with per-profile knobs from Deps. Phase 3: Burn super-stage with goroutine fan-out for CPU + memory + fio + iperf, PSU rails sampled across the Burn window, SensorMux (2 s flush, 500-sample cap) to absorb backpressure. Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode (BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl), lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into SpecValidate with pin-by-identifier and fan-out-across-component matching; mismatches park the run in FailedHolding. Phase 5: profile radio on the host start form, profile chip on the run header, Firmware section in the HTML report, coverage artifact uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath seam + stress_ng and dmidecode example fakes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+17
-1
@@ -42,4 +42,20 @@ jobs:
|
|||||||
GOOS=linux GOARCH=amd64 go build ./...
|
GOOS=linux GOARCH=amd64 go build ./...
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
run: go test -race -count=1 ./...
|
run: go test -race -count=1 -coverprofile=coverage.out ./...
|
||||||
|
|
||||||
|
- name: Coverage summary
|
||||||
|
run: |
|
||||||
|
go tool cover -func=coverage.out | tee coverage.txt
|
||||||
|
go tool cover -html=coverage.out -o coverage.html
|
||||||
|
|
||||||
|
- name: Upload coverage artifact
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
if: always()
|
||||||
|
with:
|
||||||
|
name: coverage
|
||||||
|
path: |
|
||||||
|
coverage.out
|
||||||
|
coverage.txt
|
||||||
|
coverage.html
|
||||||
|
retention-days: 14
|
||||||
|
|||||||
@@ -124,6 +124,56 @@ type ClaimResponse struct {
|
|||||||
// at the right stage instead of silently replaying Inventory and
|
// at the right stage instead of silently replaying Inventory and
|
||||||
// letting the orchestrator advance past the crashed stage.
|
// letting the orchestrator advance past the crashed stage.
|
||||||
CurrentState string `json:"current_state"`
|
CurrentState string `json:"current_state"`
|
||||||
|
// StageConfig carries per-profile stage knobs (Phase 2): stage-level
|
||||||
|
// timeouts and probe-level durations/modes. Empty when the agent
|
||||||
|
// talks to a pre-Phase-2 orchestrator; the agent applies compile-
|
||||||
|
// time defaults in that case.
|
||||||
|
StageConfig ClaimStageConfig `json:"stage_config"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClaimStageConfig mirrors config.StageConfig server-side — duplicated so
|
||||||
|
// the agent doesn't need to import internal/config. Durations arrive as
|
||||||
|
// strings ("2m", "2h") and are parsed by the tests package at the point
|
||||||
|
// of use. An empty field means "use the agent-side default" so a missing
|
||||||
|
// knob doesn't silently turn CPUStress / Storage into a no-op.
|
||||||
|
type ClaimStageConfig struct {
|
||||||
|
Profile string `json:"profile"`
|
||||||
|
StageTimeouts map[string]string `json:"stage_timeouts,omitempty"`
|
||||||
|
CPUStress ClaimCPUStressKnobs `json:"cpustress"`
|
||||||
|
Storage ClaimStorageKnobs `json:"storage"`
|
||||||
|
Network ClaimNetworkKnobs `json:"network"`
|
||||||
|
Burn ClaimBurnKnobs `json:"burn"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClaimCPUStressKnobs struct {
|
||||||
|
CPUPass string `json:"cpu_pass,omitempty"`
|
||||||
|
MemPass string `json:"mem_pass,omitempty"`
|
||||||
|
EDACPoll string `json:"edac_poll,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClaimStorageKnobs struct {
|
||||||
|
Mode string `json:"mode,omitempty"`
|
||||||
|
FioSize string `json:"fio_size,omitempty"`
|
||||||
|
FioTime string `json:"fio_time,omitempty"`
|
||||||
|
FioBS string `json:"fio_bs,omitempty"`
|
||||||
|
FioRW string `json:"fio_rw,omitempty"`
|
||||||
|
Verify string `json:"verify,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClaimNetworkKnobs struct {
|
||||||
|
Duration string `json:"duration,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClaimBurnKnobs mirrors config.BurnKnobs. Duration/CPUWorkers arrive as
|
||||||
|
// strings so the agent can treat empty as "use compile-time default".
|
||||||
|
// MemPct is a percentage (0-100); IperfParallel is the parallel stream
|
||||||
|
// count fed to iperf3 -P. FioOnSpare gates whether fio runs inside Burn.
|
||||||
|
type ClaimBurnKnobs struct {
|
||||||
|
Duration string `json:"duration,omitempty"`
|
||||||
|
CPUWorkers string `json:"cpu_workers,omitempty"`
|
||||||
|
MemPct int `json:"mem_pct,omitempty"`
|
||||||
|
FioOnSpare bool `json:"fio_on_spare,omitempty"`
|
||||||
|
IperfParallel int `json:"iperf_parallel,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ClaimExpectedDiskSpec struct {
|
type ClaimExpectedDiskSpec struct {
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
package probes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// EDACSample is one counter reading from /sys/devices/system/edac/mc/.
|
||||||
|
// Kind is "edac_ce" (correctable ECC errors) or "edac_ue"
|
||||||
|
// (uncorrectable — always a critical signal). Key identifies the memory
|
||||||
|
// controller (e.g. "mc0"). Value is the cumulative count since boot;
|
||||||
|
// the threshold evaluator flags it the moment it exceeds 0.
|
||||||
|
type EDACSample struct {
|
||||||
|
Kind string
|
||||||
|
Key string
|
||||||
|
Value float64
|
||||||
|
Unit string
|
||||||
|
}
|
||||||
|
|
||||||
|
// EDAC returns one EDACSample per (memory-controller × {ce,ue}) pair
|
||||||
|
// that /sys exposes. Returns an empty slice when EDAC isn't available
|
||||||
|
// (virtualized host, missing kernel driver, mdadm-style boards without
|
||||||
|
// a controller node) — callers treat an empty return as "no data",
|
||||||
|
// not "passed". Errors are swallowed for the same reason: a hot-
|
||||||
|
// swapped DIMM that makes /sys blink briefly shouldn't fail the stage
|
||||||
|
// before the real counter can be read.
|
||||||
|
//
|
||||||
|
// This is intentionally small — the sidecar polls periodically, so one
|
||||||
|
// bad read is recovered on the next tick. The counters are monotonic,
|
||||||
|
// so emitting the current raw value is correct.
|
||||||
|
func EDAC() []EDACSample {
|
||||||
|
root := "/sys/devices/system/edac/mc"
|
||||||
|
entries, err := os.ReadDir(root)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var out []EDACSample
|
||||||
|
for _, e := range entries {
|
||||||
|
name := e.Name()
|
||||||
|
if !strings.HasPrefix(name, "mc") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
base := filepath.Join(root, name)
|
||||||
|
if ce, ok := readCount(filepath.Join(base, "ce_count")); ok {
|
||||||
|
out = append(out, EDACSample{Kind: "edac_ce", Key: name, Value: ce, Unit: "count"})
|
||||||
|
}
|
||||||
|
if ue, ok := readCount(filepath.Join(base, "ue_count")); ok {
|
||||||
|
out = append(out, EDACSample{Kind: "edac_ue", Key: name, Value: ue, Unit: "count"})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// readCount reads a single decimal integer from a sysfs file and
|
||||||
|
// returns it as a float. Returns (0, false) on any failure so callers
|
||||||
|
// can skip the sample without a diagnostic.
|
||||||
|
func readCount(path string) (float64, bool) {
|
||||||
|
b, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
s := strings.TrimSpace(string(b))
|
||||||
|
n, err := strconv.ParseInt(s, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return float64(n), true
|
||||||
|
}
|
||||||
@@ -0,0 +1,496 @@
|
|||||||
|
package probes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FirmwareSnapshot is the on-wire shape the agent POSTs alongside the
|
||||||
|
// Firmware stage result. Mirrors internal/store.FirmwareSnapshot without
|
||||||
|
// the import — the /result handler converts to the store type and
|
||||||
|
// persists. One run produces many snapshots (one per BIOS / BMC / NIC
|
||||||
|
// port / HBA / microcode / NVMe); identifier distinguishes siblings
|
||||||
|
// (e.g. "eth0" / "eth1"), version is the canonical string to diff.
|
||||||
|
type FirmwareSnapshot struct {
|
||||||
|
Component string `json:"component"` // bios|bmc|nic|hba|microcode|nvme_fw
|
||||||
|
Identifier string `json:"identifier"`
|
||||||
|
Version string `json:"version"`
|
||||||
|
Vendor string `json:"vendor,omitempty"`
|
||||||
|
Raw map[string]string `json:"raw,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Firmware runs every sub-probe in sequence. Each one is bounded with
|
||||||
|
// a short timeout so a hung dmidecode / ipmitool / nvme tool can't
|
||||||
|
// freeze the stage — the probe is best-effort, missing tools produce
|
||||||
|
// empty output rather than an error. Returns the aggregated slice
|
||||||
|
// along with a list of probe-level warnings (surfaced in the stage
|
||||||
|
// summary so operators see which subsystem couldn't be read).
|
||||||
|
func Firmware(ctx context.Context) ([]FirmwareSnapshot, []string) {
|
||||||
|
var out []FirmwareSnapshot
|
||||||
|
var warnings []string
|
||||||
|
|
||||||
|
if snap, warn := probeBIOS(ctx); snap != nil {
|
||||||
|
out = append(out, *snap)
|
||||||
|
} else if warn != "" {
|
||||||
|
warnings = append(warnings, warn)
|
||||||
|
}
|
||||||
|
if snap, warn := probeBMC(ctx); snap != nil {
|
||||||
|
out = append(out, *snap)
|
||||||
|
} else if warn != "" {
|
||||||
|
warnings = append(warnings, warn)
|
||||||
|
}
|
||||||
|
out = append(out, probeNICFirmware(ctx)...)
|
||||||
|
out = append(out, probeNVMeFirmware(ctx)...)
|
||||||
|
out = append(out, probeHBAFirmware(ctx)...)
|
||||||
|
if snap := probeMicrocode(); snap != nil {
|
||||||
|
out = append(out, *snap)
|
||||||
|
}
|
||||||
|
|
||||||
|
return out, warnings
|
||||||
|
}
|
||||||
|
|
||||||
|
// runCmd executes a short-lived command with a per-call timeout. The
|
||||||
|
// timeout is intentionally aggressive (5 s) because firmware probes
|
||||||
|
// read device registers and occasionally block forever on a wedged
|
||||||
|
// controller — the stage should report "no HBA firmware readable"
|
||||||
|
// rather than hang the pipeline.
|
||||||
|
func runCmd(ctx context.Context, name string, args ...string) (string, error) {
|
||||||
|
cctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
cmd := exec.CommandContext(cctx, name, args...)
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return string(out), err
|
||||||
|
}
|
||||||
|
return string(out), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----- BIOS --------------------------------------------------------------
|
||||||
|
|
||||||
|
// probeBIOS invokes dmidecode -t bios and parses the vendor + version
|
||||||
|
// lines. dmidecode must run as root; we let it fail gracefully when the
|
||||||
|
// agent is mis-deployed without privileges.
|
||||||
|
func probeBIOS(ctx context.Context) (*FirmwareSnapshot, string) {
|
||||||
|
if _, err := exec.LookPath("dmidecode"); err != nil {
|
||||||
|
return nil, "bios: dmidecode not installed"
|
||||||
|
}
|
||||||
|
out, err := runCmd(ctx, "dmidecode", "-t", "bios")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Sprintf("bios: dmidecode failed: %v", trimErr(err, out))
|
||||||
|
}
|
||||||
|
snap := parseDmidecodeBIOS(strings.NewReader(out))
|
||||||
|
if snap == nil {
|
||||||
|
return nil, "bios: dmidecode produced no usable output"
|
||||||
|
}
|
||||||
|
return snap, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseDmidecodeBIOS consumes `dmidecode -t bios` output and pulls
|
||||||
|
// Vendor / Version / Release Date. Kept as an io.Reader for unit tests.
|
||||||
|
func parseDmidecodeBIOS(r io.Reader) *FirmwareSnapshot {
|
||||||
|
kv := parseDmidecodeSection(r, "BIOS Information")
|
||||||
|
if kv == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
snap := &FirmwareSnapshot{
|
||||||
|
Component: "bios",
|
||||||
|
Identifier: "system",
|
||||||
|
Version: firstNonEmpty(kv["Version"], kv["Firmware Revision"]),
|
||||||
|
Vendor: kv["Vendor"],
|
||||||
|
Raw: kv,
|
||||||
|
}
|
||||||
|
if snap.Version == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return snap
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseDmidecodeSection returns the key/value map of the first dmidecode
|
||||||
|
// handle whose title matches. dmidecode blocks look like:
|
||||||
|
// Handle 0x0000, ...
|
||||||
|
// BIOS Information
|
||||||
|
// Vendor: American Megatrends
|
||||||
|
// Version: 3.0
|
||||||
|
// ...
|
||||||
|
// With a blank line between blocks. Values like "Characteristics:"
|
||||||
|
// followed by a bulleted sub-list are collapsed into "…" so we don't
|
||||||
|
// accidentally swallow the next handle.
|
||||||
|
func parseDmidecodeSection(r io.Reader, title string) map[string]string {
|
||||||
|
sc := bufio.NewScanner(r)
|
||||||
|
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||||
|
var kv map[string]string
|
||||||
|
var inside, seenTitle bool
|
||||||
|
for sc.Scan() {
|
||||||
|
line := sc.Text()
|
||||||
|
trim := strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(line, "Handle ") {
|
||||||
|
if seenTitle && kv != nil {
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
inside = false
|
||||||
|
kv = nil
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !inside {
|
||||||
|
if trim == title {
|
||||||
|
inside = true
|
||||||
|
seenTitle = true
|
||||||
|
kv = map[string]string{}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if trim == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if k, v, ok := strings.Cut(trim, ":"); ok {
|
||||||
|
v = strings.TrimSpace(v)
|
||||||
|
if v == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
kv[strings.TrimSpace(k)] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if seenTitle {
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----- BMC / IPMI --------------------------------------------------------
|
||||||
|
|
||||||
|
// probeBMC walks `ipmitool mc info`. Home-lab hosts often lack a BMC —
|
||||||
|
// missing binary or a non-zero exit returns a warning without failing
|
||||||
|
// the stage. We capture Firmware Revision + Manufacturer as the version.
|
||||||
|
func probeBMC(ctx context.Context) (*FirmwareSnapshot, string) {
|
||||||
|
if _, err := exec.LookPath("ipmitool"); err != nil {
|
||||||
|
return nil, "bmc: ipmitool not installed"
|
||||||
|
}
|
||||||
|
out, err := runCmd(ctx, "ipmitool", "mc", "info")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Sprintf("bmc: ipmitool mc info failed: %v", trimErr(err, out))
|
||||||
|
}
|
||||||
|
snap := parseIpmitoolMCInfo(strings.NewReader(out))
|
||||||
|
if snap == nil {
|
||||||
|
return nil, "bmc: ipmitool output not parseable"
|
||||||
|
}
|
||||||
|
return snap, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseIpmitoolMCInfo pulls "Firmware Revision" + "Manufacturer Name"
|
||||||
|
// from the textual output. Format is indented key : value lines.
|
||||||
|
func parseIpmitoolMCInfo(r io.Reader) *FirmwareSnapshot {
|
||||||
|
sc := bufio.NewScanner(r)
|
||||||
|
kv := map[string]string{}
|
||||||
|
for sc.Scan() {
|
||||||
|
line := strings.TrimSpace(sc.Text())
|
||||||
|
if k, v, ok := strings.Cut(line, ":"); ok {
|
||||||
|
kv[strings.TrimSpace(k)] = strings.TrimSpace(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
version := firstNonEmpty(kv["Firmware Revision"], kv["Aux Firmware Rev Info"])
|
||||||
|
if version == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &FirmwareSnapshot{
|
||||||
|
Component: "bmc",
|
||||||
|
Identifier: "bmc0",
|
||||||
|
Version: version,
|
||||||
|
Vendor: kv["Manufacturer Name"],
|
||||||
|
Raw: kv,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----- NIC firmware ------------------------------------------------------
|
||||||
|
|
||||||
|
// probeNICFirmware enumerates /sys/class/net/*/device and calls
|
||||||
|
// `ethtool -i <iface>` on each real NIC (skip lo, bridges, virtuals).
|
||||||
|
// One snapshot per interface so a mismatched port lights up in the diff
|
||||||
|
// without silencing sibling ports.
|
||||||
|
func probeNICFirmware(ctx context.Context) []FirmwareSnapshot {
|
||||||
|
if _, err := exec.LookPath("ethtool"); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
ifaces, err := os.ReadDir("/sys/class/net")
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var out []FirmwareSnapshot
|
||||||
|
for _, entry := range ifaces {
|
||||||
|
name := entry.Name()
|
||||||
|
if !isRealNIC(name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
raw, err := runCmd(ctx, "ethtool", "-i", name)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
snap := parseEthtoolI(strings.NewReader(raw), name)
|
||||||
|
if snap != nil {
|
||||||
|
out = append(out, *snap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseEthtoolI extracts driver/firmware-version from `ethtool -i`
|
||||||
|
// output. Lines are "key: value" with a consistent prefix order.
|
||||||
|
func parseEthtoolI(r io.Reader, iface string) *FirmwareSnapshot {
|
||||||
|
sc := bufio.NewScanner(r)
|
||||||
|
kv := map[string]string{}
|
||||||
|
for sc.Scan() {
|
||||||
|
line := sc.Text()
|
||||||
|
if k, v, ok := strings.Cut(line, ":"); ok {
|
||||||
|
kv[strings.TrimSpace(k)] = strings.TrimSpace(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if kv["firmware-version"] == "" && kv["driver"] == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &FirmwareSnapshot{
|
||||||
|
Component: "nic",
|
||||||
|
Identifier: iface,
|
||||||
|
Version: kv["firmware-version"],
|
||||||
|
Vendor: kv["driver"],
|
||||||
|
Raw: kv,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// isRealNIC filters out loopback, bridges, veth, and the handful of
|
||||||
|
// virtual kernel devices ethtool will refuse on.
|
||||||
|
func isRealNIC(name string) bool {
|
||||||
|
if name == "" || name == "lo" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, prefix := range []string{"docker", "br-", "veth", "virbr", "tun", "tap", "bond"} {
|
||||||
|
if strings.HasPrefix(name, prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Only accept interfaces that have a `device` link — real PCI NICs
|
||||||
|
// do; pure virtuals (dummy0, wg*) don't.
|
||||||
|
if _, err := os.Stat(filepath.Join("/sys/class/net", name, "device")); err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----- NVMe --------------------------------------------------------------
|
||||||
|
|
||||||
|
// probeNVMeFirmware reads /sys/class/nvme/nvmeN/firmware_rev for every
|
||||||
|
// controller. Falls back to `nvme id-ctrl` if the sysfs file is missing
|
||||||
|
// (older kernels). Identifier is the controller path so a run with two
|
||||||
|
// drives produces two snapshots.
|
||||||
|
func probeNVMeFirmware(ctx context.Context) []FirmwareSnapshot {
|
||||||
|
entries, err := os.ReadDir("/sys/class/nvme")
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var out []FirmwareSnapshot
|
||||||
|
for _, e := range entries {
|
||||||
|
ctrl := e.Name()
|
||||||
|
rev := strings.TrimSpace(readFile(filepath.Join("/sys/class/nvme", ctrl, "firmware_rev")))
|
||||||
|
model := strings.TrimSpace(readFile(filepath.Join("/sys/class/nvme", ctrl, "model")))
|
||||||
|
if rev == "" {
|
||||||
|
// Fallback: nvme id-ctrl -H /dev/<ctrl>. Available on hosts
|
||||||
|
// where sysfs doesn't export firmware_rev.
|
||||||
|
if _, err := exec.LookPath("nvme"); err == nil {
|
||||||
|
raw, _ := runCmd(ctx, "nvme", "id-ctrl", "/dev/"+ctrl)
|
||||||
|
rev = parseNVMeIDCtrl(strings.NewReader(raw), "fr")
|
||||||
|
if model == "" {
|
||||||
|
model = parseNVMeIDCtrl(strings.NewReader(raw), "mn")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if rev == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, FirmwareSnapshot{
|
||||||
|
Component: "nvme_fw",
|
||||||
|
Identifier: ctrl,
|
||||||
|
Version: rev,
|
||||||
|
Vendor: model,
|
||||||
|
Raw: map[string]string{"model": model, "firmware_rev": rev},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseNVMeIDCtrl pulls a single field out of `nvme id-ctrl` output.
|
||||||
|
// Format: "fr : FW1234" / "mn : Samsung SSD 980 PRO".
|
||||||
|
// Leading spaces vary, values may contain spaces.
|
||||||
|
func parseNVMeIDCtrl(r io.Reader, key string) string {
|
||||||
|
sc := bufio.NewScanner(r)
|
||||||
|
prefix := key + " "
|
||||||
|
for sc.Scan() {
|
||||||
|
line := strings.TrimSpace(sc.Text())
|
||||||
|
if !strings.HasPrefix(line, prefix) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_, v, ok := strings.Cut(line, ":")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(v)
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----- HBA ---------------------------------------------------------------
|
||||||
|
|
||||||
|
var lspciClassHBA = regexp.MustCompile(`(?i)(serial attached scsi|sas controller|raid bus controller)`)
|
||||||
|
|
||||||
|
// probeHBAFirmware looks for SAS/RAID HBAs via `lspci -Dvvnn`. The
|
||||||
|
// firmware string is typically exposed as "Product Name" +
|
||||||
|
// "Capabilities" but in practice the LSI/Broadcom driver writes a
|
||||||
|
// "revision" on the device line. We capture what's printed and rely on
|
||||||
|
// SpecValidate to diff — this keeps us off tool-specific CLIs (storcli,
|
||||||
|
// mpt-status) that aren't always installed.
|
||||||
|
func probeHBAFirmware(ctx context.Context) []FirmwareSnapshot {
|
||||||
|
if _, err := exec.LookPath("lspci"); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out, err := runCmd(ctx, "lspci", "-Dvvnn")
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return parseLspciHBA(strings.NewReader(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseLspciHBA walks `lspci -Dvvnn` stanzas and picks SAS/RAID
|
||||||
|
// controllers. One snapshot per device; identifier is the PCI address.
|
||||||
|
// Version is the device line's revision (rev NN) or the Kernel modules
|
||||||
|
// string when no rev is printed.
|
||||||
|
func parseLspciHBA(r io.Reader) []FirmwareSnapshot {
|
||||||
|
sc := bufio.NewScanner(r)
|
||||||
|
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||||
|
var out []FirmwareSnapshot
|
||||||
|
var cur *FirmwareSnapshot
|
||||||
|
revRe := regexp.MustCompile(`\(rev\s+([0-9a-fA-F]+)\)`)
|
||||||
|
flush := func() {
|
||||||
|
if cur != nil && cur.Version != "" {
|
||||||
|
out = append(out, *cur)
|
||||||
|
}
|
||||||
|
cur = nil
|
||||||
|
}
|
||||||
|
for sc.Scan() {
|
||||||
|
line := sc.Text()
|
||||||
|
if !strings.HasPrefix(line, "\t") && strings.Contains(line, " ") {
|
||||||
|
// New device line.
|
||||||
|
flush()
|
||||||
|
if lspciClassHBA.MatchString(line) {
|
||||||
|
addr, rest, _ := strings.Cut(line, " ")
|
||||||
|
cur = &FirmwareSnapshot{
|
||||||
|
Component: "hba",
|
||||||
|
Identifier: addr,
|
||||||
|
Vendor: strings.TrimSpace(rest),
|
||||||
|
Raw: map[string]string{"device_line": line},
|
||||||
|
}
|
||||||
|
if m := revRe.FindStringSubmatch(line); len(m) == 2 {
|
||||||
|
cur.Version = "rev " + m[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if cur == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
trim := strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(trim, "Kernel modules:") {
|
||||||
|
cur.Raw["kernel_modules"] = strings.TrimPrefix(trim, "Kernel modules:")
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(trim, "Kernel driver in use:") {
|
||||||
|
cur.Raw["kernel_driver"] = strings.TrimPrefix(trim, "Kernel driver in use:")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flush()
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----- Microcode ---------------------------------------------------------
|
||||||
|
|
||||||
|
// probeMicrocode reads /proc/cpuinfo for the "microcode" line. All
|
||||||
|
// cores report the same value post-boot, so one snapshot is enough.
|
||||||
|
func probeMicrocode() *FirmwareSnapshot {
|
||||||
|
f, err := os.Open("/proc/cpuinfo")
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer func() { _ = f.Close() }()
|
||||||
|
snap := parseMicrocode(f)
|
||||||
|
return snap
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseMicrocode(r io.Reader) *FirmwareSnapshot {
|
||||||
|
sc := bufio.NewScanner(r)
|
||||||
|
version := ""
|
||||||
|
vendor := ""
|
||||||
|
for sc.Scan() {
|
||||||
|
line := sc.Text()
|
||||||
|
k, v, ok := strings.Cut(line, ":")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key := strings.TrimSpace(k)
|
||||||
|
val := strings.TrimSpace(v)
|
||||||
|
switch key {
|
||||||
|
case "microcode":
|
||||||
|
if version == "" {
|
||||||
|
version = val
|
||||||
|
}
|
||||||
|
case "vendor_id":
|
||||||
|
if vendor == "" {
|
||||||
|
vendor = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if version != "" && vendor != "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if version == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &FirmwareSnapshot{
|
||||||
|
Component: "microcode",
|
||||||
|
Identifier: "cpu",
|
||||||
|
Version: version,
|
||||||
|
Vendor: vendor,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----- helpers -----------------------------------------------------------
|
||||||
|
|
||||||
|
func firstNonEmpty(ss ...string) string {
|
||||||
|
for _, s := range ss {
|
||||||
|
if strings.TrimSpace(s) != "" {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func readFile(p string) string {
|
||||||
|
b, err := os.ReadFile(p)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return string(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// trimErr joins the underlying error with the first line of combined
|
||||||
|
// output so the warning message carries enough diagnostic context
|
||||||
|
// without dumping a screenful of dmidecode/ipmitool noise.
|
||||||
|
func trimErr(err error, out string) string {
|
||||||
|
firstLine := strings.SplitN(strings.TrimSpace(out), "\n", 2)[0]
|
||||||
|
if firstLine == "" {
|
||||||
|
return err.Error()
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%v (%s)", err, firstLine)
|
||||||
|
}
|
||||||
@@ -0,0 +1,232 @@
|
|||||||
|
package probes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Golden dmidecode -t bios output (trimmed, representative). A real
|
||||||
|
// host will have more lines; parse must tolerate the unknown fields.
|
||||||
|
const dmidecodeBIOS = `# dmidecode 3.3
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 3.2.0 present.
|
||||||
|
|
||||||
|
Handle 0x0000, DMI type 0, 26 bytes
|
||||||
|
BIOS Information
|
||||||
|
Vendor: American Megatrends Inc.
|
||||||
|
Version: 3.2
|
||||||
|
Release Date: 07/15/2021
|
||||||
|
Address: 0xF0000
|
||||||
|
Runtime Size: 64 kB
|
||||||
|
ROM Size: 32 MB
|
||||||
|
Characteristics:
|
||||||
|
PCI is supported
|
||||||
|
BIOS is upgradeable
|
||||||
|
|
||||||
|
Handle 0x0001, DMI type 1, 27 bytes
|
||||||
|
System Information
|
||||||
|
Manufacturer: Supermicro
|
||||||
|
Product Name: X11SSL-F
|
||||||
|
`
|
||||||
|
|
||||||
|
func TestParseDmidecodeBIOS(t *testing.T) {
|
||||||
|
snap := parseDmidecodeBIOS(strings.NewReader(dmidecodeBIOS))
|
||||||
|
if snap == nil {
|
||||||
|
t.Fatal("parseDmidecodeBIOS returned nil")
|
||||||
|
}
|
||||||
|
if snap.Component != "bios" {
|
||||||
|
t.Errorf("component = %q, want bios", snap.Component)
|
||||||
|
}
|
||||||
|
if snap.Version != "3.2" {
|
||||||
|
t.Errorf("version = %q, want 3.2", snap.Version)
|
||||||
|
}
|
||||||
|
if snap.Vendor != "American Megatrends Inc." {
|
||||||
|
t.Errorf("vendor = %q, want American Megatrends Inc.", snap.Vendor)
|
||||||
|
}
|
||||||
|
if snap.Raw["Release Date"] != "07/15/2021" {
|
||||||
|
t.Errorf("release date = %q, want 07/15/2021", snap.Raw["Release Date"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseDmidecodeBIOSMissingBlock(t *testing.T) {
|
||||||
|
// No BIOS Information block → nil result, not a crash.
|
||||||
|
input := "Handle 0x0001, DMI type 1, 27 bytes\nSystem Information\n\tManufacturer: Acme\n"
|
||||||
|
if snap := parseDmidecodeBIOS(strings.NewReader(input)); snap != nil {
|
||||||
|
t.Fatalf("expected nil when BIOS block absent, got %+v", snap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const ipmitoolMCInfo = `Device ID : 32
|
||||||
|
Device Revision : 1
|
||||||
|
Firmware Revision : 1.74
|
||||||
|
IPMI Version : 2.0
|
||||||
|
Manufacturer ID : 10876
|
||||||
|
Manufacturer Name : Supermicro
|
||||||
|
Product ID : 2051 (0x0803)
|
||||||
|
Product Name : Unknown (0x803)
|
||||||
|
`
|
||||||
|
|
||||||
|
func TestParseIpmitoolMCInfo(t *testing.T) {
|
||||||
|
snap := parseIpmitoolMCInfo(strings.NewReader(ipmitoolMCInfo))
|
||||||
|
if snap == nil {
|
||||||
|
t.Fatal("parseIpmitoolMCInfo returned nil")
|
||||||
|
}
|
||||||
|
if snap.Component != "bmc" {
|
||||||
|
t.Errorf("component = %q, want bmc", snap.Component)
|
||||||
|
}
|
||||||
|
if snap.Version != "1.74" {
|
||||||
|
t.Errorf("version = %q, want 1.74", snap.Version)
|
||||||
|
}
|
||||||
|
if snap.Vendor != "Supermicro" {
|
||||||
|
t.Errorf("vendor = %q, want Supermicro", snap.Vendor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseIpmitoolMCInfoEmpty(t *testing.T) {
|
||||||
|
if snap := parseIpmitoolMCInfo(strings.NewReader("")); snap != nil {
|
||||||
|
t.Fatalf("expected nil on empty input, got %+v", snap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const ethtoolEth0 = `driver: mlx5_core
|
||||||
|
version: 5.15.0
|
||||||
|
firmware-version: 16.32.1010 (MT_0000000008)
|
||||||
|
expansion-rom-version:
|
||||||
|
bus-info: 0000:5e:00.0
|
||||||
|
supports-statistics: yes
|
||||||
|
`
|
||||||
|
|
||||||
|
func TestParseEthtoolI(t *testing.T) {
|
||||||
|
snap := parseEthtoolI(strings.NewReader(ethtoolEth0), "eth0")
|
||||||
|
if snap == nil {
|
||||||
|
t.Fatal("parseEthtoolI returned nil")
|
||||||
|
}
|
||||||
|
if snap.Component != "nic" || snap.Identifier != "eth0" {
|
||||||
|
t.Errorf("component/id = %q/%q, want nic/eth0", snap.Component, snap.Identifier)
|
||||||
|
}
|
||||||
|
if snap.Version != "16.32.1010 (MT_0000000008)" {
|
||||||
|
t.Errorf("version = %q, want 16.32.1010 (MT_0000000008)", snap.Version)
|
||||||
|
}
|
||||||
|
if snap.Vendor != "mlx5_core" {
|
||||||
|
t.Errorf("vendor = %q, want mlx5_core", snap.Vendor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseEthtoolIEmpty(t *testing.T) {
|
||||||
|
if snap := parseEthtoolI(strings.NewReader("not a valid output"), "eth0"); snap != nil {
|
||||||
|
t.Fatalf("expected nil on garbage input, got %+v", snap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const nvmeIDCtrl = `NVME Identify Controller:
|
||||||
|
vid : 0x144d
|
||||||
|
ssvid : 0x144d
|
||||||
|
sn : S5GYNX0R500123X
|
||||||
|
mn : Samsung SSD 980 PRO 1TB
|
||||||
|
fr : 5B2QGXA7
|
||||||
|
rab : 2
|
||||||
|
`
|
||||||
|
|
||||||
|
func TestParseNVMeIDCtrl(t *testing.T) {
|
||||||
|
if got := parseNVMeIDCtrl(strings.NewReader(nvmeIDCtrl), "fr"); got != "5B2QGXA7" {
|
||||||
|
t.Errorf("fr = %q, want 5B2QGXA7", got)
|
||||||
|
}
|
||||||
|
if got := parseNVMeIDCtrl(strings.NewReader(nvmeIDCtrl), "mn"); got != "Samsung SSD 980 PRO 1TB" {
|
||||||
|
t.Errorf("mn = %q, want Samsung SSD 980 PRO 1TB", got)
|
||||||
|
}
|
||||||
|
if got := parseNVMeIDCtrl(strings.NewReader(nvmeIDCtrl), "missing"); got != "" {
|
||||||
|
t.Errorf("missing key should be empty, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const lspciHBA = `0000:01:00.0 Ethernet controller [0200]: Intel Corporation I350 [8086:1521] (rev 01)
|
||||||
|
Subsystem: Intel Corporation I350 [8086:0001]
|
||||||
|
Kernel driver in use: igb
|
||||||
|
Kernel modules: igb
|
||||||
|
|
||||||
|
0000:03:00.0 Serial Attached SCSI controller [0107]: Broadcom / LSI SAS3008 PCI-Express Fusion-MPT SAS-3 [1000:0097] (rev 02)
|
||||||
|
Subsystem: Broadcom / LSI SAS9300-8i [1000:30e0]
|
||||||
|
Kernel driver in use: mpt3sas
|
||||||
|
Kernel modules: mpt3sas
|
||||||
|
|
||||||
|
0000:04:00.0 RAID bus controller [0104]: LSI MegaRAID SAS-3 3108 [1000:005d] (rev 02)
|
||||||
|
Subsystem: LSI MegaRAID SAS 9361-8i [1000:9361]
|
||||||
|
Kernel driver in use: megaraid_sas
|
||||||
|
Kernel modules: megaraid_sas
|
||||||
|
`
|
||||||
|
|
||||||
|
func TestParseLspciHBA(t *testing.T) {
|
||||||
|
got := parseLspciHBA(strings.NewReader(lspciHBA))
|
||||||
|
if len(got) != 2 {
|
||||||
|
t.Fatalf("got %d HBA snapshots, want 2 (SAS + RAID; Ethernet must be skipped)", len(got))
|
||||||
|
}
|
||||||
|
for _, s := range got {
|
||||||
|
if s.Component != "hba" {
|
||||||
|
t.Errorf("component = %q, want hba", s.Component)
|
||||||
|
}
|
||||||
|
if s.Version != "rev 02" {
|
||||||
|
t.Errorf("version = %q, want 'rev 02'", s.Version)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got[0].Identifier != "0000:03:00.0" {
|
||||||
|
t.Errorf("first identifier = %q, want 0000:03:00.0", got[0].Identifier)
|
||||||
|
}
|
||||||
|
if got[1].Identifier != "0000:04:00.0" {
|
||||||
|
t.Errorf("second identifier = %q, want 0000:04:00.0", got[1].Identifier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const cpuinfo = `processor : 0
|
||||||
|
vendor_id : GenuineIntel
|
||||||
|
cpu family : 6
|
||||||
|
model : 85
|
||||||
|
model name : Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz
|
||||||
|
stepping : 7
|
||||||
|
microcode : 0x5003006
|
||||||
|
cpu MHz : 2100.000
|
||||||
|
`
|
||||||
|
|
||||||
|
func TestParseMicrocode(t *testing.T) {
|
||||||
|
snap := parseMicrocode(strings.NewReader(cpuinfo))
|
||||||
|
if snap == nil {
|
||||||
|
t.Fatal("parseMicrocode returned nil")
|
||||||
|
}
|
||||||
|
if snap.Version != "0x5003006" {
|
||||||
|
t.Errorf("version = %q, want 0x5003006", snap.Version)
|
||||||
|
}
|
||||||
|
if snap.Vendor != "GenuineIntel" {
|
||||||
|
t.Errorf("vendor = %q, want GenuineIntel", snap.Vendor)
|
||||||
|
}
|
||||||
|
if snap.Identifier != "cpu" {
|
||||||
|
t.Errorf("identifier = %q, want cpu", snap.Identifier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseMicrocodeMissing(t *testing.T) {
|
||||||
|
// A /proc/cpuinfo without a microcode line returns nil.
|
||||||
|
input := "processor\t: 0\nvendor_id\t: GenuineIntel\n"
|
||||||
|
if snap := parseMicrocode(strings.NewReader(input)); snap != nil {
|
||||||
|
t.Fatalf("expected nil when microcode line absent, got %+v", snap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsRealNIC(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
want bool // want=true means a real-looking name (the /sys/class/net/<name>/device check is skipped here)
|
||||||
|
}{
|
||||||
|
{"lo", false},
|
||||||
|
{"", false},
|
||||||
|
{"docker0", false},
|
||||||
|
{"br-abc", false},
|
||||||
|
{"veth1234", false},
|
||||||
|
{"virbr0", false},
|
||||||
|
{"bond0", false},
|
||||||
|
{"tun0", false},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
if got := isRealNIC(tc.name); got != tc.want {
|
||||||
|
t.Errorf("isRealNIC(%q) = %v, want %v", tc.name, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,85 @@
|
|||||||
|
package probes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NetDevSnapshot is the per-interface counter row from /proc/net/dev at
|
||||||
|
// a single instant. Used by the Network stage to compute deltas across
|
||||||
|
// an iperf window — a rising rx_errors or tx_dropped during a loaded
|
||||||
|
// link is a real NIC problem, not general noise.
|
||||||
|
type NetDevSnapshot struct {
|
||||||
|
Iface string
|
||||||
|
RxBytes uint64
|
||||||
|
RxErrs uint64
|
||||||
|
RxDrop uint64
|
||||||
|
TxBytes uint64
|
||||||
|
TxErrs uint64
|
||||||
|
TxDrop uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// NetDev reads /proc/net/dev and returns one snapshot per non-loopback
|
||||||
|
// interface. Returns nil on read/parse failure (best-effort: a missing
|
||||||
|
// /proc is survivable; the caller skips delta reporting that tick).
|
||||||
|
func NetDev() []NetDevSnapshot {
|
||||||
|
f, err := os.Open("/proc/net/dev")
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer func() { _ = f.Close() }()
|
||||||
|
return parseNetDev(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseNetDev is split from NetDev so tests can feed a fixture without
|
||||||
|
// touching the real /proc. The /proc/net/dev format is two header lines
|
||||||
|
// followed by rows of "iface: rx_bytes rx_packets rx_errs rx_drop ... tx_bytes tx_packets tx_errs tx_drop ..."
|
||||||
|
// — 16 whitespace-separated counters, of which we pull a curated six.
|
||||||
|
func parseNetDev(r io.Reader) []NetDevSnapshot {
|
||||||
|
var out []NetDevSnapshot
|
||||||
|
sc := bufio.NewScanner(r)
|
||||||
|
// Skip the two header lines (iface || bytes ... || bytes ...).
|
||||||
|
for i := 0; i < 2 && sc.Scan(); i++ {
|
||||||
|
}
|
||||||
|
for sc.Scan() {
|
||||||
|
line := strings.TrimSpace(sc.Text())
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
colon := strings.IndexByte(line, ':')
|
||||||
|
if colon < 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
iface := strings.TrimSpace(line[:colon])
|
||||||
|
if iface == "" || iface == "lo" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fields := strings.Fields(line[colon+1:])
|
||||||
|
if len(fields) < 16 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// /proc/net/dev columns:
|
||||||
|
// 0 rx_bytes 1 rx_packets 2 rx_errs 3 rx_drop 4 fifo 5 frame 6 compressed 7 multicast
|
||||||
|
// 8 tx_bytes 9 tx_packets 10 tx_errs 11 tx_drop 12 fifo 13 colls 14 carrier 15 compressed
|
||||||
|
snap := NetDevSnapshot{Iface: iface}
|
||||||
|
snap.RxBytes = parseU64(fields[0])
|
||||||
|
snap.RxErrs = parseU64(fields[2])
|
||||||
|
snap.RxDrop = parseU64(fields[3])
|
||||||
|
snap.TxBytes = parseU64(fields[8])
|
||||||
|
snap.TxErrs = parseU64(fields[10])
|
||||||
|
snap.TxDrop = parseU64(fields[11])
|
||||||
|
out = append(out, snap)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseU64(s string) uint64 {
|
||||||
|
n, err := strconv.ParseUint(s, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return n
|
||||||
|
}
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
package probes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestParseNetDev_RealSample exercises parseNetDev against a synthetic
|
||||||
|
// /proc/net/dev fixture with the full 16-column layout. Confirms the
|
||||||
|
// loopback interface is dropped, headers are skipped, and each of the
|
||||||
|
// six curated counters lands in the right field.
|
||||||
|
func TestParseNetDev_RealSample(t *testing.T) {
|
||||||
|
// Columns after "iface:":
|
||||||
|
// 0 rx_bytes 1 rx_packets 2 rx_errs 3 rx_drop
|
||||||
|
// 4 fifo 5 frame 6 compressed 7 multicast
|
||||||
|
// 8 tx_bytes 9 tx_packets 10 tx_errs 11 tx_drop
|
||||||
|
// 12 fifo 13 colls 14 carrier 15 compressed
|
||||||
|
fixture := `Inter-| Receive | Transmit
|
||||||
|
face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed
|
||||||
|
lo: 1000000 10000 0 0 0 0 0 0 1000000 10000 0 0 0 0 0 0
|
||||||
|
eth0: 50000000 100000 7 12 0 0 0 0 40000000 90000 3 5 0 0 0 0
|
||||||
|
eth1: 12345 200 0 0 0 0 0 0 54321 180 0 0 0 0 0 0
|
||||||
|
`
|
||||||
|
snaps := parseNetDev(strings.NewReader(fixture))
|
||||||
|
if len(snaps) != 2 {
|
||||||
|
t.Fatalf("got %d snapshots, want 2 (lo should be dropped)", len(snaps))
|
||||||
|
}
|
||||||
|
byIface := map[string]NetDevSnapshot{}
|
||||||
|
for _, s := range snaps {
|
||||||
|
byIface[s.Iface] = s
|
||||||
|
}
|
||||||
|
eth0, ok := byIface["eth0"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("eth0 missing from parsed snapshots")
|
||||||
|
}
|
||||||
|
if eth0.RxBytes != 50000000 {
|
||||||
|
t.Errorf("eth0 RxBytes=%d, want 50000000", eth0.RxBytes)
|
||||||
|
}
|
||||||
|
if eth0.RxErrs != 7 {
|
||||||
|
t.Errorf("eth0 RxErrs=%d, want 7", eth0.RxErrs)
|
||||||
|
}
|
||||||
|
if eth0.RxDrop != 12 {
|
||||||
|
t.Errorf("eth0 RxDrop=%d, want 12", eth0.RxDrop)
|
||||||
|
}
|
||||||
|
if eth0.TxBytes != 40000000 {
|
||||||
|
t.Errorf("eth0 TxBytes=%d, want 40000000", eth0.TxBytes)
|
||||||
|
}
|
||||||
|
if eth0.TxErrs != 3 {
|
||||||
|
t.Errorf("eth0 TxErrs=%d, want 3", eth0.TxErrs)
|
||||||
|
}
|
||||||
|
if eth0.TxDrop != 5 {
|
||||||
|
t.Errorf("eth0 TxDrop=%d, want 5", eth0.TxDrop)
|
||||||
|
}
|
||||||
|
if _, ok := byIface["lo"]; ok {
|
||||||
|
t.Errorf("lo should have been filtered out")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseNetDev_Empty: an empty reader returns no snapshots, not a
|
||||||
|
// crash. Callers treat nil as "no data" and skip the delta step.
|
||||||
|
func TestParseNetDev_Empty(t *testing.T) {
|
||||||
|
snaps := parseNetDev(strings.NewReader(""))
|
||||||
|
if len(snaps) != 0 {
|
||||||
|
t.Errorf("got %d snapshots from empty reader, want 0", len(snaps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseNetDev_MalformedRow skips rows that don't have the expected
|
||||||
|
// 16 columns rather than panicking. A truncated line shouldn't hide the
|
||||||
|
// good rows that follow.
|
||||||
|
func TestParseNetDev_MalformedRow(t *testing.T) {
|
||||||
|
fixture := `header line 1
|
||||||
|
header line 2
|
||||||
|
bad0: 123 456
|
||||||
|
eth0: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
|
||||||
|
`
|
||||||
|
snaps := parseNetDev(strings.NewReader(fixture))
|
||||||
|
if len(snaps) != 1 {
|
||||||
|
t.Fatalf("got %d snapshots, want 1 (bad0 should be dropped)", len(snaps))
|
||||||
|
}
|
||||||
|
if snaps[0].Iface != "eth0" {
|
||||||
|
t.Errorf("got iface=%q, want eth0", snaps[0].Iface)
|
||||||
|
}
|
||||||
|
}
|
||||||
+131
-22
@@ -26,6 +26,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
@@ -71,7 +72,10 @@ func Run(ctx context.Context, p *bootstate.Params) error {
|
|||||||
}
|
}
|
||||||
fwd.info(fmt.Sprintf("claimed run; stages=%v current_state=%s", claim.Stages, claim.CurrentState))
|
fwd.info(fmt.Sprintf("claimed run; stages=%v current_state=%s", claim.Stages, claim.CurrentState))
|
||||||
|
|
||||||
go thermalSidecar(ctx, c, fwd)
|
mux := NewSensorMux(ctx, c)
|
||||||
|
defer mux.Close()
|
||||||
|
|
||||||
|
go thermalSidecar(ctx, mux, fwd)
|
||||||
|
|
||||||
hbCh := make(chan HeartbeatResponse, 4)
|
hbCh := make(chan HeartbeatResponse, 4)
|
||||||
go heartbeatLoop(ctx, c, fwd, hbCh)
|
go heartbeatLoop(ctx, c, fwd, hbCh)
|
||||||
@@ -101,7 +105,7 @@ func Run(ctx context.Context, p *bootstate.Params) error {
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
fwd.info("stage: starting " + nextStage)
|
fwd.info("stage: starting " + nextStage)
|
||||||
outcome := runStageCancellable(ctx, nextStage, claim, fwd, c, overrideFlags{})
|
outcome := runStageCancellable(ctx, nextStage, claim, fwd, c, mux, overrideFlags{})
|
||||||
if outcome.Cancelled {
|
if outcome.Cancelled {
|
||||||
fwd.warn("stage cancelled by operator; posting result and exiting")
|
fwd.warn("stage cancelled by operator; posting result and exiting")
|
||||||
_, _ = postResult(ctx, c, nextStage, outcome)
|
_, _ = postResult(ctx, c, nextStage, outcome)
|
||||||
@@ -119,7 +123,7 @@ func Run(ctx context.Context, p *bootstate.Params) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Park and wait for an override directive.
|
// Park and wait for an override directive.
|
||||||
return waitForOverride(ctx, c, fwd, hbCh, claim)
|
return waitForOverride(ctx, c, fwd, mux, hbCh, claim)
|
||||||
}
|
}
|
||||||
if resp.NextState == "Completed" || resp.NextState == "" {
|
if resp.NextState == "Completed" || resp.NextState == "" {
|
||||||
fwd.info("pipeline complete")
|
fwd.info("pipeline complete")
|
||||||
@@ -144,10 +148,10 @@ func Run(ctx context.Context, p *bootstate.Params) error {
|
|||||||
// it runs the inventory probe and passes the result as the /result body
|
// it runs the inventory probe and passes the result as the /result body
|
||||||
// (the orchestrator persists it as an artifact). Every other stage
|
// (the orchestrator persists it as an artifact). Every other stage
|
||||||
// returns a tests.Outcome which postResult marshals generically.
|
// returns a tests.Outcome which postResult marshals generically.
|
||||||
func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logForwarder, c *Client, ovr overrideFlags) stageOutcome {
|
func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logForwarder, c *Client, mux *SensorMux, ovr overrideFlags) stageOutcome {
|
||||||
fwd.SetStage(stage)
|
fwd.SetStage(stage)
|
||||||
defer fwd.ClearStage()
|
defer fwd.ClearStage()
|
||||||
deps := newDeps(ctx, c, fwd, ovr, claim)
|
deps := newDeps(ctx, c, fwd, mux, ovr, claim, stage)
|
||||||
switch stage {
|
switch stage {
|
||||||
case "Inventory":
|
case "Inventory":
|
||||||
fwd.info("Inventory: probing host hardware")
|
fwd.info("Inventory: probing host hardware")
|
||||||
@@ -163,6 +167,25 @@ func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logF
|
|||||||
},
|
},
|
||||||
Inventory: inv,
|
Inventory: inv,
|
||||||
}
|
}
|
||||||
|
case "Firmware":
|
||||||
|
fwd.info("Firmware: probing firmware versions")
|
||||||
|
snaps, warns := probes.Firmware(ctx)
|
||||||
|
for _, w := range warns {
|
||||||
|
fwd.warn(w)
|
||||||
|
}
|
||||||
|
summary := firmwareSummary(snaps)
|
||||||
|
fwd.info("Firmware: " + summary)
|
||||||
|
return stageOutcome{
|
||||||
|
Outcome: tests.Outcome{
|
||||||
|
Passed: true,
|
||||||
|
Summary: summary,
|
||||||
|
Extras: map[string]any{
|
||||||
|
"warnings": warns,
|
||||||
|
"snapshots": len(snaps),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Firmware: snaps,
|
||||||
|
}
|
||||||
case "SMART":
|
case "SMART":
|
||||||
return stageOutcome{Outcome: tests.SMART(ctx, deps)}
|
return stageOutcome{Outcome: tests.SMART(ctx, deps)}
|
||||||
case "CPUStress":
|
case "CPUStress":
|
||||||
@@ -170,10 +193,19 @@ func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logF
|
|||||||
case "Storage":
|
case "Storage":
|
||||||
return stageOutcome{Outcome: tests.Storage(ctx, deps)}
|
return stageOutcome{Outcome: tests.Storage(ctx, deps)}
|
||||||
case "Network":
|
case "Network":
|
||||||
|
duration := deps.NetworkKnobs.Duration
|
||||||
|
if duration <= 0 {
|
||||||
|
duration = 10 * time.Second
|
||||||
|
}
|
||||||
return stageOutcome{Outcome: tests.Network(ctx, deps, tests.NetworkConfig{
|
return stageOutcome{Outcome: tests.Network(ctx, deps, tests.NetworkConfig{
|
||||||
OrchestratorURL: c.BaseURL,
|
OrchestratorURL: c.BaseURL,
|
||||||
IperfPort: claim.IperfPort,
|
IperfPort: claim.IperfPort,
|
||||||
Duration: 10 * time.Second,
|
Duration: duration,
|
||||||
|
})}
|
||||||
|
case "Burn":
|
||||||
|
return stageOutcome{Outcome: tests.Burn(ctx, deps, tests.BurnConfig{
|
||||||
|
OrchestratorURL: c.BaseURL,
|
||||||
|
IperfPort: claim.IperfPort,
|
||||||
})}
|
})}
|
||||||
case "GPU":
|
case "GPU":
|
||||||
return stageOutcome{Outcome: tests.GPU(ctx, deps)}
|
return stageOutcome{Outcome: tests.GPU(ctx, deps)}
|
||||||
@@ -189,6 +221,7 @@ func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logF
|
|||||||
type stageOutcome struct {
|
type stageOutcome struct {
|
||||||
Outcome tests.Outcome
|
Outcome tests.Outcome
|
||||||
Inventory *spec.Inventory // only for Inventory stage
|
Inventory *spec.Inventory // only for Inventory stage
|
||||||
|
Firmware []probes.FirmwareSnapshot // only for Firmware stage
|
||||||
Cancelled bool // set when the stage was cut short by operator cancel
|
Cancelled bool // set when the stage was cut short by operator cancel
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -197,14 +230,14 @@ type stageOutcome struct {
|
|||||||
// is currently running. If the derived context was cancelled while the
|
// is currently running. If the derived context was cancelled while the
|
||||||
// stage executed, the outcome is rewritten as a cancellation record so
|
// stage executed, the outcome is rewritten as a cancellation record so
|
||||||
// the orchestrator has something to persist.
|
// the orchestrator has something to persist.
|
||||||
func runStageCancellable(parent context.Context, stage string, claim *ClaimResponse, fwd *logForwarder, c *Client, ovr overrideFlags) stageOutcome {
|
func runStageCancellable(parent context.Context, stage string, claim *ClaimResponse, fwd *logForwarder, c *Client, mux *SensorMux, ovr overrideFlags) stageOutcome {
|
||||||
stageCtx, cancel := context.WithCancel(parent)
|
stageCtx, cancel := context.WithCancel(parent)
|
||||||
stageCancel.Store(cancel)
|
stageCancel.Store(cancel)
|
||||||
defer func() {
|
defer func() {
|
||||||
cancel()
|
cancel()
|
||||||
stageCancel.Store(context.CancelFunc(nil))
|
stageCancel.Store(context.CancelFunc(nil))
|
||||||
}()
|
}()
|
||||||
out := runStage(stageCtx, stage, claim, fwd, c, ovr)
|
out := runStage(stageCtx, stage, claim, fwd, c, mux, ovr)
|
||||||
// If the parent is still live but the stage ctx was cancelled, the
|
// If the parent is still live but the stage ctx was cancelled, the
|
||||||
// operator fired a cancel — mark the outcome so the caller can exit
|
// operator fired a cancel — mark the outcome so the caller can exit
|
||||||
// the pipeline cleanly. Plain ctx-cancel on ctx.Done (e.g. shutdown)
|
// the pipeline cleanly. Plain ctx-cancel on ctx.Done (e.g. shutdown)
|
||||||
@@ -235,7 +268,7 @@ type overrideFlags struct {
|
|||||||
Wipe bool `json:"wipe"`
|
Wipe bool `json:"wipe"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func newDeps(ctx context.Context, c *Client, fwd *logForwarder, ovr overrideFlags, claim *ClaimResponse) tests.Deps {
|
func newDeps(ctx context.Context, c *Client, fwd *logForwarder, mux *SensorMux, ovr overrideFlags, claim *ClaimResponse, stage string) tests.Deps {
|
||||||
var expected []tests.ExpectedDisk
|
var expected []tests.ExpectedDisk
|
||||||
for _, e := range claim.ExpectedDisks {
|
for _, e := range claim.ExpectedDisks {
|
||||||
expected = append(expected, tests.ExpectedDisk{Serial: e.Serial, SizeGB: e.SizeGB})
|
expected = append(expected, tests.ExpectedDisk{Serial: e.Serial, SizeGB: e.SizeGB})
|
||||||
@@ -247,17 +280,73 @@ func newDeps(ctx context.Context, c *Client, fwd *logForwarder, ovr overrideFlag
|
|||||||
OverrideWipe: ovr.Wipe,
|
OverrideWipe: ovr.Wipe,
|
||||||
NonDestructive: claim.NonDestructive,
|
NonDestructive: claim.NonDestructive,
|
||||||
ExpectedDisks: expected,
|
ExpectedDisks: expected,
|
||||||
StageTimeout: 2 * time.Minute,
|
StageTimeout: stageTimeout(claim, stage),
|
||||||
Sensor: func(ctx context.Context, samples []tests.Sample) error {
|
CPUStressKnobs: tests.CPUStressKnobs{
|
||||||
|
CPUPass: parseDur(claim.StageConfig.CPUStress.CPUPass),
|
||||||
|
MemPass: parseDur(claim.StageConfig.CPUStress.MemPass),
|
||||||
|
EDACPoll: parseDur(claim.StageConfig.CPUStress.EDACPoll),
|
||||||
|
},
|
||||||
|
StorageKnobs: tests.StorageKnobs{
|
||||||
|
Mode: claim.StageConfig.Storage.Mode,
|
||||||
|
FioSize: claim.StageConfig.Storage.FioSize,
|
||||||
|
FioTime: parseDur(claim.StageConfig.Storage.FioTime),
|
||||||
|
FioBS: claim.StageConfig.Storage.FioBS,
|
||||||
|
FioRW: claim.StageConfig.Storage.FioRW,
|
||||||
|
Verify: claim.StageConfig.Storage.Verify,
|
||||||
|
},
|
||||||
|
NetworkKnobs: tests.NetworkKnobs{
|
||||||
|
Duration: parseDur(claim.StageConfig.Network.Duration),
|
||||||
|
},
|
||||||
|
BurnKnobs: tests.BurnKnobs{
|
||||||
|
Duration: parseDur(claim.StageConfig.Burn.Duration),
|
||||||
|
CPUWorkers: claim.StageConfig.Burn.CPUWorkers,
|
||||||
|
MemPct: claim.StageConfig.Burn.MemPct,
|
||||||
|
FioOnSpare: claim.StageConfig.Burn.FioOnSpare,
|
||||||
|
IperfParallel: claim.StageConfig.Burn.IperfParallel,
|
||||||
|
},
|
||||||
|
Sensor: func(_ context.Context, samples []tests.Sample) error {
|
||||||
out := make([]SensorSample, 0, len(samples))
|
out := make([]SensorSample, 0, len(samples))
|
||||||
for _, s := range samples {
|
for _, s := range samples {
|
||||||
out = append(out, SensorSample{Kind: s.Kind, Key: s.Key, Value: s.Value, Unit: s.Unit})
|
out = append(out, SensorSample{Kind: s.Kind, Key: s.Key, Value: s.Value, Unit: s.Unit})
|
||||||
}
|
}
|
||||||
return c.Sensor(ctx, out)
|
mux.Send(out)
|
||||||
|
return nil
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stageTimeout reads claim.StageConfig.StageTimeouts[stage] and falls
|
||||||
|
// back to 2 minutes (the pre-Phase-2 default). Malformed entries log and
|
||||||
|
// fall back — we'd rather run the stage than refuse on a typo.
|
||||||
|
func stageTimeout(claim *ClaimResponse, stage string) time.Duration {
|
||||||
|
if claim == nil || claim.StageConfig.StageTimeouts == nil {
|
||||||
|
return 2 * time.Minute
|
||||||
|
}
|
||||||
|
raw, ok := claim.StageConfig.StageTimeouts[stage]
|
||||||
|
if !ok || raw == "" {
|
||||||
|
return 2 * time.Minute
|
||||||
|
}
|
||||||
|
d, err := time.ParseDuration(raw)
|
||||||
|
if err != nil || d <= 0 {
|
||||||
|
return 2 * time.Minute
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseDur is the permissive duration parser for the knob wire shape.
|
||||||
|
// Empty strings / parse failures yield 0 so callers can treat a zero
|
||||||
|
// value as "use the compile-time default" without a nil-check dance.
|
||||||
|
func parseDur(s string) time.Duration {
|
||||||
|
if s == "" {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
d, err := time.ParseDuration(s)
|
||||||
|
if err != nil || d < 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
// postResult marshals stageOutcome for the /result endpoint. The
|
// postResult marshals stageOutcome for the /result endpoint. The
|
||||||
// Inventory shape is special-cased: it includes the inventory blob so
|
// Inventory shape is special-cased: it includes the inventory blob so
|
||||||
// the orchestrator can persist it and run server-side spec diff.
|
// the orchestrator can persist it and run server-side spec diff.
|
||||||
@@ -276,6 +365,9 @@ func postResult(ctx context.Context, c *Client, stage string, s stageOutcome) (*
|
|||||||
if s.Inventory != nil {
|
if s.Inventory != nil {
|
||||||
body["inventory"] = s.Inventory
|
body["inventory"] = s.Inventory
|
||||||
}
|
}
|
||||||
|
if len(s.Firmware) > 0 {
|
||||||
|
body["firmware"] = s.Firmware
|
||||||
|
}
|
||||||
if len(s.Outcome.SubSteps) > 0 {
|
if len(s.Outcome.SubSteps) > 0 {
|
||||||
wire := make([]SubStepReport, 0, len(s.Outcome.SubSteps))
|
wire := make([]SubStepReport, 0, len(s.Outcome.SubSteps))
|
||||||
for _, ss := range s.Outcome.SubSteps {
|
for _, ss := range s.Outcome.SubSteps {
|
||||||
@@ -304,7 +396,7 @@ func stageForState(state string) string {
|
|||||||
switch state {
|
switch state {
|
||||||
case "InventoryCheck":
|
case "InventoryCheck":
|
||||||
return "Inventory"
|
return "Inventory"
|
||||||
case "SMART", "CPUStress", "Storage", "Network", "GPU", "PSU":
|
case "Firmware", "SMART", "CPUStress", "Storage", "Network", "Burn", "GPU", "PSU":
|
||||||
return state
|
return state
|
||||||
}
|
}
|
||||||
// SpecValidate and Reporting are orchestrator-owned; we never see
|
// SpecValidate and Reporting are orchestrator-owned; we never see
|
||||||
@@ -315,7 +407,7 @@ func stageForState(state string) string {
|
|||||||
// waitForOverride parks the agent in FailedHolding. It listens for a
|
// waitForOverride parks the agent in FailedHolding. It listens for a
|
||||||
// heartbeat directive that tells it to retry a stage (e.g. Storage
|
// heartbeat directive that tells it to retry a stage (e.g. Storage
|
||||||
// with wipe-override armed) and re-enters runStage from that point.
|
// with wipe-override armed) and re-enters runStage from that point.
|
||||||
func waitForOverride(ctx context.Context, c *Client, fwd *logForwarder, hb <-chan HeartbeatResponse, claim *ClaimResponse) error {
|
func waitForOverride(ctx context.Context, c *Client, fwd *logForwarder, mux *SensorMux, hb <-chan HeartbeatResponse, claim *ClaimResponse) error {
|
||||||
fwd.info("holding: awaiting operator decision (heartbeat directive or ctx cancel)")
|
fwd.info("holding: awaiting operator decision (heartbeat directive or ctx cancel)")
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@@ -333,7 +425,7 @@ func waitForOverride(ctx context.Context, c *Client, fwd *logForwarder, hb <-cha
|
|||||||
if len(cmd.OverrideFlags) > 0 {
|
if len(cmd.OverrideFlags) > 0 {
|
||||||
_ = json.Unmarshal(cmd.OverrideFlags, &ovr)
|
_ = json.Unmarshal(cmd.OverrideFlags, &ovr)
|
||||||
}
|
}
|
||||||
outcome := runStageCancellable(ctx, cmd.Stage, claim, fwd, c, ovr)
|
outcome := runStageCancellable(ctx, cmd.Stage, claim, fwd, c, mux, ovr)
|
||||||
if outcome.Cancelled {
|
if outcome.Cancelled {
|
||||||
fwd.warn("stage cancelled by operator; posting result and exiting")
|
fwd.warn("stage cancelled by operator; posting result and exiting")
|
||||||
_, _ = postResult(ctx, c, cmd.Stage, outcome)
|
_, _ = postResult(ctx, c, cmd.Stage, outcome)
|
||||||
@@ -362,7 +454,7 @@ func waitForOverride(ctx context.Context, c *Client, fwd *logForwarder, hb <-cha
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
fwd.info("stage: starting " + nextStage)
|
fwd.info("stage: starting " + nextStage)
|
||||||
out := runStageCancellable(ctx, nextStage, claim, fwd, c, overrideFlags{})
|
out := runStageCancellable(ctx, nextStage, claim, fwd, c, mux, overrideFlags{})
|
||||||
if out.Cancelled {
|
if out.Cancelled {
|
||||||
fwd.warn("stage cancelled by operator; posting result and exiting")
|
fwd.warn("stage cancelled by operator; posting result and exiting")
|
||||||
_, _ = postResult(ctx, c, nextStage, out)
|
_, _ = postResult(ctx, c, nextStage, out)
|
||||||
@@ -417,11 +509,32 @@ func inventorySummary(inv *spec.Inventory) string {
|
|||||||
len(inv.Disks), len(inv.NICs), len(inv.GPUs))
|
len(inv.Disks), len(inv.NICs), len(inv.GPUs))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// firmwareSummary renders the one-liner surfaced in the stage tile:
|
||||||
|
// per-component counts so an operator can see "bios=1 nic=2 nvme_fw=1"
|
||||||
|
// without opening the report.
|
||||||
|
func firmwareSummary(snaps []probes.FirmwareSnapshot) string {
|
||||||
|
counts := map[string]int{}
|
||||||
|
for _, s := range snaps {
|
||||||
|
counts[s.Component]++
|
||||||
|
}
|
||||||
|
if len(counts) == 0 {
|
||||||
|
return "no firmware readable"
|
||||||
|
}
|
||||||
|
keys := []string{"bios", "bmc", "nic", "hba", "nvme_fw", "microcode"}
|
||||||
|
parts := make([]string, 0, len(keys))
|
||||||
|
for _, k := range keys {
|
||||||
|
if n := counts[k]; n > 0 {
|
||||||
|
parts = append(parts, fmt.Sprintf("%s=%d", k, n))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.Join(parts, " ")
|
||||||
|
}
|
||||||
|
|
||||||
// thermalSidecar posts a batch of /sys/class/hwmon samples every 5s.
|
// thermalSidecar posts a batch of /sys/class/hwmon samples every 5s.
|
||||||
// Idempotent: a dead sensor just drops out of the next batch. Errors
|
// Idempotent: a dead sensor just drops out of the next batch. Errors
|
||||||
// are logged but never fatal — we'd rather have a run with partial
|
// are logged but never fatal — we'd rather have a run with partial
|
||||||
// thermal data than kill the agent over an I/O hiccup.
|
// thermal data than kill the agent over an I/O hiccup.
|
||||||
func thermalSidecar(ctx context.Context, c *Client, fwd *logForwarder) {
|
func thermalSidecar(ctx context.Context, mux *SensorMux, fwd *logForwarder) {
|
||||||
t := time.NewTicker(5 * time.Second)
|
t := time.NewTicker(5 * time.Second)
|
||||||
defer t.Stop()
|
defer t.Stop()
|
||||||
for {
|
for {
|
||||||
@@ -437,11 +550,7 @@ func thermalSidecar(ctx context.Context, c *Client, fwd *logForwarder) {
|
|||||||
for _, s := range samples {
|
for _, s := range samples {
|
||||||
out = append(out, SensorSample{Kind: s.Kind, Key: s.Key, Value: s.Value, Unit: s.Unit})
|
out = append(out, SensorSample{Kind: s.Kind, Key: s.Key, Value: s.Value, Unit: s.Unit})
|
||||||
}
|
}
|
||||||
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
mux.Send(out)
|
||||||
if err := c.Sensor(sendCtx, out); err != nil {
|
|
||||||
fwd.warn("thermal sidecar: " + err.Error())
|
|
||||||
}
|
|
||||||
cancel()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,139 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SensorMux coalesces sensor samples from every stage + sidecar into a
|
||||||
|
// single batched HTTP POST stream. Without it, a Burn run that fans out
|
||||||
|
// four concurrent workloads + thermal + PSU + EDAC sidecars can push ~50
|
||||||
|
// samples/sec, each as a separate /sensor request — enough to either
|
||||||
|
// saturate the orchestrator's request budget or stall a stage on its
|
||||||
|
// own sensor-forwarding path.
|
||||||
|
//
|
||||||
|
// Contract:
|
||||||
|
// - Send is non-blocking; a full input channel drops a batch on the
|
||||||
|
// floor and logs a warning. That's preferred over back-pressuring
|
||||||
|
// a workload goroutine and skewing its timing.
|
||||||
|
// - Flush happens every flushInterval *or* whenever the pending buffer
|
||||||
|
// exceeds maxBatch samples. Chunk-at-flush keeps each HTTP request
|
||||||
|
// bounded regardless of the incoming rate.
|
||||||
|
// - Close flushes whatever is in the buffer. Callers that need the
|
||||||
|
// final flush to reach the server should defer Close before other
|
||||||
|
// deferred shutdown work.
|
||||||
|
type SensorMux struct {
|
||||||
|
c *Client
|
||||||
|
in chan []SensorSample
|
||||||
|
flushInterval time.Duration
|
||||||
|
maxBatch int
|
||||||
|
|
||||||
|
ctx context.Context
|
||||||
|
cancel context.CancelFunc
|
||||||
|
wg sync.WaitGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSensorMux starts the flush loop. Callers hand the returned mux to
|
||||||
|
// every code path that previously called Client.Sensor directly (stage
|
||||||
|
// Deps.Sensor, thermal sidecar, EDAC sidecar). The mux lives for the
|
||||||
|
// duration of the agent run.
|
||||||
|
func NewSensorMux(parent context.Context, c *Client) *SensorMux {
|
||||||
|
ctx, cancel := context.WithCancel(parent)
|
||||||
|
m := &SensorMux{
|
||||||
|
c: c,
|
||||||
|
in: make(chan []SensorSample, 32),
|
||||||
|
flushInterval: 2 * time.Second,
|
||||||
|
maxBatch: 500,
|
||||||
|
ctx: ctx,
|
||||||
|
cancel: cancel,
|
||||||
|
}
|
||||||
|
m.wg.Add(1)
|
||||||
|
go m.loop()
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send enqueues a batch for the next flush tick. Empty batches are
|
||||||
|
// silently ignored so callers with conditional sample lists don't need
|
||||||
|
// to guard the call site.
|
||||||
|
func (m *SensorMux) Send(samples []SensorSample) {
|
||||||
|
if m == nil || len(samples) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Copy so caller mutations don't race with the flush loop.
|
||||||
|
out := make([]SensorSample, len(samples))
|
||||||
|
copy(out, samples)
|
||||||
|
select {
|
||||||
|
case m.in <- out:
|
||||||
|
default:
|
||||||
|
log.Printf("sensor mux: input channel full, dropping %d samples", len(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close stops the flush loop and flushes the residual buffer. Safe to
|
||||||
|
// call twice (the second is a no-op because the internal context is
|
||||||
|
// already cancelled).
|
||||||
|
func (m *SensorMux) Close() {
|
||||||
|
if m == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.cancel()
|
||||||
|
m.wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SensorMux) loop() {
|
||||||
|
defer m.wg.Done()
|
||||||
|
buf := make([]SensorSample, 0, m.maxBatch)
|
||||||
|
t := time.NewTicker(m.flushInterval)
|
||||||
|
defer t.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-m.ctx.Done():
|
||||||
|
m.flushChunks(buf)
|
||||||
|
buf = nil
|
||||||
|
// Drain whatever is still sitting in the channel so a
|
||||||
|
// workload that pushed right before Close doesn't lose
|
||||||
|
// those final samples.
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case batch := <-m.in:
|
||||||
|
m.flushChunks(batch)
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case batch := <-m.in:
|
||||||
|
buf = append(buf, batch...)
|
||||||
|
if len(buf) >= m.maxBatch {
|
||||||
|
m.flushChunks(buf)
|
||||||
|
buf = buf[:0]
|
||||||
|
}
|
||||||
|
case <-t.C:
|
||||||
|
if len(buf) > 0 {
|
||||||
|
m.flushChunks(buf)
|
||||||
|
buf = buf[:0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// flushChunks splits a potentially-large slice into maxBatch-sized
|
||||||
|
// HTTP requests so no single POST carries more than the configured cap.
|
||||||
|
// A 10-second per-chunk timeout keeps a stalled orchestrator from
|
||||||
|
// freezing the flush loop.
|
||||||
|
func (m *SensorMux) flushChunks(all []SensorSample) {
|
||||||
|
for len(all) > 0 {
|
||||||
|
n := len(all)
|
||||||
|
if n > m.maxBatch {
|
||||||
|
n = m.maxBatch
|
||||||
|
}
|
||||||
|
chunk := all[:n]
|
||||||
|
all = all[n:]
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
if err := m.c.Sensor(ctx, chunk); err != nil {
|
||||||
|
log.Printf("sensor mux: flush of %d samples failed: %v", len(chunk), err)
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,144 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestSensorMux_CloseFlushesBuffer confirms Close() empties the
|
||||||
|
// pending buffer through the HTTP client before returning. Without
|
||||||
|
// this guarantee a Burn run would drop the last 2 s of samples when
|
||||||
|
// the stage tears down, which is exactly the window that contains the
|
||||||
|
// peak-load PSU / thermal readings we care about.
|
||||||
|
func TestSensorMux_CloseFlushesBuffer(t *testing.T) {
|
||||||
|
var batches int32
|
||||||
|
var totalSamples int32
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if !strings.HasSuffix(r.URL.Path, "/sensor") {
|
||||||
|
t.Errorf("unexpected path %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
body, _ := io.ReadAll(r.Body)
|
||||||
|
var env struct {
|
||||||
|
Samples []SensorSample `json:"samples"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(body, &env); err != nil {
|
||||||
|
t.Errorf("decode: %v", err)
|
||||||
|
}
|
||||||
|
atomic.AddInt32(&batches, 1)
|
||||||
|
atomic.AddInt32(&totalSamples, int32(len(env.Samples)))
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := &Client{
|
||||||
|
BaseURL: srv.URL,
|
||||||
|
RunID: 1,
|
||||||
|
Token: "t",
|
||||||
|
HTTP: srv.Client(),
|
||||||
|
}
|
||||||
|
mux := NewSensorMux(context.Background(), c)
|
||||||
|
mux.Send([]SensorSample{
|
||||||
|
{Kind: "temp", Key: "cpu/0", Value: 72.5, Unit: "C"},
|
||||||
|
{Kind: "psu_volt", Key: "+12V", Value: 12.05, Unit: "V"},
|
||||||
|
})
|
||||||
|
mux.Send([]SensorSample{
|
||||||
|
{Kind: "mce", Key: "0", Value: 0, Unit: "count"},
|
||||||
|
})
|
||||||
|
mux.Close()
|
||||||
|
|
||||||
|
if got := atomic.LoadInt32(&totalSamples); got != 3 {
|
||||||
|
t.Errorf("expected 3 samples flushed, got %d across %d batch(es)", got, atomic.LoadInt32(&batches))
|
||||||
|
}
|
||||||
|
if atomic.LoadInt32(&batches) == 0 {
|
||||||
|
t.Errorf("expected at least one batch HTTP post")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSensorMux_ChunksOversizedBatch verifies flushChunks splits a
|
||||||
|
// single oversized input into maxBatch-sized HTTP requests. The plan's
|
||||||
|
// Burn stage can legitimately push a single input larger than the cap
|
||||||
|
// (e.g. a workload goroutine dumping a backlog), and a single giant
|
||||||
|
// POST would defeat the point of the multiplexer.
|
||||||
|
func TestSensorMux_ChunksOversizedBatch(t *testing.T) {
|
||||||
|
var batchSizes []int
|
||||||
|
var mu sync.Mutex
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
body, _ := io.ReadAll(r.Body)
|
||||||
|
var env struct {
|
||||||
|
Samples []SensorSample `json:"samples"`
|
||||||
|
}
|
||||||
|
_ = json.Unmarshal(body, &env)
|
||||||
|
mu.Lock()
|
||||||
|
batchSizes = append(batchSizes, len(env.Samples))
|
||||||
|
mu.Unlock()
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := &Client{BaseURL: srv.URL, RunID: 1, Token: "t", HTTP: srv.Client()}
|
||||||
|
mux := NewSensorMux(context.Background(), c)
|
||||||
|
|
||||||
|
// One input with 1200 samples → expect chunks of 500 + 500 + 200
|
||||||
|
// given the default maxBatch of 500.
|
||||||
|
big := make([]SensorSample, 1200)
|
||||||
|
for i := range big {
|
||||||
|
big[i] = SensorSample{Kind: "burn/throughput_mbps", Key: "eth0", Value: float64(i), Unit: "Mbps"}
|
||||||
|
}
|
||||||
|
mux.Send(big)
|
||||||
|
mux.Close()
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
total := 0
|
||||||
|
for _, n := range batchSizes {
|
||||||
|
total += n
|
||||||
|
if n > 500 {
|
||||||
|
t.Errorf("batch size %d exceeds maxBatch=500", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if total != 1200 {
|
||||||
|
t.Errorf("sum of batch sizes = %d, want 1200 (sizes=%v)", total, batchSizes)
|
||||||
|
}
|
||||||
|
if len(batchSizes) < 3 {
|
||||||
|
t.Errorf("expected at least 3 chunks for a 1200-sample input, got %d (%v)", len(batchSizes), batchSizes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSensorMux_EmptyAndNilSafe covers the defensive guards around
|
||||||
|
// Send(nil) / Send([]) / a nil *SensorMux. Callers with conditional
|
||||||
|
// sample lists (storage probe that skipped a disk, GPU stage with no
|
||||||
|
// devices) should be able to call Send unconditionally without adding
|
||||||
|
// their own nil check.
|
||||||
|
func TestSensorMux_EmptyAndNilSafe(t *testing.T) {
|
||||||
|
var batches int32
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
atomic.AddInt32(&batches, 1)
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
// Nil receiver must be a no-op.
|
||||||
|
var nilMux *SensorMux
|
||||||
|
nilMux.Send([]SensorSample{{Kind: "x", Key: "y"}})
|
||||||
|
nilMux.Close()
|
||||||
|
|
||||||
|
c := &Client{BaseURL: srv.URL, RunID: 1, Token: "t", HTTP: srv.Client()}
|
||||||
|
mux := NewSensorMux(context.Background(), c)
|
||||||
|
mux.Send(nil)
|
||||||
|
mux.Send([]SensorSample{})
|
||||||
|
mux.Close()
|
||||||
|
|
||||||
|
// Give any spurious goroutine a chance to surprise us.
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
if atomic.LoadInt32(&batches) != 0 {
|
||||||
|
t.Errorf("empty/nil Send must not produce HTTP batches, got %d", atomic.LoadInt32(&batches))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,486 @@
|
|||||||
|
package tests
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"vetting/agent/probes"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BurnConfig is what the agent passes to Burn: the orchestrator's iperf3
|
||||||
|
// server address and port. Durations + concurrency knobs come from
|
||||||
|
// Deps.BurnKnobs so they scale with profile.
|
||||||
|
type BurnConfig struct {
|
||||||
|
OrchestratorURL string
|
||||||
|
IperfPort int // 0 = 5201
|
||||||
|
}
|
||||||
|
|
||||||
|
// Burn is the concurrent soak stage. Unlike CPUStress (serial
|
||||||
|
// CPU→memory) or Storage (serial per disk) it fans out every workload
|
||||||
|
// at once: stress-ng hammers CPU + memory, fio drives the allow-listed
|
||||||
|
// disks, iperf3 pushes sustained NIC traffic, and two sidecars poll
|
||||||
|
// EDAC + PSU rails for the duration of the window.
|
||||||
|
//
|
||||||
|
// This is where PSU rails actually matter: 12V sag under simultaneous
|
||||||
|
// CPU + disk + NIC load is exactly the failure a thermal/power
|
||||||
|
// regression produces, and it's invisible to any stage that loads one
|
||||||
|
// subsystem at a time. The PSU stage that follows Burn in the pipeline
|
||||||
|
// re-samples rails post-window to confirm they settle back to nominal.
|
||||||
|
//
|
||||||
|
// Burn stays inside the stage framework — it doesn't spawn a parallel
|
||||||
|
// stage runner. The goroutine fan-out is local; the stage converges
|
||||||
|
// before returning an Outcome so every invariant the orchestrator
|
||||||
|
// relies on (serial stage order, single in-flight stage per run) still
|
||||||
|
// holds.
|
||||||
|
func Burn(ctx context.Context, d Deps, cfg BurnConfig) Outcome {
|
||||||
|
duration := d.BurnKnobs.Duration
|
||||||
|
if duration <= 0 {
|
||||||
|
duration = 2 * time.Minute
|
||||||
|
}
|
||||||
|
cpuWorkers := resolveCPUWorkers(d.BurnKnobs.CPUWorkers)
|
||||||
|
memPct := clampMemPct(d.BurnKnobs.MemPct)
|
||||||
|
iperfParallel := d.BurnKnobs.IperfParallel
|
||||||
|
if iperfParallel <= 0 {
|
||||||
|
iperfParallel = 2
|
||||||
|
}
|
||||||
|
d.Info(fmt.Sprintf("Burn: window=%s cpu_workers=%d mem_pct=%d iperf_parallel=%d fio_on_spare=%v",
|
||||||
|
duration, cpuWorkers, memPct, iperfParallel, d.BurnKnobs.FioOnSpare))
|
||||||
|
|
||||||
|
// Sidecars run for the lifetime of the window and are cancelled on
|
||||||
|
// return so the main stage converges cleanly. EDAC catches DIMM
|
||||||
|
// bit-flips that appear only under concurrent load; PSU catches
|
||||||
|
// rail sag that only appears when CPU + disk + NIC pull current
|
||||||
|
// simultaneously.
|
||||||
|
sideCtx, sideCancel := context.WithCancel(ctx)
|
||||||
|
defer sideCancel()
|
||||||
|
var sideWG sync.WaitGroup
|
||||||
|
sideWG.Add(2)
|
||||||
|
go runEDACSidecar(sideCtx, &sideWG, d)
|
||||||
|
go runPSUSidecar(sideCtx, &sideWG, d)
|
||||||
|
|
||||||
|
runCtx, cancel := context.WithTimeout(ctx, duration+30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
results := make(chan burnSubResult, 4)
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
results <- runBurnCPU(runCtx, d, duration, cpuWorkers)
|
||||||
|
}()
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
results <- runBurnMemory(runCtx, d, duration, memPct)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// fio runs only when explicitly enabled *and* there are allow-listed
|
||||||
|
// disks *and* the run wasn't marked non-destructive. Any of those
|
||||||
|
// missing records a Skipped sub-step so the operator sees why.
|
||||||
|
if d.BurnKnobs.FioOnSpare && len(d.ExpectedDisks) > 0 && !d.NonDestructive {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
results <- runBurnFio(runCtx, d, duration)
|
||||||
|
}()
|
||||||
|
} else {
|
||||||
|
reason := burnFioSkipReason(d)
|
||||||
|
results <- burnSubResult{Name: "Burn fio", Skipped: true, Reason: reason}
|
||||||
|
}
|
||||||
|
|
||||||
|
// iperf requires an orchestrator host. Lab hosts run with the
|
||||||
|
// bundled iperf3 server; without a base URL we can't derive a
|
||||||
|
// target so we skip rather than fail the stage.
|
||||||
|
if cfg.OrchestratorURL != "" {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
results <- runBurnIperf(runCtx, d, duration, cfg.OrchestratorURL, cfg.IperfPort, iperfParallel)
|
||||||
|
}()
|
||||||
|
} else {
|
||||||
|
results <- burnSubResult{Name: "Burn iperf", Skipped: true, Reason: "no orchestrator host"}
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
sideCancel()
|
||||||
|
sideWG.Wait()
|
||||||
|
close(results)
|
||||||
|
|
||||||
|
subs, samples, failures := collectBurnResults(results)
|
||||||
|
if d.Sensor != nil && len(samples) > 0 {
|
||||||
|
_ = d.Sensor(ctx, samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
extras := map[string]any{
|
||||||
|
"duration": duration.String(),
|
||||||
|
"cpu_workers": cpuWorkers,
|
||||||
|
"mem_pct": memPct,
|
||||||
|
"iperf_parallel": iperfParallel,
|
||||||
|
"fio_on_spare": d.BurnKnobs.FioOnSpare,
|
||||||
|
}
|
||||||
|
if len(failures) > 0 {
|
||||||
|
msg := "Burn workloads failed: " + strings.Join(failures, ", ")
|
||||||
|
d.Error(msg)
|
||||||
|
return Outcome{
|
||||||
|
Passed: false,
|
||||||
|
Message: msg,
|
||||||
|
Summary: fmt.Sprintf("Burn failed (%d of %d workloads)", len(failures), len(subs)),
|
||||||
|
Extras: extras,
|
||||||
|
SubSteps: subs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d.Info(fmt.Sprintf("Burn: %s window passed; %d workloads converged", duration, len(subs)))
|
||||||
|
return Outcome{
|
||||||
|
Passed: true,
|
||||||
|
Summary: fmt.Sprintf("Burn %s passed (%d workloads)", duration, len(subs)),
|
||||||
|
Extras: extras,
|
||||||
|
SubSteps: subs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// burnSubResult is the per-workload return type used by the fan-out
|
||||||
|
// goroutines. Sample slice is merged into the stage's final /sensor
|
||||||
|
// batch; SubStep becomes a row on the /result sub-steps list.
|
||||||
|
type burnSubResult struct {
|
||||||
|
Name string
|
||||||
|
Passed bool
|
||||||
|
Skipped bool
|
||||||
|
Reason string // why a workload was skipped
|
||||||
|
Err string // why a workload failed
|
||||||
|
Samples []Sample
|
||||||
|
SubStep SubStepReport
|
||||||
|
}
|
||||||
|
|
||||||
|
func collectBurnResults(ch <-chan burnSubResult) ([]SubStepReport, []Sample, []string) {
|
||||||
|
var subs []SubStepReport
|
||||||
|
var samples []Sample
|
||||||
|
var failures []string
|
||||||
|
for r := range ch {
|
||||||
|
// Non-skipped goroutines populate SubStep directly. Skipped slots
|
||||||
|
// get a synthesized row here so the /result shape stays stable.
|
||||||
|
if r.Skipped {
|
||||||
|
stamp := time.Now().UTC()
|
||||||
|
subs = append(subs, SubStepReport{
|
||||||
|
Name: r.Name,
|
||||||
|
Skipped: true,
|
||||||
|
StartedAt: stamp,
|
||||||
|
CompletedAt: stamp,
|
||||||
|
SummaryJSON: mustJSON(map[string]any{"skipped": true, "reason": r.Reason}),
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
subs = append(subs, r.SubStep)
|
||||||
|
samples = append(samples, r.Samples...)
|
||||||
|
if !r.Passed {
|
||||||
|
reason := r.Err
|
||||||
|
if reason == "" {
|
||||||
|
reason = "unknown"
|
||||||
|
}
|
||||||
|
failures = append(failures, r.Name+": "+reason)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return subs, samples, failures
|
||||||
|
}
|
||||||
|
|
||||||
|
func burnFioSkipReason(d Deps) string {
|
||||||
|
if !d.BurnKnobs.FioOnSpare {
|
||||||
|
return "fio_on_spare knob disabled"
|
||||||
|
}
|
||||||
|
if d.NonDestructive {
|
||||||
|
return "non-destructive run"
|
||||||
|
}
|
||||||
|
if len(d.ExpectedDisks) == 0 {
|
||||||
|
return "no allowlisted disks"
|
||||||
|
}
|
||||||
|
return "disabled"
|
||||||
|
}
|
||||||
|
|
||||||
|
// runBurnCPU hammers all CPU cores with stress-ng for the window. Same
|
||||||
|
// shape as CPUStress pass 1 but with shorter label so the sub-step row
|
||||||
|
// doesn't collide with the earlier stage's "CPU pass".
|
||||||
|
func runBurnCPU(ctx context.Context, d Deps, duration time.Duration, workers int) burnSubResult {
|
||||||
|
if _, err := exec.LookPath("stress-ng"); err != nil {
|
||||||
|
return burnSubResult{Name: "Burn CPU", Err: "stress-ng missing"}
|
||||||
|
}
|
||||||
|
args := []string{
|
||||||
|
"--cpu", strconv.Itoa(workers),
|
||||||
|
"--cpu-method", "all",
|
||||||
|
"--timeout", durationSeconds(duration),
|
||||||
|
"--metrics-brief",
|
||||||
|
"--verify",
|
||||||
|
}
|
||||||
|
d.Info(fmt.Sprintf("Burn: stress-ng %s", strings.Join(args, " ")))
|
||||||
|
pass := runStressPass(ctx, d, "Burn CPU", duration, args)
|
||||||
|
return burnSubResult{
|
||||||
|
Name: "Burn CPU",
|
||||||
|
Passed: pass.Passed,
|
||||||
|
Err: pass.Err,
|
||||||
|
SubStep: subStepFromPass("Burn CPU", pass),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// runBurnMemory drives a single --vm worker sized at memPct of
|
||||||
|
// MemAvailable, capped so the kernel + agent + other workloads still
|
||||||
|
// have headroom. Clamping happens here rather than in resolveBurnKnobs
|
||||||
|
// so the cap is computed against real live memory each run.
|
||||||
|
func runBurnMemory(ctx context.Context, d Deps, duration time.Duration, memPct int) burnSubResult {
|
||||||
|
if _, err := exec.LookPath("stress-ng"); err != nil {
|
||||||
|
return burnSubResult{Name: "Burn memory", Err: "stress-ng missing"}
|
||||||
|
}
|
||||||
|
avail, err := memAvailableBytes()
|
||||||
|
if err != nil {
|
||||||
|
return burnSubResult{Name: "Burn memory", Err: "read MemAvailable: " + err.Error()}
|
||||||
|
}
|
||||||
|
// Budget = avail * memPct / 100, then subtract the standard headroom.
|
||||||
|
// If the result is below the memory-pass floor we record a skipped
|
||||||
|
// row instead — the window is too tight to be meaningful on this box.
|
||||||
|
budget := int64(float64(avail) * float64(memPct) / 100.0)
|
||||||
|
cap := budget - memHeadroomBytes
|
||||||
|
if cap < memFloorBytes {
|
||||||
|
return burnSubResult{
|
||||||
|
Name: "Burn memory",
|
||||||
|
Skipped: true,
|
||||||
|
Reason: fmt.Sprintf("budget %s below floor %s after headroom", humanBytes(budget), humanBytes(memFloorBytes)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
args := []string{
|
||||||
|
"--vm", "1",
|
||||||
|
"--vm-bytes", strconv.FormatInt(cap, 10),
|
||||||
|
"--vm-keep",
|
||||||
|
"--timeout", durationSeconds(duration),
|
||||||
|
"--metrics-brief",
|
||||||
|
"--verify",
|
||||||
|
}
|
||||||
|
d.Info(fmt.Sprintf("Burn: stress-ng memory cap=%s (%d%% of MemAvailable)", humanBytes(cap), memPct))
|
||||||
|
pass := runStressPass(ctx, d, "Burn memory", duration, args)
|
||||||
|
return burnSubResult{
|
||||||
|
Name: "Burn memory",
|
||||||
|
Passed: pass.Passed,
|
||||||
|
Err: pass.Err,
|
||||||
|
SubStep: subStepFromPass(fmt.Sprintf("Burn memory (cap %s)", humanBytes(cap)), pass),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// runBurnFio runs fio_sample against the first allow-listed disk for
|
||||||
|
// the window. Reuses runFioVerify + parseFioJSON so the samples line
|
||||||
|
// up with what Storage emits. Using fio_sample (bounded by --size)
|
||||||
|
// keeps Burn's write volume predictable regardless of profile.
|
||||||
|
func runBurnFio(ctx context.Context, d Deps, duration time.Duration) burnSubResult {
|
||||||
|
if _, err := exec.LookPath("fio"); err != nil {
|
||||||
|
return burnSubResult{Name: "Burn fio", Err: "fio missing"}
|
||||||
|
}
|
||||||
|
targets := resolveTargets(d.ExpectedDisks)
|
||||||
|
if len(targets) == 0 {
|
||||||
|
return burnSubResult{Name: "Burn fio", Skipped: true, Reason: "no allow-listed disks present"}
|
||||||
|
}
|
||||||
|
t := targets[0]
|
||||||
|
opts := fioOpts{
|
||||||
|
Mode: "fio_sample",
|
||||||
|
Size: "512MiB",
|
||||||
|
Runtime: duration,
|
||||||
|
BS: "4k",
|
||||||
|
RW: "randrw",
|
||||||
|
Verify: "md5",
|
||||||
|
}
|
||||||
|
start := time.Now()
|
||||||
|
d.Info(fmt.Sprintf("Burn: fio %s on %s (%s window)", opts.Mode, t.Device, duration))
|
||||||
|
fr := runFioVerify(ctx, t.Device, opts)
|
||||||
|
end := time.Now()
|
||||||
|
|
||||||
|
sub := SubStepReport{
|
||||||
|
Name: "Burn fio " + t.Device,
|
||||||
|
Passed: fr.Error == "",
|
||||||
|
StartedAt: start,
|
||||||
|
CompletedAt: end,
|
||||||
|
SummaryJSON: mustJSON(fr),
|
||||||
|
}
|
||||||
|
out := burnSubResult{Name: "Burn fio", SubStep: sub, Passed: fr.Error == "", Err: fr.Error}
|
||||||
|
if fr.Error == "" {
|
||||||
|
out.Samples = append(out.Samples,
|
||||||
|
Sample{Kind: "fio", Key: t.Device + "/read_iops", Value: fr.ReadIOPS, Unit: "iops"},
|
||||||
|
Sample{Kind: "fio", Key: t.Device + "/write_iops", Value: fr.WriteIOPS, Unit: "iops"},
|
||||||
|
)
|
||||||
|
if fr.ReadP99Us > 0 {
|
||||||
|
out.Samples = append(out.Samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/read", Value: fr.ReadP99Us, Unit: "us"})
|
||||||
|
}
|
||||||
|
if fr.WriteP99Us > 0 {
|
||||||
|
out.Samples = append(out.Samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/write", Value: fr.WriteP99Us, Unit: "us"})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// runBurnIperf drives iperf3 -P N for the window. Reuses parseIperfJSON
|
||||||
|
// so the same (mbps, retrans, bytesSent) extraction the Network stage
|
||||||
|
// uses applies here too. Samples emitted as Burn-scoped keys so the
|
||||||
|
// dashboard can tell at-a-glance which window they came from.
|
||||||
|
func runBurnIperf(ctx context.Context, d Deps, duration time.Duration, orchestratorURL string, port, parallel int) burnSubResult {
|
||||||
|
if _, err := exec.LookPath("iperf3"); err != nil {
|
||||||
|
return burnSubResult{Name: "Burn iperf", Err: "iperf3 missing"}
|
||||||
|
}
|
||||||
|
host, err := deriveHost(orchestratorURL)
|
||||||
|
if err != nil || host == "" {
|
||||||
|
return burnSubResult{Name: "Burn iperf", Skipped: true, Reason: "can't derive orchestrator host"}
|
||||||
|
}
|
||||||
|
if port == 0 {
|
||||||
|
port = 5201
|
||||||
|
}
|
||||||
|
if parallel < 1 {
|
||||||
|
parallel = 1
|
||||||
|
}
|
||||||
|
args := []string{
|
||||||
|
"-c", host,
|
||||||
|
"-p", strconv.Itoa(port),
|
||||||
|
"-t", strconv.Itoa(int(duration.Seconds())),
|
||||||
|
"-P", strconv.Itoa(parallel),
|
||||||
|
"-J",
|
||||||
|
}
|
||||||
|
runCtx, cancel := context.WithTimeout(ctx, duration+30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
start := time.Now()
|
||||||
|
out, err := exec.CommandContext(runCtx, "iperf3", args...).Output()
|
||||||
|
end := time.Now()
|
||||||
|
if err != nil {
|
||||||
|
return burnSubResult{
|
||||||
|
Name: "Burn iperf",
|
||||||
|
Err: "iperf3 client error: " + err.Error(),
|
||||||
|
SubStep: SubStepReport{
|
||||||
|
Name: "Burn iperf",
|
||||||
|
StartedAt: start,
|
||||||
|
CompletedAt: end,
|
||||||
|
SummaryJSON: mustJSON(map[string]any{"error": err.Error(), "stderr_tail": tailLines(string(out), 20)}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mbps, retrans, bytesSent, _, perr := parseIperfJSON(out)
|
||||||
|
if perr != nil {
|
||||||
|
return burnSubResult{
|
||||||
|
Name: "Burn iperf",
|
||||||
|
Err: "parse iperf3 json: " + perr.Error(),
|
||||||
|
SubStep: SubStepReport{
|
||||||
|
Name: "Burn iperf",
|
||||||
|
StartedAt: start,
|
||||||
|
CompletedAt: end,
|
||||||
|
SummaryJSON: mustJSON(map[string]any{"error": perr.Error()}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
samples := []Sample{{Kind: "iperf", Key: "burn/throughput_mbps", Value: mbps, Unit: "Mbps"}}
|
||||||
|
if bytesSent > 0 {
|
||||||
|
packets := float64(bytesSent) / 1460.0
|
||||||
|
if packets > 0 {
|
||||||
|
samples = append(samples, Sample{
|
||||||
|
Kind: "nic_retrans", Key: "burn/rate",
|
||||||
|
Value: float64(retrans) / packets, Unit: "rate",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
passed := mbps > 0
|
||||||
|
errMsg := ""
|
||||||
|
if !passed {
|
||||||
|
errMsg = "zero throughput from iperf3"
|
||||||
|
}
|
||||||
|
return burnSubResult{
|
||||||
|
Name: "Burn iperf",
|
||||||
|
Passed: passed,
|
||||||
|
Err: errMsg,
|
||||||
|
Samples: samples,
|
||||||
|
SubStep: SubStepReport{
|
||||||
|
Name: fmt.Sprintf("Burn iperf (P=%d)", parallel),
|
||||||
|
Passed: passed,
|
||||||
|
StartedAt: start,
|
||||||
|
CompletedAt: end,
|
||||||
|
SummaryJSON: mustJSON(map[string]any{
|
||||||
|
"throughput_mbps": mbps,
|
||||||
|
"retransmits": retrans,
|
||||||
|
"bytes_sent": bytesSent,
|
||||||
|
"parallel": parallel,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// runPSUSidecar polls /sys/class/hwmon rails every 5s for the duration
|
||||||
|
// of the Burn window, piping each read into the stage's sensor channel
|
||||||
|
// as a psu_volt sample. The threshold evaluator then applies the same
|
||||||
|
// within_pct gates used by the PSU stage — a 12V rail sagging to 10.5V
|
||||||
|
// under load will fire the critical threshold mid-Burn and the run
|
||||||
|
// will flip into FailedHolding without waiting for the post-Burn PSU
|
||||||
|
// stage to catch it.
|
||||||
|
func runPSUSidecar(ctx context.Context, wg *sync.WaitGroup, d Deps) {
|
||||||
|
defer wg.Done()
|
||||||
|
if d.Sensor == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t := time.NewTicker(5 * time.Second)
|
||||||
|
defer t.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-t.C:
|
||||||
|
rails := scanPSURails()
|
||||||
|
if len(rails) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
batch := make([]Sample, 0, len(rails))
|
||||||
|
for _, r := range rails {
|
||||||
|
batch = append(batch, Sample{Kind: "psu_volt", Key: r.Label, Value: r.Volts, Unit: "V"})
|
||||||
|
}
|
||||||
|
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
|
if err := d.Sensor(sendCtx, batch); err != nil {
|
||||||
|
d.Warn("Burn: PSU sample post: " + err.Error())
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveCPUWorkers(raw string) int {
|
||||||
|
if raw == "" || strings.EqualFold(raw, "all") {
|
||||||
|
return runtime.NumCPU()
|
||||||
|
}
|
||||||
|
if n, err := strconv.Atoi(raw); err == nil && n > 0 {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
return runtime.NumCPU()
|
||||||
|
}
|
||||||
|
|
||||||
|
// clampMemPct keeps the knob in a sane band. 0 means "use default 50%";
|
||||||
|
// above 90 would crowd the kernel + agent + fio + iperf3 workers off the
|
||||||
|
// page cache. Anything outside [10, 90] is clamped.
|
||||||
|
func clampMemPct(pct int) int {
|
||||||
|
if pct <= 0 {
|
||||||
|
return 50
|
||||||
|
}
|
||||||
|
if pct < 10 {
|
||||||
|
return 10
|
||||||
|
}
|
||||||
|
if pct > 90 {
|
||||||
|
return 90
|
||||||
|
}
|
||||||
|
return pct
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustJSON(v any) json.RawMessage {
|
||||||
|
b, err := json.Marshal(v)
|
||||||
|
if err != nil {
|
||||||
|
return json.RawMessage([]byte(`{"marshal_error":"` + err.Error() + `"}`))
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure the probes package import stays anchored — the Burn sidecars
|
||||||
|
// use probes.EDAC + the PSU rail scanner defined in psu.go which
|
||||||
|
// otherwise wouldn't pull probes in on its own.
|
||||||
|
var _ = probes.EDAC
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
package tests
|
||||||
|
|
||||||
|
import (
|
||||||
|
"runtime"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestResolveCPUWorkers covers the three parse branches: empty/"all"
|
||||||
|
// falls back to NumCPU, a valid integer is used verbatim, and garbage
|
||||||
|
// also falls back to NumCPU rather than returning zero. Zero workers
|
||||||
|
// would make stress-ng a no-op and silently defeat Burn's CPU load.
|
||||||
|
func TestResolveCPUWorkers(t *testing.T) {
|
||||||
|
np := runtime.NumCPU()
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
in string
|
||||||
|
want int
|
||||||
|
}{
|
||||||
|
{"empty defaults to NumCPU", "", np},
|
||||||
|
{"all defaults to NumCPU", "all", np},
|
||||||
|
{"ALL is case-insensitive", "ALL", np},
|
||||||
|
{"explicit integer", "3", 3},
|
||||||
|
{"negative falls back", "-1", np},
|
||||||
|
{"zero falls back", "0", np},
|
||||||
|
{"garbage falls back", "lots", np},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
if got := resolveCPUWorkers(tc.in); got != tc.want {
|
||||||
|
t.Errorf("resolveCPUWorkers(%q) = %d, want %d", tc.in, got, tc.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestClampMemPct ensures the mem_pct knob never drives the memory
|
||||||
|
// burner into OOM territory (upper clamp) or into uselessness (lower
|
||||||
|
// clamp). Zero is treated as "use default 50" so a missing knob in an
|
||||||
|
// older orchestrator's claim response doesn't collapse the workload.
|
||||||
|
func TestClampMemPct(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
in, want int
|
||||||
|
}{
|
||||||
|
{0, 50}, // default
|
||||||
|
{-10, 50}, // negative treated as default
|
||||||
|
{5, 10}, // below lower band → clamp up
|
||||||
|
{10, 10},
|
||||||
|
{50, 50},
|
||||||
|
{90, 90},
|
||||||
|
{95, 90}, // above upper band → clamp down
|
||||||
|
{1000, 90},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
if got := clampMemPct(tc.in); got != tc.want {
|
||||||
|
t.Errorf("clampMemPct(%d) = %d, want %d", tc.in, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -11,7 +11,10 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"vetting/agent/probes"
|
||||||
)
|
)
|
||||||
|
|
||||||
// CPUStress runs stress-ng as two serial passes. The previous shape
|
// CPUStress runs stress-ng as two serial passes. The previous shape
|
||||||
@@ -55,11 +58,28 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
|||||||
extras := map[string]any{"cores": cores}
|
extras := map[string]any{"cores": cores}
|
||||||
var subs []SubStepReport
|
var subs []SubStepReport
|
||||||
|
|
||||||
|
// EDAC sidecar runs for the lifetime of the stage; cancelled on
|
||||||
|
// return. It polls /sys/devices/system/edac/mc/*/{ce,ue}_count and
|
||||||
|
// posts the current counters so the server-side threshold evaluator
|
||||||
|
// can gate edac_ue > 0 → fail the run. Zero-valued poll falls back
|
||||||
|
// to 10s — the same cadence rasdaemon uses by default.
|
||||||
|
sideCtx, sideCancel := context.WithCancel(ctx)
|
||||||
|
defer sideCancel()
|
||||||
|
var sideWG sync.WaitGroup
|
||||||
|
sideWG.Add(1)
|
||||||
|
go runEDACSidecar(sideCtx, &sideWG, d)
|
||||||
|
|
||||||
|
// Per-profile durations come from Deps; zero values (missing knobs
|
||||||
|
// or legacy orchestrator) fall back to the package default so the
|
||||||
|
// stage always has a defined budget.
|
||||||
|
cpuDur := nonzeroDur(d.CPUStressKnobs.CPUPass, cpuPassDuration)
|
||||||
|
memDur := nonzeroDur(d.CPUStressKnobs.MemPass, memPassDuration)
|
||||||
|
|
||||||
// Pass 1: CPU
|
// Pass 1: CPU
|
||||||
cpu := runStressPass(ctx, d, "CPU", cpuPassDuration, []string{
|
cpu := runStressPass(ctx, d, "CPU", cpuDur, []string{
|
||||||
"--cpu", strconv.Itoa(cores),
|
"--cpu", strconv.Itoa(cores),
|
||||||
"--cpu-method", "all",
|
"--cpu-method", "all",
|
||||||
"--timeout", durationSeconds(cpuPassDuration),
|
"--timeout", durationSeconds(cpuDur),
|
||||||
"--metrics-brief",
|
"--metrics-brief",
|
||||||
"--verify",
|
"--verify",
|
||||||
})
|
})
|
||||||
@@ -104,11 +124,11 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
|||||||
SubSteps: subs,
|
SubSteps: subs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mem := runStressPass(ctx, d, "memory", memPassDuration, []string{
|
mem := runStressPass(ctx, d, "memory", memDur, []string{
|
||||||
"--vm", "1",
|
"--vm", "1",
|
||||||
"--vm-bytes", strconv.FormatInt(cap, 10),
|
"--vm-bytes", strconv.FormatInt(cap, 10),
|
||||||
"--vm-keep",
|
"--vm-keep",
|
||||||
"--timeout", durationSeconds(memPassDuration),
|
"--timeout", durationSeconds(memDur),
|
||||||
"--metrics-brief",
|
"--metrics-brief",
|
||||||
"--verify",
|
"--verify",
|
||||||
})
|
})
|
||||||
@@ -133,6 +153,64 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// runEDACSidecar polls /sys EDAC counters on d.CPUStressKnobs.EDACPoll
|
||||||
|
// cadence (or 10s fallback) for the lifetime of the stage ctx, emitting
|
||||||
|
// one sample per (memory-controller × {ce,ue}) pair on each tick. A
|
||||||
|
// single failing read is tolerated: the next tick picks up the counter.
|
||||||
|
//
|
||||||
|
// This is where the critical edac_ue threshold becomes a hard-fail: as
|
||||||
|
// soon as a UE counter advances past 0, the server-side evaluator trips
|
||||||
|
// and flips the run into FailedHolding. The sidecar emits whether or
|
||||||
|
// not stress-ng is still running; that keeps the signal live during
|
||||||
|
// inter-pass gaps.
|
||||||
|
//
|
||||||
|
// MCE counts are intentionally not sampled here — they require
|
||||||
|
// rasdaemon or mcelog and vary by live-image packaging. The threshold
|
||||||
|
// rule for mce stays seeded (so the DB shape is stable) but only fires
|
||||||
|
// once a matching kind lands, which is a follow-up.
|
||||||
|
func runEDACSidecar(ctx context.Context, wg *sync.WaitGroup, d Deps) {
|
||||||
|
defer wg.Done()
|
||||||
|
if d.Sensor == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
poll := d.CPUStressKnobs.EDACPoll
|
||||||
|
if poll <= 0 {
|
||||||
|
poll = 10 * time.Second
|
||||||
|
}
|
||||||
|
t := time.NewTicker(poll)
|
||||||
|
defer t.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-t.C:
|
||||||
|
edac := probes.EDAC()
|
||||||
|
if len(edac) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
batch := make([]Sample, 0, len(edac))
|
||||||
|
for _, s := range edac {
|
||||||
|
batch = append(batch, Sample{Kind: s.Kind, Key: s.Key, Value: s.Value, Unit: s.Unit})
|
||||||
|
}
|
||||||
|
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
|
if err := d.Sensor(sendCtx, batch); err != nil {
|
||||||
|
d.Warn("CPUStress: edac sample post: " + err.Error())
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// nonzeroDur picks override over fallback, but only when override is
|
||||||
|
// strictly positive. Lets callers pass a zero-value duration to mean
|
||||||
|
// "no override; use fallback" without a separate ok return.
|
||||||
|
func nonzeroDur(override, fallback time.Duration) time.Duration {
|
||||||
|
if override > 0 {
|
||||||
|
return override
|
||||||
|
}
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
|
||||||
// subStepFromPass projects a stressPass into a SubStepReport — shared by
|
// subStepFromPass projects a stressPass into a SubStepReport — shared by
|
||||||
// both passes and by the mid-stage early-return paths so the UI always
|
// both passes and by the mid-stage early-return paths so the UI always
|
||||||
// sees exactly one row per pass, even on failure.
|
// sees exactly one row per pass, even on failure.
|
||||||
|
|||||||
@@ -0,0 +1,24 @@
|
|||||||
|
// fake_dmidecode simulates `dmidecode -t bios` for unit tests of the
|
||||||
|
// firmware probe's BIOS parser. Prints deterministic output modeled on
|
||||||
|
// a real Supermicro host; exits 0 regardless of flags.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
fmt.Println(`# dmidecode 3.3
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 3.2.0 present.
|
||||||
|
|
||||||
|
Handle 0x0000, DMI type 0, 26 bytes
|
||||||
|
BIOS Information
|
||||||
|
Vendor: American Megatrends Inc.
|
||||||
|
Version: 3.2
|
||||||
|
Release Date: 07/15/2021
|
||||||
|
Address: 0xF0000
|
||||||
|
Runtime Size: 64 kB
|
||||||
|
ROM Size: 32 MB
|
||||||
|
Characteristics:
|
||||||
|
PCI is supported
|
||||||
|
BIOS is upgradeable`)
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
// Package fakes is the umbrella for deterministic stand-ins for
|
||||||
|
// external probe binaries that Vetting's stage code normally shells
|
||||||
|
// out to (stress-ng, fio, iperf3, dmidecode, ethtool, nvidia-smi,
|
||||||
|
// mcelog, nvme). Each real binary gets its own subpackage under
|
||||||
|
// fakes/<name>/ with `package main` and a main() that prints golden
|
||||||
|
// output — build with `go build -o <tmp>/<name> ./agent/tests/fakes/<name>`
|
||||||
|
// and point a test's tests.Deps.LookPath at <tmp>/<name>.
|
||||||
|
//
|
||||||
|
// The seam in tests is tests.Deps.LookPath: when non-nil the stage
|
||||||
|
// code uses it instead of os/exec.LookPath. Outside tests, nil
|
||||||
|
// LookPath means "use the real binary on $PATH" — stages continue to
|
||||||
|
// work on production hosts without the fakes package around.
|
||||||
|
//
|
||||||
|
// How to add a new fake:
|
||||||
|
// 1. Create agent/tests/fakes/<binaryname>/main.go.
|
||||||
|
// 2. Write `package main` with a main() that prints exactly the
|
||||||
|
// bytes the real tool would produce for the input you care to
|
||||||
|
// simulate. Determinism > completeness — tests want a known
|
||||||
|
// sample, not a realistic one.
|
||||||
|
// 3. Reference the fake from the unit test with `go test` compiling
|
||||||
|
// it via t.TempDir() + `go build -o` before the test body runs.
|
||||||
|
package fakes
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
// fake_stress_ng simulates stress-ng for unit tests. Accepts (and
|
||||||
|
// ignores) any flag, sleeps briefly so callers that measure wall-clock
|
||||||
|
// see a non-zero elapsed, and prints the "passed" lines CPUStress
|
||||||
|
// expects. Exits 0.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
fmt.Fprintln(os.Stderr, "fake_stress_ng invoked:", os.Args[1:])
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
fmt.Println("stress-ng: info: [1] dispatching hogs: 1 cpu")
|
||||||
|
fmt.Println("stress-ng: info: [1] successful run completed in 0.05s")
|
||||||
|
}
|
||||||
+130
-16
@@ -9,19 +9,27 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"vetting/agent/probes"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NetworkConfig is what the agent passes to Network: the orchestrator's
|
// NetworkConfig is what the agent passes to Network: the orchestrator's
|
||||||
// iperf3 server address and port. We derive host from OrchestratorURL.
|
// iperf3 server address, port, and the per-profile duration.
|
||||||
type NetworkConfig struct {
|
type NetworkConfig struct {
|
||||||
OrchestratorURL string
|
OrchestratorURL string
|
||||||
IperfPort int // 0 = 5201
|
IperfPort int // 0 = 5201
|
||||||
Duration time.Duration
|
Duration time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// Network runs iperf3 against the orchestrator's bundled server. Records
|
// Network runs iperf3 against the orchestrator's bundled server for
|
||||||
// bandwidth as a measurement; fails if iperf3 is missing, the server
|
// the profile-configured duration. Records throughput as a measurement;
|
||||||
// isn't reachable, or throughput is zero.
|
// records per-interface rx/tx error-rate deltas as nic_retrans samples
|
||||||
|
// so the server-side threshold gate (`nic_retrans rate < 0.001`) fires
|
||||||
|
// on a flaky PHY or a wire that drops half its packets under load.
|
||||||
|
//
|
||||||
|
// Failure cases: iperf3 missing, server unreachable, zero throughput.
|
||||||
|
// Zero throughput is treated as a hard failure — an iperf that finished
|
||||||
|
// cleanly but pushed zero bytes is indistinguishable from a bad run.
|
||||||
func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
|
func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
|
||||||
if _, err := exec.LookPath("iperf3"); err != nil {
|
if _, err := exec.LookPath("iperf3"); err != nil {
|
||||||
// Live image ships iperf3; absence means packaging regression.
|
// Live image ships iperf3; absence means packaging regression.
|
||||||
@@ -51,6 +59,11 @@ func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
|
|||||||
duration = 10 * time.Second
|
duration = 10 * time.Second
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Snapshot /proc/net/dev before the test so we can attribute any
|
||||||
|
// error-count growth to *this stage's* traffic. The same snapshot
|
||||||
|
// taken after iperf returns is the end of the window.
|
||||||
|
netStart := indexNetDev(probes.NetDev())
|
||||||
|
|
||||||
args := []string{
|
args := []string{
|
||||||
"-c", host,
|
"-c", host,
|
||||||
"-p", strconv.Itoa(port),
|
"-p", strconv.Itoa(port),
|
||||||
@@ -72,7 +85,7 @@ func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
|
|||||||
Extras: map[string]any{"stderr_tail": tailLines(string(out), 20)},
|
Extras: map[string]any{"stderr_tail": tailLines(string(out), 20)},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mbps, parsed, err := parseIperfJSON(out)
|
mbps, retrans, bytesSent, parsed, err := parseIperfJSON(out)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
d.Error("Network: parse iperf3 output: " + err.Error())
|
d.Error("Network: parse iperf3 output: " + err.Error())
|
||||||
return Outcome{
|
return Outcome{
|
||||||
@@ -82,12 +95,58 @@ func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
|
|||||||
Extras: map[string]any{"raw": string(out)},
|
Extras: map[string]any{"raw": string(out)},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
netEnd := indexNetDev(probes.NetDev())
|
||||||
|
netDelta := diffNetDev(netStart, netEnd)
|
||||||
|
|
||||||
|
samples := []Sample{{Kind: "iperf", Key: "throughput_mbps", Value: mbps, Unit: "Mbps"}}
|
||||||
|
|
||||||
|
// iperf-derived retrans rate: retrans_count / packet_count_estimate.
|
||||||
|
// TCP typical MTU 1500; payload ~1460. We divide bytes by 1460 to
|
||||||
|
// approximate packets. This keeps the rate bounded in [0, 1].
|
||||||
|
if bytesSent > 0 {
|
||||||
|
packets := float64(bytesSent) / 1460.0
|
||||||
|
if packets > 0 {
|
||||||
|
samples = append(samples, Sample{
|
||||||
|
Kind: "nic_retrans",
|
||||||
|
Key: "iperf/rate",
|
||||||
|
Value: float64(retrans) / packets,
|
||||||
|
Unit: "rate",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-interface error-rate deltas. A flaky cable typically surfaces
|
||||||
|
// as tx_errs or tx_drop on the originating interface, not inside
|
||||||
|
// iperf's own tally.
|
||||||
|
for iface, delta := range netDelta {
|
||||||
|
if delta.TxBytes > 0 {
|
||||||
|
packets := float64(delta.TxBytes) / 1460.0
|
||||||
|
if packets > 0 {
|
||||||
|
rate := float64(delta.TxErrs+delta.TxDrop) / packets
|
||||||
|
samples = append(samples, Sample{
|
||||||
|
Kind: "nic_retrans", Key: iface + "/rate", Value: rate, Unit: "rate",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Diagnostic raw counts so the report can show which interface
|
||||||
|
// bled. These don't fire a threshold today but are useful for
|
||||||
|
// post-mortem.
|
||||||
|
samples = append(samples,
|
||||||
|
Sample{Kind: "nic_errs", Key: iface + "/rx", Value: float64(delta.RxErrs + delta.RxDrop), Unit: "count"},
|
||||||
|
Sample{Kind: "nic_errs", Key: iface + "/tx", Value: float64(delta.TxErrs + delta.TxDrop), Unit: "count"},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
if d.Sensor != nil {
|
if d.Sensor != nil {
|
||||||
_ = d.Sensor(ctx, []Sample{{Kind: "iperf", Key: "throughput_mbps", Value: mbps, Unit: "Mbps"}})
|
_ = d.Sensor(ctx, samples)
|
||||||
}
|
}
|
||||||
|
|
||||||
extras := map[string]any{
|
extras := map[string]any{
|
||||||
"throughput_mbps": mbps,
|
"throughput_mbps": mbps,
|
||||||
|
"retransmits": retrans,
|
||||||
|
"bytes_sent": bytesSent,
|
||||||
|
"net_delta": netDelta,
|
||||||
"iperf_end": parsed,
|
"iperf_end": parsed,
|
||||||
}
|
}
|
||||||
if mbps <= 0 {
|
if mbps <= 0 {
|
||||||
@@ -98,14 +157,55 @@ func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
|
|||||||
Extras: extras,
|
Extras: extras,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
d.Info(fmt.Sprintf("Network: iperf3 PASSED: %.1f Mbps", mbps))
|
d.Info(fmt.Sprintf("Network: iperf3 PASSED: %.1f Mbps (retransmits=%d)", mbps, retrans))
|
||||||
return Outcome{
|
return Outcome{
|
||||||
Passed: true,
|
Passed: true,
|
||||||
Summary: fmt.Sprintf("%.1f Mbps to %s", mbps, host),
|
Summary: fmt.Sprintf("%.1f Mbps to %s (retransmits=%d)", mbps, host, retrans),
|
||||||
Extras: extras,
|
Extras: extras,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// indexNetDev flattens a NetDev slice into a map keyed by interface
|
||||||
|
// name so diffNetDev can pair start/end by name without O(n²) scans.
|
||||||
|
func indexNetDev(snaps []probes.NetDevSnapshot) map[string]probes.NetDevSnapshot {
|
||||||
|
out := map[string]probes.NetDevSnapshot{}
|
||||||
|
for _, s := range snaps {
|
||||||
|
out[s.Iface] = s
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// diffNetDev computes end − start for each interface present in both
|
||||||
|
// snapshots. An interface that dropped away mid-run is dropped from
|
||||||
|
// the result (can't compute a delta). Underflow (end < start, rare
|
||||||
|
// after a counter reset) is clamped to 0.
|
||||||
|
func diffNetDev(start, end map[string]probes.NetDevSnapshot) map[string]probes.NetDevSnapshot {
|
||||||
|
out := map[string]probes.NetDevSnapshot{}
|
||||||
|
for iface, e := range end {
|
||||||
|
s, ok := start[iface]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out[iface] = probes.NetDevSnapshot{
|
||||||
|
Iface: iface,
|
||||||
|
RxBytes: subU64(e.RxBytes, s.RxBytes),
|
||||||
|
RxErrs: subU64(e.RxErrs, s.RxErrs),
|
||||||
|
RxDrop: subU64(e.RxDrop, s.RxDrop),
|
||||||
|
TxBytes: subU64(e.TxBytes, s.TxBytes),
|
||||||
|
TxErrs: subU64(e.TxErrs, s.TxErrs),
|
||||||
|
TxDrop: subU64(e.TxDrop, s.TxDrop),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func subU64(a, b uint64) uint64 {
|
||||||
|
if a < b {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return a - b
|
||||||
|
}
|
||||||
|
|
||||||
// deriveHost pulls the hostname out of an https://host:port base URL.
|
// deriveHost pulls the hostname out of an https://host:port base URL.
|
||||||
func deriveHost(raw string) (string, error) {
|
func deriveHost(raw string) (string, error) {
|
||||||
if raw == "" {
|
if raw == "" {
|
||||||
@@ -119,18 +219,22 @@ func deriveHost(raw string) (string, error) {
|
|||||||
return strings.TrimSpace(h), nil
|
return strings.TrimSpace(h), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseIperfJSON pulls end.sum_sent.bits_per_second out of iperf3 -J.
|
// parseIperfJSON pulls end.sum_sent.bits_per_second and retransmits out
|
||||||
// Returns (Mbps, full-json-map, err).
|
// of iperf3 -J. Returns (Mbps, retransmits, bytes_sent, full-end-map, err).
|
||||||
func parseIperfJSON(b []byte) (float64, map[string]any, error) {
|
func parseIperfJSON(b []byte) (float64, int64, int64, map[string]any, error) {
|
||||||
var top map[string]any
|
var top map[string]any
|
||||||
if err := json.Unmarshal(b, &top); err != nil {
|
if err := json.Unmarshal(b, &top); err != nil {
|
||||||
return 0, nil, err
|
return 0, 0, 0, nil, err
|
||||||
}
|
}
|
||||||
end, ok := top["end"].(map[string]any)
|
end, ok := top["end"].(map[string]any)
|
||||||
if !ok {
|
if !ok {
|
||||||
return 0, top, fmt.Errorf("missing end")
|
return 0, 0, 0, nil, fmt.Errorf("missing end")
|
||||||
}
|
}
|
||||||
// iperf3 reports either sum_sent (when -R not set) or sum_received.
|
// Pull the first sum that carries bits_per_second; retransmits +
|
||||||
|
// bytes live there too for TCP.
|
||||||
|
var mbps float64
|
||||||
|
var retrans int64
|
||||||
|
var bytesSent int64
|
||||||
for _, key := range []string{"sum_sent", "sum_received", "sum"} {
|
for _, key := range []string{"sum_sent", "sum_received", "sum"} {
|
||||||
sum, ok := end[key].(map[string]any)
|
sum, ok := end[key].(map[string]any)
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -140,7 +244,17 @@ func parseIperfJSON(b []byte) (float64, map[string]any, error) {
|
|||||||
if !ok {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
return bps / 1_000_000, end, nil
|
mbps = bps / 1_000_000
|
||||||
|
if r, ok := sum["retransmits"].(float64); ok {
|
||||||
|
retrans = int64(r)
|
||||||
}
|
}
|
||||||
return 0, end, fmt.Errorf("no bits_per_second in end.sum_*")
|
if bs, ok := sum["bytes"].(float64); ok {
|
||||||
|
bytesSent = int64(bs)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if mbps == 0 {
|
||||||
|
return 0, 0, 0, end, fmt.Errorf("no bits_per_second in end.sum_*")
|
||||||
|
}
|
||||||
|
return mbps, retrans, bytesSent, end, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,192 @@
|
|||||||
|
package tests
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"vetting/agent/probes"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestParseIperfJSON_SumSent confirms we pull throughput, retransmits,
|
||||||
|
// and bytes_sent from end.sum_sent. Real iperf3 -J output nests these
|
||||||
|
// three under end.sum_sent for TCP streams.
|
||||||
|
func TestParseIperfJSON_SumSent(t *testing.T) {
|
||||||
|
raw := `{
|
||||||
|
"end": {
|
||||||
|
"sum_sent": {
|
||||||
|
"bits_per_second": 950000000,
|
||||||
|
"retransmits": 42,
|
||||||
|
"bytes": 1187500000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
mbps, retrans, bytesSent, _, err := parseIperfJSON([]byte(raw))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseIperfJSON: %v", err)
|
||||||
|
}
|
||||||
|
if mbps != 950 {
|
||||||
|
t.Errorf("mbps = %v, want 950", mbps)
|
||||||
|
}
|
||||||
|
if retrans != 42 {
|
||||||
|
t.Errorf("retransmits = %d, want 42", retrans)
|
||||||
|
}
|
||||||
|
if bytesSent != 1187500000 {
|
||||||
|
t.Errorf("bytesSent = %d, want 1187500000", bytesSent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseIperfJSON_MissingEnd fails cleanly when iperf returned
|
||||||
|
// something without an end block (partial/aborted run).
|
||||||
|
func TestParseIperfJSON_MissingEnd(t *testing.T) {
|
||||||
|
raw := `{"start": {}}`
|
||||||
|
if _, _, _, _, err := parseIperfJSON([]byte(raw)); err == nil {
|
||||||
|
t.Errorf("expected error on iperf output missing end block")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseIperfJSON_ZeroBps returns an error so the stage can fail
|
||||||
|
// fast. A successful-exit iperf that pushed zero bits is indistinguishable
|
||||||
|
// from a broken run and must not pass.
|
||||||
|
func TestParseIperfJSON_ZeroBps(t *testing.T) {
|
||||||
|
raw := `{"end": {"sum_sent": {"bits_per_second": 0}}}`
|
||||||
|
if _, _, _, _, err := parseIperfJSON([]byte(raw)); err == nil {
|
||||||
|
t.Errorf("expected error when bits_per_second is 0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseIperfJSON_FallsBackToSumReceived: UDP tests and some edge
|
||||||
|
// cases don't populate sum_sent. The parser walks sum_sent → sum_received
|
||||||
|
// → sum and picks the first that has a throughput number.
|
||||||
|
func TestParseIperfJSON_FallsBackToSumReceived(t *testing.T) {
|
||||||
|
raw := `{
|
||||||
|
"end": {
|
||||||
|
"sum_received": {"bits_per_second": 500000000}
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
mbps, _, _, _, err := parseIperfJSON([]byte(raw))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseIperfJSON: %v", err)
|
||||||
|
}
|
||||||
|
if mbps != 500 {
|
||||||
|
t.Errorf("mbps = %v, want 500", mbps)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiffNetDev_HappyPath confirms end − start on a shared interface
|
||||||
|
// produces the delta we expect. eth0 pushed 10k bytes and accumulated
|
||||||
|
// 3 tx errors during the window.
|
||||||
|
func TestDiffNetDev_HappyPath(t *testing.T) {
|
||||||
|
start := map[string]probes.NetDevSnapshot{
|
||||||
|
"eth0": {Iface: "eth0", RxBytes: 1000, RxErrs: 0, TxBytes: 5000, TxErrs: 1},
|
||||||
|
}
|
||||||
|
end := map[string]probes.NetDevSnapshot{
|
||||||
|
"eth0": {Iface: "eth0", RxBytes: 2000, RxErrs: 0, TxBytes: 15000, TxErrs: 4},
|
||||||
|
}
|
||||||
|
delta := diffNetDev(start, end)
|
||||||
|
got, ok := delta["eth0"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("eth0 missing from diff output")
|
||||||
|
}
|
||||||
|
if got.RxBytes != 1000 {
|
||||||
|
t.Errorf("RxBytes delta=%d, want 1000", got.RxBytes)
|
||||||
|
}
|
||||||
|
if got.TxBytes != 10000 {
|
||||||
|
t.Errorf("TxBytes delta=%d, want 10000", got.TxBytes)
|
||||||
|
}
|
||||||
|
if got.TxErrs != 3 {
|
||||||
|
t.Errorf("TxErrs delta=%d, want 3", got.TxErrs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiffNetDev_InterfaceVanished: an interface present at start but
|
||||||
|
// gone at end drops from the diff rather than carrying a negative or
|
||||||
|
// stale number.
|
||||||
|
func TestDiffNetDev_InterfaceVanished(t *testing.T) {
|
||||||
|
start := map[string]probes.NetDevSnapshot{
|
||||||
|
"eth0": {Iface: "eth0", TxBytes: 1000},
|
||||||
|
"eth1": {Iface: "eth1", TxBytes: 500},
|
||||||
|
}
|
||||||
|
end := map[string]probes.NetDevSnapshot{
|
||||||
|
"eth0": {Iface: "eth0", TxBytes: 2000},
|
||||||
|
}
|
||||||
|
delta := diffNetDev(start, end)
|
||||||
|
if _, ok := delta["eth1"]; ok {
|
||||||
|
t.Errorf("eth1 should have been dropped (gone at end)")
|
||||||
|
}
|
||||||
|
if delta["eth0"].TxBytes != 1000 {
|
||||||
|
t.Errorf("eth0 TxBytes delta=%d, want 1000", delta["eth0"].TxBytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiffNetDev_CounterReset: if a counter resets between snapshots
|
||||||
|
// (kernel restart, wrap-around on a 32-bit counter) we clamp to 0
|
||||||
|
// rather than underflow a uint64.
|
||||||
|
func TestDiffNetDev_CounterReset(t *testing.T) {
|
||||||
|
start := map[string]probes.NetDevSnapshot{
|
||||||
|
"eth0": {Iface: "eth0", TxBytes: 9999, TxErrs: 5},
|
||||||
|
}
|
||||||
|
end := map[string]probes.NetDevSnapshot{
|
||||||
|
"eth0": {Iface: "eth0", TxBytes: 100, TxErrs: 0},
|
||||||
|
}
|
||||||
|
delta := diffNetDev(start, end)
|
||||||
|
if delta["eth0"].TxBytes != 0 {
|
||||||
|
t.Errorf("reset TxBytes delta=%d, want 0 (clamped)", delta["eth0"].TxBytes)
|
||||||
|
}
|
||||||
|
if delta["eth0"].TxErrs != 0 {
|
||||||
|
t.Errorf("reset TxErrs delta=%d, want 0 (clamped)", delta["eth0"].TxErrs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDeriveHost: orchestrator URL → host extraction is how the agent
|
||||||
|
// picks the iperf3 server target. Handles both https://host and
|
||||||
|
// https://host:port shapes.
|
||||||
|
func TestDeriveHost(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
raw string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"https://orch.local", "orch.local"},
|
||||||
|
{"https://orch.local:8443", "orch.local"},
|
||||||
|
{"http://10.0.0.5:8080", "10.0.0.5"},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
got, err := deriveHost(c.raw)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("deriveHost(%q) error: %v", c.raw, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if got != c.want {
|
||||||
|
t.Errorf("deriveHost(%q) = %q, want %q", c.raw, got, c.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveHost_Empty(t *testing.T) {
|
||||||
|
if _, err := deriveHost(""); err == nil {
|
||||||
|
t.Errorf("deriveHost(\"\") should error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseIperfJSON_ParsesEndMap confirms the full end map is returned
|
||||||
|
// so extras can show every field iperf produced, not just the three we
|
||||||
|
// extract by hand.
|
||||||
|
func TestParseIperfJSON_ParsesEndMap(t *testing.T) {
|
||||||
|
raw := `{
|
||||||
|
"end": {
|
||||||
|
"sum_sent": {"bits_per_second": 1000000, "retransmits": 0, "bytes": 125000},
|
||||||
|
"cpu_utilization_percent": {"host_total": 12.3}
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
_, _, _, endMap, err := parseIperfJSON([]byte(raw))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseIperfJSON: %v", err)
|
||||||
|
}
|
||||||
|
if endMap == nil {
|
||||||
|
t.Fatalf("endMap is nil")
|
||||||
|
}
|
||||||
|
// Sanity: both keys round-trip via json.
|
||||||
|
b, _ := json.Marshal(endMap)
|
||||||
|
if len(b) == 0 {
|
||||||
|
t.Errorf("endMap marshaled to empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
+136
-17
@@ -7,12 +7,20 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// PSU walks /sys/class/hwmon for in*_input (mV) and in*_label to find
|
// PSU walks /sys/class/hwmon for in*_input (mV) and in*_label to find
|
||||||
// PSU rails. In home-lab hosts the kernel surfaces a handful of named
|
// PSU rails, then samples each rail every psuSampleInterval for a
|
||||||
// rails (12V, 5V, 3V3). No rails → auto-skip. Any rail outside a ±10%
|
// window sized by the stage timeout. During Burn a separate sidecar
|
||||||
// window of its nominal value → fail.
|
// (see burn.go) runs the same probe concurrently with workload — the
|
||||||
|
// PSU stage itself catches slow post-load sag that only surfaces once
|
||||||
|
// the 12V rail starts recovering from a brownout under concurrent CPU
|
||||||
|
// + fio + iperf load.
|
||||||
|
//
|
||||||
|
// Any rail outside ±10% of its nominal value at any tick fires the
|
||||||
|
// critical threshold (server-side) and fails the stage. A host with no
|
||||||
|
// PSU rails wired to hwmon auto-skips.
|
||||||
func PSU(ctx context.Context, d Deps) Outcome {
|
func PSU(ctx context.Context, d Deps) Outcome {
|
||||||
rails := scanPSURails()
|
rails := scanPSURails()
|
||||||
if len(rails) == 0 {
|
if len(rails) == 0 {
|
||||||
@@ -24,39 +32,150 @@ func PSU(ctx context.Context, d Deps) Outcome {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var samples []Sample
|
window := resolvePSUWindow(d.StageTimeout)
|
||||||
|
deadline := time.Now().Add(window)
|
||||||
|
interval := psuSampleInterval
|
||||||
|
if window < interval*2 {
|
||||||
|
// Tiny window (tests, pathological stage_timeout) — at least two
|
||||||
|
// ticks so aggregate stats are meaningful.
|
||||||
|
interval = window / 2
|
||||||
|
if interval < time.Second {
|
||||||
|
interval = time.Second
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-label tracking: min/max across the window, count of out-of-range
|
||||||
|
// hits, last-observed value (shown in the summary).
|
||||||
|
type railStats struct {
|
||||||
|
label string
|
||||||
|
minV float64
|
||||||
|
maxV float64
|
||||||
|
lastV float64
|
||||||
|
ticks int
|
||||||
|
breaches int
|
||||||
|
reason string
|
||||||
|
}
|
||||||
|
stats := map[string]*railStats{}
|
||||||
|
|
||||||
|
tick := time.NewTicker(interval)
|
||||||
|
defer tick.Stop()
|
||||||
|
// Start with an immediate sample so a sub-45s window still produces
|
||||||
|
// at least one reading.
|
||||||
|
sampleOnce := func() {
|
||||||
|
cur := scanPSURails()
|
||||||
|
if len(cur) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
batch := make([]Sample, 0, len(cur))
|
||||||
|
for _, r := range cur {
|
||||||
|
s, ok := stats[r.Label]
|
||||||
|
if !ok {
|
||||||
|
s = &railStats{label: r.Label, minV: r.Volts, maxV: r.Volts}
|
||||||
|
stats[r.Label] = s
|
||||||
|
}
|
||||||
|
s.ticks++
|
||||||
|
s.lastV = r.Volts
|
||||||
|
if r.Volts < s.minV {
|
||||||
|
s.minV = r.Volts
|
||||||
|
}
|
||||||
|
if r.Volts > s.maxV {
|
||||||
|
s.maxV = r.Volts
|
||||||
|
}
|
||||||
|
if ok, why := voltageInRange(r); !ok {
|
||||||
|
s.breaches++
|
||||||
|
if s.reason == "" {
|
||||||
|
s.reason = why
|
||||||
|
}
|
||||||
|
}
|
||||||
|
batch = append(batch, Sample{Kind: "psu_volt", Key: r.Label, Value: r.Volts, Unit: "V"})
|
||||||
|
}
|
||||||
|
if d.Sensor != nil && len(batch) > 0 {
|
||||||
|
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
|
_ = d.Sensor(sendCtx, batch)
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sampleOnce()
|
||||||
|
sampling:
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
break sampling
|
||||||
|
case <-tick.C:
|
||||||
|
sampleOnce()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the outcome. Extras carry per-rail rollup so the report can
|
||||||
|
// show "12V min=11.1 max=12.05 (3/120 ticks out of range)".
|
||||||
|
type railRollup struct {
|
||||||
|
Label string `json:"label"`
|
||||||
|
MinV float64 `json:"min_v"`
|
||||||
|
MaxV float64 `json:"max_v"`
|
||||||
|
LastV float64 `json:"last_v"`
|
||||||
|
Ticks int `json:"ticks"`
|
||||||
|
Breaches int `json:"breaches"`
|
||||||
|
Reason string `json:"reason,omitempty"`
|
||||||
|
}
|
||||||
|
rollups := make([]railRollup, 0, len(stats))
|
||||||
problems := []string{}
|
problems := []string{}
|
||||||
for _, rail := range rails {
|
for _, s := range stats {
|
||||||
samples = append(samples, Sample{Kind: "psu_volt", Key: rail.Label, Value: rail.Volts, Unit: "V"})
|
rollups = append(rollups, railRollup{
|
||||||
if ok, why := voltageInRange(rail); !ok {
|
Label: s.label, MinV: s.minV, MaxV: s.maxV, LastV: s.lastV,
|
||||||
problems = append(problems, fmt.Sprintf("%s=%.2fV (%s)", rail.Label, rail.Volts, why))
|
Ticks: s.ticks, Breaches: s.breaches, Reason: s.reason,
|
||||||
|
})
|
||||||
|
if s.breaches > 0 {
|
||||||
|
problems = append(problems, fmt.Sprintf("%s min=%.2fV max=%.2fV (%s)", s.label, s.minV, s.maxV, s.reason))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if d.Sensor != nil {
|
|
||||||
_ = d.Sensor(ctx, samples)
|
|
||||||
}
|
|
||||||
|
|
||||||
extras := map[string]any{
|
extras := map[string]any{
|
||||||
"rails": rails,
|
"rails": rollups,
|
||||||
"problems": problems,
|
"problems": problems,
|
||||||
|
"window": window.String(),
|
||||||
|
"interval": interval.String(),
|
||||||
}
|
}
|
||||||
if len(problems) > 0 {
|
if len(problems) > 0 {
|
||||||
d.Error("PSU: out-of-range rails: " + strings.Join(problems, ", "))
|
d.Error("PSU: out-of-range rails: " + strings.Join(problems, "; "))
|
||||||
return Outcome{
|
return Outcome{
|
||||||
Passed: false,
|
Passed: false,
|
||||||
Message: "PSU rails out of range: " + strings.Join(problems, ", "),
|
Message: "PSU rails out of range: " + strings.Join(problems, "; "),
|
||||||
Summary: fmt.Sprintf("%d rails, %d failing", len(rails), len(problems)),
|
Summary: fmt.Sprintf("%d rails, %d failing", len(rollups), len(problems)),
|
||||||
Extras: extras,
|
Extras: extras,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
d.Info(fmt.Sprintf("PSU: %d rails within ±10%% nominal", len(rails)))
|
d.Info(fmt.Sprintf("PSU: %d rails within ±10%% nominal across %s window", len(rollups), window))
|
||||||
return Outcome{
|
return Outcome{
|
||||||
Passed: true,
|
Passed: true,
|
||||||
Summary: fmt.Sprintf("%d rails nominal", len(rails)),
|
Summary: fmt.Sprintf("%d rails nominal (%s)", len(rollups), window),
|
||||||
Extras: extras,
|
Extras: extras,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// psuSampleInterval is the default tick for post-Burn rail sampling.
|
||||||
|
// Five seconds is slow enough to stay under the HTTP budget and fast
|
||||||
|
// enough to catch rail recovery transients.
|
||||||
|
const psuSampleInterval = 5 * time.Second
|
||||||
|
|
||||||
|
// resolvePSUWindow maps the stage timeout to the sampling window.
|
||||||
|
// With no timeout (tests / pre-Phase-2 orchestrator), stay snapshot-
|
||||||
|
// like at 30 s. Otherwise take stage_timeout - 5 s to leave headroom
|
||||||
|
// for sensor flush + result post, capped at 10 min so a 24 h soak
|
||||||
|
// doesn't spend all day in PSU.
|
||||||
|
func resolvePSUWindow(stageTimeout time.Duration) time.Duration {
|
||||||
|
if stageTimeout <= 0 {
|
||||||
|
return 30 * time.Second
|
||||||
|
}
|
||||||
|
w := stageTimeout - 5*time.Second
|
||||||
|
if w < 30*time.Second {
|
||||||
|
w = 30 * time.Second
|
||||||
|
}
|
||||||
|
if w > 10*time.Minute {
|
||||||
|
w = 10 * time.Minute
|
||||||
|
}
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
type psuRail struct {
|
type psuRail struct {
|
||||||
Label string `json:"label"`
|
Label string `json:"label"`
|
||||||
Volts float64 `json:"volts"`
|
Volts float64 `json:"volts"`
|
||||||
|
|||||||
@@ -0,0 +1,112 @@
|
|||||||
|
package tests
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestIsPSULabel keeps the allowlist narrow enough that CPU VRM rails
|
||||||
|
// don't get misclassified as PSU-out-of-range failures but wide enough
|
||||||
|
// that common SuperMicro/Intel hwmon labels land in the Yes bucket.
|
||||||
|
func TestIsPSULabel(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
label string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"+12V", true},
|
||||||
|
{"12V", true},
|
||||||
|
{"+5V", true},
|
||||||
|
{"5V", true},
|
||||||
|
{"+3.3V", true},
|
||||||
|
{"3V3", true},
|
||||||
|
{"VCCIN", true},
|
||||||
|
{"vccin", true},
|
||||||
|
{"Vcore", false},
|
||||||
|
{"CPU VCORE", false},
|
||||||
|
{"AVCC", false},
|
||||||
|
{"", false},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
if got := isPSULabel(tc.label); got != tc.want {
|
||||||
|
t.Errorf("isPSULabel(%q) = %v, want %v", tc.label, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNominalFor maps rail labels back to expected nominal voltages.
|
||||||
|
// Unknown labels must return 0 so voltageInRange short-circuits — an
|
||||||
|
// accidental nominal would invent out-of-range failures.
|
||||||
|
func TestNominalFor(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
label string
|
||||||
|
want float64
|
||||||
|
}{
|
||||||
|
{"+12V", 12.0},
|
||||||
|
{"12V", 12.0},
|
||||||
|
{"+5V", 5.0},
|
||||||
|
{"+3.3V", 3.3},
|
||||||
|
{"3V3", 3.3},
|
||||||
|
{"VCCIN", 0},
|
||||||
|
{"unknown", 0},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
if got := nominalFor(tc.label); got != tc.want {
|
||||||
|
t.Errorf("nominalFor(%q) = %v, want %v", tc.label, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestVoltageInRange verifies the ±10% band: 12V passes in [10.8,
|
||||||
|
// 13.2], fails anywhere outside. Unknown labels always pass (since
|
||||||
|
// nominalFor returned 0 above).
|
||||||
|
func TestVoltageInRange(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
rail psuRail
|
||||||
|
ok bool
|
||||||
|
}{
|
||||||
|
{psuRail{Label: "+12V", Volts: 12.0}, true},
|
||||||
|
{psuRail{Label: "+12V", Volts: 10.8}, true}, // exactly at the band
|
||||||
|
{psuRail{Label: "+12V", Volts: 13.2}, true}, // exactly at the band
|
||||||
|
{psuRail{Label: "+12V", Volts: 10.7}, false}, // just below
|
||||||
|
{psuRail{Label: "+12V", Volts: 13.3}, false}, // just above
|
||||||
|
{psuRail{Label: "+12V", Volts: 10.5}, false}, // real sag
|
||||||
|
{psuRail{Label: "+5V", Volts: 4.6}, true}, // 8% low on 5V still in band
|
||||||
|
{psuRail{Label: "+5V", Volts: 4.4}, false}, // 12% low on 5V — out of band
|
||||||
|
{psuRail{Label: "+5V", Volts: 5.0}, true},
|
||||||
|
{psuRail{Label: "VCCIN", Volts: 1.8}, true}, // unknown nominal → pass
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
got, _ := voltageInRange(tc.rail)
|
||||||
|
if got != tc.ok {
|
||||||
|
t.Errorf("voltageInRange(%+v) = %v, want %v", tc.rail, got, tc.ok)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestResolvePSUWindow maps stage timeouts to the sampling window.
|
||||||
|
// Quick's 1m stage_timeout → 55s window; deep's 10m → capped at 10m;
|
||||||
|
// missing/zero → 30s (test / legacy orchestrator path); sub-35s → at
|
||||||
|
// least 30s so aggregates are non-trivial.
|
||||||
|
func TestResolvePSUWindow(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
in time.Duration
|
||||||
|
want time.Duration
|
||||||
|
}{
|
||||||
|
{"zero → snapshot fallback", 0, 30 * time.Second},
|
||||||
|
{"negative → snapshot fallback", -1 * time.Second, 30 * time.Second},
|
||||||
|
{"tiny timeout clamps up to 30s floor", 10 * time.Second, 30 * time.Second},
|
||||||
|
{"35s - 5s = 30s", 35 * time.Second, 30 * time.Second},
|
||||||
|
{"1m quick → 55s", time.Minute, 55 * time.Second},
|
||||||
|
{"10m deep → 9m55s", 10 * time.Minute, 9*time.Minute + 55*time.Second},
|
||||||
|
{"15m soak → capped at 10m", 15 * time.Minute, 10 * time.Minute},
|
||||||
|
{"1h → capped at 10m", time.Hour, 10 * time.Minute},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
if got := resolvePSUWindow(tc.in); got != tc.want {
|
||||||
|
t.Errorf("resolvePSUWindow(%s) = %s, want %s", tc.in, got, tc.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -59,6 +59,11 @@ func (o Outcome) MarshalSummary() (json.RawMessage, error) {
|
|||||||
// Deps bundles what stages need without pulling in the whole agent.
|
// Deps bundles what stages need without pulling in the whole agent.
|
||||||
// Logger methods print to stdout + forward to the orchestrator; Sensor
|
// Logger methods print to stdout + forward to the orchestrator; Sensor
|
||||||
// drops numeric samples; OverrideFlags carries operator-set bypasses.
|
// drops numeric samples; OverrideFlags carries operator-set bypasses.
|
||||||
|
//
|
||||||
|
// CPUStressKnobs / StorageKnobs / NetworkKnobs are Phase-2 profile
|
||||||
|
// knobs. Zero-valued fields mean "fall back to the compile-time
|
||||||
|
// default" — that keeps the stages runnable even when the runner can't
|
||||||
|
// materialize a profile (tests, legacy orchestrator, etc).
|
||||||
type Deps struct {
|
type Deps struct {
|
||||||
Info func(string)
|
Info func(string)
|
||||||
Warn func(string)
|
Warn func(string)
|
||||||
@@ -68,6 +73,58 @@ type Deps struct {
|
|||||||
NonDestructive bool // skip wipe-probe + writes in Storage
|
NonDestructive bool // skip wipe-probe + writes in Storage
|
||||||
ExpectedDisks []ExpectedDisk // serials + sizes from host.expected_spec
|
ExpectedDisks []ExpectedDisk // serials + sizes from host.expected_spec
|
||||||
StageTimeout time.Duration
|
StageTimeout time.Duration
|
||||||
|
CPUStressKnobs CPUStressKnobs
|
||||||
|
StorageKnobs StorageKnobs
|
||||||
|
NetworkKnobs NetworkKnobs
|
||||||
|
BurnKnobs BurnKnobs
|
||||||
|
// LookPath is the unit-test seam for swapping a real external
|
||||||
|
// binary (stress-ng, fio, iperf3, dmidecode, …) for a fake. When
|
||||||
|
// nil the stage falls back to os/exec.LookPath — production and
|
||||||
|
// existing tests keep working unchanged. Tests under
|
||||||
|
// agent/tests/fakes/ populate this to redirect lookups to a built
|
||||||
|
// fake binary in a tempdir.
|
||||||
|
LookPath func(name string) (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPUStressKnobs parameterizes the CPUStress stage. Zero durations fall
|
||||||
|
// back to the package's compile-time defaults (cpuPassDuration etc).
|
||||||
|
type CPUStressKnobs struct {
|
||||||
|
CPUPass time.Duration
|
||||||
|
MemPass time.Duration
|
||||||
|
EDACPoll time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// StorageKnobs parameterizes the Storage stage. Mode picks between
|
||||||
|
// "fio_sample" (bounded tempfile inside the device, quick profile) and
|
||||||
|
// "full_disk" (whole-device write verify, deep/soak). Empty strings
|
||||||
|
// fall back to the stage's safe defaults.
|
||||||
|
type StorageKnobs struct {
|
||||||
|
Mode string
|
||||||
|
FioSize string
|
||||||
|
FioTime time.Duration
|
||||||
|
FioBS string
|
||||||
|
FioRW string
|
||||||
|
Verify string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NetworkKnobs parameterizes the Network stage.
|
||||||
|
type NetworkKnobs struct {
|
||||||
|
Duration time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// BurnKnobs parameterizes the Burn super-stage. Duration is the total
|
||||||
|
// Burn window; sub-workloads run concurrently inside that window.
|
||||||
|
// CPUWorkers is "all" (runtime.NumCPU) or a numeric string. MemPct is a
|
||||||
|
// percentage of MemAvailable to allocate for the memory burner (clamped
|
||||||
|
// 0-90 by the stage). IperfParallel feeds iperf3 -P to generate sustained
|
||||||
|
// NIC load. FioOnSpare gates the storage sub-workload: true = fio runs
|
||||||
|
// against the allow-listed disks for the same window; false = skip fio.
|
||||||
|
type BurnKnobs struct {
|
||||||
|
Duration time.Duration
|
||||||
|
CPUWorkers string
|
||||||
|
MemPct int
|
||||||
|
FioOnSpare bool
|
||||||
|
IperfParallel int
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sample mirrors the server's SensorSample but lives in the tests
|
// Sample mirrors the server's SensorSample but lives in the tests
|
||||||
|
|||||||
+305
-92
@@ -5,24 +5,36 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Storage is the destructive stage: badblocks (write-mode sample) + fio
|
// Storage is the destructive stage. Phase 2 replaced the old
|
||||||
// random IO, persisting IOPS + latency as measurements. Pre-gates:
|
// badblocks + 128 MiB fio combo with a single fio run per disk that
|
||||||
|
// writes, verifies md5 of what it wrote, and reports p99 latency.
|
||||||
|
// Modes:
|
||||||
|
//
|
||||||
|
// - fio_sample (quick): bounded 1 GiB write per disk, ~3 min runtime.
|
||||||
|
// - full_disk (deep/soak): writes the whole device, time-bounded by
|
||||||
|
// the fio_time knob (2 h deep, 6 h soak).
|
||||||
|
//
|
||||||
|
// Pre-gates kept from Phase 1:
|
||||||
//
|
//
|
||||||
// 1. Device allowlist: only act on /dev/<X> where the kernel-reported
|
// 1. Device allowlist: only act on /dev/<X> where the kernel-reported
|
||||||
// serial matches one of Deps.ExpectedDisks. This is the operator's
|
// serial matches one of Deps.ExpectedDisks. USB sticks and unexpected
|
||||||
// contract for what can be written to. USB sticks and unexpected
|
|
||||||
// drives are excluded.
|
// drives are excluded.
|
||||||
// 2. Wipe probe: blkid + wipefs --no-act on each target; any filesystem
|
// 2. Wipe probe: blkid + wipefs --no-act on each target; any filesystem
|
||||||
// signatures, partition tables, or LVM metadata → fail with
|
// signature, partition table, or LVM metadata → fail with
|
||||||
// UnexpectedData unless Deps.OverrideWipe is set.
|
// UnexpectedData unless Deps.OverrideWipe is set.
|
||||||
//
|
//
|
||||||
// Only after those pass does the stage run `badblocks -b 4096 -c 64 -w`
|
// After fio, the stage captures a SMART diff (start snapshot taken
|
||||||
// and `fio` in write mode. This matches the plan's "destructive disk
|
// before any writes; end snapshot after all writes finish) and posts
|
||||||
// tests are always-on, gated by layered safety."
|
// deltas on attributes like Reallocated_Sector_Ct and Current_Pending_Sector.
|
||||||
|
// The threshold evaluator isn't seeded to gate smart_delta out of the
|
||||||
|
// box — those samples are diagnostic for the report. Fio's p99 latency
|
||||||
|
// posts as fio_p99_us so the per-stage Storage warning threshold can
|
||||||
|
// fire on a latency cliff.
|
||||||
func Storage(ctx context.Context, d Deps) Outcome {
|
func Storage(ctx context.Context, d Deps) Outcome {
|
||||||
if len(d.ExpectedDisks) == 0 {
|
if len(d.ExpectedDisks) == 0 {
|
||||||
d.Info("Storage: no expected disks in spec — skipping stage")
|
d.Info("Storage: no expected disks in spec — skipping stage")
|
||||||
@@ -44,10 +56,10 @@ func Storage(ctx context.Context, d Deps) Outcome {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Non-destructive runs skip wipe-probe (nothing to refuse), badblocks
|
// Non-destructive runs skip wipe-probe (nothing to refuse), fio
|
||||||
// -w, and write-mode fio. Every expected disk is still asserted
|
// writes, and SMART delta (nothing changed so no delta to report).
|
||||||
// present + readable by listing /sys/block and reading SMART-accessible
|
// Every expected disk is still asserted present so a vanished drive
|
||||||
// identity; the per-disk map flags the shortcut so the report is clear.
|
// still fails the stage.
|
||||||
if d.NonDestructive {
|
if d.NonDestructive {
|
||||||
perDisk := map[string]any{}
|
perDisk := map[string]any{}
|
||||||
for _, t := range targets {
|
for _, t := range targets {
|
||||||
@@ -89,64 +101,80 @@ func Storage(ctx context.Context, d Deps) Outcome {
|
|||||||
d.Warn("Storage: operator override engaged — proceeding despite data on " + strings.Join(dirty, ", "))
|
d.Warn("Storage: operator override engaged — proceeding despite data on " + strings.Join(dirty, ", "))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Per target: short badblocks write sample + fio random-read/write.
|
// Capture start-of-stage SMART attributes before we write anything
|
||||||
|
// so the delta is attributable to *this* stage's writes and not the
|
||||||
|
// host's prior history. Per-disk failures are tolerated (e.g. the
|
||||||
|
// device doesn't expose SMART); we just can't emit a delta for it.
|
||||||
|
startSMART := captureSMARTAttrs(ctx, targets)
|
||||||
|
|
||||||
|
fioOpts := resolveFioOpts(d.StorageKnobs)
|
||||||
|
d.Info(fmt.Sprintf("Storage: fio mode=%s size=%s runtime=%s bs=%s rw=%s verify=%s",
|
||||||
|
fioOpts.Mode, fioOpts.Size, fioOpts.Runtime, fioOpts.BS, fioOpts.RW, fioOpts.Verify))
|
||||||
|
|
||||||
var samples []Sample
|
var samples []Sample
|
||||||
var subs []SubStepReport
|
var subs []SubStepReport
|
||||||
perDisk := map[string]any{}
|
perDisk := map[string]any{}
|
||||||
|
failed := ""
|
||||||
for _, t := range targets {
|
for _, t := range targets {
|
||||||
d.Info("Storage: running badblocks write sample on " + t.Device)
|
d.Info(fmt.Sprintf("Storage: running fio %s on %s", fioOpts.Mode, t.Device))
|
||||||
bbStart := time.Now()
|
|
||||||
bb := runBadblocks(ctx, t.Device)
|
|
||||||
bbEnd := time.Now()
|
|
||||||
bbSummary, _ := json.Marshal(bb)
|
|
||||||
subs = append(subs, SubStepReport{
|
|
||||||
Name: fmt.Sprintf("badblocks %s", t.Device),
|
|
||||||
Passed: bb.OK,
|
|
||||||
StartedAt: bbStart,
|
|
||||||
CompletedAt: bbEnd,
|
|
||||||
SummaryJSON: bbSummary,
|
|
||||||
})
|
|
||||||
|
|
||||||
d.Info(fmt.Sprintf("Storage: running fio random rw on %s", t.Device))
|
|
||||||
fioStart := time.Now()
|
fioStart := time.Now()
|
||||||
fr := runFio(ctx, t.Device)
|
fr := runFioVerify(ctx, t.Device, fioOpts)
|
||||||
fioEnd := time.Now()
|
fioEnd := time.Now()
|
||||||
fioSummary, _ := json.Marshal(fr)
|
fioSummary, _ := json.Marshal(fr)
|
||||||
subs = append(subs, SubStepReport{
|
subs = append(subs, SubStepReport{
|
||||||
Name: fmt.Sprintf("fio %s", t.Device),
|
Name: fmt.Sprintf("fio %s %s", fioOpts.Mode, t.Device),
|
||||||
Passed: fr.Error == "",
|
Passed: fr.Error == "",
|
||||||
StartedAt: fioStart,
|
StartedAt: fioStart,
|
||||||
CompletedAt: fioEnd,
|
CompletedAt: fioEnd,
|
||||||
SummaryJSON: fioSummary,
|
SummaryJSON: fioSummary,
|
||||||
})
|
})
|
||||||
|
perDisk[t.Device] = map[string]any{"fio": fr}
|
||||||
|
|
||||||
perDisk[t.Device] = map[string]any{
|
if fr.Error == "" {
|
||||||
"badblocks": bb,
|
|
||||||
"fio": fr,
|
|
||||||
}
|
|
||||||
samples = append(samples,
|
samples = append(samples,
|
||||||
Sample{Kind: "fio", Key: t.Device + "/read_iops", Value: fr.ReadIOPS, Unit: "iops"},
|
Sample{Kind: "fio", Key: t.Device + "/read_iops", Value: fr.ReadIOPS, Unit: "iops"},
|
||||||
Sample{Kind: "fio", Key: t.Device + "/write_iops", Value: fr.WriteIOPS, Unit: "iops"},
|
Sample{Kind: "fio", Key: t.Device + "/write_iops", Value: fr.WriteIOPS, Unit: "iops"},
|
||||||
)
|
)
|
||||||
if !bb.OK {
|
if fr.ReadP99Us > 0 {
|
||||||
return Outcome{
|
samples = append(samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/read", Value: fr.ReadP99Us, Unit: "us"})
|
||||||
Passed: false,
|
}
|
||||||
Message: "badblocks found errors on " + t.Device,
|
if fr.WriteP99Us > 0 {
|
||||||
Summary: "badblocks failed on " + t.Device,
|
samples = append(samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/write", Value: fr.WriteP99Us, Unit: "us"})
|
||||||
Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes},
|
}
|
||||||
SubSteps: subs,
|
} else if failed == "" {
|
||||||
|
failed = t.Device
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// End-of-stage SMART snapshot + diff. We capture whether or not fio
|
||||||
|
// succeeded — a mid-run failure still produces attributable deltas,
|
||||||
|
// which is often more interesting than the stage outcome itself.
|
||||||
|
endSMART := captureSMARTAttrs(ctx, targets)
|
||||||
|
deltas := diffSMARTAttrs(startSMART, endSMART)
|
||||||
|
for dev, attrs := range deltas {
|
||||||
|
for attr, delta := range attrs {
|
||||||
|
samples = append(samples, Sample{Kind: "smart_delta", Key: dev + "/" + attr, Value: delta, Unit: "count"})
|
||||||
}
|
}
|
||||||
if d.Sensor != nil {
|
}
|
||||||
|
if d.Sensor != nil && len(samples) > 0 {
|
||||||
_ = d.Sensor(ctx, samples)
|
_ = d.Sensor(ctx, samples)
|
||||||
}
|
}
|
||||||
|
|
||||||
d.Info(fmt.Sprintf("Storage: %d disk(s) passed badblocks + fio", len(targets)))
|
if failed != "" {
|
||||||
|
return Outcome{
|
||||||
|
Passed: false,
|
||||||
|
Message: "fio verify failed on " + failed,
|
||||||
|
Summary: "fio failed on " + failed,
|
||||||
|
Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes, "smart_delta": deltas, "fio_opts": fioOpts},
|
||||||
|
SubSteps: subs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
d.Info(fmt.Sprintf("Storage: %d disk(s) passed fio --verify", len(targets)))
|
||||||
return Outcome{
|
return Outcome{
|
||||||
Passed: true,
|
Passed: true,
|
||||||
Summary: fmt.Sprintf("%d disks passed", len(targets)),
|
Summary: fmt.Sprintf("%d disks passed (%s)", len(targets), fioOpts.Mode),
|
||||||
Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes},
|
Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes, "smart_delta": deltas, "fio_opts": fioOpts},
|
||||||
SubSteps: subs,
|
SubSteps: subs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -229,8 +257,8 @@ type wipeProbeResult struct {
|
|||||||
|
|
||||||
// probeWipe runs blkid + wipefs -n. Any non-empty output from either is
|
// probeWipe runs blkid + wipefs -n. Any non-empty output from either is
|
||||||
// a "has data" signal. This is deliberately conservative: we'd rather
|
// a "has data" signal. This is deliberately conservative: we'd rather
|
||||||
// halt on a bare ext4 signature than hand badblocks a disk with real
|
// halt on a bare ext4 signature than hand fio a disk with real bytes on
|
||||||
// bytes on it.
|
// it.
|
||||||
func probeWipe(ctx context.Context, device string) wipeProbeResult {
|
func probeWipe(ctx context.Context, device string) wipeProbeResult {
|
||||||
out := wipeProbeResult{Device: device}
|
out := wipeProbeResult{Device: device}
|
||||||
|
|
||||||
@@ -257,84 +285,269 @@ func probeWipe(ctx context.Context, device string) wipeProbeResult {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------- badblocks ----------
|
|
||||||
|
|
||||||
type badblocksResult struct {
|
|
||||||
OK bool `json:"ok"`
|
|
||||||
Elapsed string `json:"elapsed"`
|
|
||||||
Error string `json:"error,omitempty"`
|
|
||||||
OutputTail string `json:"output_tail,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func runBadblocks(ctx context.Context, device string) badblocksResult {
|
|
||||||
// -c 64 blocks per check, -w destructive write, -b 4096 block size,
|
|
||||||
// -t pattern. We only sample 256MiB (65536 × 4k) so the stage stays
|
|
||||||
// bounded. A real burn-in would run the whole disk; that belongs in
|
|
||||||
// a separate "deep" stage.
|
|
||||||
args := []string{"-b", "4096", "-c", "64", "-w", "-t", "random", device, "65536"}
|
|
||||||
start := time.Now()
|
|
||||||
runCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
|
|
||||||
defer cancel()
|
|
||||||
cmd := exec.CommandContext(runCtx, "badblocks", args...)
|
|
||||||
out, err := cmd.CombinedOutput()
|
|
||||||
r := badblocksResult{Elapsed: time.Since(start).Round(time.Second).String(), OutputTail: tailLines(string(out), 10)}
|
|
||||||
if err != nil {
|
|
||||||
r.Error = err.Error()
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
// badblocks prints each bad block to stdout. Empty output = clean.
|
|
||||||
if strings.TrimSpace(string(out)) == "" {
|
|
||||||
r.OK = true
|
|
||||||
} else {
|
|
||||||
r.Error = "bad blocks found"
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------- fio ----------
|
// ---------- fio ----------
|
||||||
|
|
||||||
|
// fioOpts resolves the probe knobs into the concrete flag values fio
|
||||||
|
// needs. Defaults match the quick profile's fio_sample shape so callers
|
||||||
|
// with zero knobs still run something bounded.
|
||||||
|
type fioOpts struct {
|
||||||
|
Mode string `json:"mode"` // "fio_sample" | "full_disk"
|
||||||
|
Size string `json:"size"` // "1GiB"; only used for fio_sample
|
||||||
|
Runtime time.Duration `json:"runtime"` // bounding time
|
||||||
|
BS string `json:"bs"` // "4k"
|
||||||
|
RW string `json:"rw"` // "randrw"
|
||||||
|
Verify string `json:"verify"` // "md5" | ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveFioOpts normalizes the knobs into a runnable config. Zero-
|
||||||
|
// valued fields fall back to the quick defaults so a stage that's
|
||||||
|
// missing its knobs still has coherent behavior (safer than refusing).
|
||||||
|
func resolveFioOpts(k StorageKnobs) fioOpts {
|
||||||
|
o := fioOpts{
|
||||||
|
Mode: firstNonEmpty(k.Mode, "fio_sample"),
|
||||||
|
Size: firstNonEmpty(k.FioSize, "1GiB"),
|
||||||
|
Runtime: k.FioTime,
|
||||||
|
BS: firstNonEmpty(k.FioBS, "4k"),
|
||||||
|
RW: firstNonEmpty(k.FioRW, "randrw"),
|
||||||
|
Verify: firstNonEmpty(k.Verify, "md5"),
|
||||||
|
}
|
||||||
|
if o.Runtime <= 0 {
|
||||||
|
o.Runtime = 3 * time.Minute
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstNonEmpty(vs ...string) string {
|
||||||
|
for _, v := range vs {
|
||||||
|
if v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
type fioResult struct {
|
type fioResult struct {
|
||||||
|
Mode string `json:"mode"`
|
||||||
ReadIOPS float64 `json:"read_iops"`
|
ReadIOPS float64 `json:"read_iops"`
|
||||||
WriteIOPS float64 `json:"write_iops"`
|
WriteIOPS float64 `json:"write_iops"`
|
||||||
ReadBWKBps float64 `json:"read_bw_kbps"`
|
ReadBWKBps float64 `json:"read_bw_kbps"`
|
||||||
WriteBWKBps float64 `json:"write_bw_kbps"`
|
WriteBWKBps float64 `json:"write_bw_kbps"`
|
||||||
|
ReadP99Us float64 `json:"read_p99_us,omitempty"`
|
||||||
|
WriteP99Us float64 `json:"write_p99_us,omitempty"`
|
||||||
Error string `json:"error,omitempty"`
|
Error string `json:"error,omitempty"`
|
||||||
|
OutputTail string `json:"output_tail,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// runFio kicks off a tiny random-rw job: 2 jobs × 64MB × 4k blocks.
|
// runFioVerify invokes fio with md5-verify semantics. fio_sample mode
|
||||||
// This is a health bar, not a benchmark — we want to know the disk
|
// caps the IO at opts.Size; full_disk drives the whole device bounded
|
||||||
// services IO, not how fast it is at p99.
|
// by runtime. Both use direct IO to bypass the page cache — we want
|
||||||
func runFio(ctx context.Context, device string) fioResult {
|
// real disk latency, not Linux' cheerful buffer.
|
||||||
runCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
|
func runFioVerify(ctx context.Context, device string, opts fioOpts) fioResult {
|
||||||
|
// 30s grace over runtime so fio has time to flush + close cleanly.
|
||||||
|
runCtx, cancel := context.WithTimeout(ctx, opts.Runtime+30*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
args := []string{
|
args := []string{
|
||||||
"--name=health", "--filename=" + device, "--rw=randrw",
|
"--name=verify-" + strings.TrimPrefix(device, "/dev/"),
|
||||||
"--bs=4k", "--size=64M", "--numjobs=2", "--time_based=0",
|
"--filename=" + device,
|
||||||
"--group_reporting", "--output-format=json", "--direct=1",
|
"--rw=" + opts.RW,
|
||||||
|
"--bs=" + opts.BS,
|
||||||
|
"--numjobs=1",
|
||||||
|
"--direct=1",
|
||||||
|
"--group_reporting",
|
||||||
|
"--output-format=json",
|
||||||
|
"--runtime=" + strconv.Itoa(int(opts.Runtime.Seconds())),
|
||||||
}
|
}
|
||||||
|
if opts.Verify != "" {
|
||||||
|
args = append(args,
|
||||||
|
"--verify="+opts.Verify,
|
||||||
|
"--verify_pattern=random",
|
||||||
|
"--do_verify=1",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
switch opts.Mode {
|
||||||
|
case "full_disk":
|
||||||
|
// Time-bounded across the full device — fio uses the device's
|
||||||
|
// full size when --size is omitted on a block device.
|
||||||
|
args = append(args, "--time_based=1")
|
||||||
|
default:
|
||||||
|
// fio_sample: bounded write. Setting --size= limits the IO
|
||||||
|
// volume regardless of runtime.
|
||||||
|
args = append(args, "--size="+opts.Size, "--time_based=0")
|
||||||
|
}
|
||||||
|
|
||||||
cmd := exec.CommandContext(runCtx, "fio", args...)
|
cmd := exec.CommandContext(runCtx, "fio", args...)
|
||||||
out, err := cmd.Output()
|
out, err := cmd.Output()
|
||||||
|
r := fioResult{Mode: opts.Mode, OutputTail: tailLines(string(out), 20)}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fioResult{Error: err.Error()}
|
r.Error = err.Error()
|
||||||
|
return r
|
||||||
}
|
}
|
||||||
|
parsed, perr := parseFioJSON(out)
|
||||||
|
if perr != nil {
|
||||||
|
r.Error = "parse fio json: " + perr.Error()
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
r.ReadIOPS = parsed.ReadIOPS
|
||||||
|
r.WriteIOPS = parsed.WriteIOPS
|
||||||
|
r.ReadBWKBps = parsed.ReadBWKBps
|
||||||
|
r.WriteBWKBps = parsed.WriteBWKBps
|
||||||
|
r.ReadP99Us = parsed.ReadP99Us
|
||||||
|
r.WriteP99Us = parsed.WriteP99Us
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseFioJSON extracts the bits we care about from fio's --output-format=json.
|
||||||
|
// Latency percentiles live at .jobs[0].read.clat_ns.percentile["99.000000"];
|
||||||
|
// we convert nanoseconds to microseconds for the fio_p99_us sample.
|
||||||
|
func parseFioJSON(out []byte) (fioResult, error) {
|
||||||
var top struct {
|
var top struct {
|
||||||
Jobs []struct {
|
Jobs []struct {
|
||||||
Read struct {
|
Read struct {
|
||||||
IOPS float64 `json:"iops"`
|
IOPS float64 `json:"iops"`
|
||||||
BW float64 `json:"bw"`
|
BW float64 `json:"bw"`
|
||||||
|
CLat struct {
|
||||||
|
Percentile map[string]float64 `json:"percentile"`
|
||||||
|
} `json:"clat_ns"`
|
||||||
} `json:"read"`
|
} `json:"read"`
|
||||||
Write struct {
|
Write struct {
|
||||||
IOPS float64 `json:"iops"`
|
IOPS float64 `json:"iops"`
|
||||||
BW float64 `json:"bw"`
|
BW float64 `json:"bw"`
|
||||||
|
CLat struct {
|
||||||
|
Percentile map[string]float64 `json:"percentile"`
|
||||||
|
} `json:"clat_ns"`
|
||||||
} `json:"write"`
|
} `json:"write"`
|
||||||
} `json:"jobs"`
|
} `json:"jobs"`
|
||||||
}
|
}
|
||||||
if err := json.Unmarshal(out, &top); err != nil || len(top.Jobs) == 0 {
|
if err := json.Unmarshal(out, &top); err != nil {
|
||||||
return fioResult{Error: "parse fio json: " + fmt.Sprint(err)}
|
return fioResult{}, err
|
||||||
|
}
|
||||||
|
if len(top.Jobs) == 0 {
|
||||||
|
return fioResult{}, fmt.Errorf("no jobs in fio output")
|
||||||
}
|
}
|
||||||
j := top.Jobs[0]
|
j := top.Jobs[0]
|
||||||
return fioResult{
|
r := fioResult{
|
||||||
ReadIOPS: j.Read.IOPS, WriteIOPS: j.Write.IOPS,
|
ReadIOPS: j.Read.IOPS, WriteIOPS: j.Write.IOPS,
|
||||||
ReadBWKBps: j.Read.BW, WriteBWKBps: j.Write.BW,
|
ReadBWKBps: j.Read.BW, WriteBWKBps: j.Write.BW,
|
||||||
}
|
}
|
||||||
|
if p := j.Read.CLat.Percentile["99.000000"]; p > 0 {
|
||||||
|
r.ReadP99Us = p / 1000.0
|
||||||
|
}
|
||||||
|
if p := j.Write.CLat.Percentile["99.000000"]; p > 0 {
|
||||||
|
r.WriteP99Us = p / 1000.0
|
||||||
|
}
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- SMART delta ----------
|
||||||
|
|
||||||
|
// smartAttrMap: device → attribute → raw counter value. ATA drives
|
||||||
|
// populate named attributes (Reallocated_Sector_Ct etc); NVMe drives
|
||||||
|
// populate a flatter nvme-specific map. We track a curated whitelist
|
||||||
|
// of wear indicators — anything else is diagnostic and drops to the raw
|
||||||
|
// report output.
|
||||||
|
type smartAttrMap map[string]map[string]float64
|
||||||
|
|
||||||
|
// captureSMARTAttrs runs smartctl -aj on each target and pulls the
|
||||||
|
// whitelisted attributes. Per-device failures (virtio, permission
|
||||||
|
// issues) degrade silently — the delta step just shows no data for
|
||||||
|
// that device.
|
||||||
|
func captureSMARTAttrs(ctx context.Context, targets []diskTarget) smartAttrMap {
|
||||||
|
out := smartAttrMap{}
|
||||||
|
for _, t := range targets {
|
||||||
|
parsed, err := runSmartctl(ctx, t.Device)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
attrs := extractSMARTAttrs(parsed)
|
||||||
|
if len(attrs) > 0 {
|
||||||
|
out[t.Device] = attrs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// smartAttributeWhitelist is the set of attributes we diff across a
|
||||||
|
// stage. They're the ones that reflect *this stage's* IO damage, not
|
||||||
|
// cumulative drive history. Adding attributes is cheap — missing ones
|
||||||
|
// just drop to zero.
|
||||||
|
var smartAttributeWhitelist = map[string]bool{
|
||||||
|
// ATA SMART attribute names (smartctl normalizes to these)
|
||||||
|
"Reallocated_Sector_Ct": true,
|
||||||
|
"Current_Pending_Sector": true,
|
||||||
|
"Offline_Uncorrectable": true,
|
||||||
|
"UDMA_CRC_Error_Count": true,
|
||||||
|
"Reported_Uncorrect": true,
|
||||||
|
"Raw_Read_Error_Rate": true,
|
||||||
|
// NVMe log fields (flat keys at top of nvme_smart_health_information_log)
|
||||||
|
"media_errors": true,
|
||||||
|
"num_err_log_entries": true,
|
||||||
|
"percentage_used": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractSMARTAttrs walks smartctl's JSON for whitelisted attribute
|
||||||
|
// values. Handles both the ATA shape (ata_smart_attributes.table[]) and
|
||||||
|
// the NVMe shape (nvme_smart_health_information_log). Returns a map
|
||||||
|
// keyed by the canonical attribute name.
|
||||||
|
func extractSMARTAttrs(raw map[string]any) map[string]float64 {
|
||||||
|
out := map[string]float64{}
|
||||||
|
// ATA attributes are in ata_smart_attributes.table[] — each element
|
||||||
|
// has {"name": "Reallocated_Sector_Ct", "raw": {"value": N}}.
|
||||||
|
if ata, ok := raw["ata_smart_attributes"].(map[string]any); ok {
|
||||||
|
if tbl, ok := ata["table"].([]any); ok {
|
||||||
|
for _, row := range tbl {
|
||||||
|
rm, ok := row.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name, _ := rm["name"].(string)
|
||||||
|
if !smartAttributeWhitelist[name] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if r, ok := rm["raw"].(map[string]any); ok {
|
||||||
|
if v, ok := r["value"].(float64); ok {
|
||||||
|
out[name] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// NVMe attributes live flat under nvme_smart_health_information_log.
|
||||||
|
if nvme, ok := raw["nvme_smart_health_information_log"].(map[string]any); ok {
|
||||||
|
for k, v := range nvme {
|
||||||
|
if !smartAttributeWhitelist[k] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if n, ok := v.(float64); ok {
|
||||||
|
out[k] = n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// diffSMARTAttrs subtracts start from end per (device, attribute).
|
||||||
|
// Only attributes present in both ends produce a delta; missing
|
||||||
|
// attributes drop out (can't attribute a zero-to-present delta safely).
|
||||||
|
// Negative deltas are kept so a drive that resets a counter is visible.
|
||||||
|
func diffSMARTAttrs(start, end smartAttrMap) map[string]map[string]float64 {
|
||||||
|
out := map[string]map[string]float64{}
|
||||||
|
for dev, endAttrs := range end {
|
||||||
|
startAttrs, ok := start[dev]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
devOut := map[string]float64{}
|
||||||
|
for attr, endV := range endAttrs {
|
||||||
|
startV, ok := startAttrs[attr]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
devOut[attr] = endV - startV
|
||||||
|
}
|
||||||
|
if len(devOut) > 0 {
|
||||||
|
out[dev] = devOut
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,218 @@
|
|||||||
|
package tests
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestParseFioJSON_ATAReadWrite confirms we pull IOPS, BW, and p99
|
||||||
|
// latency from both read and write sides. P99 is read from clat_ns and
|
||||||
|
// converted ns → us (the unit we emit to the threshold evaluator).
|
||||||
|
func TestParseFioJSON_ATAReadWrite(t *testing.T) {
|
||||||
|
raw := `{
|
||||||
|
"jobs": [{
|
||||||
|
"read": {"iops": 1234.5, "bw": 5000, "clat_ns": {"percentile": {"99.000000": 250000}}},
|
||||||
|
"write": {"iops": 432.1, "bw": 2000, "clat_ns": {"percentile": {"99.000000": 500000}}}
|
||||||
|
}]
|
||||||
|
}`
|
||||||
|
r, err := parseFioJSON([]byte(raw))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseFioJSON: %v", err)
|
||||||
|
}
|
||||||
|
if r.ReadIOPS != 1234.5 {
|
||||||
|
t.Errorf("ReadIOPS = %v, want 1234.5", r.ReadIOPS)
|
||||||
|
}
|
||||||
|
if r.WriteIOPS != 432.1 {
|
||||||
|
t.Errorf("WriteIOPS = %v, want 432.1", r.WriteIOPS)
|
||||||
|
}
|
||||||
|
if r.ReadBWKBps != 5000 {
|
||||||
|
t.Errorf("ReadBWKBps = %v, want 5000", r.ReadBWKBps)
|
||||||
|
}
|
||||||
|
// 250000 ns → 250 us
|
||||||
|
if r.ReadP99Us != 250 {
|
||||||
|
t.Errorf("ReadP99Us = %v, want 250", r.ReadP99Us)
|
||||||
|
}
|
||||||
|
// 500000 ns → 500 us
|
||||||
|
if r.WriteP99Us != 500 {
|
||||||
|
t.Errorf("WriteP99Us = %v, want 500", r.WriteP99Us)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseFioJSON_ReadOnlyJob: if only one side has p99 populated the
|
||||||
|
// other stays zero (not emitted as a sample). Mirrors a randread job.
|
||||||
|
func TestParseFioJSON_ReadOnlyJob(t *testing.T) {
|
||||||
|
raw := `{
|
||||||
|
"jobs": [{
|
||||||
|
"read": {"iops": 1000, "bw": 4000, "clat_ns": {"percentile": {"99.000000": 100000}}},
|
||||||
|
"write": {"iops": 0, "bw": 0}
|
||||||
|
}]
|
||||||
|
}`
|
||||||
|
r, err := parseFioJSON([]byte(raw))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseFioJSON: %v", err)
|
||||||
|
}
|
||||||
|
if r.WriteP99Us != 0 {
|
||||||
|
t.Errorf("WriteP99Us = %v on read-only job, want 0", r.WriteP99Us)
|
||||||
|
}
|
||||||
|
if r.ReadP99Us != 100 {
|
||||||
|
t.Errorf("ReadP99Us = %v, want 100", r.ReadP99Us)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseFioJSON_NoJobs fails rather than reporting zeroes silently.
|
||||||
|
// An empty jobs array means fio didn't run anything.
|
||||||
|
func TestParseFioJSON_NoJobs(t *testing.T) {
|
||||||
|
raw := `{"jobs": []}`
|
||||||
|
if _, err := parseFioJSON([]byte(raw)); err == nil {
|
||||||
|
t.Errorf("expected error on empty jobs array")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExtractSMARTAttrs_ATA picks attributes out of ata_smart_attributes.table
|
||||||
|
// when present. Attributes outside the whitelist drop out silently.
|
||||||
|
func TestExtractSMARTAttrs_ATA(t *testing.T) {
|
||||||
|
raw := map[string]any{}
|
||||||
|
smartJSON := `{
|
||||||
|
"ata_smart_attributes": {
|
||||||
|
"table": [
|
||||||
|
{"name": "Reallocated_Sector_Ct", "raw": {"value": 7}},
|
||||||
|
{"name": "Current_Pending_Sector", "raw": {"value": 3}},
|
||||||
|
{"name": "Spin_Retry_Count", "raw": {"value": 99}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
if err := json.Unmarshal([]byte(smartJSON), &raw); err != nil {
|
||||||
|
t.Fatalf("unmarshal fixture: %v", err)
|
||||||
|
}
|
||||||
|
out := extractSMARTAttrs(raw)
|
||||||
|
if out["Reallocated_Sector_Ct"] != 7 {
|
||||||
|
t.Errorf("Reallocated_Sector_Ct = %v, want 7", out["Reallocated_Sector_Ct"])
|
||||||
|
}
|
||||||
|
if out["Current_Pending_Sector"] != 3 {
|
||||||
|
t.Errorf("Current_Pending_Sector = %v, want 3", out["Current_Pending_Sector"])
|
||||||
|
}
|
||||||
|
if _, ok := out["Spin_Retry_Count"]; ok {
|
||||||
|
t.Errorf("Spin_Retry_Count should not appear (not in whitelist)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExtractSMARTAttrs_NVMe picks media_errors and friends from the
|
||||||
|
// nvme health log shape, which is a flat map at the top of the JSON.
|
||||||
|
func TestExtractSMARTAttrs_NVMe(t *testing.T) {
|
||||||
|
raw := map[string]any{}
|
||||||
|
smartJSON := `{
|
||||||
|
"nvme_smart_health_information_log": {
|
||||||
|
"media_errors": 2,
|
||||||
|
"num_err_log_entries": 15,
|
||||||
|
"percentage_used": 7,
|
||||||
|
"temperature": 42
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
if err := json.Unmarshal([]byte(smartJSON), &raw); err != nil {
|
||||||
|
t.Fatalf("unmarshal fixture: %v", err)
|
||||||
|
}
|
||||||
|
out := extractSMARTAttrs(raw)
|
||||||
|
if out["media_errors"] != 2 {
|
||||||
|
t.Errorf("media_errors = %v, want 2", out["media_errors"])
|
||||||
|
}
|
||||||
|
if out["num_err_log_entries"] != 15 {
|
||||||
|
t.Errorf("num_err_log_entries = %v, want 15", out["num_err_log_entries"])
|
||||||
|
}
|
||||||
|
if out["percentage_used"] != 7 {
|
||||||
|
t.Errorf("percentage_used = %v, want 7", out["percentage_used"])
|
||||||
|
}
|
||||||
|
if _, ok := out["temperature"]; ok {
|
||||||
|
t.Errorf("temperature should not appear (not in whitelist)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiffSMARTAttrs: end − start per (device, attr). Only attrs in
|
||||||
|
// both snapshots yield a delta; any disappearing attribute just drops
|
||||||
|
// out instead of showing a misleading negative.
|
||||||
|
func TestDiffSMARTAttrs(t *testing.T) {
|
||||||
|
start := smartAttrMap{
|
||||||
|
"/dev/sda": {"Reallocated_Sector_Ct": 5, "Current_Pending_Sector": 0},
|
||||||
|
}
|
||||||
|
end := smartAttrMap{
|
||||||
|
"/dev/sda": {"Reallocated_Sector_Ct": 8, "Current_Pending_Sector": 2, "UDMA_CRC_Error_Count": 1},
|
||||||
|
}
|
||||||
|
out := diffSMARTAttrs(start, end)
|
||||||
|
if out["/dev/sda"]["Reallocated_Sector_Ct"] != 3 {
|
||||||
|
t.Errorf("Reallocated_Sector_Ct delta = %v, want 3", out["/dev/sda"]["Reallocated_Sector_Ct"])
|
||||||
|
}
|
||||||
|
if out["/dev/sda"]["Current_Pending_Sector"] != 2 {
|
||||||
|
t.Errorf("Current_Pending_Sector delta = %v, want 2", out["/dev/sda"]["Current_Pending_Sector"])
|
||||||
|
}
|
||||||
|
if _, ok := out["/dev/sda"]["UDMA_CRC_Error_Count"]; ok {
|
||||||
|
t.Errorf("UDMA_CRC_Error_Count should not appear (missing at start)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiffSMARTAttrs_DeviceNewAtEnd: a device only present in the end
|
||||||
|
// snapshot (drive hot-plugged mid-run, or SMART read succeeded only at
|
||||||
|
// end) is dropped from the diff — no start baseline to subtract from.
|
||||||
|
func TestDiffSMARTAttrs_DeviceNewAtEnd(t *testing.T) {
|
||||||
|
start := smartAttrMap{}
|
||||||
|
end := smartAttrMap{
|
||||||
|
"/dev/sda": {"Reallocated_Sector_Ct": 10},
|
||||||
|
}
|
||||||
|
out := diffSMARTAttrs(start, end)
|
||||||
|
if _, ok := out["/dev/sda"]; ok {
|
||||||
|
t.Errorf("/dev/sda should drop from diff when absent at start")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestResolveFioOpts_Defaults: zero-valued knobs resolve to the quick
|
||||||
|
// profile's fio_sample shape. Any stage that's missing per-profile
|
||||||
|
// knobs (legacy claim response, test harness) still has coherent
|
||||||
|
// bounded defaults — we won't accidentally fall into unbounded writes.
|
||||||
|
func TestResolveFioOpts_Defaults(t *testing.T) {
|
||||||
|
o := resolveFioOpts(StorageKnobs{})
|
||||||
|
if o.Mode != "fio_sample" {
|
||||||
|
t.Errorf("Mode = %q, want fio_sample", o.Mode)
|
||||||
|
}
|
||||||
|
if o.Size != "1GiB" {
|
||||||
|
t.Errorf("Size = %q, want 1GiB", o.Size)
|
||||||
|
}
|
||||||
|
if o.Runtime != 3*time.Minute {
|
||||||
|
t.Errorf("Runtime = %v, want 3m", o.Runtime)
|
||||||
|
}
|
||||||
|
if o.BS != "4k" {
|
||||||
|
t.Errorf("BS = %q, want 4k", o.BS)
|
||||||
|
}
|
||||||
|
if o.RW != "randrw" {
|
||||||
|
t.Errorf("RW = %q, want randrw", o.RW)
|
||||||
|
}
|
||||||
|
if o.Verify != "md5" {
|
||||||
|
t.Errorf("Verify = %q, want md5", o.Verify)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestResolveFioOpts_FullDiskOverride confirms the deep/soak shape
|
||||||
|
// round-trips. FioTime as 2h overrides the 3-minute default.
|
||||||
|
func TestResolveFioOpts_FullDiskOverride(t *testing.T) {
|
||||||
|
k := StorageKnobs{
|
||||||
|
Mode: "full_disk",
|
||||||
|
FioTime: 2 * time.Hour,
|
||||||
|
FioBS: "64k",
|
||||||
|
FioRW: "write",
|
||||||
|
}
|
||||||
|
o := resolveFioOpts(k)
|
||||||
|
if o.Mode != "full_disk" {
|
||||||
|
t.Errorf("Mode = %q, want full_disk", o.Mode)
|
||||||
|
}
|
||||||
|
if o.Runtime != 2*time.Hour {
|
||||||
|
t.Errorf("Runtime = %v, want 2h", o.Runtime)
|
||||||
|
}
|
||||||
|
if o.BS != "64k" {
|
||||||
|
t.Errorf("BS = %q, want 64k", o.BS)
|
||||||
|
}
|
||||||
|
if o.RW != "write" {
|
||||||
|
t.Errorf("RW = %q, want write", o.RW)
|
||||||
|
}
|
||||||
|
// Verify should fall back to md5 default since knob was empty.
|
||||||
|
if o.Verify != "md5" {
|
||||||
|
t.Errorf("Verify = %q, want md5 (default)", o.Verify)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -60,6 +60,8 @@ func main() {
|
|||||||
artifactStore := &store.Artifacts{DB: conn}
|
artifactStore := &store.Artifacts{DB: conn}
|
||||||
specDiffStore := &store.SpecDiffs{DB: conn}
|
specDiffStore := &store.SpecDiffs{DB: conn}
|
||||||
measurementStore := &store.Measurements{DB: conn}
|
measurementStore := &store.Measurements{DB: conn}
|
||||||
|
thresholdStore := &store.Thresholds{DB: conn}
|
||||||
|
firmwareStore := &store.Firmware{DB: conn}
|
||||||
|
|
||||||
hub := events.NewHub()
|
hub := events.NewHub()
|
||||||
|
|
||||||
@@ -105,6 +107,8 @@ func main() {
|
|||||||
SubSteps: subStepStore,
|
SubSteps: subStepStore,
|
||||||
SpecDiffs: specDiffStore,
|
SpecDiffs: specDiffStore,
|
||||||
Artifacts: artifactStore,
|
Artifacts: artifactStore,
|
||||||
|
Thresholds: thresholdStore,
|
||||||
|
Profiles: cfg.Profiles,
|
||||||
EventHub: hub,
|
EventHub: hub,
|
||||||
Logs: logHub,
|
Logs: logHub,
|
||||||
Runner: runner,
|
Runner: runner,
|
||||||
@@ -157,6 +161,9 @@ func main() {
|
|||||||
Artifacts: artifactStore,
|
Artifacts: artifactStore,
|
||||||
SpecDiffs: specDiffStore,
|
SpecDiffs: specDiffStore,
|
||||||
Measurements: measurementStore,
|
Measurements: measurementStore,
|
||||||
|
Thresholds: thresholdStore,
|
||||||
|
Firmware: firmwareStore,
|
||||||
|
Profiles: cfg.Profiles,
|
||||||
Runner: runner,
|
Runner: runner,
|
||||||
EventHub: hub,
|
EventHub: hub,
|
||||||
Logs: logHub,
|
Logs: logHub,
|
||||||
|
|||||||
@@ -85,3 +85,54 @@ agent:
|
|||||||
|
|
||||||
notifiers: []
|
notifiers: []
|
||||||
routes: []
|
routes: []
|
||||||
|
|
||||||
|
# Vetting pipeline shared defaults. Every profile (quick/deep/soak)
|
||||||
|
# walks the same stage list; only per-stage durations differ.
|
||||||
|
# Thresholds here apply to every profile — a 92°C CPU fails a
|
||||||
|
# 2-minute quick run and a 12-hour soak run alike.
|
||||||
|
vetting:
|
||||||
|
stages: [Inventory, SpecValidate, SMART, CPUStress, Storage, Network, GPU, PSU, Reporting]
|
||||||
|
thresholds:
|
||||||
|
- { stage: "*", kind: temp, key: "cpu/*", op: lt, value: 92, unit: C, severity: critical }
|
||||||
|
- { stage: PSU, kind: psu_volt, key: "+12V", op: within_pct, value: 5, nominal: 12.0, severity: critical }
|
||||||
|
- { stage: PSU, kind: psu_volt, key: "+5V", op: within_pct, value: 5, nominal: 5.0, severity: critical }
|
||||||
|
- { stage: PSU, kind: psu_volt, key: "+3.3V", op: within_pct, value: 5, nominal: 3.3, severity: critical }
|
||||||
|
- { stage: Storage, kind: fio_p99_us, key: "*", op: lt, value: 50000, severity: warning }
|
||||||
|
- { stage: Network, kind: iperf, key: throughput_mbps, op: gte, value: 900, severity: critical }
|
||||||
|
- { stage: Network, kind: nic_retrans, key: "*/rate", op: lt, value: 0.001, severity: warning }
|
||||||
|
- { stage: CPUStress, kind: edac_ue, key: "*", op: lte, value: 0, severity: critical }
|
||||||
|
- { stage: CPUStress, kind: mce, key: "*", op: lte, value: 0, severity: critical }
|
||||||
|
|
||||||
|
# Per-profile durations + probe knobs. Only the *durations* scale across
|
||||||
|
# profiles — every profile exercises every probe and gate. Quick is a
|
||||||
|
# ~10-minute same-day sanity check; deep is the 8–12 h overnight soak;
|
||||||
|
# soak is the opt-in 36–40 h extreme run.
|
||||||
|
profiles:
|
||||||
|
quick:
|
||||||
|
stage_timeouts:
|
||||||
|
CPUStress: 5m
|
||||||
|
Storage: 5m
|
||||||
|
Network: 2m
|
||||||
|
defaults:
|
||||||
|
cpustress: { cpu_pass: 2m, mem_pass: 2m, edac_poll: 10s }
|
||||||
|
storage: { mode: fio_sample, fio_size: 1GiB, fio_time: 3m, fio_bs: 4k, fio_rw: randrw, verify: md5 }
|
||||||
|
network: { duration: 60s }
|
||||||
|
deep:
|
||||||
|
stage_timeouts:
|
||||||
|
CPUStress: 2h
|
||||||
|
Storage: 4h
|
||||||
|
Network: 35m
|
||||||
|
defaults:
|
||||||
|
cpustress: { cpu_pass: 60m, mem_pass: 60m, edac_poll: 10s }
|
||||||
|
storage: { mode: full_disk, fio_time: 2h, fio_bs: 4k, fio_rw: randrw, verify: md5 }
|
||||||
|
network: { duration: 30m }
|
||||||
|
soak:
|
||||||
|
inherit: deep
|
||||||
|
stage_timeouts:
|
||||||
|
CPUStress: 14h
|
||||||
|
Storage: 8h
|
||||||
|
Network: 2h30m
|
||||||
|
defaults:
|
||||||
|
cpustress: { cpu_pass: 12h }
|
||||||
|
storage: { mode: full_disk, fio_time: 6h }
|
||||||
|
network: { duration: 2h }
|
||||||
|
|||||||
@@ -75,3 +75,41 @@ agent:
|
|||||||
|
|
||||||
notifiers: []
|
notifiers: []
|
||||||
routes: []
|
routes: []
|
||||||
|
|
||||||
|
# Vetting pipeline shared defaults. Every profile (quick/deep/soak)
|
||||||
|
# walks the same stage list; only per-stage durations differ.
|
||||||
|
# Thresholds apply to every profile — critical breaches fail a run
|
||||||
|
# regardless of which profile the operator picked.
|
||||||
|
vetting:
|
||||||
|
stages: [Inventory, SpecValidate, SMART, CPUStress, Storage, Network, GPU, PSU, Reporting]
|
||||||
|
thresholds:
|
||||||
|
- { stage: "*", kind: temp, key: "cpu/*", op: lt, value: 92, unit: C, severity: critical }
|
||||||
|
- { stage: PSU, kind: psu_volt, key: "+12V", op: within_pct, value: 5, nominal: 12.0, severity: critical }
|
||||||
|
- { stage: PSU, kind: psu_volt, key: "+5V", op: within_pct, value: 5, nominal: 5.0, severity: critical }
|
||||||
|
- { stage: PSU, kind: psu_volt, key: "+3.3V", op: within_pct, value: 5, nominal: 3.3, severity: critical }
|
||||||
|
- { stage: Storage, kind: fio_p99_us, key: "*", op: lt, value: 50000, severity: warning }
|
||||||
|
- { stage: Network, kind: iperf, key: throughput_mbps, op: gte, value: 900, severity: critical }
|
||||||
|
- { stage: Network, kind: nic_retrans, key: "*/rate", op: lt, value: 0.001, severity: warning }
|
||||||
|
- { stage: CPUStress, kind: edac_ue, key: "*", op: lte, value: 0, severity: critical }
|
||||||
|
- { stage: CPUStress, kind: mce, key: "*", op: lte, value: 0, severity: critical }
|
||||||
|
|
||||||
|
profiles:
|
||||||
|
quick:
|
||||||
|
stage_timeouts: { CPUStress: 5m, Storage: 5m, Network: 2m }
|
||||||
|
defaults:
|
||||||
|
cpustress: { cpu_pass: 2m, mem_pass: 2m, edac_poll: 10s }
|
||||||
|
storage: { mode: fio_sample, fio_size: 1GiB, fio_time: 3m, fio_bs: 4k, fio_rw: randrw, verify: md5 }
|
||||||
|
network: { duration: 60s }
|
||||||
|
deep:
|
||||||
|
stage_timeouts: { CPUStress: 2h, Storage: 4h, Network: 35m }
|
||||||
|
defaults:
|
||||||
|
cpustress: { cpu_pass: 60m, mem_pass: 60m, edac_poll: 10s }
|
||||||
|
storage: { mode: full_disk, fio_time: 2h, fio_bs: 4k, fio_rw: randrw, verify: md5 }
|
||||||
|
network: { duration: 30m }
|
||||||
|
soak:
|
||||||
|
inherit: deep
|
||||||
|
stage_timeouts: { CPUStress: 14h, Storage: 8h, Network: 2h30m }
|
||||||
|
defaults:
|
||||||
|
cpustress: { cpu_pass: 12h }
|
||||||
|
storage: { mode: full_disk, fio_time: 6h }
|
||||||
|
network: { duration: 2h }
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import (
|
|||||||
|
|
||||||
"github.com/go-chi/chi/v5"
|
"github.com/go-chi/chi/v5"
|
||||||
|
|
||||||
|
"vetting/internal/config"
|
||||||
"vetting/internal/events"
|
"vetting/internal/events"
|
||||||
"vetting/internal/hold"
|
"vetting/internal/hold"
|
||||||
"vetting/internal/logs"
|
"vetting/internal/logs"
|
||||||
@@ -41,6 +42,9 @@ type Agent struct {
|
|||||||
Artifacts *store.Artifacts
|
Artifacts *store.Artifacts
|
||||||
SpecDiffs *store.SpecDiffs
|
SpecDiffs *store.SpecDiffs
|
||||||
Measurements *store.Measurements
|
Measurements *store.Measurements
|
||||||
|
Thresholds *store.Thresholds // Phase 1: seeded per run; consulted on each /sensor batch
|
||||||
|
Firmware *store.Firmware // Phase 4: firmware snapshots (unused before then)
|
||||||
|
Profiles *config.ProfileRegistry // Phase 2: /claim resolves the run's profile → stage knobs
|
||||||
Runner *orchestrator.Runner
|
Runner *orchestrator.Runner
|
||||||
EventHub *events.Hub
|
EventHub *events.Hub
|
||||||
Logs *logs.Hub
|
Logs *logs.Hub
|
||||||
@@ -216,6 +220,21 @@ func (a *Agent) Claim(w http.ResponseWriter, r *http.Request) {
|
|||||||
if iperfPort == 0 {
|
if iperfPort == 0 {
|
||||||
iperfPort = 5201
|
iperfPort = 5201
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Resolve the run's profile → agent-visible stage knobs. The agent
|
||||||
|
// reads these to size CPUStress / Storage / Network work. An empty
|
||||||
|
// profile (legacy runs seeded before Phase 1) falls back to "quick".
|
||||||
|
profileName := run.Profile
|
||||||
|
if profileName == "" {
|
||||||
|
profileName = config.ProfileQuick
|
||||||
|
}
|
||||||
|
var stageCfg config.StageConfig
|
||||||
|
if a.Profiles != nil {
|
||||||
|
stageCfg = a.Profiles.ResolveStageConfig(profileName)
|
||||||
|
} else {
|
||||||
|
stageCfg = config.StageConfig{Profile: profileName}
|
||||||
|
}
|
||||||
|
|
||||||
writeJSON(w, http.StatusOK, map[string]any{
|
writeJSON(w, http.StatusOK, map[string]any{
|
||||||
"ok": true,
|
"ok": true,
|
||||||
"run_id": runID,
|
"run_id": runID,
|
||||||
@@ -224,6 +243,7 @@ func (a *Agent) Claim(w http.ResponseWriter, r *http.Request) {
|
|||||||
"iperf_port": iperfPort,
|
"iperf_port": iperfPort,
|
||||||
"non_destructive": run.NonDestructive,
|
"non_destructive": run.NonDestructive,
|
||||||
"current_state": string(currentState),
|
"current_state": string(currentState),
|
||||||
|
"stage_config": stageCfg,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -398,10 +418,24 @@ type StageResult struct {
|
|||||||
Passed bool `json:"passed"`
|
Passed bool `json:"passed"`
|
||||||
Summary json.RawMessage `json:"summary,omitempty"`
|
Summary json.RawMessage `json:"summary,omitempty"`
|
||||||
Inventory *spec.Inventory `json:"inventory,omitempty"`
|
Inventory *spec.Inventory `json:"inventory,omitempty"`
|
||||||
|
Firmware []FirmwareLine `json:"firmware,omitempty"`
|
||||||
Message string `json:"message,omitempty"`
|
Message string `json:"message,omitempty"`
|
||||||
SubSteps []SubStepResultLine `json:"sub_steps,omitempty"`
|
SubSteps []SubStepResultLine `json:"sub_steps,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FirmwareLine is a single firmware snapshot POSTed alongside the
|
||||||
|
// Firmware stage's /result body. Mirrors agent/probes.FirmwareSnapshot.
|
||||||
|
// The server converts each line to a store.FirmwareSnapshot and persists
|
||||||
|
// it under the run — SpecValidate reads these back to diff against the
|
||||||
|
// host's expected_firmware.
|
||||||
|
type FirmwareLine struct {
|
||||||
|
Component string `json:"component"`
|
||||||
|
Identifier string `json:"identifier"`
|
||||||
|
Version string `json:"version"`
|
||||||
|
Vendor string `json:"vendor,omitempty"`
|
||||||
|
Raw map[string]string `json:"raw,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// SubStepResultLine is one entry in StageResult.SubSteps. Ordinal is
|
// SubStepResultLine is one entry in StageResult.SubSteps. Ordinal is
|
||||||
// assigned from slice index server-side; the agent doesn't set it.
|
// assigned from slice index server-side; the agent doesn't set it.
|
||||||
type SubStepResultLine struct {
|
type SubStepResultLine struct {
|
||||||
@@ -476,6 +510,20 @@ func (a *Agent) Result(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Aggregate threshold gate: flip Passed=false server-side when any
|
||||||
|
// critical breach landed for this stage. The agent's verdict is
|
||||||
|
// advisory — a stage-executor can miss a runaway sample that the
|
||||||
|
// sidecar caught. We check this *before* writing the stage state
|
||||||
|
// so the DB reflects the server-side decision.
|
||||||
|
thresholdDetail := ""
|
||||||
|
if body.Passed {
|
||||||
|
if breached, detail := a.stageHadCriticalBreach(r.Context(), runID, body.Stage); breached {
|
||||||
|
body.Passed = false
|
||||||
|
thresholdDetail = detail
|
||||||
|
a.appendLog(runID, "error", fmt.Sprintf("%s reported passed but %s — flipping to failed", body.Stage, detail))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
stageState := model.StagePassed
|
stageState := model.StagePassed
|
||||||
if !body.Passed {
|
if !body.Passed {
|
||||||
stageState = model.StageFailed
|
stageState = model.StageFailed
|
||||||
@@ -488,6 +536,9 @@ func (a *Agent) Result(w http.ResponseWriter, r *http.Request) {
|
|||||||
http.Error(w, "complete stage: "+err.Error(), http.StatusInternalServerError)
|
http.Error(w, "complete stage: "+err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if thresholdDetail != "" && body.Message == "" {
|
||||||
|
body.Message = thresholdDetail
|
||||||
|
}
|
||||||
|
|
||||||
// Agent-authored sub-steps: persist in slice order (ordinal = index)
|
// Agent-authored sub-steps: persist in slice order (ordinal = index)
|
||||||
// and fan out a per-row SSE event each so the detail pane shows them
|
// and fan out a per-row SSE event each so the detail pane shows them
|
||||||
@@ -502,6 +553,14 @@ func (a *Agent) Result(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Firmware-specific: persist each snapshot into firmware_snapshots.
|
||||||
|
// SpecValidate reads them back to diff against expected_firmware.
|
||||||
|
if body.Stage == "Firmware" && len(body.Firmware) > 0 {
|
||||||
|
if err := a.persistFirmware(r.Context(), runID, body.Firmware); err != nil {
|
||||||
|
log.Printf("persist firmware run %d: %v", runID, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if !body.Passed {
|
if !body.Passed {
|
||||||
if err := a.Runs.SetFailedStage(r.Context(), runID, body.Stage); err != nil {
|
if err := a.Runs.SetFailedStage(r.Context(), runID, body.Stage); err != nil {
|
||||||
log.Printf("set failed stage: %v", err)
|
log.Printf("set failed stage: %v", err)
|
||||||
@@ -615,6 +674,34 @@ func parseResultTime(s string) *time.Time {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// persistFirmware writes the reported snapshots. A nil/unset a.Firmware
|
||||||
|
// store is a no-op so tests that don't wire it up stay green; a mid-run
|
||||||
|
// persist error is logged but doesn't fail the stage (Firmware is
|
||||||
|
// advisory — SpecValidate is the gate).
|
||||||
|
func (a *Agent) persistFirmware(ctx context.Context, runID int64, lines []FirmwareLine) error {
|
||||||
|
if a.Firmware == nil || len(lines) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rows := make([]store.FirmwareSnapshot, 0, len(lines))
|
||||||
|
for _, l := range lines {
|
||||||
|
raw := "{}"
|
||||||
|
if len(l.Raw) > 0 {
|
||||||
|
if b, err := json.Marshal(l.Raw); err == nil {
|
||||||
|
raw = string(b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rows = append(rows, store.FirmwareSnapshot{
|
||||||
|
RunID: runID,
|
||||||
|
Component: l.Component,
|
||||||
|
Identifier: l.Identifier,
|
||||||
|
Version: l.Version,
|
||||||
|
Vendor: l.Vendor,
|
||||||
|
RawJSON: raw,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return a.Firmware.CreateBatch(ctx, rows)
|
||||||
|
}
|
||||||
|
|
||||||
func (a *Agent) persistInventory(r *http.Request, run *model.Run, inv *spec.Inventory) error {
|
func (a *Agent) persistInventory(r *http.Request, run *model.Run, inv *spec.Inventory) error {
|
||||||
dir := filepath.Join(a.ArtifactsDir, fmt.Sprintf("run-%d", run.ID))
|
dir := filepath.Join(a.ArtifactsDir, fmt.Sprintf("run-%d", run.ID))
|
||||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||||
@@ -667,6 +754,22 @@ func (a *Agent) resolveSpecValidate(r *http.Request, runID int64) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
diffs := spec.Diff(expected, inv)
|
diffs := spec.Diff(expected, inv)
|
||||||
|
if a.Firmware != nil && len(expected.Firmware) > 0 {
|
||||||
|
snaps, err := a.Firmware.ListForRun(r.Context(), runID)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("specvalidate: list firmware: %v", err)
|
||||||
|
} else {
|
||||||
|
observed := make([]spec.FirmwareObserved, 0, len(snaps))
|
||||||
|
for _, s := range snaps {
|
||||||
|
observed = append(observed, spec.FirmwareObserved{
|
||||||
|
Component: s.Component,
|
||||||
|
Identifier: s.Identifier,
|
||||||
|
Version: s.Version,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
diffs = append(diffs, spec.DiffFirmware(expected.Firmware, observed)...)
|
||||||
|
}
|
||||||
|
}
|
||||||
if err := a.SpecDiffs.ReplaceForRun(r.Context(), runID, diffs); err != nil {
|
if err := a.SpecDiffs.ReplaceForRun(r.Context(), runID, diffs); err != nil {
|
||||||
log.Printf("specvalidate: write diffs: %v", err)
|
log.Printf("specvalidate: write diffs: %v", err)
|
||||||
}
|
}
|
||||||
@@ -884,13 +987,17 @@ type SensorSample struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Sensor persists a batch of numeric samples. The thermal sidecar hits
|
// Sensor persists a batch of numeric samples. The thermal sidecar hits
|
||||||
// this on a tick; stage executors (iperf, fio) also drop here.
|
// this on a tick; stage executors (iperf, fio) also drop here. Each
|
||||||
|
// sample is evaluated against the run's seeded thresholds — critical
|
||||||
|
// breaches fail the run immediately (thermal runaway, EDAC UE, voltage
|
||||||
|
// sag); warning breaches are recorded for the report only.
|
||||||
func (a *Agent) Sensor(w http.ResponseWriter, r *http.Request) {
|
func (a *Agent) Sensor(w http.ResponseWriter, r *http.Request) {
|
||||||
runID, ok := runIDFromURL(w, r)
|
runID, ok := runIDFromURL(w, r)
|
||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if _, ok := a.authenticate(w, r, runID); !ok {
|
run, ok := a.authenticate(w, r, runID)
|
||||||
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if a.Measurements == nil {
|
if a.Measurements == nil {
|
||||||
@@ -903,8 +1010,12 @@ func (a *Agent) Sensor(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
rows := make([]model.Measurement, 0, len(body.Samples))
|
rows := make([]model.Measurement, 0, len(body.Samples))
|
||||||
|
sampleStages := make([]string, 0, len(body.Samples))
|
||||||
for _, s := range body.Samples {
|
for _, s := range body.Samples {
|
||||||
ts, _ := time.Parse(time.RFC3339Nano, s.TS)
|
ts, _ := time.Parse(time.RFC3339Nano, s.TS)
|
||||||
|
if ts.IsZero() {
|
||||||
|
ts = time.Now().UTC()
|
||||||
|
}
|
||||||
rows = append(rows, model.Measurement{
|
rows = append(rows, model.Measurement{
|
||||||
RunID: runID,
|
RunID: runID,
|
||||||
TS: ts,
|
TS: ts,
|
||||||
@@ -913,12 +1024,139 @@ func (a *Agent) Sensor(w http.ResponseWriter, r *http.Request) {
|
|||||||
Value: s.Value,
|
Value: s.Value,
|
||||||
Unit: s.Unit,
|
Unit: s.Unit,
|
||||||
})
|
})
|
||||||
|
// Stage the sample belongs to drives threshold selector
|
||||||
|
// matching. We use the run's current state — the agent does
|
||||||
|
// not tag samples with a stage.
|
||||||
|
sampleStages = append(sampleStages, orchestrator.StageNameForState(run.State))
|
||||||
}
|
}
|
||||||
if err := a.Measurements.CreateBatch(r.Context(), rows); err != nil {
|
if err := a.Measurements.CreateBatch(r.Context(), rows); err != nil {
|
||||||
http.Error(w, "write samples: "+err.Error(), http.StatusInternalServerError)
|
http.Error(w, "write samples: "+err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
writeJSON(w, http.StatusOK, map[string]any{"ok": true, "written": len(rows)})
|
critical := a.evaluateSensorBatch(r.Context(), runID, rows, sampleStages)
|
||||||
|
writeJSON(w, http.StatusOK, map[string]any{
|
||||||
|
"ok": true,
|
||||||
|
"written": len(rows),
|
||||||
|
"breach": critical != "",
|
||||||
|
"breach_kind": critical,
|
||||||
|
})
|
||||||
|
if critical != "" {
|
||||||
|
a.failRunOnCriticalBreach(r, run, critical)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// evaluateSensorBatch runs each sample through the run's thresholds,
|
||||||
|
// persists evaluations, and returns a short human-readable label for
|
||||||
|
// the first critical breach it sees (empty when all samples pass or
|
||||||
|
// only hit warning-severity rules).
|
||||||
|
func (a *Agent) evaluateSensorBatch(ctx context.Context, runID int64, rows []model.Measurement, sampleStages []string) string {
|
||||||
|
if a.Thresholds == nil || len(rows) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
rules, err := a.Thresholds.ListForRun(ctx, runID)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("sensor: list thresholds run %d: %v", runID, err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if len(rules) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
evalRules := make([]orchestrator.Threshold, 0, len(rules))
|
||||||
|
for _, r := range rules {
|
||||||
|
evalRules = append(evalRules, orchestrator.Threshold{
|
||||||
|
ID: r.ID,
|
||||||
|
Stage: r.Stage,
|
||||||
|
Kind: r.Kind,
|
||||||
|
Key: r.Key,
|
||||||
|
Op: orchestrator.ThresholdOp(r.Op),
|
||||||
|
Value: r.Threshold,
|
||||||
|
Nominal: r.Nominal,
|
||||||
|
Severity: orchestrator.ThresholdSeverity(r.Severity),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
evals := make([]store.ThresholdEvaluation, 0, len(rows))
|
||||||
|
critical := ""
|
||||||
|
for i, m := range rows {
|
||||||
|
sample := orchestrator.Sample{
|
||||||
|
Stage: sampleStages[i],
|
||||||
|
Kind: m.Kind,
|
||||||
|
Key: m.Key,
|
||||||
|
Value: m.Value,
|
||||||
|
}
|
||||||
|
for _, res := range orchestrator.Evaluate(sample, evalRules) {
|
||||||
|
evals = append(evals, store.ThresholdEvaluation{
|
||||||
|
RunID: runID,
|
||||||
|
ThresholdID: res.Threshold.ID,
|
||||||
|
Stage: sample.Stage,
|
||||||
|
Kind: sample.Kind,
|
||||||
|
Key: sample.Key,
|
||||||
|
TS: m.TS,
|
||||||
|
Observed: res.Observed,
|
||||||
|
Passed: res.Passed,
|
||||||
|
})
|
||||||
|
if critical == "" && res.CriticalBreach() {
|
||||||
|
critical = fmt.Sprintf("%s %s=%g breached %s %g",
|
||||||
|
res.Threshold.Kind, sample.Key, res.Observed, res.Threshold.Op, res.Threshold.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := a.Thresholds.RecordBatch(ctx, evals); err != nil {
|
||||||
|
log.Printf("sensor: record evals run %d: %v", runID, err)
|
||||||
|
}
|
||||||
|
return critical
|
||||||
|
}
|
||||||
|
|
||||||
|
// stageHadCriticalBreach returns true if any critical-severity
|
||||||
|
// threshold evaluation for this run matched samples attributed to the
|
||||||
|
// given stage (stage selector "*" or exact). Called at /result close
|
||||||
|
// so even an agent that reports Passed=true gets overridden when the
|
||||||
|
// aggregate view says the stage tripped a gate.
|
||||||
|
func (a *Agent) stageHadCriticalBreach(ctx context.Context, runID int64, stage string) (bool, string) {
|
||||||
|
if a.Thresholds == nil {
|
||||||
|
return false, ""
|
||||||
|
}
|
||||||
|
breaches, err := a.Thresholds.CriticalBreaches(ctx, runID)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("result: list breaches run %d: %v", runID, err)
|
||||||
|
return false, ""
|
||||||
|
}
|
||||||
|
for _, b := range breaches {
|
||||||
|
if b.Stage == stage || b.Stage == "" || b.Stage == "*" {
|
||||||
|
return true, fmt.Sprintf("critical threshold breach: %s %s=%g", b.Kind, b.Key, b.Observed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// failRunOnCriticalBreach flips the run to FailedHolding in response
|
||||||
|
// to a live threshold breach (thermal runaway, EDAC UE, rail sag).
|
||||||
|
// The agent's pending /result for the current stage may still arrive —
|
||||||
|
// the silent-skip guard handles that by refusing to double-transition.
|
||||||
|
func (a *Agent) failRunOnCriticalBreach(r *http.Request, run *model.Run, detail string) {
|
||||||
|
stage := orchestrator.StageNameForState(run.State)
|
||||||
|
if stage == "" {
|
||||||
|
stage = "threshold"
|
||||||
|
}
|
||||||
|
if err := a.Runs.SetFailedStage(r.Context(), run.ID, stage+" (threshold)"); err != nil {
|
||||||
|
log.Printf("sensor: set failed stage run %d: %v", run.ID, err)
|
||||||
|
}
|
||||||
|
if _, err := a.Runner.Transition(r.Context(), run.ID, orchestrator.TriggerStageFailed); err != nil {
|
||||||
|
// If we're already in FailedHolding the transition errors —
|
||||||
|
// that's fine, the first breach wins.
|
||||||
|
log.Printf("sensor: fail-transition run %d: %v", run.ID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
hostName := a.hostNameFor(r.Context(), run.HostID)
|
||||||
|
a.dispatchEvent(notify.Event{
|
||||||
|
Kind: notify.KindStageFailed,
|
||||||
|
Severity: notify.SeverityCritical,
|
||||||
|
RunID: run.ID,
|
||||||
|
HostName: hostName,
|
||||||
|
Title: fmt.Sprintf("[vetting] %s FAILED: %s (threshold)", hostName, stage),
|
||||||
|
Body: fmt.Sprintf("Run %d on %s tripped a critical threshold during %s: %s", run.ID, hostName, stage, detail),
|
||||||
|
URL: a.runLinkURL(run.ID),
|
||||||
|
})
|
||||||
|
a.appendLog(run.ID, "error", fmt.Sprintf("threshold breach during %s: %s — run parked in FailedHolding", stage, detail))
|
||||||
}
|
}
|
||||||
|
|
||||||
// resolveReporting runs when the pipeline advances into StateReporting.
|
// resolveReporting runs when the pipeline advances into StateReporting.
|
||||||
@@ -956,12 +1194,20 @@ func (a *Agent) resolveReporting(r *http.Request, runID int64) {
|
|||||||
log.Printf("reporting: list measurements: %v", err)
|
log.Printf("reporting: list measurements: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
var firmware []store.FirmwareSnapshot
|
||||||
|
if a.Firmware != nil {
|
||||||
|
firmware, err = a.Firmware.ListForRun(ctx, runID)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("reporting: list firmware: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
bundle := map[string]any{
|
bundle := map[string]any{
|
||||||
"run": run,
|
"run": run,
|
||||||
"host": host,
|
"host": host,
|
||||||
"stages": stages,
|
"stages": stages,
|
||||||
"spec_diffs": diffs,
|
"spec_diffs": diffs,
|
||||||
"measurements": measurements,
|
"measurements": measurements,
|
||||||
|
"firmware": firmware,
|
||||||
"generated_at": time.Now().UTC().Format(time.RFC3339),
|
"generated_at": time.Now().UTC().Format(time.RFC3339),
|
||||||
}
|
}
|
||||||
buf, err := json.MarshalIndent(bundle, "", " ")
|
buf, err := json.MarshalIndent(bundle, "", " ")
|
||||||
@@ -993,6 +1239,15 @@ func (a *Agent) resolveReporting(r *http.Request, runID int64) {
|
|||||||
// Also render the operator-facing HTML summary alongside the JSON.
|
// Also render the operator-facing HTML summary alongside the JSON.
|
||||||
// Failures here are non-fatal — the JSON is the source of truth.
|
// Failures here are non-fatal — the JSON is the source of truth.
|
||||||
if host != nil {
|
if host != nil {
|
||||||
|
fwRows := make([]report.FirmwareSnapshot, 0, len(firmware))
|
||||||
|
for _, f := range firmware {
|
||||||
|
fwRows = append(fwRows, report.FirmwareSnapshot{
|
||||||
|
Component: f.Component,
|
||||||
|
Identifier: f.Identifier,
|
||||||
|
Version: f.Version,
|
||||||
|
Vendor: f.Vendor,
|
||||||
|
})
|
||||||
|
}
|
||||||
htmlData := report.Data{
|
htmlData := report.Data{
|
||||||
GeneratedAt: time.Now().UTC(),
|
GeneratedAt: time.Now().UTC(),
|
||||||
Run: *run,
|
Run: *run,
|
||||||
@@ -1000,6 +1255,7 @@ func (a *Agent) resolveReporting(r *http.Request, runID int64) {
|
|||||||
Stages: stages,
|
Stages: stages,
|
||||||
SpecDiffs: diffs,
|
SpecDiffs: diffs,
|
||||||
Aggregates: report.AggregateMeasurements(measurements),
|
Aggregates: report.AggregateMeasurements(measurements),
|
||||||
|
Firmware: fwRows,
|
||||||
}
|
}
|
||||||
if htmlBuf, err := report.RenderHTML(htmlData); err != nil {
|
if htmlBuf, err := report.RenderHTML(htmlData); err != nil {
|
||||||
log.Printf("reporting: render html: %v", err)
|
log.Printf("reporting: render html: %v", err)
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ func TestRunPage_DefaultStep_Running(t *testing.T) {
|
|||||||
})
|
})
|
||||||
runID, _ := runs.Create(ctx, id, "rr", false)
|
runID, _ := runs.Create(ctx, id, "rr", false)
|
||||||
_ = ui.Stages.Seed(ctx, runID)
|
_ = ui.Stages.Seed(ctx, runID)
|
||||||
for _, name := range []string{"Inventory", "SpecValidate"} {
|
for _, name := range []string{"Inventory", "Firmware", "SpecValidate"} {
|
||||||
_ = ui.Stages.StartByName(ctx, runID, name)
|
_ = ui.Stages.StartByName(ctx, runID, name)
|
||||||
_ = ui.Stages.CompleteByName(ctx, runID, name, model.StagePassed, "")
|
_ = ui.Stages.CompleteByName(ctx, runID, name, model.StagePassed, "")
|
||||||
}
|
}
|
||||||
@@ -135,7 +135,7 @@ func TestRunPage_DefaultStep_Failed(t *testing.T) {
|
|||||||
})
|
})
|
||||||
runID, _ := runs.Create(ctx, id, "rf", false)
|
runID, _ := runs.Create(ctx, id, "rf", false)
|
||||||
_ = ui.Stages.Seed(ctx, runID)
|
_ = ui.Stages.Seed(ctx, runID)
|
||||||
for _, name := range []string{"Inventory", "SpecValidate", "SMART"} {
|
for _, name := range []string{"Inventory", "Firmware", "SpecValidate", "SMART"} {
|
||||||
_ = ui.Stages.StartByName(ctx, runID, name)
|
_ = ui.Stages.StartByName(ctx, runID, name)
|
||||||
_ = ui.Stages.CompleteByName(ctx, runID, name, model.StagePassed, "")
|
_ = ui.Stages.CompleteByName(ctx, runID, name, model.StagePassed, "")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,169 @@
|
|||||||
|
package api_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"vetting/internal/api"
|
||||||
|
"vetting/internal/db"
|
||||||
|
"vetting/internal/events"
|
||||||
|
"vetting/internal/model"
|
||||||
|
"vetting/internal/orchestrator"
|
||||||
|
"vetting/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// setupAgentWithThresholds builds an Agent wired up to the thresholds
|
||||||
|
// store + a Runner so the /sensor handler can drive the state machine.
|
||||||
|
// Seeds one critical thermal threshold and parks the run in CPUStress
|
||||||
|
// so the handler will stamp a stage-relevant failed_stage.
|
||||||
|
func setupAgentWithThresholds(t *testing.T) (*api.Agent, int64, string) {
|
||||||
|
t.Helper()
|
||||||
|
path := filepath.Join(t.TempDir(), "vetting.db")
|
||||||
|
conn, err := db.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open db: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { _ = conn.Close() })
|
||||||
|
|
||||||
|
hosts := &store.Hosts{DB: conn}
|
||||||
|
runs := &store.Runs{DB: conn}
|
||||||
|
stages := &store.Stages{DB: conn}
|
||||||
|
meas := &store.Measurements{DB: conn}
|
||||||
|
thresholds := &store.Thresholds{DB: conn}
|
||||||
|
hub := events.NewHub()
|
||||||
|
runner := &orchestrator.Runner{Runs: runs, Hosts: hosts, Stages: stages, EventHub: hub}
|
||||||
|
|
||||||
|
hostID, err := hosts.Create(context.Background(), model.Host{
|
||||||
|
Name: "thresh-host",
|
||||||
|
MAC: "aa:bb:cc:dd:ee:aa",
|
||||||
|
WoLBroadcastIP: "10.0.0.255",
|
||||||
|
WoLPort: 9,
|
||||||
|
ExpectedSpecYAML: "memory:\n total_gib: 16\n",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create host: %v", err)
|
||||||
|
}
|
||||||
|
plain, hash, err := orchestrator.IssueRunToken()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("issue token: %v", err)
|
||||||
|
}
|
||||||
|
runID, err := runs.Create(context.Background(), hostID, hash, false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create run: %v", err)
|
||||||
|
}
|
||||||
|
if err := stages.Seed(context.Background(), runID); err != nil {
|
||||||
|
t.Fatalf("seed stages: %v", err)
|
||||||
|
}
|
||||||
|
// Park the run where a real thermal sidecar would be posting samples.
|
||||||
|
if err := runs.SetState(context.Background(), runID, model.StateCPUStress); err != nil {
|
||||||
|
t.Fatalf("set state: %v", err)
|
||||||
|
}
|
||||||
|
// Seed one critical thermal threshold.
|
||||||
|
if _, err := thresholds.SeedForRun(context.Background(), runID, []store.ThresholdSpec{
|
||||||
|
{Stage: "*", Kind: "temp", Key: "cpu/*", Op: "lt", Value: 92, Unit: "C", Severity: "critical", Source: "profile"},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed thresholds: %v", err)
|
||||||
|
}
|
||||||
|
return &api.Agent{
|
||||||
|
Hosts: hosts,
|
||||||
|
Runs: runs,
|
||||||
|
Stages: stages,
|
||||||
|
Measurements: meas,
|
||||||
|
Thresholds: thresholds,
|
||||||
|
Runner: runner,
|
||||||
|
}, runID, plain
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSensor_ThermalRunawayFailsRun: a sample that breaches a critical
|
||||||
|
// threshold lands in threshold_evaluations (passed=0) and flips the
|
||||||
|
// run into FailedHolding with failed_stage naming the current stage.
|
||||||
|
// This is the Phase-1 behavior gate — without the evaluator, the sample
|
||||||
|
// would just sit in measurements and the run would happily march on.
|
||||||
|
func TestSensor_ThermalRunawayFailsRun(t *testing.T) {
|
||||||
|
a, runID, token := setupAgentWithThresholds(t)
|
||||||
|
batch := api.SensorBatch{Samples: []api.SensorSample{
|
||||||
|
{Kind: "temp", Key: "cpu/0", Value: 95.3, Unit: "C"},
|
||||||
|
}}
|
||||||
|
buf, _ := json.Marshal(batch)
|
||||||
|
req := routedRequest(runID, http.MethodPost,
|
||||||
|
"/api/v1/runs/"+strconv.FormatInt(runID, 10)+"/sensor", buf)
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
a.Sensor(rr, req)
|
||||||
|
if rr.Code != http.StatusOK {
|
||||||
|
t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String())
|
||||||
|
}
|
||||||
|
var resp struct {
|
||||||
|
OK bool `json:"ok"`
|
||||||
|
Breach bool `json:"breach"`
|
||||||
|
Kind string `json:"breach_kind"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
|
||||||
|
t.Fatalf("decode: %v", err)
|
||||||
|
}
|
||||||
|
if !resp.Breach {
|
||||||
|
t.Fatalf("expected breach=true, got %+v", resp)
|
||||||
|
}
|
||||||
|
run, err := a.Runs.Get(context.Background(), runID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get run: %v", err)
|
||||||
|
}
|
||||||
|
if run.State != model.StateFailedHolding {
|
||||||
|
t.Fatalf("state = %s, want FailedHolding", run.State)
|
||||||
|
}
|
||||||
|
if run.FailedStage == "" {
|
||||||
|
t.Fatalf("failed_stage empty; want stage-named breach")
|
||||||
|
}
|
||||||
|
evals, err := a.Thresholds.ListEvaluations(context.Background(), runID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("list evaluations: %v", err)
|
||||||
|
}
|
||||||
|
if len(evals) != 1 {
|
||||||
|
t.Fatalf("want 1 evaluation recorded, got %d", len(evals))
|
||||||
|
}
|
||||||
|
if evals[0].Passed {
|
||||||
|
t.Fatalf("evaluation recorded as passed for 95.3C sample against <92C rule")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSensor_WithinThresholdPasses: a sample comfortably inside the
|
||||||
|
// threshold writes an evaluation row with passed=1 and leaves the run
|
||||||
|
// state untouched.
|
||||||
|
func TestSensor_WithinThresholdPasses(t *testing.T) {
|
||||||
|
a, runID, token := setupAgentWithThresholds(t)
|
||||||
|
batch := api.SensorBatch{Samples: []api.SensorSample{
|
||||||
|
{Kind: "temp", Key: "cpu/0", Value: 55.0, Unit: "C"},
|
||||||
|
}}
|
||||||
|
buf, _ := json.Marshal(batch)
|
||||||
|
req := routedRequest(runID, http.MethodPost,
|
||||||
|
"/api/v1/runs/"+strconv.FormatInt(runID, 10)+"/sensor", buf)
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
a.Sensor(rr, req)
|
||||||
|
if rr.Code != http.StatusOK {
|
||||||
|
t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String())
|
||||||
|
}
|
||||||
|
run, err := a.Runs.Get(context.Background(), runID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get run: %v", err)
|
||||||
|
}
|
||||||
|
if run.State != model.StateCPUStress {
|
||||||
|
t.Fatalf("state = %s, want CPUStress unchanged", run.State)
|
||||||
|
}
|
||||||
|
evals, err := a.Thresholds.ListEvaluations(context.Background(), runID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("list evaluations: %v", err)
|
||||||
|
}
|
||||||
|
if len(evals) != 1 || !evals[0].Passed {
|
||||||
|
t.Fatalf("want 1 passed evaluation, got %+v", evals)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -75,6 +75,12 @@ func newCaptureRegistry(c *captureNotifier) *notify.Registry {
|
|||||||
// (agent, runID, plainTokenForBearer). Caller is responsible for
|
// (agent, runID, plainTokenForBearer). Caller is responsible for
|
||||||
// transitioning the run out of Queued.
|
// transitioning the run out of Queued.
|
||||||
func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
||||||
|
return fullAgentWithSpec(t, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
// fullAgentWithSpec is the same as fullAgent but seeds the host with
|
||||||
|
// an ExpectedSpecYAML so SpecValidate can pick up diffs in the test.
|
||||||
|
func fullAgentWithSpec(t *testing.T, expectedSpecYAML string) (*api.Agent, int64, string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
tmp := t.TempDir()
|
tmp := t.TempDir()
|
||||||
conn, err := db.Open(filepath.Join(tmp, "vetting.db"))
|
conn, err := db.Open(filepath.Join(tmp, "vetting.db"))
|
||||||
@@ -89,6 +95,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
|||||||
artifactStore := &store.Artifacts{DB: conn}
|
artifactStore := &store.Artifacts{DB: conn}
|
||||||
specDiffStore := &store.SpecDiffs{DB: conn}
|
specDiffStore := &store.SpecDiffs{DB: conn}
|
||||||
measurementStore := &store.Measurements{DB: conn}
|
measurementStore := &store.Measurements{DB: conn}
|
||||||
|
firmwareStore := &store.Firmware{DB: conn}
|
||||||
|
|
||||||
hub := events.NewHub()
|
hub := events.NewHub()
|
||||||
logHub, err := logs.NewHub(filepath.Join(tmp, "logs"), hub)
|
logHub, err := logs.NewHub(filepath.Join(tmp, "logs"), hub)
|
||||||
@@ -109,7 +116,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
|||||||
MAC: "aa:bb:cc:dd:ee:10",
|
MAC: "aa:bb:cc:dd:ee:10",
|
||||||
WoLBroadcastIP: "10.0.0.255",
|
WoLBroadcastIP: "10.0.0.255",
|
||||||
WoLPort: 9,
|
WoLPort: 9,
|
||||||
ExpectedSpecYAML: "", // empty spec → no diffs
|
ExpectedSpecYAML: expectedSpecYAML,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("create host: %v", err)
|
t.Fatalf("create host: %v", err)
|
||||||
@@ -132,6 +139,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
|
|||||||
Artifacts: artifactStore,
|
Artifacts: artifactStore,
|
||||||
SpecDiffs: specDiffStore,
|
SpecDiffs: specDiffStore,
|
||||||
Measurements: measurementStore,
|
Measurements: measurementStore,
|
||||||
|
Firmware: firmwareStore,
|
||||||
Runner: runner,
|
Runner: runner,
|
||||||
EventHub: hub,
|
EventHub: hub,
|
||||||
Logs: logHub,
|
Logs: logHub,
|
||||||
@@ -195,20 +203,24 @@ func TestFullPipelineToCompleted(t *testing.T) {
|
|||||||
Memory: spec.MemorySpec{TotalGiB: 16},
|
Memory: spec.MemorySpec{TotalGiB: 16},
|
||||||
}
|
}
|
||||||
next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv})
|
next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv})
|
||||||
// After Inventory → SpecValidate resolves inline → SMART
|
// After Inventory → Firmware
|
||||||
if next != "SMART" {
|
if next != "Firmware" {
|
||||||
t.Fatalf("after Inventory, next_state = %q, want SMART", next)
|
t.Fatalf("after Inventory, next_state = %q, want Firmware", next)
|
||||||
}
|
}
|
||||||
|
|
||||||
// The remaining stages advance one-for-one in order.
|
// The remaining stages advance one-for-one in order. After Firmware
|
||||||
|
// the inline SpecValidate resolver advances through SpecValidate to
|
||||||
|
// SMART without a dedicated /result POST for SpecValidate.
|
||||||
walkPlan := []struct {
|
walkPlan := []struct {
|
||||||
stage string
|
stage string
|
||||||
expected string
|
expected string
|
||||||
}{
|
}{
|
||||||
|
{"Firmware", "SMART"},
|
||||||
{"SMART", "CPUStress"},
|
{"SMART", "CPUStress"},
|
||||||
{"CPUStress", "Storage"},
|
{"CPUStress", "Storage"},
|
||||||
{"Storage", "Network"},
|
{"Storage", "Network"},
|
||||||
{"Network", "GPU"},
|
{"Network", "Burn"},
|
||||||
|
{"Burn", "GPU"},
|
||||||
{"GPU", "PSU"},
|
{"GPU", "PSU"},
|
||||||
{"PSU", "Completed"}, // PSU → Reporting resolves inline → Completed
|
{"PSU", "Completed"}, // PSU → Reporting resolves inline → Completed
|
||||||
}
|
}
|
||||||
@@ -287,8 +299,11 @@ func TestFaultInjectionSMART(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inv := spec.Inventory{Memory: spec.MemorySpec{TotalGiB: 16}}
|
inv := spec.Inventory{Memory: spec.MemorySpec{TotalGiB: 16}}
|
||||||
if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "SMART" {
|
if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "Firmware" {
|
||||||
t.Fatalf("after Inventory, next = %q want SMART", next)
|
t.Fatalf("after Inventory, next = %q want Firmware", next)
|
||||||
|
}
|
||||||
|
if next := walkStage(t, a, runID, token, "Firmware", true, nil); next != "SMART" {
|
||||||
|
t.Fatalf("after Firmware, next = %q want SMART (inline SpecValidate)", next)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fake SMART failure → expect FailedHolding.
|
// Fake SMART failure → expect FailedHolding.
|
||||||
@@ -316,3 +331,76 @@ func TestFaultInjectionSMART(t *testing.T) {
|
|||||||
t.Errorf("StageFailed severity = %q, want critical", ev.Severity)
|
t.Errorf("StageFailed severity = %q, want critical", ev.Severity)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestFirmwarePersistAndSpecMismatch exercises the Phase 4 firmware
|
||||||
|
// integration: the agent POSTs Firmware snapshots; server persists; the
|
||||||
|
// following SpecValidate diff picks up a firmware mismatch and parks
|
||||||
|
// the run in FailedHolding with FailedStage=SpecValidate.
|
||||||
|
func TestFirmwarePersistAndSpecMismatch(t *testing.T) {
|
||||||
|
// Host demands BIOS 3.3; agent will POST 3.2 → one critical firmware diff.
|
||||||
|
yaml := "firmware:\n - component: bios\n version: \"3.3\"\n"
|
||||||
|
a, runID, token := fullAgentWithSpec(t, yaml)
|
||||||
|
a.Notify = newCaptureRegistry(&captureNotifier{name: "capture"})
|
||||||
|
|
||||||
|
if err := a.Runs.SetState(context.Background(), runID, model.StateInventoryCheck); err != nil {
|
||||||
|
t.Fatalf("set state: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
inv := spec.Inventory{Memory: spec.MemorySpec{TotalGiB: 16}}
|
||||||
|
if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "Firmware" {
|
||||||
|
t.Fatalf("after Inventory, next = %q want Firmware", next)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Firmware stage: agent reports actual BIOS 3.2 → one row persisted.
|
||||||
|
fw := []map[string]any{
|
||||||
|
{"component": "bios", "identifier": "system", "version": "3.2", "vendor": "AMI"},
|
||||||
|
}
|
||||||
|
next := walkStage(t, a, runID, token, "Firmware", true, map[string]any{"firmware": fw})
|
||||||
|
// Inline SpecValidate should detect the firmware mismatch and send
|
||||||
|
// the run to FailedHolding without the agent posting SpecValidate.
|
||||||
|
if next != "FailedHolding" {
|
||||||
|
t.Fatalf("after Firmware mismatch, next = %q want FailedHolding", next)
|
||||||
|
}
|
||||||
|
|
||||||
|
run, err := a.Runs.Get(context.Background(), runID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get run: %v", err)
|
||||||
|
}
|
||||||
|
if run.State != model.StateFailedHolding {
|
||||||
|
t.Fatalf("run.State = %q, want FailedHolding", run.State)
|
||||||
|
}
|
||||||
|
if run.FailedStage != "SpecValidate" {
|
||||||
|
t.Fatalf("run.FailedStage = %q, want SpecValidate", run.FailedStage)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Persistence: row landed in firmware_snapshots.
|
||||||
|
snaps, err := a.Firmware.ListForRun(context.Background(), runID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListForRun firmware: %v", err)
|
||||||
|
}
|
||||||
|
if len(snaps) != 1 {
|
||||||
|
t.Fatalf("firmware rows = %d, want 1: %+v", len(snaps), snaps)
|
||||||
|
}
|
||||||
|
if snaps[0].Component != "bios" || snaps[0].Version != "3.2" {
|
||||||
|
t.Errorf("persisted snapshot = %+v", snaps[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Diff row: SpecDiffs has a firmware-specific entry (rather than
|
||||||
|
// only CPU/memory/disk rows) and is critical.
|
||||||
|
diffs, err := a.SpecDiffs.ListForRun(context.Background(), runID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListForRun specdiffs: %v", err)
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, d := range diffs {
|
||||||
|
if strings.HasPrefix(d.Field, "firmware[") {
|
||||||
|
found = true
|
||||||
|
if d.Severity != "critical" {
|
||||||
|
t.Errorf("firmware diff severity = %q, want critical", d.Severity)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatalf("no firmware[...] entry in spec diffs: %+v", diffs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import (
|
|||||||
"github.com/go-chi/chi/v5"
|
"github.com/go-chi/chi/v5"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
|
|
||||||
|
"vetting/internal/config"
|
||||||
"vetting/internal/events"
|
"vetting/internal/events"
|
||||||
"vetting/internal/logs"
|
"vetting/internal/logs"
|
||||||
"vetting/internal/model"
|
"vetting/internal/model"
|
||||||
@@ -32,6 +33,8 @@ type UI struct {
|
|||||||
SubSteps *store.SubSteps
|
SubSteps *store.SubSteps
|
||||||
SpecDiffs *store.SpecDiffs
|
SpecDiffs *store.SpecDiffs
|
||||||
Artifacts *store.Artifacts
|
Artifacts *store.Artifacts
|
||||||
|
Thresholds *store.Thresholds // Phase 1: seeded at StartRun from Profiles
|
||||||
|
Profiles *config.ProfileRegistry
|
||||||
EventHub *events.Hub
|
EventHub *events.Hub
|
||||||
Logs *logs.Hub
|
Logs *logs.Hub
|
||||||
Runner *orchestrator.Runner
|
Runner *orchestrator.Runner
|
||||||
@@ -316,23 +319,71 @@ func (u *UI) StartRun(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
nonDestructive := r.PostFormValue("non_destructive") == "1"
|
nonDestructive := r.PostFormValue("non_destructive") == "1"
|
||||||
|
profile := strings.TrimSpace(r.PostFormValue("profile"))
|
||||||
|
if profile == "" {
|
||||||
|
profile = config.ProfileQuick
|
||||||
|
}
|
||||||
|
if !config.IsValidProfile(profile) {
|
||||||
|
http.Error(w, "unknown profile: "+profile, http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
_, hash, err := orchestrator.IssueRunToken()
|
_, hash, err := orchestrator.IssueRunToken()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, "token: "+err.Error(), http.StatusInternalServerError)
|
http.Error(w, "token: "+err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
runID, err := u.Runs.Create(r.Context(), hostID, hash, nonDestructive)
|
runID, err := u.Runs.CreateWithProfile(r.Context(), hostID, hash, nonDestructive, profile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError)
|
http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
log.Printf("ui: created run %d for host %d (state=Queued)", runID, hostID)
|
if err := u.seedThresholds(r.Context(), runID, host, profile); err != nil {
|
||||||
|
// A threshold-seed failure shouldn't orphan a run row — log
|
||||||
|
// and continue. Samples will just accumulate without a gate
|
||||||
|
// until the operator retries, same as before Phase 1.
|
||||||
|
log.Printf("ui: seed thresholds run %d: %v", runID, err)
|
||||||
|
}
|
||||||
|
log.Printf("ui: created run %d for host %d profile=%s (state=Queued)", runID, hostID, profile)
|
||||||
// Send the operator straight to the new run — the button they clicked
|
// Send the operator straight to the new run — the button they clicked
|
||||||
// was "Start vetting", the thing they want next is to watch it.
|
// was "Start vetting", the thing they want next is to watch it.
|
||||||
http.Redirect(w, r, fmt.Sprintf("/runs/%d", runID), http.StatusSeeOther)
|
http.Redirect(w, r, fmt.Sprintf("/runs/%d", runID), http.StatusSeeOther)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// seedThresholds materializes the per-run threshold table from the
|
||||||
|
// ProfileRegistry. The shared vetting.thresholds block applies to
|
||||||
|
// every profile; future per-profile overrides will layer on top here,
|
||||||
|
// and per-host overrides (Phase 1 extra) land via ExpectedSpecYAML in
|
||||||
|
// a later iteration. Safe to skip silently when Thresholds or the
|
||||||
|
// registry isn't wired — tests do not always build one.
|
||||||
|
func (u *UI) seedThresholds(ctx context.Context, runID int64, host *model.Host, profile string) error {
|
||||||
|
if u.Thresholds == nil || u.Profiles == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
_ = host // reserved for per-host override layer
|
||||||
|
_ = profile // reserved for per-profile override layer
|
||||||
|
defaults := u.Profiles.Vetting.Thresholds
|
||||||
|
if len(defaults) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
specs := make([]store.ThresholdSpec, 0, len(defaults))
|
||||||
|
for _, d := range defaults {
|
||||||
|
specs = append(specs, store.ThresholdSpec{
|
||||||
|
Stage: d.Stage,
|
||||||
|
Kind: d.Kind,
|
||||||
|
Key: d.Key,
|
||||||
|
Op: d.Op,
|
||||||
|
Value: d.Value,
|
||||||
|
Nominal: d.Nominal,
|
||||||
|
Unit: d.Unit,
|
||||||
|
Severity: d.Severity,
|
||||||
|
Source: "profile",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_, err := u.Thresholds.SeedForRun(ctx, runID, specs)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) {
|
func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) {
|
||||||
_ = templates.Registration(templates.RegistrationForm{
|
_ = templates.Registration(templates.RegistrationForm{
|
||||||
QuickRegisterURL: u.baseURL(r),
|
QuickRegisterURL: u.baseURL(r),
|
||||||
|
|||||||
@@ -20,6 +20,13 @@ type Config struct {
|
|||||||
Agent Agent `yaml:"agent"`
|
Agent Agent `yaml:"agent"`
|
||||||
Notifiers []Notifier `yaml:"notifiers"`
|
Notifiers []Notifier `yaml:"notifiers"`
|
||||||
Routes []Route `yaml:"routes"`
|
Routes []Route `yaml:"routes"`
|
||||||
|
|
||||||
|
// Profiles holds the Phase-1 quick/deep/soak registry (stage order,
|
||||||
|
// threshold defaults, per-profile stage timeouts + probe knobs).
|
||||||
|
// Populated from the `vetting:` and `profiles:` top-level blocks
|
||||||
|
// during Load. Nil is never returned — Load installs a default
|
||||||
|
// registry when those blocks are absent.
|
||||||
|
Profiles *ProfileRegistry `yaml:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Server struct {
|
type Server struct {
|
||||||
@@ -111,6 +118,20 @@ func Load(path string) (*Config, error) {
|
|||||||
if err := yaml.Unmarshal(b, &c); err != nil {
|
if err := yaml.Unmarshal(b, &c); err != nil {
|
||||||
return nil, fmt.Errorf("parse config: %w", err)
|
return nil, fmt.Errorf("parse config: %w", err)
|
||||||
}
|
}
|
||||||
|
// The `vetting:` + `profiles:` blocks live alongside the existing
|
||||||
|
// fields but we decode them into the raw shape because YAML
|
||||||
|
// durations arrive as strings. Reusing the same byte buffer is
|
||||||
|
// safe: yaml.Unmarshal is happy to ignore keys the target doesn't
|
||||||
|
// know about.
|
||||||
|
var rawProfiles rawProfilesBlock
|
||||||
|
if err := yaml.Unmarshal(b, &rawProfiles); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse profiles: %w", err)
|
||||||
|
}
|
||||||
|
reg, err := buildProfileRegistry(rawProfiles)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("profiles: %w", err)
|
||||||
|
}
|
||||||
|
c.Profiles = reg
|
||||||
if c.Server.Bind == "" {
|
if c.Server.Bind == "" {
|
||||||
c.Server.Bind = "127.0.0.1:8080"
|
c.Server.Bind = "127.0.0.1:8080"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,441 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProfileName is the set of legal values for a Run's profile column.
|
||||||
|
// Exposed as constants so callers (UI handler, tests, agent) don't
|
||||||
|
// sprinkle literal strings.
|
||||||
|
const (
|
||||||
|
ProfileQuick = "quick"
|
||||||
|
ProfileDeep = "deep"
|
||||||
|
ProfileSoak = "soak"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AllProfiles is the canonical ordering shown in the picker. Leftmost
|
||||||
|
// is the default; rightmost is the longest-running.
|
||||||
|
var AllProfiles = []string{ProfileQuick, ProfileDeep, ProfileSoak}
|
||||||
|
|
||||||
|
// IsValidProfile returns true when name is one of the known profile
|
||||||
|
// identifiers. Used at the UI boundary to reject malformed POSTs and in
|
||||||
|
// store code as a fallback guard.
|
||||||
|
func IsValidProfile(name string) bool {
|
||||||
|
for _, p := range AllProfiles {
|
||||||
|
if p == name {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vetting holds the stage order + threshold defaults that are shared
|
||||||
|
// across all profiles. Only the per-stage durations/concurrency differ
|
||||||
|
// between quick/deep/soak; gates like "CPU > 92C fails the run" apply
|
||||||
|
// to a 2-minute quick run and a 12-hour soak alike.
|
||||||
|
type Vetting struct {
|
||||||
|
Stages []string `yaml:"stages"`
|
||||||
|
Thresholds []ThresholdDefaults `yaml:"thresholds"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThresholdDefaults is the YAML shape of a threshold declaration. One
|
||||||
|
// stanza can declare a per-stage rule ("stage: Network") or a global
|
||||||
|
// rule ("stage: *") — the threshold evaluator applies both to samples
|
||||||
|
// with matching (stage, kind, key).
|
||||||
|
type ThresholdDefaults struct {
|
||||||
|
Stage string `yaml:"stage"`
|
||||||
|
Kind string `yaml:"kind"`
|
||||||
|
Key string `yaml:"key"`
|
||||||
|
Op string `yaml:"op"` // lt|lte|gt|gte|within_pct
|
||||||
|
Value float64 `yaml:"value"`
|
||||||
|
Nominal float64 `yaml:"nominal"` // only used by within_pct (e.g. 12.0 for +12V rail)
|
||||||
|
Unit string `yaml:"unit"`
|
||||||
|
Severity string `yaml:"severity"` // critical|warning
|
||||||
|
}
|
||||||
|
|
||||||
|
// ProfileRegistry is the in-memory view of the `profiles:` block in
|
||||||
|
// vetting.yaml. The orchestrator queries it at run creation time to
|
||||||
|
// seed thresholds and (in Phase 3+) to scale per-stage durations.
|
||||||
|
type ProfileRegistry struct {
|
||||||
|
// Shared stage ordering + threshold defaults. Every profile walks
|
||||||
|
// the same list; only durations/concurrency differ.
|
||||||
|
Vetting Vetting
|
||||||
|
|
||||||
|
// Profiles is keyed by name ("quick"/"deep"/"soak"). Inherit is
|
||||||
|
// already resolved at load time — a caller sees a flattened view.
|
||||||
|
Profiles map[string]Profile
|
||||||
|
}
|
||||||
|
|
||||||
|
// Profile is a loaded profile. StageTimeouts is keyed by stage name.
|
||||||
|
// Defaults carries the free-form knobs each probe reads.
|
||||||
|
type Profile struct {
|
||||||
|
Name string
|
||||||
|
Inherit string
|
||||||
|
StageTimeouts map[string]time.Duration
|
||||||
|
Defaults map[string]map[string]any
|
||||||
|
}
|
||||||
|
|
||||||
|
// StageConfig is the flat view of a profile's knobs, shipped on the
|
||||||
|
// claim response so the agent can size CPUStress/Storage/Network/Burn
|
||||||
|
// work without parsing YAML. Empty values mean "fall back to the
|
||||||
|
// agent's compile-time default" — an older orchestrator that doesn't
|
||||||
|
// set these fields keeps working unchanged.
|
||||||
|
type StageConfig struct {
|
||||||
|
Profile string `json:"profile"`
|
||||||
|
StageTimeouts map[string]string `json:"stage_timeouts,omitempty"`
|
||||||
|
CPUStress CPUStressKnobs `json:"cpustress"`
|
||||||
|
Storage StorageKnobs `json:"storage"`
|
||||||
|
Network NetworkKnobs `json:"network"`
|
||||||
|
Burn BurnKnobs `json:"burn"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPUStressKnobs parallels the `cpustress:` block under `profiles.<name>.defaults`.
|
||||||
|
// Durations are YAML duration strings ("2m", "60m", "12h").
|
||||||
|
type CPUStressKnobs struct {
|
||||||
|
CPUPass string `json:"cpu_pass,omitempty"`
|
||||||
|
MemPass string `json:"mem_pass,omitempty"`
|
||||||
|
EDACPoll string `json:"edac_poll,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// StorageKnobs parallels `storage:` defaults. Mode is "fio_sample" (quick)
|
||||||
|
// or "full_disk" (deep/soak). Verify names the integrity mode ("md5" or "").
|
||||||
|
type StorageKnobs struct {
|
||||||
|
Mode string `json:"mode,omitempty"`
|
||||||
|
FioSize string `json:"fio_size,omitempty"`
|
||||||
|
FioTime string `json:"fio_time,omitempty"`
|
||||||
|
FioBS string `json:"fio_bs,omitempty"`
|
||||||
|
FioRW string `json:"fio_rw,omitempty"`
|
||||||
|
Verify string `json:"verify,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NetworkKnobs parallels `network:` defaults. Duration is a YAML string.
|
||||||
|
type NetworkKnobs struct {
|
||||||
|
Duration string `json:"duration,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// BurnKnobs parallels `burn:` defaults. Duration is the total Burn window.
|
||||||
|
// CPUWorkers is "all" (agent picks runtime.NumCPU) or a numeric string.
|
||||||
|
// MemPct is a percentage of MemAvailable to stress. FioOnSpare gates
|
||||||
|
// whether fio runs inside Burn (set false if operator lacks a spare
|
||||||
|
// partition). IperfParallel is the parallel stream count fed to iperf3 -P.
|
||||||
|
type BurnKnobs struct {
|
||||||
|
Duration string `json:"duration,omitempty"`
|
||||||
|
CPUWorkers string `json:"cpu_workers,omitempty"`
|
||||||
|
MemPct int `json:"mem_pct,omitempty"`
|
||||||
|
FioOnSpare bool `json:"fio_on_spare,omitempty"`
|
||||||
|
IperfParallel int `json:"iperf_parallel,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResolveStageConfig flattens the named profile into the wire shape the
|
||||||
|
// claim handler ships. Missing keys render as empty strings so the agent
|
||||||
|
// falls back to its own defaults.
|
||||||
|
func (pr *ProfileRegistry) ResolveStageConfig(name string) StageConfig {
|
||||||
|
if pr == nil {
|
||||||
|
return StageConfig{Profile: name}
|
||||||
|
}
|
||||||
|
p, err := pr.Lookup(name)
|
||||||
|
if err != nil {
|
||||||
|
return StageConfig{Profile: name}
|
||||||
|
}
|
||||||
|
out := StageConfig{Profile: p.Name}
|
||||||
|
if len(p.StageTimeouts) > 0 {
|
||||||
|
out.StageTimeouts = make(map[string]string, len(p.StageTimeouts))
|
||||||
|
for k, v := range p.StageTimeouts {
|
||||||
|
out.StageTimeouts[k] = v.String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cpu := p.Defaults["cpustress"]
|
||||||
|
out.CPUStress.CPUPass = yamlString(cpu, "cpu_pass")
|
||||||
|
out.CPUStress.MemPass = yamlString(cpu, "mem_pass")
|
||||||
|
out.CPUStress.EDACPoll = yamlString(cpu, "edac_poll")
|
||||||
|
st := p.Defaults["storage"]
|
||||||
|
out.Storage.Mode = yamlString(st, "mode")
|
||||||
|
out.Storage.FioSize = yamlString(st, "fio_size")
|
||||||
|
out.Storage.FioTime = yamlString(st, "fio_time")
|
||||||
|
out.Storage.FioBS = yamlString(st, "fio_bs")
|
||||||
|
out.Storage.FioRW = yamlString(st, "fio_rw")
|
||||||
|
out.Storage.Verify = yamlString(st, "verify")
|
||||||
|
net := p.Defaults["network"]
|
||||||
|
out.Network.Duration = yamlString(net, "duration")
|
||||||
|
burn := p.Defaults["burn"]
|
||||||
|
out.Burn.Duration = yamlString(burn, "duration")
|
||||||
|
out.Burn.CPUWorkers = yamlString(burn, "cpu_workers")
|
||||||
|
out.Burn.MemPct = yamlInt(burn, "mem_pct")
|
||||||
|
out.Burn.FioOnSpare = yamlBool(burn, "fio_on_spare")
|
||||||
|
out.Burn.IperfParallel = yamlInt(burn, "iperf_parallel")
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// yamlInt coerces a map[string]any entry to int. Accepts native int,
|
||||||
|
// float64 (JSON numbers round-trip as float), or numeric string. Missing
|
||||||
|
// / malformed values return 0 so the agent falls back to its default.
|
||||||
|
func yamlInt(m map[string]any, key string) int {
|
||||||
|
v, ok := m[key]
|
||||||
|
if !ok || v == nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
switch x := v.(type) {
|
||||||
|
case int:
|
||||||
|
return x
|
||||||
|
case int64:
|
||||||
|
return int(x)
|
||||||
|
case float64:
|
||||||
|
return int(x)
|
||||||
|
case string:
|
||||||
|
// Best-effort string → int. Empty and non-numeric fall through
|
||||||
|
// to zero.
|
||||||
|
var n int
|
||||||
|
if _, err := fmt.Sscanf(x, "%d", &n); err == nil {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// yamlBool accepts native bool or "true"/"false" strings. Anything else
|
||||||
|
// (missing key, numeric, typo) returns false — a safer default than
|
||||||
|
// "true" for a destructive knob like fio_on_spare.
|
||||||
|
func yamlBool(m map[string]any, key string) bool {
|
||||||
|
v, ok := m[key]
|
||||||
|
if !ok || v == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
switch x := v.(type) {
|
||||||
|
case bool:
|
||||||
|
return x
|
||||||
|
case string:
|
||||||
|
return strings.EqualFold(x, "true")
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// yamlString coerces a map[string]any entry to its string form. YAML
|
||||||
|
// durations like "2m" parse as strings; numeric literals like 5 parse as
|
||||||
|
// int. We format non-string scalars with fmt.Sprint so the agent can
|
||||||
|
// still interpret them.
|
||||||
|
func yamlString(m map[string]any, key string) string {
|
||||||
|
v, ok := m[key]
|
||||||
|
if !ok || v == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if s, ok := v.(string); ok {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return fmt.Sprint(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lookup returns the profile with the given name. Falls back to the
|
||||||
|
// default profile (quick) if the name is empty. Returns an error when
|
||||||
|
// the name is non-empty but unknown so the caller can surface it.
|
||||||
|
func (pr *ProfileRegistry) Lookup(name string) (Profile, error) {
|
||||||
|
if name == "" {
|
||||||
|
name = ProfileQuick
|
||||||
|
}
|
||||||
|
p, ok := pr.Profiles[name]
|
||||||
|
if !ok {
|
||||||
|
return Profile{}, fmt.Errorf("unknown profile %q", name)
|
||||||
|
}
|
||||||
|
return p, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Names returns the registry's profile names in the canonical
|
||||||
|
// picker order (quick/deep/soak). Profiles present in the config but
|
||||||
|
// unknown to AllProfiles are appended after, alphabetically.
|
||||||
|
func (pr *ProfileRegistry) Names() []string {
|
||||||
|
out := make([]string, 0, len(pr.Profiles))
|
||||||
|
seen := map[string]bool{}
|
||||||
|
for _, n := range AllProfiles {
|
||||||
|
if _, ok := pr.Profiles[n]; ok {
|
||||||
|
out = append(out, n)
|
||||||
|
seen[n] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for n := range pr.Profiles {
|
||||||
|
if !seen[n] {
|
||||||
|
out = append(out, n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stages returns the shared stage order, or a safe default when the
|
||||||
|
// config didn't declare one — keeps tests that don't build a full
|
||||||
|
// ProfileRegistry from tripping over a nil slice.
|
||||||
|
func (pr *ProfileRegistry) Stages() []string {
|
||||||
|
if len(pr.Vetting.Stages) == 0 {
|
||||||
|
return DefaultStages()
|
||||||
|
}
|
||||||
|
out := make([]string, len(pr.Vetting.Stages))
|
||||||
|
copy(out, pr.Vetting.Stages)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultStages is the canonical stage list the orchestrator walks
|
||||||
|
// when no config is loaded. Mirrored in the vetting.yaml shipped with
|
||||||
|
// the repo so edits to the slice and the file stay in sync.
|
||||||
|
func DefaultStages() []string {
|
||||||
|
return []string{
|
||||||
|
"Inventory",
|
||||||
|
"Firmware",
|
||||||
|
"SpecValidate",
|
||||||
|
"SMART",
|
||||||
|
"CPUStress",
|
||||||
|
"Storage",
|
||||||
|
"Network",
|
||||||
|
"Burn",
|
||||||
|
"GPU",
|
||||||
|
"PSU",
|
||||||
|
"Reporting",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// rawProfile is the YAML shape before inherit resolution. Durations
|
||||||
|
// arrive as strings (e.g. "2h") so we can parse them with
|
||||||
|
// time.ParseDuration instead of rolling our own.
|
||||||
|
type rawProfile struct {
|
||||||
|
Inherit string `yaml:"inherit"`
|
||||||
|
StageTimeouts map[string]string `yaml:"stage_timeouts"`
|
||||||
|
Defaults map[string]map[string]any `yaml:"defaults"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type rawProfilesBlock struct {
|
||||||
|
Vetting Vetting `yaml:"vetting"`
|
||||||
|
Profiles map[string]rawProfile `yaml:"profiles"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildProfileRegistry flattens a rawProfilesBlock into a ProfileRegistry.
|
||||||
|
// Resolves `inherit:` by recursive merge (child keys win), parses
|
||||||
|
// stage_timeouts strings into time.Durations, and returns an error if
|
||||||
|
// the inherit chain loops or references an unknown profile.
|
||||||
|
func buildProfileRegistry(raw rawProfilesBlock) (*ProfileRegistry, error) {
|
||||||
|
if len(raw.Profiles) == 0 {
|
||||||
|
raw.Profiles = defaultRawProfiles()
|
||||||
|
}
|
||||||
|
out := &ProfileRegistry{
|
||||||
|
Vetting: raw.Vetting,
|
||||||
|
Profiles: make(map[string]Profile, len(raw.Profiles)),
|
||||||
|
}
|
||||||
|
if len(out.Vetting.Stages) == 0 {
|
||||||
|
out.Vetting.Stages = DefaultStages()
|
||||||
|
}
|
||||||
|
for name := range raw.Profiles {
|
||||||
|
resolved, err := resolveProfile(raw.Profiles, name, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out.Profiles[name] = resolved
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveProfile recursively walks inherit chains, depth-first. The
|
||||||
|
// visited slice is a cycle guard — we add the current name before
|
||||||
|
// recursing and bail if we ever see it again.
|
||||||
|
func resolveProfile(all map[string]rawProfile, name string, visited []string) (Profile, error) {
|
||||||
|
for _, v := range visited {
|
||||||
|
if v == name {
|
||||||
|
return Profile{}, fmt.Errorf("profile inherit cycle: %s -> %s", strings.Join(visited, " -> "), name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
raw, ok := all[name]
|
||||||
|
if !ok {
|
||||||
|
return Profile{}, fmt.Errorf("unknown profile %q", name)
|
||||||
|
}
|
||||||
|
base := Profile{
|
||||||
|
Name: name,
|
||||||
|
Inherit: raw.Inherit,
|
||||||
|
StageTimeouts: map[string]time.Duration{},
|
||||||
|
Defaults: map[string]map[string]any{},
|
||||||
|
}
|
||||||
|
if raw.Inherit != "" {
|
||||||
|
parent, err := resolveProfile(all, raw.Inherit, append(visited, name))
|
||||||
|
if err != nil {
|
||||||
|
return Profile{}, err
|
||||||
|
}
|
||||||
|
for k, v := range parent.StageTimeouts {
|
||||||
|
base.StageTimeouts[k] = v
|
||||||
|
}
|
||||||
|
for k, v := range parent.Defaults {
|
||||||
|
copyMap := make(map[string]any, len(v))
|
||||||
|
for kk, vv := range v {
|
||||||
|
copyMap[kk] = vv
|
||||||
|
}
|
||||||
|
base.Defaults[k] = copyMap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for stage, s := range raw.StageTimeouts {
|
||||||
|
d, err := time.ParseDuration(s)
|
||||||
|
if err != nil {
|
||||||
|
return Profile{}, fmt.Errorf("profile %s stage_timeouts[%s]: %w", name, stage, err)
|
||||||
|
}
|
||||||
|
base.StageTimeouts[stage] = d
|
||||||
|
}
|
||||||
|
for group, kv := range raw.Defaults {
|
||||||
|
dest, ok := base.Defaults[group]
|
||||||
|
if !ok {
|
||||||
|
dest = map[string]any{}
|
||||||
|
base.Defaults[group] = dest
|
||||||
|
}
|
||||||
|
for k, v := range kv {
|
||||||
|
dest[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return base, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultRawProfiles returns sane per-profile durations + probe knobs
|
||||||
|
// used when vetting.yaml omits the `profiles:` block entirely. Matches
|
||||||
|
// the plan's per-stage budget table so the agent still gets coherent
|
||||||
|
// CPUStress/Storage/Network knobs without any operator-visible config.
|
||||||
|
func defaultRawProfiles() map[string]rawProfile {
|
||||||
|
return map[string]rawProfile{
|
||||||
|
ProfileQuick: {
|
||||||
|
StageTimeouts: map[string]string{
|
||||||
|
"CPUStress": "5m",
|
||||||
|
"Storage": "5m",
|
||||||
|
"Network": "2m",
|
||||||
|
"Burn": "3m",
|
||||||
|
"PSU": "1m",
|
||||||
|
},
|
||||||
|
Defaults: map[string]map[string]any{
|
||||||
|
"cpustress": {"cpu_pass": "2m", "mem_pass": "2m", "edac_poll": "10s"},
|
||||||
|
"storage": {"mode": "fio_sample", "fio_size": "1GiB", "fio_time": "3m", "fio_bs": "4k", "fio_rw": "randrw", "verify": "md5"},
|
||||||
|
"network": {"duration": "60s"},
|
||||||
|
"burn": {"duration": "2m", "cpu_workers": "all", "mem_pct": 50, "fio_on_spare": true, "iperf_parallel": 2},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ProfileDeep: {
|
||||||
|
StageTimeouts: map[string]string{
|
||||||
|
"CPUStress": "2h",
|
||||||
|
"Storage": "4h",
|
||||||
|
"Network": "35m",
|
||||||
|
"Burn": "3h",
|
||||||
|
"PSU": "10m",
|
||||||
|
},
|
||||||
|
Defaults: map[string]map[string]any{
|
||||||
|
"cpustress": {"cpu_pass": "60m", "mem_pass": "60m", "edac_poll": "10s"},
|
||||||
|
"storage": {"mode": "full_disk", "fio_time": "2h", "fio_bs": "4k", "fio_rw": "randrw", "verify": "md5"},
|
||||||
|
"network": {"duration": "30m"},
|
||||||
|
"burn": {"duration": "2h", "cpu_workers": "all", "mem_pct": 70, "fio_on_spare": true, "iperf_parallel": 4},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ProfileSoak: {
|
||||||
|
Inherit: ProfileDeep,
|
||||||
|
StageTimeouts: map[string]string{
|
||||||
|
"CPUStress": "14h",
|
||||||
|
"Storage": "8h",
|
||||||
|
"Network": "2h30m",
|
||||||
|
"Burn": "20h",
|
||||||
|
"PSU": "15m",
|
||||||
|
},
|
||||||
|
Defaults: map[string]map[string]any{
|
||||||
|
"cpustress": {"cpu_pass": "12h"},
|
||||||
|
"storage": {"mode": "full_disk", "fio_time": "6h"},
|
||||||
|
"network": {"duration": "2h"},
|
||||||
|
"burn": {"duration": "18h", "iperf_parallel": 8},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
-- Phase-1 groundwork for profile-aware, threshold-gated vetting.
|
||||||
|
--
|
||||||
|
-- Adds:
|
||||||
|
-- * runs.profile — which profile the run is executing
|
||||||
|
-- (quick|deep|soak; defaults to quick for
|
||||||
|
-- backfill of older rows + tests).
|
||||||
|
-- * thresholds — seeded per run at creation from the
|
||||||
|
-- ProfileRegistry + per-host overrides;
|
||||||
|
-- immutable for that run so a late config
|
||||||
|
-- edit can't retroactively pass/fail it.
|
||||||
|
-- * threshold_evaluations — one row per observed sample vs threshold;
|
||||||
|
-- drives the report + pipeline badges.
|
||||||
|
-- * firmware_snapshots — per-run BIOS/BMC/NIC/HBA/microcode/NVMe
|
||||||
|
-- version captures used by SpecValidate
|
||||||
|
-- diffing in Phase 4.
|
||||||
|
|
||||||
|
ALTER TABLE runs ADD COLUMN profile TEXT NOT NULL DEFAULT 'quick';
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS thresholds (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
|
||||||
|
stage_name TEXT NOT NULL, -- "*" matches any stage
|
||||||
|
kind TEXT NOT NULL, -- temp|psu_volt|iperf|fio_p99_us|nic_retrans|edac_ce|edac_ue|mce|...
|
||||||
|
key TEXT NOT NULL, -- "*" or glob-ish match (prefix* / *suffix / exact)
|
||||||
|
op TEXT NOT NULL, -- lt|lte|gt|gte|within_pct
|
||||||
|
threshold REAL NOT NULL,
|
||||||
|
nominal REAL NOT NULL DEFAULT 0, -- used by within_pct; 0 elsewhere
|
||||||
|
unit TEXT NOT NULL DEFAULT '',
|
||||||
|
severity TEXT NOT NULL, -- critical|warning
|
||||||
|
source TEXT NOT NULL -- profile|host_override
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_thresholds_run ON thresholds(run_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_thresholds_kind ON thresholds(run_id, stage_name, kind);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS threshold_evaluations (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
|
||||||
|
threshold_id INTEGER NOT NULL REFERENCES thresholds(id) ON DELETE CASCADE,
|
||||||
|
stage_name TEXT NOT NULL,
|
||||||
|
kind TEXT NOT NULL,
|
||||||
|
key TEXT NOT NULL,
|
||||||
|
ts TIMESTAMP NOT NULL,
|
||||||
|
observed REAL NOT NULL,
|
||||||
|
passed INTEGER NOT NULL -- 1 = sample within threshold, 0 = breach
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_threshold_evals_run ON threshold_evaluations(run_id, passed);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS firmware_snapshots (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
|
||||||
|
component TEXT NOT NULL, -- bios|bmc|nic|hba|microcode|nvme_fw
|
||||||
|
identifier TEXT NOT NULL, -- slot/serial/device path that distinguishes this component
|
||||||
|
version TEXT NOT NULL,
|
||||||
|
vendor TEXT NOT NULL DEFAULT '',
|
||||||
|
raw_json TEXT NOT NULL DEFAULT '{}'
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_firmware_run ON firmware_snapshots(run_id, component);
|
||||||
@@ -26,11 +26,13 @@ const (
|
|||||||
StateWaitingReboot RunState = "WaitingReboot"
|
StateWaitingReboot RunState = "WaitingReboot"
|
||||||
StateBooting RunState = "Booting"
|
StateBooting RunState = "Booting"
|
||||||
StateInventoryCheck RunState = "InventoryCheck"
|
StateInventoryCheck RunState = "InventoryCheck"
|
||||||
|
StateFirmware RunState = "Firmware"
|
||||||
StateSpecValidate RunState = "SpecValidate"
|
StateSpecValidate RunState = "SpecValidate"
|
||||||
StateSMART RunState = "SMART"
|
StateSMART RunState = "SMART"
|
||||||
StateCPUStress RunState = "CPUStress"
|
StateCPUStress RunState = "CPUStress"
|
||||||
StateStorage RunState = "Storage"
|
StateStorage RunState = "Storage"
|
||||||
StateNetwork RunState = "Network"
|
StateNetwork RunState = "Network"
|
||||||
|
StateBurn RunState = "Burn"
|
||||||
StateGPU RunState = "GPU"
|
StateGPU RunState = "GPU"
|
||||||
StatePSU RunState = "PSU"
|
StatePSU RunState = "PSU"
|
||||||
StateReporting RunState = "Reporting"
|
StateReporting RunState = "Reporting"
|
||||||
@@ -63,6 +65,7 @@ type Run struct {
|
|||||||
HoldIP string
|
HoldIP string
|
||||||
OverrideFlagsJSON string
|
OverrideFlagsJSON string
|
||||||
NonDestructive bool
|
NonDestructive bool
|
||||||
|
Profile string // quick|deep|soak; empty is treated as "quick"
|
||||||
}
|
}
|
||||||
|
|
||||||
type StageState string
|
type StageState string
|
||||||
|
|||||||
@@ -119,9 +119,9 @@ func (d *Dispatcher) pickNext(ctx context.Context) {
|
|||||||
queued = &runs[i]
|
queued = &runs[i]
|
||||||
}
|
}
|
||||||
case model.StateWaitingWoL, model.StateWaitingReboot, model.StateBooting,
|
case model.StateWaitingWoL, model.StateWaitingReboot, model.StateBooting,
|
||||||
model.StateInventoryCheck, model.StateSpecValidate, model.StateSMART,
|
model.StateInventoryCheck, model.StateFirmware, model.StateSpecValidate, model.StateSMART,
|
||||||
model.StateCPUStress, model.StateStorage, model.StateNetwork,
|
model.StateCPUStress, model.StateStorage, model.StateNetwork,
|
||||||
model.StateGPU, model.StatePSU, model.StateReporting:
|
model.StateBurn, model.StateGPU, model.StatePSU, model.StateReporting:
|
||||||
inFlight++
|
inFlight++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,11 +30,13 @@ const (
|
|||||||
// "InventoryCheck". Later stages share a name with their state.
|
// "InventoryCheck". Later stages share a name with their state.
|
||||||
var stageStates = map[string]model.RunState{
|
var stageStates = map[string]model.RunState{
|
||||||
"Inventory": model.StateInventoryCheck,
|
"Inventory": model.StateInventoryCheck,
|
||||||
|
"Firmware": model.StateFirmware,
|
||||||
"SpecValidate": model.StateSpecValidate,
|
"SpecValidate": model.StateSpecValidate,
|
||||||
"SMART": model.StateSMART,
|
"SMART": model.StateSMART,
|
||||||
"CPUStress": model.StateCPUStress,
|
"CPUStress": model.StateCPUStress,
|
||||||
"Storage": model.StateStorage,
|
"Storage": model.StateStorage,
|
||||||
"Network": model.StateNetwork,
|
"Network": model.StateNetwork,
|
||||||
|
"Burn": model.StateBurn,
|
||||||
"GPU": model.StateGPU,
|
"GPU": model.StateGPU,
|
||||||
"PSU": model.StatePSU,
|
"PSU": model.StatePSU,
|
||||||
"Reporting": model.StateReporting,
|
"Reporting": model.StateReporting,
|
||||||
@@ -44,11 +46,13 @@ var stageStates = map[string]model.RunState{
|
|||||||
// first stage to Completed. Kept in sync with store.DefaultStageOrder.
|
// first stage to Completed. Kept in sync with store.DefaultStageOrder.
|
||||||
var stageOrder = []model.RunState{
|
var stageOrder = []model.RunState{
|
||||||
model.StateInventoryCheck,
|
model.StateInventoryCheck,
|
||||||
|
model.StateFirmware,
|
||||||
model.StateSpecValidate,
|
model.StateSpecValidate,
|
||||||
model.StateSMART,
|
model.StateSMART,
|
||||||
model.StateCPUStress,
|
model.StateCPUStress,
|
||||||
model.StateStorage,
|
model.StateStorage,
|
||||||
model.StateNetwork,
|
model.StateNetwork,
|
||||||
|
model.StateBurn,
|
||||||
model.StateGPU,
|
model.StateGPU,
|
||||||
model.StatePSU,
|
model.StatePSU,
|
||||||
model.StateReporting,
|
model.StateReporting,
|
||||||
@@ -143,9 +147,9 @@ func nextStageState(current model.RunState) (model.RunState, error) {
|
|||||||
func allActiveStates() []model.RunState {
|
func allActiveStates() []model.RunState {
|
||||||
return []model.RunState{
|
return []model.RunState{
|
||||||
model.StateQueued, model.StateWaitingWoL, model.StateWaitingReboot, model.StateBooting,
|
model.StateQueued, model.StateWaitingWoL, model.StateWaitingReboot, model.StateBooting,
|
||||||
model.StateInventoryCheck, model.StateSpecValidate, model.StateSMART,
|
model.StateInventoryCheck, model.StateFirmware, model.StateSpecValidate, model.StateSMART,
|
||||||
model.StateCPUStress, model.StateStorage, model.StateNetwork,
|
model.StateCPUStress, model.StateStorage, model.StateNetwork,
|
||||||
model.StateGPU, model.StatePSU, model.StateReporting,
|
model.StateBurn, model.StateGPU, model.StatePSU, model.StateReporting,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -80,11 +80,13 @@ func TestTriggerAgentClaimedFromWaitingReboot(t *testing.T) {
|
|||||||
func TestTriggerStageMismatch(t *testing.T) {
|
func TestTriggerStageMismatch(t *testing.T) {
|
||||||
stageStates := []model.RunState{
|
stageStates := []model.RunState{
|
||||||
model.StateInventoryCheck,
|
model.StateInventoryCheck,
|
||||||
|
model.StateFirmware,
|
||||||
model.StateSpecValidate,
|
model.StateSpecValidate,
|
||||||
model.StateSMART,
|
model.StateSMART,
|
||||||
model.StateCPUStress,
|
model.StateCPUStress,
|
||||||
model.StateStorage,
|
model.StateStorage,
|
||||||
model.StateNetwork,
|
model.StateNetwork,
|
||||||
|
model.StateBurn,
|
||||||
model.StateGPU,
|
model.StateGPU,
|
||||||
model.StatePSU,
|
model.StatePSU,
|
||||||
model.StateReporting,
|
model.StateReporting,
|
||||||
@@ -114,11 +116,13 @@ func TestTriggerStageMismatch(t *testing.T) {
|
|||||||
func TestStageNameForState(t *testing.T) {
|
func TestStageNameForState(t *testing.T) {
|
||||||
pairs := map[string]model.RunState{
|
pairs := map[string]model.RunState{
|
||||||
"Inventory": model.StateInventoryCheck,
|
"Inventory": model.StateInventoryCheck,
|
||||||
|
"Firmware": model.StateFirmware,
|
||||||
"SpecValidate": model.StateSpecValidate,
|
"SpecValidate": model.StateSpecValidate,
|
||||||
"SMART": model.StateSMART,
|
"SMART": model.StateSMART,
|
||||||
"CPUStress": model.StateCPUStress,
|
"CPUStress": model.StateCPUStress,
|
||||||
"Storage": model.StateStorage,
|
"Storage": model.StateStorage,
|
||||||
"Network": model.StateNetwork,
|
"Network": model.StateNetwork,
|
||||||
|
"Burn": model.StateBurn,
|
||||||
"GPU": model.StateGPU,
|
"GPU": model.StateGPU,
|
||||||
"PSU": model.StatePSU,
|
"PSU": model.StatePSU,
|
||||||
"Reporting": model.StateReporting,
|
"Reporting": model.StateReporting,
|
||||||
@@ -143,11 +147,13 @@ func TestNextStageWalk(t *testing.T) {
|
|||||||
// one in the canonical order, and from Reporting onto Completed.
|
// one in the canonical order, and from Reporting onto Completed.
|
||||||
chain := []model.RunState{
|
chain := []model.RunState{
|
||||||
model.StateInventoryCheck,
|
model.StateInventoryCheck,
|
||||||
|
model.StateFirmware,
|
||||||
model.StateSpecValidate,
|
model.StateSpecValidate,
|
||||||
model.StateSMART,
|
model.StateSMART,
|
||||||
model.StateCPUStress,
|
model.StateCPUStress,
|
||||||
model.StateStorage,
|
model.StateStorage,
|
||||||
model.StateNetwork,
|
model.StateNetwork,
|
||||||
|
model.StateBurn,
|
||||||
model.StateGPU,
|
model.StateGPU,
|
||||||
model.StatePSU,
|
model.StatePSU,
|
||||||
model.StateReporting,
|
model.StateReporting,
|
||||||
|
|||||||
@@ -0,0 +1,182 @@
|
|||||||
|
package orchestrator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ThresholdOp is one of the comparison operators a threshold supports.
|
||||||
|
// within_pct is the only one that cares about a "nominal" value for
|
||||||
|
// the key — used for PSU rails ("+12V within 5% of 12.0").
|
||||||
|
type ThresholdOp string
|
||||||
|
|
||||||
|
const (
|
||||||
|
OpLT ThresholdOp = "lt"
|
||||||
|
OpLTE ThresholdOp = "lte"
|
||||||
|
OpGT ThresholdOp = "gt"
|
||||||
|
OpGTE ThresholdOp = "gte"
|
||||||
|
OpWithinPct ThresholdOp = "within_pct"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ThresholdSeverity routes a breach to either "fail the run" or "just
|
||||||
|
// surface a warning in the report". The evaluator returns it alongside
|
||||||
|
// the Pass flag so the caller can decide whether to transition the run.
|
||||||
|
type ThresholdSeverity string
|
||||||
|
|
||||||
|
const (
|
||||||
|
SeverityCritical ThresholdSeverity = "critical"
|
||||||
|
SeverityWarning ThresholdSeverity = "warning"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Threshold is the evaluator's view of a stored threshold row. It's a
|
||||||
|
// flat, already-parsed value-object — the evaluator doesn't look at
|
||||||
|
// the DB and the store doesn't look at the evaluator.
|
||||||
|
type Threshold struct {
|
||||||
|
ID int64
|
||||||
|
Stage string // "*" matches any stage
|
||||||
|
Kind string
|
||||||
|
Key string // glob-ish: "*" / "prefix*" / "*suffix" / exact
|
||||||
|
Op ThresholdOp
|
||||||
|
Value float64
|
||||||
|
Nominal float64 // for within_pct (nominal voltage/frequency)
|
||||||
|
Severity ThresholdSeverity
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample is a single observation the evaluator tests against matching
|
||||||
|
// thresholds. Stage may be empty when the agent doesn't know which
|
||||||
|
// stage posted it (e.g. the thermal sidecar running across stages) —
|
||||||
|
// empty-stage samples only match thresholds with Stage == "*".
|
||||||
|
type Sample struct {
|
||||||
|
Stage string
|
||||||
|
Kind string
|
||||||
|
Key string
|
||||||
|
Value float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// EvalResult is the per-sample outcome of a threshold evaluation:
|
||||||
|
// which threshold was consulted, whether the sample passed, and the
|
||||||
|
// severity so the caller can fast-fail on critical breaches.
|
||||||
|
type EvalResult struct {
|
||||||
|
Threshold Threshold
|
||||||
|
Passed bool
|
||||||
|
Observed float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Breached returns true when the sample violated the threshold.
|
||||||
|
func (r EvalResult) Breached() bool { return !r.Passed }
|
||||||
|
|
||||||
|
// CriticalBreach returns true only for critical-severity breaches —
|
||||||
|
// the "fail the run right now" case.
|
||||||
|
func (r EvalResult) CriticalBreach() bool {
|
||||||
|
return r.Breached() && r.Threshold.Severity == SeverityCritical
|
||||||
|
}
|
||||||
|
|
||||||
|
// Evaluate runs a single sample through every threshold that applies
|
||||||
|
// to it. A sample may match more than one threshold (a generic "*"
|
||||||
|
// rule + a stage-specific override); each match produces its own
|
||||||
|
// EvalResult in the returned slice so both get persisted.
|
||||||
|
func Evaluate(sample Sample, thresholds []Threshold) []EvalResult {
|
||||||
|
out := make([]EvalResult, 0, 1)
|
||||||
|
for _, t := range thresholds {
|
||||||
|
if !thresholdMatchesSample(t, sample) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
passed, err := evaluateOp(t.Op, sample.Value, t.Value, t.Nominal)
|
||||||
|
if err != nil {
|
||||||
|
// Unknown operator — skip. The caller could validate on
|
||||||
|
// insert; here we prefer to drop the threshold than to
|
||||||
|
// return an error that forces every Sensor write to 500.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, EvalResult{
|
||||||
|
Threshold: t,
|
||||||
|
Passed: passed,
|
||||||
|
Observed: sample.Value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// thresholdMatchesSample applies the stage + kind + key filter. Kind
|
||||||
|
// is always literal — there's no "any kind" threshold and if there
|
||||||
|
// ever is we'll add a `kind: *` escape hatch. Stage and key both
|
||||||
|
// support glob-ish matching.
|
||||||
|
func thresholdMatchesSample(t Threshold, s Sample) bool {
|
||||||
|
if t.Kind != s.Kind {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !stageMatches(t.Stage, s.Stage) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !keyMatches(t.Key, s.Key) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// stageMatches returns true if the threshold's stage selector applies
|
||||||
|
// to the sample's stage. "*" matches everything; empty threshold
|
||||||
|
// selector is treated as "*" so a threshold declared without a stage
|
||||||
|
// key isn't accidentally inert. A sample without a stage only matches
|
||||||
|
// the "*" selector — we don't guess.
|
||||||
|
func stageMatches(selector, sampleStage string) bool {
|
||||||
|
if selector == "" || selector == "*" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return selector == sampleStage
|
||||||
|
}
|
||||||
|
|
||||||
|
// keyMatches handles "*", "prefix*", "*suffix", and exact match. We
|
||||||
|
// avoid pulling in filepath.Match so Windows `\`-vs-`/` rules don't
|
||||||
|
// leak into the sample namespace (key "eth0/rx_errors" is not a path).
|
||||||
|
func keyMatches(pattern, key string) bool {
|
||||||
|
if pattern == "" || pattern == "*" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
hasPrefix := strings.HasPrefix(pattern, "*")
|
||||||
|
hasSuffix := strings.HasSuffix(pattern, "*")
|
||||||
|
switch {
|
||||||
|
case hasPrefix && hasSuffix:
|
||||||
|
inner := strings.TrimPrefix(strings.TrimSuffix(pattern, "*"), "*")
|
||||||
|
return strings.Contains(key, inner)
|
||||||
|
case hasSuffix:
|
||||||
|
return strings.HasPrefix(key, strings.TrimSuffix(pattern, "*"))
|
||||||
|
case hasPrefix:
|
||||||
|
return strings.HasSuffix(key, strings.TrimPrefix(pattern, "*"))
|
||||||
|
default:
|
||||||
|
return pattern == key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// evaluateOp does the numeric comparison. within_pct is the oddball:
|
||||||
|
// it tests |observed - nominal| <= (pct / 100) * nominal. Returns an
|
||||||
|
// error for unknown operators so the caller can log + drop.
|
||||||
|
func evaluateOp(op ThresholdOp, observed, threshold, nominal float64) (bool, error) {
|
||||||
|
switch op {
|
||||||
|
case OpLT:
|
||||||
|
return observed < threshold, nil
|
||||||
|
case OpLTE:
|
||||||
|
return observed <= threshold, nil
|
||||||
|
case OpGT:
|
||||||
|
return observed > threshold, nil
|
||||||
|
case OpGTE:
|
||||||
|
return observed >= threshold, nil
|
||||||
|
case OpWithinPct:
|
||||||
|
if nominal == 0 {
|
||||||
|
// within_pct against a 0 nominal is meaningless. Treat as
|
||||||
|
// pass so a misconfigured rule doesn't spuriously fail.
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
allowed := (threshold / 100.0) * nominal
|
||||||
|
if allowed < 0 {
|
||||||
|
allowed = -allowed
|
||||||
|
}
|
||||||
|
diff := observed - nominal
|
||||||
|
if diff < 0 {
|
||||||
|
diff = -diff
|
||||||
|
}
|
||||||
|
return diff <= allowed, nil
|
||||||
|
default:
|
||||||
|
return false, fmt.Errorf("unknown op %q", op)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
package orchestrator
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
// TestEvaluate_Ops covers every operator against the boundary case
|
||||||
|
// (equal to threshold) plus one clearly-inside and one clearly-outside
|
||||||
|
// value. Table-driven because the logic is regular.
|
||||||
|
func TestEvaluate_Ops(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
op ThresholdOp
|
||||||
|
value float64
|
||||||
|
nominal float64
|
||||||
|
observed float64
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"lt strict below", OpLT, 10, 0, 5, true},
|
||||||
|
{"lt equal fails", OpLT, 10, 0, 10, false},
|
||||||
|
{"lt above fails", OpLT, 10, 0, 15, false},
|
||||||
|
|
||||||
|
{"lte below", OpLTE, 10, 0, 5, true},
|
||||||
|
{"lte equal passes", OpLTE, 10, 0, 10, true},
|
||||||
|
{"lte above fails", OpLTE, 10, 0, 11, false},
|
||||||
|
|
||||||
|
{"gt below fails", OpGT, 900, 0, 800, false},
|
||||||
|
{"gt equal fails", OpGT, 900, 0, 900, false},
|
||||||
|
{"gt above passes", OpGT, 900, 0, 950, true},
|
||||||
|
|
||||||
|
{"gte equal passes", OpGTE, 900, 0, 900, true},
|
||||||
|
{"gte below fails", OpGTE, 900, 0, 800, false},
|
||||||
|
|
||||||
|
{"within_pct exact", OpWithinPct, 5, 12.0, 12.0, true},
|
||||||
|
{"within_pct inside", OpWithinPct, 5, 12.0, 11.7, true},
|
||||||
|
{"within_pct outside low", OpWithinPct, 5, 12.0, 11.0, false},
|
||||||
|
{"within_pct outside high", OpWithinPct, 5, 12.0, 12.7, false},
|
||||||
|
{"within_pct zero nominal passes", OpWithinPct, 5, 0, 99, true},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
rules := []Threshold{{
|
||||||
|
Stage: "*", Kind: "k", Key: "k", Op: tc.op,
|
||||||
|
Value: tc.value, Nominal: tc.nominal, Severity: SeverityCritical,
|
||||||
|
}}
|
||||||
|
res := Evaluate(Sample{Stage: "Any", Kind: "k", Key: "k", Value: tc.observed}, rules)
|
||||||
|
if len(res) != 1 {
|
||||||
|
t.Fatalf("expected 1 match, got %d", len(res))
|
||||||
|
}
|
||||||
|
if res[0].Passed != tc.want {
|
||||||
|
t.Fatalf("op=%s observed=%v want passed=%v got %v", tc.op, tc.observed, tc.want, res[0].Passed)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEvaluate_StageMatching: a Network-scoped rule ignores samples
|
||||||
|
// stamped with other stages. Global "*" catches everything.
|
||||||
|
func TestEvaluate_StageMatching(t *testing.T) {
|
||||||
|
rules := []Threshold{
|
||||||
|
{Stage: "*", Kind: "temp", Key: "cpu/*", Op: OpLT, Value: 92, Severity: SeverityCritical},
|
||||||
|
{Stage: "Burn", Kind: "temp", Key: "cpu/*", Op: OpLT, Value: 88, Severity: SeverityCritical},
|
||||||
|
}
|
||||||
|
// Sample from CPUStress — only the global rule applies.
|
||||||
|
res := Evaluate(Sample{Stage: "CPUStress", Kind: "temp", Key: "cpu/0", Value: 89}, rules)
|
||||||
|
if len(res) != 1 {
|
||||||
|
t.Fatalf("cpustress sample: expected 1 match, got %d", len(res))
|
||||||
|
}
|
||||||
|
if res[0].Threshold.Value != 92 {
|
||||||
|
t.Fatalf("cpustress sample matched wrong rule: %+v", res[0].Threshold)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample from Burn — both rules match. The stricter one breaches.
|
||||||
|
res = Evaluate(Sample{Stage: "Burn", Kind: "temp", Key: "cpu/0", Value: 89}, rules)
|
||||||
|
if len(res) != 2 {
|
||||||
|
t.Fatalf("burn sample: expected 2 matches, got %d", len(res))
|
||||||
|
}
|
||||||
|
var globalPassed, burnPassed bool
|
||||||
|
for _, r := range res {
|
||||||
|
switch r.Threshold.Value {
|
||||||
|
case 92:
|
||||||
|
globalPassed = r.Passed
|
||||||
|
case 88:
|
||||||
|
burnPassed = r.Passed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !globalPassed {
|
||||||
|
t.Fatalf("global 92C rule should pass at 89C")
|
||||||
|
}
|
||||||
|
if burnPassed {
|
||||||
|
t.Fatalf("burn 88C rule should breach at 89C")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEvaluate_KeyWildcards covers "*" / "prefix*" / "*suffix".
|
||||||
|
func TestEvaluate_KeyWildcards(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
pattern string
|
||||||
|
key string
|
||||||
|
match bool
|
||||||
|
}{
|
||||||
|
{"*", "anything", true},
|
||||||
|
{"", "anything", true},
|
||||||
|
{"cpu/*", "cpu/0", true},
|
||||||
|
{"cpu/*", "gpu/0", false},
|
||||||
|
{"*/rate", "eth0/rate", true},
|
||||||
|
{"*/rate", "eth0/count", false},
|
||||||
|
{"exact", "exact", true},
|
||||||
|
{"exact", "exactly", false},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.pattern+"_vs_"+tc.key, func(t *testing.T) {
|
||||||
|
got := keyMatches(tc.pattern, tc.key)
|
||||||
|
if got != tc.match {
|
||||||
|
t.Fatalf("keyMatches(%q, %q) = %v, want %v", tc.pattern, tc.key, got, tc.match)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEvaluate_SeverityDispatch: only critical breaches flip
|
||||||
|
// CriticalBreach; warning-severity breaches stay advisory.
|
||||||
|
func TestEvaluate_SeverityDispatch(t *testing.T) {
|
||||||
|
rules := []Threshold{
|
||||||
|
{Stage: "*", Kind: "temp", Key: "cpu", Op: OpLT, Value: 92, Severity: SeverityCritical},
|
||||||
|
{Stage: "*", Kind: "fio", Key: "p99", Op: OpLT, Value: 50000, Severity: SeverityWarning},
|
||||||
|
}
|
||||||
|
res := Evaluate(Sample{Stage: "CPU", Kind: "temp", Key: "cpu", Value: 95}, rules)
|
||||||
|
if len(res) != 1 || !res[0].CriticalBreach() {
|
||||||
|
t.Fatalf("critical breach not detected: %+v", res)
|
||||||
|
}
|
||||||
|
res = Evaluate(Sample{Stage: "Storage", Kind: "fio", Key: "p99", Value: 80000}, rules)
|
||||||
|
if len(res) != 1 {
|
||||||
|
t.Fatalf("expected 1 match, got %d", len(res))
|
||||||
|
}
|
||||||
|
if res[0].CriticalBreach() {
|
||||||
|
t.Fatalf("warning-severity breach should not be critical")
|
||||||
|
}
|
||||||
|
if !res[0].Breached() {
|
||||||
|
t.Fatalf("warning-severity rule should still show breach=true")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEvaluate_NoMatchingThreshold: a sample that doesn't hit any rule
|
||||||
|
// produces an empty result slice — callers treat that as "advisory".
|
||||||
|
func TestEvaluate_NoMatchingThreshold(t *testing.T) {
|
||||||
|
rules := []Threshold{
|
||||||
|
{Stage: "*", Kind: "temp", Key: "cpu/*", Op: OpLT, Value: 92, Severity: SeverityCritical},
|
||||||
|
}
|
||||||
|
res := Evaluate(Sample{Stage: "Network", Kind: "iperf", Key: "throughput", Value: 950}, rules)
|
||||||
|
if len(res) != 0 {
|
||||||
|
t.Fatalf("unmatched sample should yield 0 results, got %d", len(res))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -29,6 +29,16 @@ type Data struct {
|
|||||||
Stages []model.Stage
|
Stages []model.Stage
|
||||||
SpecDiffs []model.SpecDiff
|
SpecDiffs []model.SpecDiff
|
||||||
Aggregates []Aggregate // flattened measurement summary; see Aggregate
|
Aggregates []Aggregate // flattened measurement summary; see Aggregate
|
||||||
|
Firmware []FirmwareSnapshot // captured firmware versions, empty if none
|
||||||
|
}
|
||||||
|
|
||||||
|
// FirmwareSnapshot is the report-facing view of one firmware row.
|
||||||
|
// Package-local so the HTML template stays decoupled from store types.
|
||||||
|
type FirmwareSnapshot struct {
|
||||||
|
Component string
|
||||||
|
Identifier string
|
||||||
|
Version string
|
||||||
|
Vendor string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregate is a per (kind, key) summary of a run's measurements. Min/
|
// Aggregate is a per (kind, key) summary of a run's measurements. Min/
|
||||||
@@ -196,6 +206,27 @@ const htmlTemplate = `<!doctype html>
|
|||||||
</table>
|
</table>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h2>Firmware ({{len .Firmware}})</h2>
|
||||||
|
{{if .Firmware}}
|
||||||
|
<table>
|
||||||
|
<thead><tr><th>Component</th><th>Identifier</th><th>Version</th><th>Vendor</th></tr></thead>
|
||||||
|
<tbody>
|
||||||
|
{{range .Firmware}}
|
||||||
|
<tr>
|
||||||
|
<td>{{.Component}}</td>
|
||||||
|
<td><code>{{.Identifier}}</code></td>
|
||||||
|
<td><code>{{.Version}}</code></td>
|
||||||
|
<td>{{.Vendor}}</td>
|
||||||
|
</tr>
|
||||||
|
{{end}}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{{else}}
|
||||||
|
<p>No firmware snapshots captured.</p>
|
||||||
|
{{end}}
|
||||||
|
</section>
|
||||||
|
|
||||||
<section>
|
<section>
|
||||||
<h2>Spec diffs ({{len .SpecDiffs}})</h2>
|
<h2>Spec diffs ({{len .SpecDiffs}})</h2>
|
||||||
{{if .SpecDiffs}}
|
{{if .SpecDiffs}}
|
||||||
|
|||||||
@@ -26,6 +26,31 @@ type Spec struct {
|
|||||||
Disks []DiskSpec `yaml:"disks,omitempty"`
|
Disks []DiskSpec `yaml:"disks,omitempty"`
|
||||||
NICs []NICSpec `yaml:"nics,omitempty"`
|
NICs []NICSpec `yaml:"nics,omitempty"`
|
||||||
GPUs []GPUSpec `yaml:"gpus,omitempty"`
|
GPUs []GPUSpec `yaml:"gpus,omitempty"`
|
||||||
|
Firmware []FirmwareSpec `yaml:"firmware,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FirmwareSpec is one row in the expected-spec YAML's `firmware:` block.
|
||||||
|
// Component is one of bios|bmc|nic|hba|microcode|nvme_fw (matches the
|
||||||
|
// on-wire value from agent/probes.FirmwareSnapshot.Component). Identifier
|
||||||
|
// is optional — when empty the rule applies to every observed snapshot
|
||||||
|
// of that component (use for single-instance things like BIOS/microcode);
|
||||||
|
// when set it pins the check to a specific NIC port / NVMe controller /
|
||||||
|
// PCI address. Version is the literal string expected; comparison is
|
||||||
|
// exact after trimming whitespace.
|
||||||
|
type FirmwareSpec struct {
|
||||||
|
Component string `yaml:"component"`
|
||||||
|
Identifier string `yaml:"identifier,omitempty"`
|
||||||
|
Version string `yaml:"version"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FirmwareObserved is what the agent reported, in a spec-package-local
|
||||||
|
// shape so callers don't need to thread store types through the diff.
|
||||||
|
// The server converts store.FirmwareSnapshot → FirmwareObserved before
|
||||||
|
// calling DiffFirmware.
|
||||||
|
type FirmwareObserved struct {
|
||||||
|
Component string
|
||||||
|
Identifier string
|
||||||
|
Version string
|
||||||
}
|
}
|
||||||
|
|
||||||
type CPUSpec struct {
|
type CPUSpec struct {
|
||||||
@@ -175,6 +200,73 @@ func diffNICs(expected, actual []NICSpec) []model.SpecDiff {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DiffFirmware returns a SpecDiff per firmware expectation that doesn't
|
||||||
|
// find a matching observed snapshot. Matching rules:
|
||||||
|
// - An expected rule with Identifier set matches by (component, id);
|
||||||
|
// a missing observed snapshot yields a "present=false" diff.
|
||||||
|
// - An expected rule with Identifier empty applies to every observed
|
||||||
|
// snapshot of that component — useful for "all NICs must run fw
|
||||||
|
// 8.30" without listing each port. Zero observed snapshots of the
|
||||||
|
// component yields a single "present=false" diff, not N.
|
||||||
|
// - Version mismatch emits an exact-string expected→actual diff.
|
||||||
|
// Case is preserved (firmware versions are case-sensitive in practice).
|
||||||
|
func DiffFirmware(expected []FirmwareSpec, actual []FirmwareObserved) []model.SpecDiff {
|
||||||
|
if len(expected) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
byCompIdent := map[string]FirmwareObserved{}
|
||||||
|
byComp := map[string][]FirmwareObserved{}
|
||||||
|
for _, o := range actual {
|
||||||
|
byCompIdent[fwKey(o.Component, o.Identifier)] = o
|
||||||
|
byComp[o.Component] = append(byComp[o.Component], o)
|
||||||
|
}
|
||||||
|
var out []model.SpecDiff
|
||||||
|
for _, exp := range expected {
|
||||||
|
comp := strings.TrimSpace(exp.Component)
|
||||||
|
if comp == "" || strings.TrimSpace(exp.Version) == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
label := "firmware[" + comp
|
||||||
|
if exp.Identifier != "" {
|
||||||
|
label += "/" + exp.Identifier
|
||||||
|
}
|
||||||
|
label += "]"
|
||||||
|
if exp.Identifier != "" {
|
||||||
|
got, ok := byCompIdent[fwKey(comp, exp.Identifier)]
|
||||||
|
if !ok {
|
||||||
|
out = append(out, diff(label+".present", "true", "false"))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !strings.EqualFold(strings.TrimSpace(got.Version), strings.TrimSpace(exp.Version)) {
|
||||||
|
out = append(out, diff(label+".version", exp.Version, got.Version))
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// No identifier: fan out across every observed snapshot of this
|
||||||
|
// component. Missing is one diff; a mismatching port/controller
|
||||||
|
// emits one diff per mismatch.
|
||||||
|
observed := byComp[comp]
|
||||||
|
if len(observed) == 0 {
|
||||||
|
out = append(out, diff(label+".present", "true", "false"))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, got := range observed {
|
||||||
|
if !strings.EqualFold(strings.TrimSpace(got.Version), strings.TrimSpace(exp.Version)) {
|
||||||
|
slot := got.Identifier
|
||||||
|
if slot == "" {
|
||||||
|
slot = "*"
|
||||||
|
}
|
||||||
|
out = append(out, diff("firmware["+comp+"/"+slot+"].version", exp.Version, got.Version))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func fwKey(component, identifier string) string {
|
||||||
|
return strings.ToLower(component) + "|" + strings.ToLower(identifier)
|
||||||
|
}
|
||||||
|
|
||||||
func diffGPUs(expected, actual []GPUSpec) []model.SpecDiff {
|
func diffGPUs(expected, actual []GPUSpec) []model.SpecDiff {
|
||||||
if len(expected) == 0 {
|
if len(expected) == 0 {
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -119,3 +119,96 @@ func TestDiffSeverityAlwaysCritical(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDiffFirmwareIdentifierMatch(t *testing.T) {
|
||||||
|
exp := []FirmwareSpec{{Component: "bios", Version: "3.2"}}
|
||||||
|
obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
|
||||||
|
if d := DiffFirmware(exp, obs); len(d) != 0 {
|
||||||
|
t.Fatalf("matching bios version should produce no diff, got %+v", d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDiffFirmwareVersionMismatch(t *testing.T) {
|
||||||
|
exp := []FirmwareSpec{{Component: "bios", Version: "3.3"}}
|
||||||
|
obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
|
||||||
|
d := DiffFirmware(exp, obs)
|
||||||
|
if len(d) != 1 {
|
||||||
|
t.Fatalf("want 1 diff, got %d: %+v", len(d), d)
|
||||||
|
}
|
||||||
|
if d[0].Expected != "3.3" || d[0].Actual != "3.2" {
|
||||||
|
t.Fatalf("diff expected/actual = %q/%q, want 3.3/3.2", d[0].Expected, d[0].Actual)
|
||||||
|
}
|
||||||
|
if d[0].Severity != "critical" {
|
||||||
|
t.Errorf("severity = %q, want critical", d[0].Severity)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDiffFirmwareMissingComponentPresent(t *testing.T) {
|
||||||
|
// Expected rule with no identifier + zero observed snapshots →
|
||||||
|
// single "present=false" diff, not N.
|
||||||
|
exp := []FirmwareSpec{{Component: "bmc", Version: "1.74"}}
|
||||||
|
d := DiffFirmware(exp, nil)
|
||||||
|
if len(d) != 1 {
|
||||||
|
t.Fatalf("want 1 diff for missing BMC, got %d: %+v", len(d), d)
|
||||||
|
}
|
||||||
|
if d[0].Field != "firmware[bmc].present" || d[0].Expected != "true" || d[0].Actual != "false" {
|
||||||
|
t.Fatalf("missing-BMC diff = %+v", d[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDiffFirmwareWildcardFanOut(t *testing.T) {
|
||||||
|
// Expected rule with empty identifier fans across every observed
|
||||||
|
// snapshot of the component — one port matches, one doesn't → one diff.
|
||||||
|
exp := []FirmwareSpec{{Component: "nic", Version: "16.32.1010"}}
|
||||||
|
obs := []FirmwareObserved{
|
||||||
|
{Component: "nic", Identifier: "eth0", Version: "16.32.1010"},
|
||||||
|
{Component: "nic", Identifier: "eth1", Version: "14.28.0000"},
|
||||||
|
}
|
||||||
|
d := DiffFirmware(exp, obs)
|
||||||
|
if len(d) != 1 {
|
||||||
|
t.Fatalf("want 1 diff (mismatched eth1 only), got %d: %+v", len(d), d)
|
||||||
|
}
|
||||||
|
if d[0].Field != "firmware[nic/eth1].version" {
|
||||||
|
t.Errorf("field = %q, want firmware[nic/eth1].version", d[0].Field)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDiffFirmwareIdentifierPin(t *testing.T) {
|
||||||
|
// Identifier set: pins the rule to a specific port. Other ports
|
||||||
|
// with mismatched firmware are not evaluated by this rule.
|
||||||
|
exp := []FirmwareSpec{{Component: "nic", Identifier: "eth0", Version: "1.0"}}
|
||||||
|
obs := []FirmwareObserved{
|
||||||
|
{Component: "nic", Identifier: "eth0", Version: "1.0"},
|
||||||
|
{Component: "nic", Identifier: "eth1", Version: "9.9"},
|
||||||
|
}
|
||||||
|
if d := DiffFirmware(exp, obs); len(d) != 0 {
|
||||||
|
t.Fatalf("pinned rule should ignore other ports, got %+v", d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDiffFirmwareIdentifierPinMissing(t *testing.T) {
|
||||||
|
// Pinned rule with no matching observed snapshot → present=false diff.
|
||||||
|
exp := []FirmwareSpec{{Component: "nic", Identifier: "eth0", Version: "1.0"}}
|
||||||
|
if d := DiffFirmware(exp, nil); len(d) != 1 || d[0].Field != "firmware[nic/eth0].present" {
|
||||||
|
t.Fatalf("want present=false for pinned rule, got %+v", d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDiffFirmwareEmptyRuleSkipped(t *testing.T) {
|
||||||
|
// Empty component or empty version silently skip rather than panic.
|
||||||
|
exp := []FirmwareSpec{{Component: "", Version: "x"}, {Component: "bios", Version: ""}}
|
||||||
|
obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
|
||||||
|
if d := DiffFirmware(exp, obs); len(d) != 0 {
|
||||||
|
t.Fatalf("empty rules should skip, got %+v", d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDiffFirmwareCaseInsensitive(t *testing.T) {
|
||||||
|
// Version match is case-insensitive after trim; avoids spurious diff
|
||||||
|
// from ethtool's "FW1234" vs expected YAML's "fw1234".
|
||||||
|
exp := []FirmwareSpec{{Component: "nvme_fw", Identifier: "nvme0", Version: "fw1234"}}
|
||||||
|
obs := []FirmwareObserved{{Component: "nvme_fw", Identifier: "nvme0", Version: "FW1234"}}
|
||||||
|
if d := DiffFirmware(exp, obs); len(d) != 0 {
|
||||||
|
t.Fatalf("case-insensitive match expected, got %+v", d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FirmwareSnapshot is one row in firmware_snapshots. A run captures
|
||||||
|
// many (one per BIOS/BMC/NIC/HBA/microcode/NVMe) so SpecValidate can
|
||||||
|
// diff them against the host's expected spec in Phase 4.
|
||||||
|
type FirmwareSnapshot struct {
|
||||||
|
ID int64
|
||||||
|
RunID int64
|
||||||
|
Component string // bios|bmc|nic|hba|microcode|nvme_fw
|
||||||
|
Identifier string // slot/serial/device path
|
||||||
|
Version string
|
||||||
|
Vendor string
|
||||||
|
RawJSON string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Firmware is the CRUD seam. The agent's Phase-4 probe POSTs captured
|
||||||
|
// rows; the orchestrator stores them. SpecValidate reads them back.
|
||||||
|
type Firmware struct {
|
||||||
|
DB *sql.DB
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create inserts a single firmware snapshot. One call per (run, component,
|
||||||
|
// identifier) — the agent probe owns dedup/formatting.
|
||||||
|
func (f *Firmware) Create(ctx context.Context, s FirmwareSnapshot) (int64, error) {
|
||||||
|
raw := s.RawJSON
|
||||||
|
if raw == "" {
|
||||||
|
raw = "{}"
|
||||||
|
}
|
||||||
|
res, err := f.DB.ExecContext(ctx, `
|
||||||
|
INSERT INTO firmware_snapshots(run_id, component, identifier, version, vendor, raw_json)
|
||||||
|
VALUES(?,?,?,?,?,?)
|
||||||
|
`, s.RunID, s.Component, s.Identifier, s.Version, s.Vendor, raw)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("insert firmware: %w", err)
|
||||||
|
}
|
||||||
|
return res.LastInsertId()
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateBatch persists a slice of snapshots under one transaction.
|
||||||
|
// Agent probe enumerates all components in one pass, so batching wins.
|
||||||
|
func (f *Firmware) CreateBatch(ctx context.Context, rows []FirmwareSnapshot) error {
|
||||||
|
if len(rows) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
tx, err := f.DB.BeginTx(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer func() { _ = tx.Rollback() }()
|
||||||
|
stmt, err := tx.PrepareContext(ctx, `
|
||||||
|
INSERT INTO firmware_snapshots(run_id, component, identifier, version, vendor, raw_json)
|
||||||
|
VALUES(?,?,?,?,?,?)
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("prepare firmware insert: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { _ = stmt.Close() }()
|
||||||
|
for _, s := range rows {
|
||||||
|
raw := s.RawJSON
|
||||||
|
if raw == "" {
|
||||||
|
raw = "{}"
|
||||||
|
}
|
||||||
|
if _, err := stmt.ExecContext(ctx, s.RunID, s.Component, s.Identifier, s.Version, s.Vendor, raw); err != nil {
|
||||||
|
return fmt.Errorf("insert firmware %s/%s: %w", s.Component, s.Identifier, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tx.Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListForRun returns every firmware snapshot for a run in stable order.
|
||||||
|
// Report page + SpecValidate both read this.
|
||||||
|
func (f *Firmware) ListForRun(ctx context.Context, runID int64) ([]FirmwareSnapshot, error) {
|
||||||
|
rows, err := f.DB.QueryContext(ctx, `
|
||||||
|
SELECT id, run_id, component, identifier, version, vendor, raw_json
|
||||||
|
FROM firmware_snapshots WHERE run_id = ? ORDER BY id
|
||||||
|
`, runID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
var out []FirmwareSnapshot
|
||||||
|
for rows.Next() {
|
||||||
|
var s FirmwareSnapshot
|
||||||
|
if err := rows.Scan(&s.ID, &s.RunID, &s.Component, &s.Identifier,
|
||||||
|
&s.Version, &s.Vendor, &s.RawJSON); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out = append(out, s)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
+30
-12
@@ -14,16 +14,30 @@ type Runs struct {
|
|||||||
DB *sql.DB
|
DB *sql.DB
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create inserts a new run using the default "quick" profile. Older
|
||||||
|
// call sites (and most tests) target this form — the profile column's
|
||||||
|
// DEFAULT 'quick' on runs takes care of the backfill.
|
||||||
func (r *Runs) Create(ctx context.Context, hostID int64, tokenHash string, nonDestructive bool) (int64, error) {
|
func (r *Runs) Create(ctx context.Context, hostID int64, tokenHash string, nonDestructive bool) (int64, error) {
|
||||||
|
return r.CreateWithProfile(ctx, hostID, tokenHash, nonDestructive, "quick")
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateWithProfile inserts a new run with an explicit profile
|
||||||
|
// ("quick"|"deep"|"soak"). The UI handler is the authoritative caller;
|
||||||
|
// empty profile falls back to "quick" so a misconfigured form doesn't
|
||||||
|
// leave a row with a blank profile column.
|
||||||
|
func (r *Runs) CreateWithProfile(ctx context.Context, hostID int64, tokenHash string, nonDestructive bool, profile string) (int64, error) {
|
||||||
|
if profile == "" {
|
||||||
|
profile = "quick"
|
||||||
|
}
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
nd := 0
|
nd := 0
|
||||||
if nonDestructive {
|
if nonDestructive {
|
||||||
nd = 1
|
nd = 1
|
||||||
}
|
}
|
||||||
res, err := r.DB.ExecContext(ctx, `
|
res, err := r.DB.ExecContext(ctx, `
|
||||||
INSERT INTO runs(host_id, state, agent_token_hash, next_boot_target, started_at, non_destructive)
|
INSERT INTO runs(host_id, state, agent_token_hash, next_boot_target, started_at, non_destructive, profile)
|
||||||
VALUES(?,?,?,?,?,?)
|
VALUES(?,?,?,?,?,?,?)
|
||||||
`, hostID, string(model.StateQueued), tokenHash, "linux", now, nd)
|
`, hostID, string(model.StateQueued), tokenHash, "linux", now, nd, profile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("insert run: %w", err)
|
return 0, fmt.Errorf("insert run: %w", err)
|
||||||
}
|
}
|
||||||
@@ -107,14 +121,15 @@ func (r *Runs) Get(ctx context.Context, id int64) (*model.Run, error) {
|
|||||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
|
COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
|
||||||
|
COALESCE(profile,'quick')
|
||||||
FROM runs WHERE id = ?
|
FROM runs WHERE id = ?
|
||||||
`, id)
|
`, id)
|
||||||
var run model.Run
|
var run model.Run
|
||||||
var completedAt sql.NullTime
|
var completedAt sql.NullTime
|
||||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
|
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
|
||||||
if errors.Is(err, sql.ErrNoRows) {
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
return nil, ErrNotFound
|
return nil, ErrNotFound
|
||||||
}
|
}
|
||||||
@@ -133,7 +148,8 @@ func (r *Runs) LatestForHost(ctx context.Context, hostID int64) (*model.Run, err
|
|||||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
|
COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
|
||||||
|
COALESCE(profile,'quick')
|
||||||
FROM runs WHERE host_id = ?
|
FROM runs WHERE host_id = ?
|
||||||
ORDER BY id DESC LIMIT 1
|
ORDER BY id DESC LIMIT 1
|
||||||
`, hostID)
|
`, hostID)
|
||||||
@@ -141,7 +157,7 @@ func (r *Runs) LatestForHost(ctx context.Context, hostID int64) (*model.Run, err
|
|||||||
var completedAt sql.NullTime
|
var completedAt sql.NullTime
|
||||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
|
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
|
||||||
if errors.Is(err, sql.ErrNoRows) {
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
@@ -165,7 +181,8 @@ func (r *Runs) ListForHost(ctx context.Context, hostID int64, limit int) ([]mode
|
|||||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
|
COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
|
||||||
|
COALESCE(profile,'quick')
|
||||||
FROM runs
|
FROM runs
|
||||||
WHERE host_id = ?
|
WHERE host_id = ?
|
||||||
ORDER BY id DESC
|
ORDER BY id DESC
|
||||||
@@ -181,7 +198,7 @@ func (r *Runs) ListForHost(ctx context.Context, hostID int64, limit int) ([]mode
|
|||||||
var completedAt sql.NullTime
|
var completedAt sql.NullTime
|
||||||
if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive); err != nil {
|
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if completedAt.Valid {
|
if completedAt.Valid {
|
||||||
@@ -206,7 +223,8 @@ func (r *Runs) Active(ctx context.Context) ([]model.Run, error) {
|
|||||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||||
COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
|
COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
|
||||||
|
COALESCE(profile,'quick')
|
||||||
FROM runs
|
FROM runs
|
||||||
WHERE state NOT IN ('Completed','Released','Cancelled')
|
WHERE state NOT IN ('Completed','Released','Cancelled')
|
||||||
ORDER BY id
|
ORDER BY id
|
||||||
@@ -221,7 +239,7 @@ func (r *Runs) Active(ctx context.Context) ([]model.Run, error) {
|
|||||||
var completedAt sql.NullTime
|
var completedAt sql.NullTime
|
||||||
if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive); err != nil {
|
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if completedAt.Valid {
|
if completedAt.Valid {
|
||||||
@@ -275,7 +293,7 @@ func (r *Runs) FindActiveByMAC(ctx context.Context, mac string) (*model.Run, err
|
|||||||
var completedAt sql.NullTime
|
var completedAt sql.NullTime
|
||||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
|
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
|
||||||
if errors.Is(err, sql.ErrNoRows) {
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,11 +17,13 @@ type Stages struct {
|
|||||||
// reaches Inventory; later phases add more executors but the list is fixed.
|
// reaches Inventory; later phases add more executors but the list is fixed.
|
||||||
var DefaultStageOrder = []string{
|
var DefaultStageOrder = []string{
|
||||||
"Inventory",
|
"Inventory",
|
||||||
|
"Firmware",
|
||||||
"SpecValidate",
|
"SpecValidate",
|
||||||
"SMART",
|
"SMART",
|
||||||
"CPUStress",
|
"CPUStress",
|
||||||
"Storage",
|
"Storage",
|
||||||
"Network",
|
"Network",
|
||||||
|
"Burn",
|
||||||
"GPU",
|
"GPU",
|
||||||
"PSU",
|
"PSU",
|
||||||
"Reporting",
|
"Reporting",
|
||||||
|
|||||||
@@ -0,0 +1,280 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Threshold is the DB view of a per-run threshold row. Mirrors the
|
||||||
|
// orchestrator.Threshold value-object but keeps Severity/Op as strings
|
||||||
|
// so callers higher up don't force this package to import orchestrator.
|
||||||
|
type Threshold struct {
|
||||||
|
ID int64
|
||||||
|
RunID int64
|
||||||
|
Stage string
|
||||||
|
Kind string
|
||||||
|
Key string
|
||||||
|
Op string
|
||||||
|
Threshold float64
|
||||||
|
Nominal float64
|
||||||
|
Unit string
|
||||||
|
Severity string
|
||||||
|
Source string // profile|host_override
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThresholdEvaluation is one recorded comparison — the evaluator calls
|
||||||
|
// this for every sample that matched a threshold, whether it passed
|
||||||
|
// or breached. The report page aggregates these to show the operator
|
||||||
|
// why a run failed (or was flagged as warning-only).
|
||||||
|
type ThresholdEvaluation struct {
|
||||||
|
ID int64
|
||||||
|
RunID int64
|
||||||
|
ThresholdID int64
|
||||||
|
Stage string
|
||||||
|
Kind string
|
||||||
|
Key string
|
||||||
|
TS time.Time
|
||||||
|
Observed float64
|
||||||
|
Passed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Thresholds is the CRUD seam. Kept intentionally narrow: seed at run
|
||||||
|
// creation, list for evaluation on each sensor batch, record eval
|
||||||
|
// results, aggregate for the report.
|
||||||
|
type Thresholds struct {
|
||||||
|
DB *sql.DB
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThresholdSpec is the caller-supplied shape for seeding — a flat
|
||||||
|
// value-object that carries the threshold rule plus its source so
|
||||||
|
// the ProfileRegistry-driven seed and per-host overrides converge
|
||||||
|
// on one insert path. Kept here (not in config) so the store layer
|
||||||
|
// doesn't have to import config.
|
||||||
|
type ThresholdSpec struct {
|
||||||
|
Stage string
|
||||||
|
Kind string
|
||||||
|
Key string
|
||||||
|
Op string
|
||||||
|
Value float64
|
||||||
|
Nominal float64
|
||||||
|
Unit string
|
||||||
|
Severity string
|
||||||
|
Source string
|
||||||
|
}
|
||||||
|
|
||||||
|
// SeedForRun converts the caller's specs into Threshold rows for the
|
||||||
|
// given run and bulk-inserts them. Returns the inserted rows with IDs
|
||||||
|
// populated so the evaluator can pin evaluations without a re-read.
|
||||||
|
func (t *Thresholds) SeedForRun(ctx context.Context, runID int64, specs []ThresholdSpec) ([]Threshold, error) {
|
||||||
|
rows := make([]Threshold, 0, len(specs))
|
||||||
|
for _, s := range specs {
|
||||||
|
rows = append(rows, Threshold{
|
||||||
|
RunID: runID,
|
||||||
|
Stage: s.Stage,
|
||||||
|
Kind: s.Kind,
|
||||||
|
Key: s.Key,
|
||||||
|
Op: s.Op,
|
||||||
|
Threshold: s.Value,
|
||||||
|
Nominal: s.Nominal,
|
||||||
|
Unit: s.Unit,
|
||||||
|
Severity: s.Severity,
|
||||||
|
Source: s.Source,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return t.CreateBatch(ctx, rows)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create inserts a single threshold row — used by the seed path when
|
||||||
|
// the orchestrator materializes per-run rules from the ProfileRegistry.
|
||||||
|
// Returns the row's ID so the evaluator can pin evaluations to it.
|
||||||
|
func (t *Thresholds) Create(ctx context.Context, th Threshold) (int64, error) {
|
||||||
|
res, err := t.DB.ExecContext(ctx, `
|
||||||
|
INSERT INTO thresholds(run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source)
|
||||||
|
VALUES(?,?,?,?,?,?,?,?,?,?)
|
||||||
|
`, th.RunID, th.Stage, th.Kind, th.Key, th.Op, th.Threshold, th.Nominal, th.Unit, th.Severity, th.Source)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("insert threshold: %w", err)
|
||||||
|
}
|
||||||
|
return res.LastInsertId()
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateBatch is the fast path for run seeding — one transaction per
|
||||||
|
// run, one row per threshold. Returns the inserted rows with IDs set
|
||||||
|
// so the caller can drop them into the in-memory evaluator without a
|
||||||
|
// follow-up read.
|
||||||
|
func (t *Thresholds) CreateBatch(ctx context.Context, rows []Threshold) ([]Threshold, error) {
|
||||||
|
if len(rows) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
tx, err := t.DB.BeginTx(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer func() { _ = tx.Rollback() }()
|
||||||
|
stmt, err := tx.PrepareContext(ctx, `
|
||||||
|
INSERT INTO thresholds(run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source)
|
||||||
|
VALUES(?,?,?,?,?,?,?,?,?,?)
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("prepare threshold insert: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { _ = stmt.Close() }()
|
||||||
|
out := make([]Threshold, 0, len(rows))
|
||||||
|
for _, th := range rows {
|
||||||
|
res, err := stmt.ExecContext(ctx, th.RunID, th.Stage, th.Kind, th.Key, th.Op,
|
||||||
|
th.Threshold, th.Nominal, th.Unit, th.Severity, th.Source)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("insert threshold %s/%s: %w", th.Stage, th.Key, err)
|
||||||
|
}
|
||||||
|
id, err := res.LastInsertId()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
th.ID = id
|
||||||
|
out = append(out, th)
|
||||||
|
}
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListForRun returns every threshold seeded for a run, in stable ID
|
||||||
|
// order. Evaluator expects this to be cheap (few tens of rows per run)
|
||||||
|
// and pulls it on each /sensor batch.
|
||||||
|
func (t *Thresholds) ListForRun(ctx context.Context, runID int64) ([]Threshold, error) {
|
||||||
|
rows, err := t.DB.QueryContext(ctx, `
|
||||||
|
SELECT id, run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source
|
||||||
|
FROM thresholds WHERE run_id = ? ORDER BY id
|
||||||
|
`, runID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
var out []Threshold
|
||||||
|
for rows.Next() {
|
||||||
|
var th Threshold
|
||||||
|
if err := rows.Scan(&th.ID, &th.RunID, &th.Stage, &th.Kind, &th.Key,
|
||||||
|
&th.Op, &th.Threshold, &th.Nominal, &th.Unit, &th.Severity, &th.Source); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out = append(out, th)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordEvaluation persists a single evaluation outcome. Called per
|
||||||
|
// matching sample so the run's report has a full audit trail ("temp
|
||||||
|
// hit 95 at 14:22:03" rather than just "temp failed").
|
||||||
|
func (t *Thresholds) RecordEvaluation(ctx context.Context, ev ThresholdEvaluation) error {
|
||||||
|
passed := 0
|
||||||
|
if ev.Passed {
|
||||||
|
passed = 1
|
||||||
|
}
|
||||||
|
if ev.TS.IsZero() {
|
||||||
|
ev.TS = time.Now().UTC()
|
||||||
|
}
|
||||||
|
_, err := t.DB.ExecContext(ctx, `
|
||||||
|
INSERT INTO threshold_evaluations(run_id, threshold_id, stage_name, kind, key, ts, observed, passed)
|
||||||
|
VALUES(?,?,?,?,?,?,?,?)
|
||||||
|
`, ev.RunID, ev.ThresholdID, ev.Stage, ev.Kind, ev.Key, ev.TS, ev.Observed, passed)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("record evaluation: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordBatch persists a slice of evaluations in one transaction. The
|
||||||
|
// agent-handler hot path builds these one per sample and batches them
|
||||||
|
// under the same Sensor POST so we take one round-trip rather than N.
|
||||||
|
func (t *Thresholds) RecordBatch(ctx context.Context, evals []ThresholdEvaluation) error {
|
||||||
|
if len(evals) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
tx, err := t.DB.BeginTx(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer func() { _ = tx.Rollback() }()
|
||||||
|
stmt, err := tx.PrepareContext(ctx, `
|
||||||
|
INSERT INTO threshold_evaluations(run_id, threshold_id, stage_name, kind, key, ts, observed, passed)
|
||||||
|
VALUES(?,?,?,?,?,?,?,?)
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("prepare eval insert: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { _ = stmt.Close() }()
|
||||||
|
for _, ev := range evals {
|
||||||
|
passed := 0
|
||||||
|
if ev.Passed {
|
||||||
|
passed = 1
|
||||||
|
}
|
||||||
|
if ev.TS.IsZero() {
|
||||||
|
ev.TS = time.Now().UTC()
|
||||||
|
}
|
||||||
|
if _, err := stmt.ExecContext(ctx, ev.RunID, ev.ThresholdID, ev.Stage, ev.Kind, ev.Key, ev.TS, ev.Observed, passed); err != nil {
|
||||||
|
return fmt.Errorf("insert eval: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tx.Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListEvaluations returns the evaluation history for a run, newest
|
||||||
|
// last. Bounded at a sane cap so a pathological run with a sample-per-
|
||||||
|
// second sidecar doesn't blow up the report page.
|
||||||
|
func (t *Thresholds) ListEvaluations(ctx context.Context, runID int64) ([]ThresholdEvaluation, error) {
|
||||||
|
rows, err := t.DB.QueryContext(ctx, `
|
||||||
|
SELECT id, run_id, threshold_id, stage_name, kind, key, ts, observed, passed
|
||||||
|
FROM threshold_evaluations WHERE run_id = ?
|
||||||
|
ORDER BY id LIMIT 5000
|
||||||
|
`, runID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
var out []ThresholdEvaluation
|
||||||
|
for rows.Next() {
|
||||||
|
var ev ThresholdEvaluation
|
||||||
|
var passed int
|
||||||
|
if err := rows.Scan(&ev.ID, &ev.RunID, &ev.ThresholdID, &ev.Stage, &ev.Kind,
|
||||||
|
&ev.Key, &ev.TS, &ev.Observed, &passed); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
ev.Passed = passed == 1
|
||||||
|
out = append(out, ev)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// CriticalBreaches returns the evaluations that fire the "fail the
|
||||||
|
// run" gate — critical-severity thresholds with passed=0. The
|
||||||
|
// agent-handler calls this at /result close so an aggregate breach
|
||||||
|
// (p99 latency > bound) still flips the run to FailedHolding even if
|
||||||
|
// no single sample tripped the fast-fail path.
|
||||||
|
func (t *Thresholds) CriticalBreaches(ctx context.Context, runID int64) ([]ThresholdEvaluation, error) {
|
||||||
|
rows, err := t.DB.QueryContext(ctx, `
|
||||||
|
SELECT e.id, e.run_id, e.threshold_id, e.stage_name, e.kind, e.key, e.ts, e.observed, e.passed
|
||||||
|
FROM threshold_evaluations e
|
||||||
|
JOIN thresholds t ON t.id = e.threshold_id
|
||||||
|
WHERE e.run_id = ? AND e.passed = 0 AND t.severity = 'critical'
|
||||||
|
ORDER BY e.id
|
||||||
|
`, runID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
var out []ThresholdEvaluation
|
||||||
|
for rows.Next() {
|
||||||
|
var ev ThresholdEvaluation
|
||||||
|
var passed int
|
||||||
|
if err := rows.Scan(&ev.ID, &ev.RunID, &ev.ThresholdID, &ev.Stage, &ev.Kind,
|
||||||
|
&ev.Key, &ev.TS, &ev.Observed, &passed); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
ev.Passed = passed == 1
|
||||||
|
out = append(out, ev)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
@@ -636,6 +636,21 @@ body.bare main { max-width: none; }
|
|||||||
.run-failed-stage { color: var(--danger); }
|
.run-failed-stage { color: var(--danger); }
|
||||||
.run-failed-stage strong { font-family: var(--mono); }
|
.run-failed-stage strong { font-family: var(--mono); }
|
||||||
.run-diffs { color: var(--danger); }
|
.run-diffs { color: var(--danger); }
|
||||||
|
.run-profile-chip {
|
||||||
|
display: inline-block;
|
||||||
|
font-family: var(--mono);
|
||||||
|
font-size: 11px;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: .04em;
|
||||||
|
padding: 2px 8px;
|
||||||
|
border-radius: 999px;
|
||||||
|
border: 1px solid rgba(255,255,255,.15);
|
||||||
|
background: rgba(255,255,255,.05);
|
||||||
|
color: var(--text-dim);
|
||||||
|
}
|
||||||
|
.run-profile-quick { color: var(--accent); border-color: rgba(60,130,246,.45); background: rgba(60,130,246,.08); }
|
||||||
|
.run-profile-deep { color: #e5b94f; border-color: rgba(229,185,79,.45); background: rgba(229,185,79,.08); }
|
||||||
|
.run-profile-soak { color: #d97a57; border-color: rgba(217,122,87,.45); background: rgba(217,122,87,.08); }
|
||||||
|
|
||||||
.hold-banner {
|
.hold-banner {
|
||||||
background: rgba(229,100,102,.1);
|
background: rgba(229,100,102,.1);
|
||||||
@@ -890,6 +905,17 @@ body.bare main { max-width: none; }
|
|||||||
.host-actions { padding: 0; }
|
.host-actions { padding: 0; }
|
||||||
.host-actions-row { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
|
.host-actions-row { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
|
||||||
.host-nd-toggle { display: inline-flex; gap: 6px; align-items: center; color: var(--text-dim); font-size: 13px; }
|
.host-nd-toggle { display: inline-flex; gap: 6px; align-items: center; color: var(--text-dim); font-size: 13px; }
|
||||||
|
.host-profile-picker {
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
padding: 6px 10px;
|
||||||
|
display: inline-flex;
|
||||||
|
gap: 12px;
|
||||||
|
align-items: center;
|
||||||
|
margin: 0 8px 0 0;
|
||||||
|
}
|
||||||
|
.host-profile-picker legend { font-size: 11px; color: var(--text-dim); text-transform: uppercase; letter-spacing: .05em; padding: 0 4px; }
|
||||||
|
.host-profile-picker label { display: inline-flex; gap: 4px; align-items: center; font-family: var(--mono); font-size: 13px; cursor: pointer; }
|
||||||
|
|
||||||
.in-flight-banner-wrap { display: contents; }
|
.in-flight-banner-wrap { display: contents; }
|
||||||
.in-flight-banner {
|
.in-flight-banner {
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var3 string
|
var templ_7745c5c3_Var3 string
|
||||||
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
|
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -88,7 +88,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var4 string
|
var templ_7745c5c3_Var4 string
|
||||||
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
|
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 28, Col: 102}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 28, Col: 102}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -110,7 +110,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var6 string
|
var templ_7745c5c3_Var6 string
|
||||||
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var5).String())
|
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var5).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -123,7 +123,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var7 string
|
var templ_7745c5c3_Var7 string
|
||||||
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(stageMarker(string(d.Stage.State)))
|
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(stageMarker(string(d.Stage.State)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 30, Col: 105}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 30, Col: 105}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -136,7 +136,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var8 string
|
var templ_7745c5c3_Var8 string
|
||||||
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
|
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 31, Col: 41}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 31, Col: 41}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -149,7 +149,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var9 string
|
var templ_7745c5c3_Var9 string
|
||||||
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(stageDurationFromStage(d.Stage))
|
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(stageDurationFromStage(d.Stage))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 32, Col: 64}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 32, Col: 64}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -182,7 +182,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var10 string
|
var templ_7745c5c3_Var10 string
|
||||||
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
|
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 43, Col: 99}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 43, Col: 99}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -195,7 +195,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var11 string
|
var templ_7745c5c3_Var11 string
|
||||||
templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("log-%d-%s", d.RunID, d.Stage.Name))
|
templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("log-%d-%s", d.RunID, d.Stage.Name))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 47, Col: 56}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 47, Col: 56}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -208,7 +208,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
|
|||||||
var templ_7745c5c3_Var12 string
|
var templ_7745c5c3_Var12 string
|
||||||
templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("log-%d-%s", d.RunID, d.Stage.Name))
|
templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("log-%d-%s", d.RunID, d.Stage.Name))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 48, Col: 62}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 48, Col: 62}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
|
|||||||
@@ -102,6 +102,21 @@ templ HostActions(d HostPageData) {
|
|||||||
<div class="host-actions-row">
|
<div class="host-actions-row">
|
||||||
if hostCanStart(d) {
|
if hostCanStart(d) {
|
||||||
<form method="post" action={ templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)) } class="inline host-start-form">
|
<form method="post" action={ templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)) } class="inline host-start-form">
|
||||||
|
<fieldset class="host-profile-picker">
|
||||||
|
<legend>Profile</legend>
|
||||||
|
<label title="~10 min — post-repair sanity: all probes + gates, short budgets">
|
||||||
|
<input type="radio" name="profile" value="quick" checked/>
|
||||||
|
quick
|
||||||
|
</label>
|
||||||
|
<label title="~8–12 h — overnight soak: long CPU/RAM, full-disk fio verify, 30 min network">
|
||||||
|
<input type="radio" name="profile" value="deep"/>
|
||||||
|
deep
|
||||||
|
</label>
|
||||||
|
<label title="≥24 h — week-long burn-in; opt-in when you suspect intermittent faults">
|
||||||
|
<input type="radio" name="profile" value="soak"/>
|
||||||
|
soak
|
||||||
|
</label>
|
||||||
|
</fieldset>
|
||||||
<label class="host-nd-toggle">
|
<label class="host-nd-toggle">
|
||||||
<input type="checkbox" name="non_destructive" value="1"/>
|
<input type="checkbox" name="non_destructive" value="1"/>
|
||||||
Non-destructive (skip wipe-probe + disk writes)
|
Non-destructive (skip wipe-probe + disk writes)
|
||||||
@@ -258,6 +273,16 @@ func hostCanStartIfOnline(d HostPageData) bool {
|
|||||||
return d.ActiveRun == nil
|
return d.ActiveRun == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// profileChipValue normalizes a Run.Profile string for display on the
|
||||||
|
// run page chip. Older runs with an empty column predate Phase 1 — show
|
||||||
|
// them as "quick" (the prior implicit default).
|
||||||
|
func profileChipValue(p string) string {
|
||||||
|
if p == "" {
|
||||||
|
return "quick"
|
||||||
|
}
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
// runDuration formats the elapsed time for a run using the same buckets
|
// runDuration formats the elapsed time for a run using the same buckets
|
||||||
// as stageDuration. In-flight runs clock from StartedAt to now so the
|
// as stageDuration. In-flight runs clock from StartedAt to now so the
|
||||||
// run-page header + runs-table row keep ticking on each SSE push.
|
// run-page header + runs-table row keep ticking on each SSE push.
|
||||||
|
|||||||
@@ -361,7 +361,7 @@ func HostActions(d HostPageData) templ.Component {
|
|||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "\" class=\"inline host-start-form\"><label class=\"host-nd-toggle\"><input type=\"checkbox\" name=\"non_destructive\" value=\"1\"> Non-destructive (skip wipe-probe + disk writes)</label> <button type=\"submit\" class=\"btn-primary\">Start vetting</button></form>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "\" class=\"inline host-start-form\"><fieldset class=\"host-profile-picker\"><legend>Profile</legend> <label title=\"~10 min — post-repair sanity: all probes + gates, short budgets\"><input type=\"radio\" name=\"profile\" value=\"quick\" checked> quick</label> <label title=\"~8–12 h — overnight soak: long CPU/RAM, full-disk fio verify, 30 min network\"><input type=\"radio\" name=\"profile\" value=\"deep\"> deep</label> <label title=\"≥24 h — week-long burn-in; opt-in when you suspect intermittent faults\"><input type=\"radio\" name=\"profile\" value=\"soak\"> soak</label></fieldset><label class=\"host-nd-toggle\"><input type=\"checkbox\" name=\"non_destructive\" value=\"1\"> Non-destructive (skip wipe-probe + disk writes)</label> <button type=\"submit\" class=\"btn-primary\">Start vetting</button></form>")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
@@ -383,7 +383,7 @@ func HostActions(d HostPageData) templ.Component {
|
|||||||
var templ_7745c5c3_Var19 templ.SafeURL
|
var templ_7745c5c3_Var19 templ.SafeURL
|
||||||
templ_7745c5c3_Var19, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/delete", d.Host.ID)))
|
templ_7745c5c3_Var19, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/delete", d.Host.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 116, Col: 89}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 131, Col: 89}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var19))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var19))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -428,7 +428,7 @@ func InFlightBanner(d HostPageData) templ.Component {
|
|||||||
var templ_7745c5c3_Var21 string
|
var templ_7745c5c3_Var21 string
|
||||||
templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-inflight-%d", d.Host.ID))
|
templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-inflight-%d", d.Host.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 128, Col: 51}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 143, Col: 51}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var21))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var21))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -441,7 +441,7 @@ func InFlightBanner(d HostPageData) templ.Component {
|
|||||||
var templ_7745c5c3_Var22 string
|
var templ_7745c5c3_Var22 string
|
||||||
templ_7745c5c3_Var22, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-inflight-%d", d.Host.ID))
|
templ_7745c5c3_Var22, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-inflight-%d", d.Host.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 130, Col: 57}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 145, Col: 57}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var22))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var22))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -459,7 +459,7 @@ func InFlightBanner(d HostPageData) templ.Component {
|
|||||||
var templ_7745c5c3_Var23 templ.SafeURL
|
var templ_7745c5c3_Var23 templ.SafeURL
|
||||||
templ_7745c5c3_Var23, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.ActiveRun.ID)))
|
templ_7745c5c3_Var23, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.ActiveRun.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 134, Col: 92}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 149, Col: 92}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var23))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var23))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -472,7 +472,7 @@ func InFlightBanner(d HostPageData) templ.Component {
|
|||||||
var templ_7745c5c3_Var24 string
|
var templ_7745c5c3_Var24 string
|
||||||
templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d", d.ActiveRun.ID))
|
templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d", d.ActiveRun.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 135, Col: 74}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 150, Col: 74}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -485,7 +485,7 @@ func InFlightBanner(d HostPageData) templ.Component {
|
|||||||
var templ_7745c5c3_Var25 string
|
var templ_7745c5c3_Var25 string
|
||||||
templ_7745c5c3_Var25, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(d.ActiveRun))
|
templ_7745c5c3_Var25, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(d.ActiveRun))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 136, Col: 59}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 151, Col: 59}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var25))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var25))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -541,7 +541,7 @@ func HostEmptyState(d HostPageData) templ.Component {
|
|||||||
var templ_7745c5c3_Var27 templ.SafeURL
|
var templ_7745c5c3_Var27 templ.SafeURL
|
||||||
templ_7745c5c3_Var27, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)))
|
templ_7745c5c3_Var27, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 152, Col: 88}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 167, Col: 88}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var27))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var27))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -655,7 +655,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var31 string
|
var templ_7745c5c3_Var31 string
|
||||||
templ_7745c5c3_Var31, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("runrow-%d", d.Run.ID))
|
templ_7745c5c3_Var31, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("runrow-%d", d.Run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 204, Col: 41}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 219, Col: 41}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var31))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var31))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -681,7 +681,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var33 string
|
var templ_7745c5c3_Var33 string
|
||||||
templ_7745c5c3_Var33, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("runrow-%d", d.Run.ID))
|
templ_7745c5c3_Var33, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("runrow-%d", d.Run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 206, Col: 47}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 221, Col: 47}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var33))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var33))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -694,7 +694,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var34 templ.SafeURL
|
var templ_7745c5c3_Var34 templ.SafeURL
|
||||||
templ_7745c5c3_Var34, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)))
|
templ_7745c5c3_Var34, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 210, Col: 61}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 225, Col: 61}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var34))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var34))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -707,7 +707,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var35 string
|
var templ_7745c5c3_Var35 string
|
||||||
templ_7745c5c3_Var35, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("#%d", d.Run.ID))
|
templ_7745c5c3_Var35, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("#%d", d.Run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 210, Col: 94}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 225, Col: 94}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -742,7 +742,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var38 string
|
var templ_7745c5c3_Var38 string
|
||||||
templ_7745c5c3_Var38, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(&d.Run))
|
templ_7745c5c3_Var38, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(&d.Run))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 213, Col: 92}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 228, Col: 92}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var38))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var38))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -755,7 +755,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var39 string
|
var templ_7745c5c3_Var39 string
|
||||||
templ_7745c5c3_Var39, templ_7745c5c3_Err = templ.JoinStringErrs(relativeTime(d.Run.StartedAt))
|
templ_7745c5c3_Var39, templ_7745c5c3_Err = templ.JoinStringErrs(relativeTime(d.Run.StartedAt))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 215, Col: 62}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 230, Col: 62}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var39))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var39))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -768,7 +768,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var40 string
|
var templ_7745c5c3_Var40 string
|
||||||
templ_7745c5c3_Var40, templ_7745c5c3_Err = templ.JoinStringErrs(runDuration(&d.Run))
|
templ_7745c5c3_Var40, templ_7745c5c3_Err = templ.JoinStringErrs(runDuration(&d.Run))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 216, Col: 53}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 231, Col: 53}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var40))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var40))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -805,7 +805,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var43 string
|
var templ_7745c5c3_Var43 string
|
||||||
templ_7745c5c3_Var43, templ_7745c5c3_Err = templ.JoinStringErrs(name)
|
templ_7745c5c3_Var43, templ_7745c5c3_Err = templ.JoinStringErrs(name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 221, Col: 94}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 236, Col: 94}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var43))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var43))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -823,7 +823,7 @@ func RunRow(d RunRowData) templ.Component {
|
|||||||
var templ_7745c5c3_Var44 templ.SafeURL
|
var templ_7745c5c3_Var44 templ.SafeURL
|
||||||
templ_7745c5c3_Var44, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)))
|
templ_7745c5c3_Var44, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 226, Col: 84}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 241, Col: 84}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var44))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var44))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -867,6 +867,16 @@ func hostCanStartIfOnline(d HostPageData) bool {
|
|||||||
return d.ActiveRun == nil
|
return d.ActiveRun == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// profileChipValue normalizes a Run.Profile string for display on the
|
||||||
|
// run page chip. Older runs with an empty column predate Phase 1 — show
|
||||||
|
// them as "quick" (the prior implicit default).
|
||||||
|
func profileChipValue(p string) string {
|
||||||
|
if p == "" {
|
||||||
|
return "quick"
|
||||||
|
}
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
// runDuration formats the elapsed time for a run using the same buckets
|
// runDuration formats the elapsed time for a run using the same buckets
|
||||||
// as stageDuration. In-flight runs clock from StartedAt to now so the
|
// as stageDuration. In-flight runs clock from StartedAt to now so the
|
||||||
// run-page header + runs-table row keep ticking on each SSE push.
|
// run-page header + runs-table row keep ticking on each SSE push.
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var3 string
|
var templ_7745c5c3_Var3 string
|
||||||
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("host-%d", t.Host.ID))
|
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("host-%d", t.Host.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 19, Col: 40}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 19, Col: 40}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -68,7 +68,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var4 string
|
var templ_7745c5c3_Var4 string
|
||||||
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
|
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -81,7 +81,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var5 string
|
var templ_7745c5c3_Var5 string
|
||||||
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("tile-%d", t.Host.ID))
|
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("tile-%d", t.Host.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 21, Col: 46}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 21, Col: 46}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -94,7 +94,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var6 templ.SafeURL
|
var templ_7745c5c3_Var6 templ.SafeURL
|
||||||
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d", t.Host.ID)))
|
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d", t.Host.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 24, Col: 80}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 24, Col: 80}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -107,7 +107,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var7 string
|
var templ_7745c5c3_Var7 string
|
||||||
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs("Open " + t.Host.Name)
|
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs("Open " + t.Host.Name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 24, Col: 117}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 24, Col: 117}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -120,7 +120,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var8 string
|
var templ_7745c5c3_Var8 string
|
||||||
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(t.Host.Name)
|
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(t.Host.Name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 26, Col: 39}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 26, Col: 39}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -142,7 +142,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var10 string
|
var templ_7745c5c3_Var10 string
|
||||||
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var9).String())
|
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var9).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -155,7 +155,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var11 string
|
var templ_7745c5c3_Var11 string
|
||||||
templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(lastSeenLabel(t.LastSeenAt))
|
templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(lastSeenLabel(t.LastSeenAt))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 28, Col: 95}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 28, Col: 95}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -168,7 +168,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var12 string
|
var templ_7745c5c3_Var12 string
|
||||||
templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(t.Latest))
|
templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(t.Latest))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 29, Col: 51}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 29, Col: 51}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -186,7 +186,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var13 templ.SafeURL
|
var templ_7745c5c3_Var13 templ.SafeURL
|
||||||
templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", t.Host.ID)))
|
templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", t.Host.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 34, Col: 89}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 34, Col: 89}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var13))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var13))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -209,7 +209,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var14 templ.SafeURL
|
var templ_7745c5c3_Var14 templ.SafeURL
|
||||||
templ_7745c5c3_Var14, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/cancel", t.Host.ID)))
|
templ_7745c5c3_Var14, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/cancel", t.Host.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 44, Col: 90}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 44, Col: 90}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var14))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var14))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -227,7 +227,7 @@ func HostTile(t TileData) templ.Component {
|
|||||||
var templ_7745c5c3_Var15 templ.SafeURL
|
var templ_7745c5c3_Var15 templ.SafeURL
|
||||||
templ_7745c5c3_Var15, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/reports/%d", t.Latest.ID)))
|
templ_7745c5c3_Var15, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/reports/%d", t.Latest.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 48, Col: 88}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 48, Col: 88}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var15))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var15))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ func Layout(title string) templ.Component {
|
|||||||
var templ_7745c5c3_Var2 string
|
var templ_7745c5c3_Var2 string
|
||||||
templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(title)
|
templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(title)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/layout.templ`, Line: 9, Col: 17}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `layout.templ`, Line: 9, Col: 17}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var2))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var2))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -86,7 +86,7 @@ func BareLayout(title string) templ.Component {
|
|||||||
var templ_7745c5c3_Var4 string
|
var templ_7745c5c3_Var4 string
|
||||||
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(title)
|
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(title)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/layout.templ`, Line: 39, Col: 17}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `layout.templ`, Line: 39, Col: 17}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
|
|||||||
@@ -40,11 +40,13 @@ func runStateRank(s model.RunState) int {
|
|||||||
model.StateWaitingReboot,
|
model.StateWaitingReboot,
|
||||||
model.StateBooting,
|
model.StateBooting,
|
||||||
model.StateInventoryCheck,
|
model.StateInventoryCheck,
|
||||||
|
model.StateFirmware,
|
||||||
model.StateSpecValidate,
|
model.StateSpecValidate,
|
||||||
model.StateSMART,
|
model.StateSMART,
|
||||||
model.StateCPUStress,
|
model.StateCPUStress,
|
||||||
model.StateStorage,
|
model.StateStorage,
|
||||||
model.StateNetwork,
|
model.StateNetwork,
|
||||||
|
model.StateBurn,
|
||||||
model.StateGPU,
|
model.StateGPU,
|
||||||
model.StatePSU,
|
model.StatePSU,
|
||||||
model.StateReporting,
|
model.StateReporting,
|
||||||
@@ -205,11 +207,13 @@ func firstStageState(run *model.Run) model.RunState {
|
|||||||
func stageStateByName(name string) (model.RunState, bool) {
|
func stageStateByName(name string) (model.RunState, bool) {
|
||||||
m := map[string]model.RunState{
|
m := map[string]model.RunState{
|
||||||
"Inventory": model.StateInventoryCheck,
|
"Inventory": model.StateInventoryCheck,
|
||||||
|
"Firmware": model.StateFirmware,
|
||||||
"SpecValidate": model.StateSpecValidate,
|
"SpecValidate": model.StateSpecValidate,
|
||||||
"SMART": model.StateSMART,
|
"SMART": model.StateSMART,
|
||||||
"CPUStress": model.StateCPUStress,
|
"CPUStress": model.StateCPUStress,
|
||||||
"Storage": model.StateStorage,
|
"Storage": model.StateStorage,
|
||||||
"Network": model.StateNetwork,
|
"Network": model.StateNetwork,
|
||||||
|
"Burn": model.StateBurn,
|
||||||
"GPU": model.StateGPU,
|
"GPU": model.StateGPU,
|
||||||
"PSU": model.StatePSU,
|
"PSU": model.StatePSU,
|
||||||
"Reporting": model.StateReporting,
|
"Reporting": model.StateReporting,
|
||||||
|
|||||||
@@ -48,11 +48,13 @@ func runStateRank(s model.RunState) int {
|
|||||||
model.StateWaitingReboot,
|
model.StateWaitingReboot,
|
||||||
model.StateBooting,
|
model.StateBooting,
|
||||||
model.StateInventoryCheck,
|
model.StateInventoryCheck,
|
||||||
|
model.StateFirmware,
|
||||||
model.StateSpecValidate,
|
model.StateSpecValidate,
|
||||||
model.StateSMART,
|
model.StateSMART,
|
||||||
model.StateCPUStress,
|
model.StateCPUStress,
|
||||||
model.StateStorage,
|
model.StateStorage,
|
||||||
model.StateNetwork,
|
model.StateNetwork,
|
||||||
|
model.StateBurn,
|
||||||
model.StateGPU,
|
model.StateGPU,
|
||||||
model.StatePSU,
|
model.StatePSU,
|
||||||
model.StateReporting,
|
model.StateReporting,
|
||||||
@@ -213,11 +215,13 @@ func firstStageState(run *model.Run) model.RunState {
|
|||||||
func stageStateByName(name string) (model.RunState, bool) {
|
func stageStateByName(name string) (model.RunState, bool) {
|
||||||
m := map[string]model.RunState{
|
m := map[string]model.RunState{
|
||||||
"Inventory": model.StateInventoryCheck,
|
"Inventory": model.StateInventoryCheck,
|
||||||
|
"Firmware": model.StateFirmware,
|
||||||
"SpecValidate": model.StateSpecValidate,
|
"SpecValidate": model.StateSpecValidate,
|
||||||
"SMART": model.StateSMART,
|
"SMART": model.StateSMART,
|
||||||
"CPUStress": model.StateCPUStress,
|
"CPUStress": model.StateCPUStress,
|
||||||
"Storage": model.StateStorage,
|
"Storage": model.StateStorage,
|
||||||
"Network": model.StateNetwork,
|
"Network": model.StateNetwork,
|
||||||
|
"Burn": model.StateBurn,
|
||||||
"GPU": model.StateGPU,
|
"GPU": model.StateGPU,
|
||||||
"PSU": model.StatePSU,
|
"PSU": model.StatePSU,
|
||||||
"Reporting": model.StateReporting,
|
"Reporting": model.StateReporting,
|
||||||
@@ -312,7 +316,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
|
|||||||
var templ_7745c5c3_Var3 string
|
var templ_7745c5c3_Var3 string
|
||||||
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
|
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -339,7 +343,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
|
|||||||
var templ_7745c5c3_Var5 string
|
var templ_7745c5c3_Var5 string
|
||||||
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var4).String())
|
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var4).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -361,7 +365,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
|
|||||||
var templ_7745c5c3_Var7 string
|
var templ_7745c5c3_Var7 string
|
||||||
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var6).String())
|
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var6).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -374,7 +378,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
|
|||||||
var templ_7745c5c3_Var8 string
|
var templ_7745c5c3_Var8 string
|
||||||
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(stageMarker(n.State))
|
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(stageMarker(n.State))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 275, Col: 77}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 279, Col: 77}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -387,7 +391,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
|
|||||||
var templ_7745c5c3_Var9 string
|
var templ_7745c5c3_Var9 string
|
||||||
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(n.Name)
|
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(n.Name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 276, Col: 36}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 280, Col: 36}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -400,7 +404,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
|
|||||||
var templ_7745c5c3_Var10 string
|
var templ_7745c5c3_Var10 string
|
||||||
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(stageDuration(n))
|
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(stageDuration(n))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 277, Col: 50}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 281, Col: 50}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -454,7 +458,7 @@ func PipelineSection(run *model.Run, nodes []PipelineNode) templ.Component {
|
|||||||
var templ_7745c5c3_Var12 string
|
var templ_7745c5c3_Var12 string
|
||||||
templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("pipeline-%d", run.ID))
|
templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("pipeline-%d", run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 292, Col: 41}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 296, Col: 41}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -467,7 +471,7 @@ func PipelineSection(run *model.Run, nodes []PipelineNode) templ.Component {
|
|||||||
var templ_7745c5c3_Var13 string
|
var templ_7745c5c3_Var13 string
|
||||||
templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("pipeline-%d", run.ID))
|
templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("pipeline-%d", run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 294, Col: 47}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 298, Col: 47}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var13))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var13))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
|
|||||||
@@ -8,26 +8,28 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// node indexes for the default pipeline layout: pre-stages (3) + stage
|
// node indexes for the default pipeline layout: pre-stages (3) + stage
|
||||||
// rows (9) + terminal Completed (1) = 13 nodes.
|
// rows (11) + terminal Completed (1) = 15 nodes.
|
||||||
const (
|
const (
|
||||||
idxQueued = 0
|
idxQueued = 0
|
||||||
idxWaitingReboot = 1
|
idxWaitingReboot = 1
|
||||||
idxBooting = 2
|
idxBooting = 2
|
||||||
idxInventory = 3
|
idxInventory = 3
|
||||||
idxSpecValidate = 4
|
idxFirmware = 4
|
||||||
idxSMART = 5
|
idxSpecValidate = 5
|
||||||
idxCPUStress = 6
|
idxSMART = 6
|
||||||
idxStorage = 7
|
idxCPUStress = 7
|
||||||
idxNetwork = 8
|
idxStorage = 8
|
||||||
idxGPU = 9
|
idxNetwork = 9
|
||||||
idxPSU = 10
|
idxBurn = 10
|
||||||
idxReporting = 11
|
idxGPU = 11
|
||||||
idxCompleted = 12
|
idxPSU = 12
|
||||||
|
idxReporting = 13
|
||||||
|
idxCompleted = 14
|
||||||
)
|
)
|
||||||
|
|
||||||
// seedStages returns a fresh all-pending stage slice in the canonical order.
|
// seedStages returns a fresh all-pending stage slice in the canonical order.
|
||||||
func seedStages() []model.Stage {
|
func seedStages() []model.Stage {
|
||||||
names := []string{"Inventory", "SpecValidate", "SMART", "CPUStress", "Storage", "Network", "GPU", "PSU", "Reporting"}
|
names := []string{"Inventory", "Firmware", "SpecValidate", "SMART", "CPUStress", "Storage", "Network", "Burn", "GPU", "PSU", "Reporting"}
|
||||||
out := make([]model.Stage, len(names))
|
out := make([]model.Stage, len(names))
|
||||||
for i, n := range names {
|
for i, n := range names {
|
||||||
out[i] = model.Stage{Name: n, Ordinal: i, State: model.StagePending}
|
out[i] = model.Stage{Name: n, Ordinal: i, State: model.StagePending}
|
||||||
@@ -37,10 +39,10 @@ func seedStages() []model.Stage {
|
|||||||
|
|
||||||
func TestBuildPipeline_NoRun(t *testing.T) {
|
func TestBuildPipeline_NoRun(t *testing.T) {
|
||||||
nodes := BuildPipeline(nil, nil)
|
nodes := BuildPipeline(nil, nil)
|
||||||
// Ghost pipeline: 3 pre-stages + 9 stage ghosts + 1 terminal = 13
|
// Ghost pipeline: 3 pre-stages + 10 stage ghosts + 1 terminal = 14
|
||||||
// nodes, all pending.
|
// nodes, all pending.
|
||||||
if len(nodes) != 13 {
|
if len(nodes) != 15 {
|
||||||
t.Fatalf("len = %d, want 13", len(nodes))
|
t.Fatalf("len = %d, want 15", len(nodes))
|
||||||
}
|
}
|
||||||
for i, n := range nodes {
|
for i, n := range nodes {
|
||||||
if n.State != "pending" {
|
if n.State != "pending" {
|
||||||
@@ -56,8 +58,8 @@ func TestBuildPipeline_NoRun(t *testing.T) {
|
|||||||
func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
||||||
run := &model.Run{State: model.StateWaitingReboot}
|
run := &model.Run{State: model.StateWaitingReboot}
|
||||||
nodes := BuildPipeline(run, nil)
|
nodes := BuildPipeline(run, nil)
|
||||||
if len(nodes) != 13 {
|
if len(nodes) != 15 {
|
||||||
t.Fatalf("len = %d, want 13", len(nodes))
|
t.Fatalf("len = %d, want 15", len(nodes))
|
||||||
}
|
}
|
||||||
if nodes[idxQueued].State != "passed" {
|
if nodes[idxQueued].State != "passed" {
|
||||||
t.Errorf("Queued = %q, want passed", nodes[idxQueued].State)
|
t.Errorf("Queued = %q, want passed", nodes[idxQueued].State)
|
||||||
@@ -65,7 +67,7 @@ func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
|||||||
if nodes[idxWaitingReboot].State != "running" {
|
if nodes[idxWaitingReboot].State != "running" {
|
||||||
t.Errorf("WaitingReboot = %q, want running", nodes[idxWaitingReboot].State)
|
t.Errorf("WaitingReboot = %q, want running", nodes[idxWaitingReboot].State)
|
||||||
}
|
}
|
||||||
// All 9 stage ghosts must be pending — nothing has started yet.
|
// All 11 stage ghosts must be pending — nothing has started yet.
|
||||||
for i := idxInventory; i <= idxReporting; i++ {
|
for i := idxInventory; i <= idxReporting; i++ {
|
||||||
if nodes[i].State != "pending" {
|
if nodes[i].State != "pending" {
|
||||||
t.Errorf("%s (ghost) = %q, want pending", nodes[i].Name, nodes[i].State)
|
t.Errorf("%s (ghost) = %q, want pending", nodes[i].Name, nodes[i].State)
|
||||||
@@ -81,19 +83,20 @@ func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
|||||||
// pending ghosts rather than silently disappearing.
|
// pending ghosts rather than silently disappearing.
|
||||||
func TestBuildPipeline_GhostStagesDuringStage(t *testing.T) {
|
func TestBuildPipeline_GhostStagesDuringStage(t *testing.T) {
|
||||||
run := &model.Run{State: model.StateSMART}
|
run := &model.Run{State: model.StateSMART}
|
||||||
// Only Inventory + SpecValidate seeded; SMART onwards are ghosts.
|
// Only Inventory + Firmware + SpecValidate seeded; SMART onwards are ghosts.
|
||||||
stages := []model.Stage{
|
stages := []model.Stage{
|
||||||
{Name: "Inventory", Ordinal: 0, State: model.StagePassed},
|
{Name: "Inventory", Ordinal: 0, State: model.StagePassed},
|
||||||
{Name: "SpecValidate", Ordinal: 1, State: model.StagePassed},
|
{Name: "Firmware", Ordinal: 1, State: model.StagePassed},
|
||||||
|
{Name: "SpecValidate", Ordinal: 2, State: model.StagePassed},
|
||||||
}
|
}
|
||||||
nodes := BuildPipeline(run, stages)
|
nodes := BuildPipeline(run, stages)
|
||||||
if len(nodes) != 13 {
|
if len(nodes) != 15 {
|
||||||
t.Fatalf("len = %d, want 13", len(nodes))
|
t.Fatalf("len = %d, want 15", len(nodes))
|
||||||
}
|
}
|
||||||
if nodes[idxSMART].State != "running" {
|
if nodes[idxSMART].State != "running" {
|
||||||
t.Errorf("SMART (ghost) = %q, want running", nodes[idxSMART].State)
|
t.Errorf("SMART (ghost) = %q, want running", nodes[idxSMART].State)
|
||||||
}
|
}
|
||||||
for _, i := range []int{idxCPUStress, idxStorage, idxNetwork, idxGPU, idxPSU, idxReporting} {
|
for _, i := range []int{idxCPUStress, idxStorage, idxNetwork, idxBurn, idxGPU, idxPSU, idxReporting} {
|
||||||
if nodes[i].State != "pending" {
|
if nodes[i].State != "pending" {
|
||||||
t.Errorf("%s (ghost) = %q, want pending", nodes[i].Name, nodes[i].State)
|
t.Errorf("%s (ghost) = %q, want pending", nodes[i].Name, nodes[i].State)
|
||||||
}
|
}
|
||||||
@@ -103,12 +106,13 @@ func TestBuildPipeline_GhostStagesDuringStage(t *testing.T) {
|
|||||||
func TestBuildPipeline_Running(t *testing.T) {
|
func TestBuildPipeline_Running(t *testing.T) {
|
||||||
run := &model.Run{State: model.StateSMART}
|
run := &model.Run{State: model.StateSMART}
|
||||||
stages := seedStages()
|
stages := seedStages()
|
||||||
stages[0].State = model.StagePassed
|
stages[0].State = model.StagePassed // Inventory
|
||||||
stages[1].State = model.StagePassed
|
stages[1].State = model.StagePassed // Firmware
|
||||||
stages[2].State = model.StageRunning
|
stages[2].State = model.StagePassed // SpecValidate
|
||||||
|
stages[3].State = model.StageRunning // SMART
|
||||||
nodes := BuildPipeline(run, stages)
|
nodes := BuildPipeline(run, stages)
|
||||||
if len(nodes) != 13 {
|
if len(nodes) != 15 {
|
||||||
t.Fatalf("len = %d, want 13", len(nodes))
|
t.Fatalf("len = %d, want 15", len(nodes))
|
||||||
}
|
}
|
||||||
// Pre-stages are all past for a run that has reached SMART.
|
// Pre-stages are all past for a run that has reached SMART.
|
||||||
for i := idxQueued; i <= idxBooting; i++ {
|
for i := idxQueued; i <= idxBooting; i++ {
|
||||||
@@ -136,10 +140,10 @@ func TestBuildPipeline_Running(t *testing.T) {
|
|||||||
func TestBuildPipeline_Failed(t *testing.T) {
|
func TestBuildPipeline_Failed(t *testing.T) {
|
||||||
run := &model.Run{State: model.StateFailedHolding, FailedStage: "Storage"}
|
run := &model.Run{State: model.StateFailedHolding, FailedStage: "Storage"}
|
||||||
stages := seedStages()
|
stages := seedStages()
|
||||||
for i := 0; i <= 3; i++ {
|
for i := 0; i <= 4; i++ {
|
||||||
stages[i].State = model.StagePassed
|
stages[i].State = model.StagePassed
|
||||||
}
|
}
|
||||||
stages[4].State = model.StageFailed // Storage
|
stages[5].State = model.StageFailed // Storage
|
||||||
nodes := BuildPipeline(run, stages)
|
nodes := BuildPipeline(run, stages)
|
||||||
// Pre-stages are past a run that reached Storage.
|
// Pre-stages are past a run that reached Storage.
|
||||||
for i := idxQueued; i <= idxBooting; i++ {
|
for i := idxQueued; i <= idxBooting; i++ {
|
||||||
@@ -150,7 +154,7 @@ func TestBuildPipeline_Failed(t *testing.T) {
|
|||||||
if nodes[idxStorage].State != "failed" {
|
if nodes[idxStorage].State != "failed" {
|
||||||
t.Errorf("Storage = %q, want failed", nodes[idxStorage].State)
|
t.Errorf("Storage = %q, want failed", nodes[idxStorage].State)
|
||||||
}
|
}
|
||||||
for _, i := range []int{idxNetwork, idxGPU, idxPSU, idxReporting} {
|
for _, i := range []int{idxNetwork, idxBurn, idxGPU, idxPSU, idxReporting} {
|
||||||
if nodes[i].State != "skipped" {
|
if nodes[i].State != "skipped" {
|
||||||
t.Errorf("%s = %q, want skipped", nodes[i].Name, nodes[i].State)
|
t.Errorf("%s = %q, want skipped", nodes[i].Name, nodes[i].State)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ func Registration(form RegistrationForm) templ.Component {
|
|||||||
var templ_7745c5c3_Var3 string
|
var templ_7745c5c3_Var3 string
|
||||||
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(form.Error)
|
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(form.Error)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 22, Col: 35}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 22, Col: 35}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -83,7 +83,7 @@ func Registration(form RegistrationForm) templ.Component {
|
|||||||
var templ_7745c5c3_Var4 string
|
var templ_7745c5c3_Var4 string
|
||||||
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs("curl -fsSL " + form.QuickRegisterURL + "/register/quick.sh | sudo bash")
|
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs("curl -fsSL " + form.QuickRegisterURL + "/register/quick.sh | sudo bash")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 28, Col: 108}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 28, Col: 108}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -101,7 +101,7 @@ func Registration(form RegistrationForm) templ.Component {
|
|||||||
var templ_7745c5c3_Var5 string
|
var templ_7745c5c3_Var5 string
|
||||||
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(form.Name)
|
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(form.Name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 38, Col: 55}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 38, Col: 55}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -114,7 +114,7 @@ func Registration(form RegistrationForm) templ.Component {
|
|||||||
var templ_7745c5c3_Var6 string
|
var templ_7745c5c3_Var6 string
|
||||||
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs(form.MAC)
|
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs(form.MAC)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 42, Col: 53}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 42, Col: 53}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -127,7 +127,7 @@ func Registration(form RegistrationForm) templ.Component {
|
|||||||
var templ_7745c5c3_Var7 string
|
var templ_7745c5c3_Var7 string
|
||||||
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(form.WoLBroadcastIP)
|
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(form.WoLBroadcastIP)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 47, Col: 78}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 47, Col: 78}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -140,7 +140,7 @@ func Registration(form RegistrationForm) templ.Component {
|
|||||||
var templ_7745c5c3_Var8 string
|
var templ_7745c5c3_Var8 string
|
||||||
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(defaultPort(form.WoLPort))
|
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(defaultPort(form.WoLPort))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 51, Col: 78}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 51, Col: 78}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -153,7 +153,7 @@ func Registration(form RegistrationForm) templ.Component {
|
|||||||
var templ_7745c5c3_Var9 string
|
var templ_7745c5c3_Var9 string
|
||||||
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(form.ExpectedSpecYAML)
|
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(form.ExpectedSpecYAML)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 56, Col: 127}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 56, Col: 127}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -166,7 +166,7 @@ func Registration(form RegistrationForm) templ.Component {
|
|||||||
var templ_7745c5c3_Var10 string
|
var templ_7745c5c3_Var10 string
|
||||||
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(form.Notes)
|
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(form.Notes)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 60, Col: 51}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 60, Col: 51}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ templ RunHeader(d RunPageData) {
|
|||||||
<div class="run-header-left">
|
<div class="run-header-left">
|
||||||
<h1 class="run-header-name">{ fmt.Sprintf("Run #%d", d.Run.ID) }</h1>
|
<h1 class="run-header-name">{ fmt.Sprintf("Run #%d", d.Run.ID) }</h1>
|
||||||
<span class={ "run-status-badge", "run-status-" + tileMood(&d.Run) }>{ tileStatus(&d.Run) }</span>
|
<span class={ "run-status-badge", "run-status-" + tileMood(&d.Run) }>{ tileStatus(&d.Run) }</span>
|
||||||
|
<span class={ "run-profile-chip", "run-profile-" + profileChipValue(d.Run.Profile) }>{ profileChipValue(d.Run.Profile) }</span>
|
||||||
<span class="run-duration">{ runDuration(&d.Run) }</span>
|
<span class="run-duration">{ runDuration(&d.Run) }</span>
|
||||||
if d.Run.FailedStage != "" {
|
if d.Run.FailedStage != "" {
|
||||||
<span class="run-failed-stage">failed at <strong>{ d.Run.FailedStage }</strong></span>
|
<span class="run-failed-stage">failed at <strong>{ d.Run.FailedStage }</strong></span>
|
||||||
|
|||||||
@@ -286,142 +286,177 @@ func RunHeader(d RunPageData) templ.Component {
|
|||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, "</span> <span class=\"run-duration\">")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, "</span> ")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var15 string
|
var templ_7745c5c3_Var15 = []any{"run-profile-chip", "run-profile-" + profileChipValue(d.Run.Profile)}
|
||||||
templ_7745c5c3_Var15, templ_7745c5c3_Err = templ.JoinStringErrs(runDuration(&d.Run))
|
templ_7745c5c3_Err = templ.RenderCSSItems(ctx, templ_7745c5c3_Buffer, templ_7745c5c3_Var15...)
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 86, Col: 51}
|
|
||||||
}
|
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var15))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "</span> ")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "<span class=\"")
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
if d.Run.FailedStage != "" {
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 17, "<span class=\"run-failed-stage\">failed at <strong>")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var16 string
|
var templ_7745c5c3_Var16 string
|
||||||
templ_7745c5c3_Var16, templ_7745c5c3_Err = templ.JoinStringErrs(d.Run.FailedStage)
|
templ_7745c5c3_Var16, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var15).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 88, Col: 72}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var16))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var16))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 18, "</strong></span> ")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 17, "\">")
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if d.SpecDiffCritical > 0 {
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 19, "<span class=\"run-diffs bad\">")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var17 string
|
var templ_7745c5c3_Var17 string
|
||||||
templ_7745c5c3_Var17, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d critical diff", d.SpecDiffCritical))
|
templ_7745c5c3_Var17, templ_7745c5c3_Err = templ.JoinStringErrs(profileChipValue(d.Run.Profile))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 91, Col: 85}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 86, Col: 121}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var17))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var17))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 20, "</span>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 18, "</span> <span class=\"run-duration\">")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
}
|
var templ_7745c5c3_Var18 string
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "</div><div class=\"run-header-right\">")
|
templ_7745c5c3_Var18, templ_7745c5c3_Err = templ.JoinStringErrs(runDuration(&d.Run))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 87, Col: 51}
|
||||||
}
|
|
||||||
if canCancel(&d.Run) {
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 22, "<form method=\"post\" action=\"")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
var templ_7745c5c3_Var18 templ.SafeURL
|
|
||||||
templ_7745c5c3_Var18, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/cancel", d.Host.ID)))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 96, Col: 90}
|
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var18))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var18))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 23, "\" class=\"inline\" onsubmit=\"return confirm('Cancel run? Destructive stages may leave the host in an intermediate state requiring manual cleanup.');\"><button type=\"submit\" class=\"btn-danger\">Cancel run</button></form>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 19, "</span> ")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
}
|
if d.Run.FailedStage != "" {
|
||||||
if canOverrideWipe(&d.Run) {
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 20, "<span class=\"run-failed-stage\">failed at <strong>")
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 24, "<form method=\"post\" action=\"")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var19 templ.SafeURL
|
var templ_7745c5c3_Var19 string
|
||||||
templ_7745c5c3_Var19, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/override-wipe", d.Host.ID)))
|
templ_7745c5c3_Var19, templ_7745c5c3_Err = templ.JoinStringErrs(d.Run.FailedStage)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 101, Col: 97}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 89, Col: 72}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var19))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var19))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 25, "\" class=\"inline\"><button type=\"submit\" class=\"btn-danger\">Override wipe-probe</button></form>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "</strong></span> ")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if hasReport(&d.Run) {
|
if d.SpecDiffCritical > 0 {
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "<a class=\"button-like\" href=\"")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 22, "<span class=\"run-diffs bad\">")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var20 templ.SafeURL
|
var templ_7745c5c3_Var20 string
|
||||||
templ_7745c5c3_Var20, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/reports/%d", d.Run.ID)))
|
templ_7745c5c3_Var20, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d critical diff", d.SpecDiffCritical))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 106, Col: 85}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 92, Col: 85}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var20))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var20))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 27, "\" target=\"_blank\" rel=\"noopener\">View report</a> ")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 23, "</span>")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if d.Run.State.IsTerminal() {
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 24, "</div><div class=\"run-header-right\">")
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 28, "<form method=\"post\" action=\"")
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
if canCancel(&d.Run) {
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 25, "<form method=\"post\" action=\"")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var21 templ.SafeURL
|
var templ_7745c5c3_Var21 templ.SafeURL
|
||||||
templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)))
|
templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/cancel", d.Host.ID)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 109, Col: 89}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 97, Col: 90}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var21))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var21))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 29, "\" class=\"inline\"><button type=\"submit\" class=\"btn-primary\">Start new run</button></form>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\" class=\"inline\" onsubmit=\"return confirm('Cancel run? Destructive stages may leave the host in an intermediate state requiring manual cleanup.');\"><button type=\"submit\" class=\"btn-danger\">Cancel run</button></form>")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 30, "</div></header>")
|
if canOverrideWipe(&d.Run) {
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 27, "<form method=\"post\" action=\"")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var22 templ.SafeURL
|
||||||
|
templ_7745c5c3_Var22, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/override-wipe", d.Host.ID)))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 102, Col: 97}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var22))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 28, "\" class=\"inline\"><button type=\"submit\" class=\"btn-danger\">Override wipe-probe</button></form>")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if hasReport(&d.Run) {
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 29, "<a class=\"button-like\" href=\"")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var23 templ.SafeURL
|
||||||
|
templ_7745c5c3_Var23, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/reports/%d", d.Run.ID)))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 107, Col: 85}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var23))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 30, "\" target=\"_blank\" rel=\"noopener\">View report</a> ")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if d.Run.State.IsTerminal() {
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 31, "<form method=\"post\" action=\"")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var24 templ.SafeURL
|
||||||
|
templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 110, Col: 89}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 32, "\" class=\"inline\"><button type=\"submit\" class=\"btn-primary\">Start new run</button></form>")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 33, "</div></header>")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
@@ -449,83 +484,83 @@ func HoldBanner(d RunPageData) templ.Component {
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
ctx = templ.InitializeContext(ctx)
|
ctx = templ.InitializeContext(ctx)
|
||||||
templ_7745c5c3_Var22 := templ.GetChildren(ctx)
|
templ_7745c5c3_Var25 := templ.GetChildren(ctx)
|
||||||
if templ_7745c5c3_Var22 == nil {
|
if templ_7745c5c3_Var25 == nil {
|
||||||
templ_7745c5c3_Var22 = templ.NopComponent
|
templ_7745c5c3_Var25 = templ.NopComponent
|
||||||
}
|
}
|
||||||
ctx = templ.ClearChildren(ctx)
|
ctx = templ.ClearChildren(ctx)
|
||||||
if d.Run.State == model.StateFailedHolding && d.Run.HoldIP != "" {
|
if d.Run.State == model.StateFailedHolding && d.Run.HoldIP != "" {
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 31, "<section id=\"")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 34, "<section id=\"")
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
var templ_7745c5c3_Var23 string
|
|
||||||
templ_7745c5c3_Var23, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 124, Col: 47}
|
|
||||||
}
|
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var23))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 32, "\" class=\"hold-banner\" sse-swap=\"")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
var templ_7745c5c3_Var24 string
|
|
||||||
templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 126, Col: 53}
|
|
||||||
}
|
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 33, "\" hx-swap=\"outerHTML\"><span class=\"hold-banner-label\">Host is holding — SSH available:</span> <code class=\"hold-ssh\">")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
var templ_7745c5c3_Var25 string
|
|
||||||
templ_7745c5c3_Var25, templ_7745c5c3_Err = templ.JoinStringErrs(sshInvocation(d.HoldKeyPath, d.Run.HoldIP))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 130, Col: 70}
|
|
||||||
}
|
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var25))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 34, "</code></section>")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 35, "<section id=\"")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var26 string
|
var templ_7745c5c3_Var26 string
|
||||||
templ_7745c5c3_Var26, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
|
templ_7745c5c3_Var26, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 134, Col: 47}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 125, Col: 47}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var26))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var26))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 36, "\" class=\"detail-hold-placeholder\" sse-swap=\"")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 35, "\" class=\"hold-banner\" sse-swap=\"")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var27 string
|
var templ_7745c5c3_Var27 string
|
||||||
templ_7745c5c3_Var27, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
|
templ_7745c5c3_Var27, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 136, Col: 53}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 127, Col: 53}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var27))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var27))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 37, "\" hx-swap=\"outerHTML\"></section>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 36, "\" hx-swap=\"outerHTML\"><span class=\"hold-banner-label\">Host is holding — SSH available:</span> <code class=\"hold-ssh\">")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var28 string
|
||||||
|
templ_7745c5c3_Var28, templ_7745c5c3_Err = templ.JoinStringErrs(sshInvocation(d.HoldKeyPath, d.Run.HoldIP))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 131, Col: 70}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var28))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 37, "</code></section>")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 38, "<section id=\"")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var29 string
|
||||||
|
templ_7745c5c3_Var29, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 135, Col: 47}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var29))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 39, "\" class=\"detail-hold-placeholder\" sse-swap=\"")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var30 string
|
||||||
|
templ_7745c5c3_Var30, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 137, Col: 53}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var30))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 40, "\" hx-swap=\"outerHTML\"></section>")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
@@ -553,138 +588,138 @@ func RunSpecDiffs(d RunPageData) templ.Component {
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
ctx = templ.InitializeContext(ctx)
|
ctx = templ.InitializeContext(ctx)
|
||||||
templ_7745c5c3_Var28 := templ.GetChildren(ctx)
|
templ_7745c5c3_Var31 := templ.GetChildren(ctx)
|
||||||
if templ_7745c5c3_Var28 == nil {
|
if templ_7745c5c3_Var31 == nil {
|
||||||
templ_7745c5c3_Var28 = templ.NopComponent
|
templ_7745c5c3_Var31 = templ.NopComponent
|
||||||
}
|
}
|
||||||
ctx = templ.ClearChildren(ctx)
|
ctx = templ.ClearChildren(ctx)
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 38, "<section id=\"")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 41, "<section id=\"")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var29 string
|
var templ_7745c5c3_Var32 string
|
||||||
templ_7745c5c3_Var29, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-specdiffs-%d", d.Run.ID))
|
templ_7745c5c3_Var32, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-specdiffs-%d", d.Run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 147, Col: 51}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 148, Col: 51}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var29))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var32))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 39, "\" class=\"detail-section detail-diffs\" sse-swap=\"")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 42, "\" class=\"detail-section detail-diffs\" sse-swap=\"")
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
var templ_7745c5c3_Var30 string
|
|
||||||
templ_7745c5c3_Var30, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-specdiffs-%d", d.Run.ID))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 149, Col: 57}
|
|
||||||
}
|
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var30))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 40, "\" hx-swap=\"outerHTML\">")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
if len(d.SpecDiffs) > 0 {
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 41, "<details")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
if hasCriticalDiff(d.SpecDiffs) {
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 42, " open")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 43, "><summary><h2>Spec diffs (")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
var templ_7745c5c3_Var31 string
|
|
||||||
templ_7745c5c3_Var31, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d", len(d.SpecDiffs)))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 154, Col: 66}
|
|
||||||
}
|
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var31))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 44, ")</h2></summary><ul class=\"diff-list\">")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
for _, diff := range d.SpecDiffs {
|
|
||||||
var templ_7745c5c3_Var32 = []any{"diff-row", "diff-" + diff.Severity}
|
|
||||||
templ_7745c5c3_Err = templ.RenderCSSItems(ctx, templ_7745c5c3_Buffer, templ_7745c5c3_Var32...)
|
|
||||||
if templ_7745c5c3_Err != nil {
|
|
||||||
return templ_7745c5c3_Err
|
|
||||||
}
|
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 45, "<li class=\"")
|
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var33 string
|
var templ_7745c5c3_Var33 string
|
||||||
templ_7745c5c3_Var33, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var32).String())
|
templ_7745c5c3_Var33, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-specdiffs-%d", d.Run.ID))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 150, Col: 57}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var33))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var33))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 46, "\"><div class=\"diff-field\">")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 43, "\" hx-swap=\"outerHTML\">")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
if len(d.SpecDiffs) > 0 {
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 44, "<details")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
if hasCriticalDiff(d.SpecDiffs) {
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 45, " open")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 46, "><summary><h2>Spec diffs (")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var34 string
|
var templ_7745c5c3_Var34 string
|
||||||
templ_7745c5c3_Var34, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Field)
|
templ_7745c5c3_Var34, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d", len(d.SpecDiffs)))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 158, Col: 43}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 155, Col: 66}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var34))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var34))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 47, "</div><div class=\"diff-expected\">expected: <code>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 47, ")</h2></summary><ul class=\"diff-list\">")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var35 string
|
for _, diff := range d.SpecDiffs {
|
||||||
templ_7745c5c3_Var35, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Expected)
|
var templ_7745c5c3_Var35 = []any{"diff-row", "diff-" + diff.Severity}
|
||||||
if templ_7745c5c3_Err != nil {
|
templ_7745c5c3_Err = templ.RenderCSSItems(ctx, templ_7745c5c3_Buffer, templ_7745c5c3_Var35...)
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 159, Col: 65}
|
|
||||||
}
|
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
|
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 48, "</code></div><div class=\"diff-actual\">actual: <code>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 48, "<li class=\"")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
var templ_7745c5c3_Var36 string
|
var templ_7745c5c3_Var36 string
|
||||||
templ_7745c5c3_Var36, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Actual)
|
templ_7745c5c3_Var36, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var35).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 160, Col: 59}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var36))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var36))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 49, "</code></div></li>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 49, "\"><div class=\"diff-field\">")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var37 string
|
||||||
|
templ_7745c5c3_Var37, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Field)
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 159, Col: 43}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var37))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 50, "</div><div class=\"diff-expected\">expected: <code>")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var38 string
|
||||||
|
templ_7745c5c3_Var38, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Expected)
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 160, Col: 65}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var38))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 51, "</code></div><div class=\"diff-actual\">actual: <code>")
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
var templ_7745c5c3_Var39 string
|
||||||
|
templ_7745c5c3_Var39, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Actual)
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 161, Col: 59}
|
||||||
|
}
|
||||||
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var39))
|
||||||
|
if templ_7745c5c3_Err != nil {
|
||||||
|
return templ_7745c5c3_Err
|
||||||
|
}
|
||||||
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 52, "</code></div></li>")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 50, "</ul></details>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 53, "</ul></details>")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 51, "</section>")
|
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 54, "</section>")
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ_7745c5c3_Err
|
return templ_7745c5c3_Err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
|
|||||||
var templ_7745c5c3_Var3 string
|
var templ_7745c5c3_Var3 string
|
||||||
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("substep-%d-%s-%d", ss.RunID, ss.StageName, ss.Ordinal))
|
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("substep-%d-%s-%d", ss.RunID, ss.StageName, ss.Ordinal))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 63, Col: 74}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 63, Col: 74}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -112,7 +112,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
|
|||||||
var templ_7745c5c3_Var4 string
|
var templ_7745c5c3_Var4 string
|
||||||
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
|
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -125,7 +125,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
|
|||||||
var templ_7745c5c3_Var5 string
|
var templ_7745c5c3_Var5 string
|
||||||
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("substep-%d-%s-%d", ss.RunID, ss.StageName, ss.Ordinal))
|
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("substep-%d-%s-%d", ss.RunID, ss.StageName, ss.Ordinal))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 65, Col: 80}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 65, Col: 80}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -147,7 +147,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
|
|||||||
var templ_7745c5c3_Var7 string
|
var templ_7745c5c3_Var7 string
|
||||||
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var6).String())
|
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var6).String())
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 1, Col: 0}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 1, Col: 0}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -160,7 +160,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
|
|||||||
var templ_7745c5c3_Var8 string
|
var templ_7745c5c3_Var8 string
|
||||||
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(subStepMarker(ss.State))
|
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(subStepMarker(ss.State))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 68, Col: 96}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 68, Col: 96}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -173,7 +173,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
|
|||||||
var templ_7745c5c3_Var9 string
|
var templ_7745c5c3_Var9 string
|
||||||
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(ss.Name)
|
templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(ss.Name)
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 69, Col: 38}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 69, Col: 38}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
@@ -186,7 +186,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
|
|||||||
var templ_7745c5c3_Var10 string
|
var templ_7745c5c3_Var10 string
|
||||||
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(subStepDuration(ss))
|
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(subStepDuration(ss))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 70, Col: 54}
|
return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 70, Col: 54}
|
||||||
}
|
}
|
||||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
||||||
if templ_7745c5c3_Err != nil {
|
if templ_7745c5c3_Err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user