package tests import ( "context" "encoding/json" "fmt" "os/exec" "strconv" "strings" "time" ) // Storage is the destructive stage. Phase 2 replaced the old // badblocks + 128 MiB fio combo with a single fio run per disk that // writes, verifies md5 of what it wrote, and reports p99 latency. // Modes: // // - fio_sample (quick): bounded 1 GiB write per disk, ~3 min runtime. // - full_disk (deep/soak): writes the whole device, time-bounded by // the fio_time knob (2 h deep, 6 h soak). // // Pre-gates kept from Phase 1: // // 1. Device allowlist: only act on /dev/ where the kernel-reported // serial matches one of Deps.ExpectedDisks. USB sticks and unexpected // drives are excluded. // 2. Wipe probe: blkid + wipefs --no-act on each target; any filesystem // signature, partition table, or LVM metadata → fail with // UnexpectedData unless Deps.OverrideWipe is set. // // After fio, the stage captures a SMART diff (start snapshot taken // before any writes; end snapshot after all writes finish) and posts // deltas on attributes like Reallocated_Sector_Ct and Current_Pending_Sector. // The threshold evaluator isn't seeded to gate smart_delta out of the // box — those samples are diagnostic for the report. Fio's p99 latency // posts as fio_p99_us so the per-stage Storage warning threshold can // fire on a latency cliff. func Storage(ctx context.Context, d Deps) Outcome { if len(d.ExpectedDisks) == 0 { d.Info("Storage: no expected disks in spec — skipping stage") return Outcome{ Passed: true, Summary: "skipped (no expected disks)", Extras: map[string]any{"skipped": true, "reason": "no_expected_disks"}, } } targets := resolveTargets(d.ExpectedDisks) if len(targets) == 0 { d.Error("Storage: none of the expected disks are present on this host") return Outcome{ Passed: false, Message: "device allowlist matched zero disks", Summary: "no allowed disks present", Extras: map[string]any{"expected": d.ExpectedDisks}, } } // Non-destructive runs skip wipe-probe (nothing to refuse), fio // writes, and SMART delta (nothing changed so no delta to report). // Every expected disk is still asserted present so a vanished drive // still fails the stage. if d.NonDestructive { perDisk := map[string]any{} for _, t := range targets { perDisk[t.Device] = map[string]any{"mode": "non_destructive", "serial": t.Serial} } d.Info(fmt.Sprintf("Storage: non-destructive — verified %d disk(s) present", len(targets))) return Outcome{ Passed: true, Summary: fmt.Sprintf("non-destructive: read-only checks only (%d disks)", len(targets)), Extras: map[string]any{"per_disk": perDisk, "non_destructive": true}, } } // Wipe probe on every target. A single dirty disk halts the stage // unless the operator has set OverrideWipe via the UI. probes := map[string]wipeProbeResult{} dirty := []string{} for _, t := range targets { probe := probeWipe(ctx, t.Device) probes[t.Device] = probe if probe.HasData { dirty = append(dirty, t.Device) } } if len(dirty) > 0 && !d.OverrideWipe { d.Error("Storage: wipe probe found existing data on: " + strings.Join(dirty, ", ")) return Outcome{ Passed: false, Message: "UnexpectedData: " + strings.Join(dirty, ", ") + " (operator override required)", Summary: fmt.Sprintf("wipe-probe halt (%d disk(s) have data)", len(dirty)), Extras: map[string]any{ "wipe_probe": probes, "override_hint": "click 'Override wipe & retry' in the held tile", "dirty_devices": dirty, }, } } if d.OverrideWipe && len(dirty) > 0 { d.Warn("Storage: operator override engaged — proceeding despite data on " + strings.Join(dirty, ", ")) } // Capture start-of-stage SMART attributes before we write anything // so the delta is attributable to *this* stage's writes and not the // host's prior history. Per-disk failures are tolerated (e.g. the // device doesn't expose SMART); we just can't emit a delta for it. startSMART := captureSMARTAttrs(ctx, targets) fioOpts := resolveFioOpts(d.StorageKnobs) d.Info(fmt.Sprintf("Storage: fio mode=%s size=%s runtime=%s bs=%s rw=%s verify=%s", fioOpts.Mode, fioOpts.Size, fioOpts.Runtime, fioOpts.BS, fioOpts.RW, fioOpts.Verify)) var samples []Sample var subs []SubStepReport perDisk := map[string]any{} failed := "" for _, t := range targets { d.Info(fmt.Sprintf("Storage: running fio %s on %s", fioOpts.Mode, t.Device)) fioStart := time.Now() fr := runFioVerify(ctx, t.Device, fioOpts) fioEnd := time.Now() fioSummary, _ := json.Marshal(fr) subs = append(subs, SubStepReport{ Name: fmt.Sprintf("fio %s %s", fioOpts.Mode, t.Device), Passed: fr.Error == "", StartedAt: fioStart, CompletedAt: fioEnd, SummaryJSON: fioSummary, }) perDisk[t.Device] = map[string]any{"fio": fr} if fr.Error == "" { samples = append(samples, Sample{Kind: "fio", Key: t.Device + "/read_iops", Value: fr.ReadIOPS, Unit: "iops"}, Sample{Kind: "fio", Key: t.Device + "/write_iops", Value: fr.WriteIOPS, Unit: "iops"}, ) if fr.ReadP99Us > 0 { samples = append(samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/read", Value: fr.ReadP99Us, Unit: "us"}) } if fr.WriteP99Us > 0 { samples = append(samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/write", Value: fr.WriteP99Us, Unit: "us"}) } } else if failed == "" { failed = t.Device } } // End-of-stage SMART snapshot + diff. We capture whether or not fio // succeeded — a mid-run failure still produces attributable deltas, // which is often more interesting than the stage outcome itself. endSMART := captureSMARTAttrs(ctx, targets) deltas := diffSMARTAttrs(startSMART, endSMART) for dev, attrs := range deltas { for attr, delta := range attrs { samples = append(samples, Sample{Kind: "smart_delta", Key: dev + "/" + attr, Value: delta, Unit: "count"}) } } if d.Sensor != nil && len(samples) > 0 { _ = d.Sensor(ctx, samples) } if failed != "" { return Outcome{ Passed: false, Message: "fio verify failed on " + failed, Summary: "fio failed on " + failed, Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes, "smart_delta": deltas, "fio_opts": fioOpts}, SubSteps: subs, } } d.Info(fmt.Sprintf("Storage: %d disk(s) passed fio --verify", len(targets))) return Outcome{ Passed: true, Summary: fmt.Sprintf("%d disks passed (%s)", len(targets), fioOpts.Mode), Extras: map[string]any{"per_disk": perDisk, "wipe_probe": probes, "smart_delta": deltas, "fio_opts": fioOpts}, SubSteps: subs, } } type diskTarget struct { Serial string Device string } // resolveTargets maps expected-disk serials to /dev/ paths by reading // /sys/block. Uses the same mechanism as probes.inventory to avoid drift. func resolveTargets(expected []ExpectedDisk) []diskTarget { disks, err := listBlockDisks() if err != nil { return nil } // Build serial → device map from /sys. serialOf := map[string]string{} for _, dev := range disks { name := strings.TrimPrefix(dev, "/dev/") s := diskSerialFromSys(name) if s != "" { serialOf[strings.ToLower(s)] = dev } } var out []diskTarget for _, e := range expected { if e.Serial == "" { continue } if dev, ok := serialOf[strings.ToLower(e.Serial)]; ok { out = append(out, diskTarget{Serial: e.Serial, Device: dev}) } } return out } // diskSerialFromSys is a smaller copy of probes.diskSerial; imported // from internal/probes would cause a cycle so we duplicate the short // lookup. If it drifts from the inventory probe, Storage fails because // the serial doesn't match — which is the correct behavior. func diskSerialFromSys(name string) string { for _, rel := range []string{ "/sys/block/" + name + "/device/serial", "/sys/block/" + name + "/serial", } { b, err := readFileBytes(rel) if err != nil { continue } s := strings.TrimSpace(string(b)) if s != "" { return s } } // Fall back to udevadm — ID_SERIAL_SHORT is more reliable on SCSI. out, err := exec.Command("udevadm", "info", "--query=property", "--name="+name).Output() if err != nil { return "" } for _, line := range strings.Split(string(out), "\n") { if v, ok := strings.CutPrefix(line, "ID_SERIAL_SHORT="); ok { return strings.TrimSpace(v) } } return "" } func readFileBytes(p string) ([]byte, error) { return readFile(p) } // ---------- wipe probe ---------- type wipeProbeResult struct { Device string `json:"device"` HasData bool `json:"has_data"` Findings []string `json:"findings,omitempty"` } // probeWipe runs blkid + wipefs -n. Any non-empty output from either is // a "has data" signal. This is deliberately conservative: we'd rather // halt on a bare ext4 signature than hand fio a disk with real bytes on // it. func probeWipe(ctx context.Context, device string) wipeProbeResult { out := wipeProbeResult{Device: device} if b, err := exec.CommandContext(ctx, "blkid", "-o", "full", device).Output(); err == nil { s := strings.TrimSpace(string(b)) if s != "" { out.Findings = append(out.Findings, "blkid: "+s) out.HasData = true } } if b, err := exec.CommandContext(ctx, "wipefs", "--no-act", device).Output(); err == nil { s := strings.TrimSpace(string(b)) // wipefs prints a header line even on a clean disk; keep only // lines with actual signature data. for _, line := range strings.Split(s, "\n") { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "DEVICE") || strings.HasPrefix(line, "offset") { continue } out.Findings = append(out.Findings, "wipefs: "+line) out.HasData = true } } return out } // ---------- fio ---------- // fioOpts resolves the probe knobs into the concrete flag values fio // needs. Defaults match the quick profile's fio_sample shape so callers // with zero knobs still run something bounded. type fioOpts struct { Mode string `json:"mode"` // "fio_sample" | "full_disk" Size string `json:"size"` // "1GiB"; only used for fio_sample Runtime time.Duration `json:"runtime"` // bounding time BS string `json:"bs"` // "4k" RW string `json:"rw"` // "randrw" Verify string `json:"verify"` // "md5" | "" } // resolveFioOpts normalizes the knobs into a runnable config. Zero- // valued fields fall back to the quick defaults so a stage that's // missing its knobs still has coherent behavior (safer than refusing). func resolveFioOpts(k StorageKnobs) fioOpts { o := fioOpts{ Mode: firstNonEmpty(k.Mode, "fio_sample"), Size: firstNonEmpty(k.FioSize, "1GiB"), Runtime: k.FioTime, BS: firstNonEmpty(k.FioBS, "4k"), RW: firstNonEmpty(k.FioRW, "randrw"), Verify: firstNonEmpty(k.Verify, "md5"), } if o.Runtime <= 0 { o.Runtime = 3 * time.Minute } return o } func firstNonEmpty(vs ...string) string { for _, v := range vs { if v != "" { return v } } return "" } type fioResult struct { Mode string `json:"mode"` ReadIOPS float64 `json:"read_iops"` WriteIOPS float64 `json:"write_iops"` ReadBWKBps float64 `json:"read_bw_kbps"` WriteBWKBps float64 `json:"write_bw_kbps"` ReadP99Us float64 `json:"read_p99_us,omitempty"` WriteP99Us float64 `json:"write_p99_us,omitempty"` Error string `json:"error,omitempty"` OutputTail string `json:"output_tail,omitempty"` } // runFioVerify invokes fio with md5-verify semantics. fio_sample mode // caps the IO at opts.Size; full_disk drives the whole device bounded // by runtime. Both use direct IO to bypass the page cache — we want // real disk latency, not Linux' cheerful buffer. func runFioVerify(ctx context.Context, device string, opts fioOpts) fioResult { // 30s grace over runtime so fio has time to flush + close cleanly. runCtx, cancel := context.WithTimeout(ctx, opts.Runtime+30*time.Second) defer cancel() args := []string{ "--name=verify-" + strings.TrimPrefix(device, "/dev/"), "--filename=" + device, "--rw=" + opts.RW, "--bs=" + opts.BS, "--numjobs=1", "--direct=1", "--group_reporting", "--output-format=json", "--runtime=" + strconv.Itoa(int(opts.Runtime.Seconds())), } if opts.Verify != "" { args = append(args, "--verify="+opts.Verify, "--verify_pattern=random", "--do_verify=1", ) } switch opts.Mode { case "full_disk": // Time-bounded across the full device — fio uses the device's // full size when --size is omitted on a block device. args = append(args, "--time_based=1") default: // fio_sample: bounded write. Setting --size= limits the IO // volume regardless of runtime. args = append(args, "--size="+opts.Size, "--time_based=0") } cmd := exec.CommandContext(runCtx, "fio", args...) out, err := cmd.Output() r := fioResult{Mode: opts.Mode, OutputTail: tailLines(string(out), 20)} if err != nil { r.Error = err.Error() return r } parsed, perr := parseFioJSON(out) if perr != nil { r.Error = "parse fio json: " + perr.Error() return r } r.ReadIOPS = parsed.ReadIOPS r.WriteIOPS = parsed.WriteIOPS r.ReadBWKBps = parsed.ReadBWKBps r.WriteBWKBps = parsed.WriteBWKBps r.ReadP99Us = parsed.ReadP99Us r.WriteP99Us = parsed.WriteP99Us return r } // parseFioJSON extracts the bits we care about from fio's --output-format=json. // Latency percentiles live at .jobs[0].read.clat_ns.percentile["99.000000"]; // we convert nanoseconds to microseconds for the fio_p99_us sample. func parseFioJSON(out []byte) (fioResult, error) { var top struct { Jobs []struct { Read struct { IOPS float64 `json:"iops"` BW float64 `json:"bw"` CLat struct { Percentile map[string]float64 `json:"percentile"` } `json:"clat_ns"` } `json:"read"` Write struct { IOPS float64 `json:"iops"` BW float64 `json:"bw"` CLat struct { Percentile map[string]float64 `json:"percentile"` } `json:"clat_ns"` } `json:"write"` } `json:"jobs"` } if err := json.Unmarshal(out, &top); err != nil { return fioResult{}, err } if len(top.Jobs) == 0 { return fioResult{}, fmt.Errorf("no jobs in fio output") } j := top.Jobs[0] r := fioResult{ ReadIOPS: j.Read.IOPS, WriteIOPS: j.Write.IOPS, ReadBWKBps: j.Read.BW, WriteBWKBps: j.Write.BW, } if p := j.Read.CLat.Percentile["99.000000"]; p > 0 { r.ReadP99Us = p / 1000.0 } if p := j.Write.CLat.Percentile["99.000000"]; p > 0 { r.WriteP99Us = p / 1000.0 } return r, nil } // ---------- SMART delta ---------- // smartAttrMap: device → attribute → raw counter value. ATA drives // populate named attributes (Reallocated_Sector_Ct etc); NVMe drives // populate a flatter nvme-specific map. We track a curated whitelist // of wear indicators — anything else is diagnostic and drops to the raw // report output. type smartAttrMap map[string]map[string]float64 // captureSMARTAttrs runs smartctl -aj on each target and pulls the // whitelisted attributes. Per-device failures (virtio, permission // issues) degrade silently — the delta step just shows no data for // that device. func captureSMARTAttrs(ctx context.Context, targets []diskTarget) smartAttrMap { out := smartAttrMap{} for _, t := range targets { parsed, err := runSmartctl(ctx, t.Device) if err != nil { continue } attrs := extractSMARTAttrs(parsed) if len(attrs) > 0 { out[t.Device] = attrs } } return out } // smartAttributeWhitelist is the set of attributes we diff across a // stage. They're the ones that reflect *this stage's* IO damage, not // cumulative drive history. Adding attributes is cheap — missing ones // just drop to zero. var smartAttributeWhitelist = map[string]bool{ // ATA SMART attribute names (smartctl normalizes to these) "Reallocated_Sector_Ct": true, "Current_Pending_Sector": true, "Offline_Uncorrectable": true, "UDMA_CRC_Error_Count": true, "Reported_Uncorrect": true, "Raw_Read_Error_Rate": true, // NVMe log fields (flat keys at top of nvme_smart_health_information_log) "media_errors": true, "num_err_log_entries": true, "percentage_used": true, } // extractSMARTAttrs walks smartctl's JSON for whitelisted attribute // values. Handles both the ATA shape (ata_smart_attributes.table[]) and // the NVMe shape (nvme_smart_health_information_log). Returns a map // keyed by the canonical attribute name. func extractSMARTAttrs(raw map[string]any) map[string]float64 { out := map[string]float64{} // ATA attributes are in ata_smart_attributes.table[] — each element // has {"name": "Reallocated_Sector_Ct", "raw": {"value": N}}. if ata, ok := raw["ata_smart_attributes"].(map[string]any); ok { if tbl, ok := ata["table"].([]any); ok { for _, row := range tbl { rm, ok := row.(map[string]any) if !ok { continue } name, _ := rm["name"].(string) if !smartAttributeWhitelist[name] { continue } if r, ok := rm["raw"].(map[string]any); ok { if v, ok := r["value"].(float64); ok { out[name] = v } } } } } // NVMe attributes live flat under nvme_smart_health_information_log. if nvme, ok := raw["nvme_smart_health_information_log"].(map[string]any); ok { for k, v := range nvme { if !smartAttributeWhitelist[k] { continue } if n, ok := v.(float64); ok { out[k] = n } } } return out } // diffSMARTAttrs subtracts start from end per (device, attribute). // Only attributes present in both ends produce a delta; missing // attributes drop out (can't attribute a zero-to-present delta safely). // Negative deltas are kept so a drive that resets a counter is visible. func diffSMARTAttrs(start, end smartAttrMap) map[string]map[string]float64 { out := map[string]map[string]float64{} for dev, endAttrs := range end { startAttrs, ok := start[dev] if !ok { continue } devOut := map[string]float64{} for attr, endV := range endAttrs { startV, ok := startAttrs[attr] if !ok { continue } devOut[attr] = endV - startV } if len(devOut) > 0 { out[dev] = devOut } } return out }