From e73e31af929271b76b7c2f5d72e4abb105e27193 Mon Sep 17 00:00:00 2001 From: josh Date: Sat, 18 Apr 2026 16:39:28 -0400 Subject: [PATCH] live-image: install stage tools and fail loudly if any are missing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The live image was still carrying the Phase 2 package list, so SMART, CPUStress, and Network each hit a LookPath miss and returned pass-with-skip. A run that skipped every real check still ended in "completed" — nothing on the report said the image was broken. Add smartmontools, stress-ng, fio, iperf3, lshw, lm-sensors, e2fsprogs, and util-linux to mkosi.conf. Flip the three stages from skip-pass to fail when their binary is missing so any future packaging regression blocks the run instead of whispering past it. Legitimate "no hardware" skips (no GPU, no hwmon, no disks, non-destructive) are untouched. Co-Authored-By: Claude Opus 4.7 --- agent/tests/cpustress.go | 12 ++++++++---- agent/tests/network.go | 10 ++++++---- agent/tests/smart.go | 15 +++++++++++++++ live-image/mkosi.conf | 13 +++++++++++-- 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/agent/tests/cpustress.go b/agent/tests/cpustress.go index b2647e8..a68dc0f 100644 --- a/agent/tests/cpustress.go +++ b/agent/tests/cpustress.go @@ -20,11 +20,15 @@ import ( // pages for the full duration, which is the Phase 4 health bar. func CPUStress(ctx context.Context, d Deps) Outcome { if _, err := exec.LookPath("stress-ng"); err != nil { - d.Warn("CPUStress: stress-ng not found in PATH — skipping stage") + // The live image ships stress-ng; absence is a packaging defect, + // not a benign local-dev scenario. Fail loudly so a regression + // in the image doesn't silently pass runs. + d.Error("CPUStress: stress-ng not found in PATH — live image is missing required tool") return Outcome{ - Passed: true, - Summary: "skipped (stress-ng missing)", - Extras: map[string]any{"skipped": true, "reason": "stress_ng_missing"}, + Passed: false, + Message: "stress-ng binary missing from live image", + Summary: "failed (stress-ng missing)", + Extras: map[string]any{"reason": "stress_ng_missing"}, } } diff --git a/agent/tests/network.go b/agent/tests/network.go index 400d976..089dc89 100644 --- a/agent/tests/network.go +++ b/agent/tests/network.go @@ -24,11 +24,13 @@ type NetworkConfig struct { // isn't reachable, or throughput is zero. func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome { if _, err := exec.LookPath("iperf3"); err != nil { - d.Warn("Network: iperf3 not found — skipping stage") + // Live image ships iperf3; absence means packaging regression. + d.Error("Network: iperf3 not found — live image is missing required tool") return Outcome{ - Passed: true, - Summary: "skipped (iperf3 missing)", - Extras: map[string]any{"skipped": true, "reason": "iperf3_missing"}, + Passed: false, + Message: "iperf3 binary missing from live image", + Summary: "failed (iperf3 missing)", + Extras: map[string]any{"reason": "iperf3_missing"}, } } host, err := deriveHost(cfg.OrchestratorURL) diff --git a/agent/tests/smart.go b/agent/tests/smart.go index 987f46d..ca3888d 100644 --- a/agent/tests/smart.go +++ b/agent/tests/smart.go @@ -21,6 +21,21 @@ import ( // surfaces as a per-disk "skipped" entry; the stage only fails if at // least one disk reports !passed. func SMART(ctx context.Context, d Deps) Outcome { + // smartctl absence is a packaging defect, not a per-disk skip. The + // per-disk `err != nil` path below catches "this device doesn't + // support SMART" (virtio-blk, exit 4); pre-checking the binary up + // front keeps that skip legitimate and fails the stage loudly if + // the live image lost its smartmontools package. + if _, err := exec.LookPath("smartctl"); err != nil { + d.Error("SMART: smartctl not found — live image is missing required tool") + return Outcome{ + Passed: false, + Message: "smartctl binary missing from live image", + Summary: "failed (smartctl missing)", + Extras: map[string]any{"reason": "smartctl_missing"}, + } + } + disks, err := listBlockDisks() if err != nil { d.Warn("SMART: failed to enumerate /sys/class/block: " + err.Error()) diff --git a/live-image/mkosi.conf b/live-image/mkosi.conf index 8e08e29..ba96c44 100644 --- a/live-image/mkosi.conf +++ b/live-image/mkosi.conf @@ -46,6 +46,17 @@ Packages= usbutils initramfs-tools zstd + # Stage binaries. Every package here backs a stage the agent runs — + # if any one goes missing the corresponding stage now fails the run + # (was: pass-with-skip). Keep this list in sync with agent/tests. + smartmontools + stress-ng + fio + iperf3 + lshw + lm-sensors + e2fsprogs + util-linux # Firmware. firmware-linux-nonfree on bookworm is a thin metapackage # that does NOT pull i915 GuC/HuC — those live in firmware-misc-nonfree. # Enumerate explicitly so the blob for whatever hardware we boot on @@ -59,7 +70,5 @@ Packages= amd64-microcode firmware-linux-nonfree -# Phase 4 will add: smartmontools stress-ng fio iperf3 lshw lm-sensors - [Host] # Copy the prebuilt Go agent in from the repo root via postinst.