From 481b67fb69675099f9fc5c33210e93043244f27d Mon Sep 17 00:00:00 2001 From: josh Date: Sun, 19 Apr 2026 21:56:18 -0400 Subject: [PATCH] feat(firmware): install probe tools in live image + surface nic/hba gaps mkosi.conf: add ipmitool, ethtool, nvme-cli so the Firmware stage can actually read BMC revisions, NIC firmware versions, and fall back to nvme-cli when sysfs firmware_rev is missing. firmware.go: probeNICFirmware and probeHBAFirmware now return (snapshots, warning) so a missing ethtool/lspci surfaces in the stage log the same way probeBIOS/probeBMC already do. Before, a host without ethtool silently reported "bios=1 nvme_fw=1 microcode=1" with no hint that nic coverage was dropped. Co-Authored-By: Claude Opus 4.7 --- agent/probes/firmware.go | 28 ++++++++++++++++++---------- live-image/mkosi.conf | 6 ++++++ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/agent/probes/firmware.go b/agent/probes/firmware.go index db4c37e..7ccf52a 100644 --- a/agent/probes/firmware.go +++ b/agent/probes/firmware.go @@ -47,9 +47,17 @@ func Firmware(ctx context.Context) ([]FirmwareSnapshot, []string) { } else if warn != "" { warnings = append(warnings, warn) } - out = append(out, probeNICFirmware(ctx)...) + nicSnaps, nicWarn := probeNICFirmware(ctx) + out = append(out, nicSnaps...) + if nicWarn != "" { + warnings = append(warnings, nicWarn) + } out = append(out, probeNVMeFirmware(ctx)...) - out = append(out, probeHBAFirmware(ctx)...) + hbaSnaps, hbaWarn := probeHBAFirmware(ctx) + out = append(out, hbaSnaps...) + if hbaWarn != "" { + warnings = append(warnings, hbaWarn) + } if snap := probeMicrocode(); snap != nil { out = append(out, *snap) } @@ -214,13 +222,13 @@ func parseIpmitoolMCInfo(r io.Reader) *FirmwareSnapshot { // `ethtool -i ` on each real NIC (skip lo, bridges, virtuals). // One snapshot per interface so a mismatched port lights up in the diff // without silencing sibling ports. -func probeNICFirmware(ctx context.Context) []FirmwareSnapshot { +func probeNICFirmware(ctx context.Context) ([]FirmwareSnapshot, string) { if _, err := exec.LookPath("ethtool"); err != nil { - return nil + return nil, "nic: ethtool not installed" } ifaces, err := os.ReadDir("/sys/class/net") if err != nil { - return nil + return nil, "" } var out []FirmwareSnapshot for _, entry := range ifaces { @@ -237,7 +245,7 @@ func probeNICFirmware(ctx context.Context) []FirmwareSnapshot { out = append(out, *snap) } } - return out + return out, "" } // parseEthtoolI extracts driver/firmware-version from `ethtool -i` @@ -353,15 +361,15 @@ var lspciClassHBA = regexp.MustCompile(`(?i)(serial attached scsi|sas controller // "revision" on the device line. We capture what's printed and rely on // SpecValidate to diff — this keeps us off tool-specific CLIs (storcli, // mpt-status) that aren't always installed. -func probeHBAFirmware(ctx context.Context) []FirmwareSnapshot { +func probeHBAFirmware(ctx context.Context) ([]FirmwareSnapshot, string) { if _, err := exec.LookPath("lspci"); err != nil { - return nil + return nil, "hba: lspci not installed" } out, err := runCmd(ctx, "lspci", "-Dvvnn") if err != nil { - return nil + return nil, fmt.Sprintf("hba: lspci failed: %v", trimErr(err, out)) } - return parseLspciHBA(strings.NewReader(out)) + return parseLspciHBA(strings.NewReader(out)), "" } // parseLspciHBA walks `lspci -Dvvnn` stanzas and picks SAS/RAID diff --git a/live-image/mkosi.conf b/live-image/mkosi.conf index ba96c44..cda66d9 100644 --- a/live-image/mkosi.conf +++ b/live-image/mkosi.conf @@ -57,6 +57,12 @@ Packages= lm-sensors e2fsprogs util-linux + # Firmware probe tooling. Without these, the Firmware stage silently + # skips whole components (ethtool → nic, nvme-cli → nvme fallback) or + # emits a cosmetic "not installed" warning (ipmitool → bmc). + ipmitool + ethtool + nvme-cli # Firmware. firmware-linux-nonfree on bookworm is a thin metapackage # that does NOT pull i915 GuC/HuC — those live in firmware-misc-nonfree. # Enumerate explicitly so the blob for whatever hardware we boot on