package probes import ( "bufio" "context" "fmt" "io" "os" "os/exec" "path/filepath" "regexp" "strings" "time" ) // FirmwareSnapshot is the on-wire shape the agent POSTs alongside the // Firmware stage result. Mirrors internal/store.FirmwareSnapshot without // the import — the /result handler converts to the store type and // persists. One run produces many snapshots (one per BIOS / BMC / NIC // port / HBA / microcode / NVMe); identifier distinguishes siblings // (e.g. "eth0" / "eth1"), version is the canonical string to diff. type FirmwareSnapshot struct { Component string `json:"component"` // bios|bmc|nic|hba|microcode|nvme_fw Identifier string `json:"identifier"` Version string `json:"version"` Vendor string `json:"vendor,omitempty"` Raw map[string]string `json:"raw,omitempty"` } // Firmware runs every sub-probe in sequence. Each one is bounded with // a short timeout so a hung dmidecode / ipmitool / nvme tool can't // freeze the stage — the probe is best-effort, missing tools produce // empty output rather than an error. Returns the aggregated slice // along with a list of probe-level warnings (surfaced in the stage // summary so operators see which subsystem couldn't be read). func Firmware(ctx context.Context) ([]FirmwareSnapshot, []string) { var out []FirmwareSnapshot var warnings []string if snap, warn := probeBIOS(ctx); snap != nil { out = append(out, *snap) } else if warn != "" { warnings = append(warnings, warn) } if snap, warn := probeBMC(ctx); snap != nil { out = append(out, *snap) } else if warn != "" { warnings = append(warnings, warn) } out = append(out, probeNICFirmware(ctx)...) out = append(out, probeNVMeFirmware(ctx)...) out = append(out, probeHBAFirmware(ctx)...) if snap := probeMicrocode(); snap != nil { out = append(out, *snap) } return out, warnings } // runCmd executes a short-lived command with a per-call timeout. The // timeout is intentionally aggressive (5 s) because firmware probes // read device registers and occasionally block forever on a wedged // controller — the stage should report "no HBA firmware readable" // rather than hang the pipeline. func runCmd(ctx context.Context, name string, args ...string) (string, error) { cctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() cmd := exec.CommandContext(cctx, name, args...) out, err := cmd.CombinedOutput() if err != nil { return string(out), err } return string(out), nil } // ----- BIOS -------------------------------------------------------------- // probeBIOS invokes dmidecode -t bios and parses the vendor + version // lines. dmidecode must run as root; we let it fail gracefully when the // agent is mis-deployed without privileges. func probeBIOS(ctx context.Context) (*FirmwareSnapshot, string) { if _, err := exec.LookPath("dmidecode"); err != nil { return nil, "bios: dmidecode not installed" } out, err := runCmd(ctx, "dmidecode", "-t", "bios") if err != nil { return nil, fmt.Sprintf("bios: dmidecode failed: %v", trimErr(err, out)) } snap := parseDmidecodeBIOS(strings.NewReader(out)) if snap == nil { return nil, "bios: dmidecode produced no usable output" } return snap, "" } // parseDmidecodeBIOS consumes `dmidecode -t bios` output and pulls // Vendor / Version / Release Date. Kept as an io.Reader for unit tests. func parseDmidecodeBIOS(r io.Reader) *FirmwareSnapshot { kv := parseDmidecodeSection(r, "BIOS Information") if kv == nil { return nil } snap := &FirmwareSnapshot{ Component: "bios", Identifier: "system", Version: firstNonEmpty(kv["Version"], kv["Firmware Revision"]), Vendor: kv["Vendor"], Raw: kv, } if snap.Version == "" { return nil } return snap } // parseDmidecodeSection returns the key/value map of the first dmidecode // handle whose title matches. dmidecode blocks look like: // Handle 0x0000, ... // BIOS Information // Vendor: American Megatrends // Version: 3.0 // ... // With a blank line between blocks. Values like "Characteristics:" // followed by a bulleted sub-list are collapsed into "…" so we don't // accidentally swallow the next handle. func parseDmidecodeSection(r io.Reader, title string) map[string]string { sc := bufio.NewScanner(r) sc.Buffer(make([]byte, 0, 64*1024), 1024*1024) var kv map[string]string var inside, seenTitle bool for sc.Scan() { line := sc.Text() trim := strings.TrimSpace(line) if strings.HasPrefix(line, "Handle ") { if seenTitle && kv != nil { return kv } inside = false kv = nil continue } if !inside { if trim == title { inside = true seenTitle = true kv = map[string]string{} } continue } if trim == "" { continue } if k, v, ok := strings.Cut(trim, ":"); ok { v = strings.TrimSpace(v) if v == "" { continue } kv[strings.TrimSpace(k)] = v } } if seenTitle { return kv } return nil } // ----- BMC / IPMI -------------------------------------------------------- // probeBMC walks `ipmitool mc info`. Home-lab hosts often lack a BMC — // missing binary or a non-zero exit returns a warning without failing // the stage. We capture Firmware Revision + Manufacturer as the version. func probeBMC(ctx context.Context) (*FirmwareSnapshot, string) { if _, err := exec.LookPath("ipmitool"); err != nil { return nil, "bmc: ipmitool not installed" } out, err := runCmd(ctx, "ipmitool", "mc", "info") if err != nil { return nil, fmt.Sprintf("bmc: ipmitool mc info failed: %v", trimErr(err, out)) } snap := parseIpmitoolMCInfo(strings.NewReader(out)) if snap == nil { return nil, "bmc: ipmitool output not parseable" } return snap, "" } // parseIpmitoolMCInfo pulls "Firmware Revision" + "Manufacturer Name" // from the textual output. Format is indented key : value lines. func parseIpmitoolMCInfo(r io.Reader) *FirmwareSnapshot { sc := bufio.NewScanner(r) kv := map[string]string{} for sc.Scan() { line := strings.TrimSpace(sc.Text()) if k, v, ok := strings.Cut(line, ":"); ok { kv[strings.TrimSpace(k)] = strings.TrimSpace(v) } } version := firstNonEmpty(kv["Firmware Revision"], kv["Aux Firmware Rev Info"]) if version == "" { return nil } return &FirmwareSnapshot{ Component: "bmc", Identifier: "bmc0", Version: version, Vendor: kv["Manufacturer Name"], Raw: kv, } } // ----- NIC firmware ------------------------------------------------------ // probeNICFirmware enumerates /sys/class/net/*/device and calls // `ethtool -i ` on each real NIC (skip lo, bridges, virtuals). // One snapshot per interface so a mismatched port lights up in the diff // without silencing sibling ports. func probeNICFirmware(ctx context.Context) []FirmwareSnapshot { if _, err := exec.LookPath("ethtool"); err != nil { return nil } ifaces, err := os.ReadDir("/sys/class/net") if err != nil { return nil } var out []FirmwareSnapshot for _, entry := range ifaces { name := entry.Name() if !isRealNIC(name) { continue } raw, err := runCmd(ctx, "ethtool", "-i", name) if err != nil { continue } snap := parseEthtoolI(strings.NewReader(raw), name) if snap != nil { out = append(out, *snap) } } return out } // parseEthtoolI extracts driver/firmware-version from `ethtool -i` // output. Lines are "key: value" with a consistent prefix order. func parseEthtoolI(r io.Reader, iface string) *FirmwareSnapshot { sc := bufio.NewScanner(r) kv := map[string]string{} for sc.Scan() { line := sc.Text() if k, v, ok := strings.Cut(line, ":"); ok { kv[strings.TrimSpace(k)] = strings.TrimSpace(v) } } if kv["firmware-version"] == "" && kv["driver"] == "" { return nil } return &FirmwareSnapshot{ Component: "nic", Identifier: iface, Version: kv["firmware-version"], Vendor: kv["driver"], Raw: kv, } } // isRealNIC filters out loopback, bridges, veth, and the handful of // virtual kernel devices ethtool will refuse on. func isRealNIC(name string) bool { if name == "" || name == "lo" { return false } for _, prefix := range []string{"docker", "br-", "veth", "virbr", "tun", "tap", "bond"} { if strings.HasPrefix(name, prefix) { return false } } // Only accept interfaces that have a `device` link — real PCI NICs // do; pure virtuals (dummy0, wg*) don't. if _, err := os.Stat(filepath.Join("/sys/class/net", name, "device")); err != nil { return false } return true } // ----- NVMe -------------------------------------------------------------- // probeNVMeFirmware reads /sys/class/nvme/nvmeN/firmware_rev for every // controller. Falls back to `nvme id-ctrl` if the sysfs file is missing // (older kernels). Identifier is the controller path so a run with two // drives produces two snapshots. func probeNVMeFirmware(ctx context.Context) []FirmwareSnapshot { entries, err := os.ReadDir("/sys/class/nvme") if err != nil { return nil } var out []FirmwareSnapshot for _, e := range entries { ctrl := e.Name() rev := strings.TrimSpace(readFile(filepath.Join("/sys/class/nvme", ctrl, "firmware_rev"))) model := strings.TrimSpace(readFile(filepath.Join("/sys/class/nvme", ctrl, "model"))) if rev == "" { // Fallback: nvme id-ctrl -H /dev/. Available on hosts // where sysfs doesn't export firmware_rev. if _, err := exec.LookPath("nvme"); err == nil { raw, _ := runCmd(ctx, "nvme", "id-ctrl", "/dev/"+ctrl) rev = parseNVMeIDCtrl(strings.NewReader(raw), "fr") if model == "" { model = parseNVMeIDCtrl(strings.NewReader(raw), "mn") } } } if rev == "" { continue } out = append(out, FirmwareSnapshot{ Component: "nvme_fw", Identifier: ctrl, Version: rev, Vendor: model, Raw: map[string]string{"model": model, "firmware_rev": rev}, }) } return out } // parseNVMeIDCtrl pulls a single field out of `nvme id-ctrl` output. // Format: "fr : FW1234" / "mn : Samsung SSD 980 PRO". // Leading spaces vary, values may contain spaces. func parseNVMeIDCtrl(r io.Reader, key string) string { sc := bufio.NewScanner(r) prefix := key + " " for sc.Scan() { line := strings.TrimSpace(sc.Text()) if !strings.HasPrefix(line, prefix) { continue } _, v, ok := strings.Cut(line, ":") if !ok { continue } return strings.TrimSpace(v) } return "" } // ----- HBA --------------------------------------------------------------- var lspciClassHBA = regexp.MustCompile(`(?i)(serial attached scsi|sas controller|raid bus controller)`) // probeHBAFirmware looks for SAS/RAID HBAs via `lspci -Dvvnn`. The // firmware string is typically exposed as "Product Name" + // "Capabilities" but in practice the LSI/Broadcom driver writes a // "revision" on the device line. We capture what's printed and rely on // SpecValidate to diff — this keeps us off tool-specific CLIs (storcli, // mpt-status) that aren't always installed. func probeHBAFirmware(ctx context.Context) []FirmwareSnapshot { if _, err := exec.LookPath("lspci"); err != nil { return nil } out, err := runCmd(ctx, "lspci", "-Dvvnn") if err != nil { return nil } return parseLspciHBA(strings.NewReader(out)) } // parseLspciHBA walks `lspci -Dvvnn` stanzas and picks SAS/RAID // controllers. One snapshot per device; identifier is the PCI address. // Version is the device line's revision (rev NN) or the Kernel modules // string when no rev is printed. func parseLspciHBA(r io.Reader) []FirmwareSnapshot { sc := bufio.NewScanner(r) sc.Buffer(make([]byte, 0, 64*1024), 1024*1024) var out []FirmwareSnapshot var cur *FirmwareSnapshot revRe := regexp.MustCompile(`\(rev\s+([0-9a-fA-F]+)\)`) flush := func() { if cur != nil && cur.Version != "" { out = append(out, *cur) } cur = nil } for sc.Scan() { line := sc.Text() if !strings.HasPrefix(line, "\t") && strings.Contains(line, " ") { // New device line. flush() if lspciClassHBA.MatchString(line) { addr, rest, _ := strings.Cut(line, " ") cur = &FirmwareSnapshot{ Component: "hba", Identifier: addr, Vendor: strings.TrimSpace(rest), Raw: map[string]string{"device_line": line}, } if m := revRe.FindStringSubmatch(line); len(m) == 2 { cur.Version = "rev " + m[1] } } continue } if cur == nil { continue } trim := strings.TrimSpace(line) if strings.HasPrefix(trim, "Kernel modules:") { cur.Raw["kernel_modules"] = strings.TrimPrefix(trim, "Kernel modules:") } if strings.HasPrefix(trim, "Kernel driver in use:") { cur.Raw["kernel_driver"] = strings.TrimPrefix(trim, "Kernel driver in use:") } } flush() return out } // ----- Microcode --------------------------------------------------------- // probeMicrocode reads /proc/cpuinfo for the "microcode" line. All // cores report the same value post-boot, so one snapshot is enough. func probeMicrocode() *FirmwareSnapshot { f, err := os.Open("/proc/cpuinfo") if err != nil { return nil } defer func() { _ = f.Close() }() snap := parseMicrocode(f) return snap } func parseMicrocode(r io.Reader) *FirmwareSnapshot { sc := bufio.NewScanner(r) version := "" vendor := "" for sc.Scan() { line := sc.Text() k, v, ok := strings.Cut(line, ":") if !ok { continue } key := strings.TrimSpace(k) val := strings.TrimSpace(v) switch key { case "microcode": if version == "" { version = val } case "vendor_id": if vendor == "" { vendor = val } } if version != "" && vendor != "" { break } } if version == "" { return nil } return &FirmwareSnapshot{ Component: "microcode", Identifier: "cpu", Version: version, Vendor: vendor, } } // ----- helpers ----------------------------------------------------------- func firstNonEmpty(ss ...string) string { for _, s := range ss { if strings.TrimSpace(s) != "" { return s } } return "" } func readFile(p string) string { b, err := os.ReadFile(p) if err != nil { return "" } return string(b) } // trimErr joins the underlying error with the first line of combined // output so the warning message carries enough diagnostic context // without dumping a screenful of dmidecode/ipmitool noise. func trimErr(err error, out string) string { firstLine := strings.SplitN(strings.TrimSpace(out), "\n", 2)[0] if firstLine == "" { return err.Error() } return fmt.Sprintf("%v (%s)", err, firstLine) }