diff --git a/agent/probes/inventory.go b/agent/probes/inventory.go index a64ba50..e54d964 100644 --- a/agent/probes/inventory.go +++ b/agent/probes/inventory.go @@ -10,10 +10,10 @@ package probes import ( "bufio" - "fmt" "os" "os/exec" "path/filepath" + "regexp" "runtime" "strconv" "strings" @@ -147,13 +147,21 @@ func diskSizeGB(base string) int { func diskSerial(name string) string { // Try a few known paths; the kernel exposes serials differently for // ATA/SCSI vs NVMe. + // + // sysfs reads return raw bytes: vpd_pg80 is a binary SCSI VPD page + // with a 4-byte header, and some SSDs put control/NUL bytes at the + // head of /device/serial. TrimSpace won't strip either, so the + // string survives into the spec map as a garbage key that doesn't + // match the reporter's cleaner read from the same file on a + // different kernel. sanitizeASCII drops everything below 0x20 and + // above 0x7E, which leaves a stable printable serial on both sides. for _, rel := range []string{ filepath.Join("/sys/block", name, "device", "serial"), filepath.Join("/sys/block", name, "device", "vpd_pg80"), filepath.Join("/sys/block", name, "serial"), } { if b, err := os.ReadFile(rel); err == nil { - s := strings.TrimSpace(string(b)) + s := sanitizeASCII(string(b)) if s != "" { return s } @@ -167,7 +175,7 @@ func diskSerial(name string) string { } for _, line := range strings.Split(string(out), "\n") { if v, ok := strings.CutPrefix(line, "ID_SERIAL_SHORT="); ok { - return strings.TrimSpace(v) + return sanitizeASCII(v) } } return "" @@ -210,7 +218,7 @@ func probeNICs() []spec.NICSpec { // the diff engine just won't match any GPU expectations. Phase 4 will // add nvidia-smi for VRAM and firmware. func probeGPUs() []spec.GPUSpec { - cmd := exec.Command("lspci", "-mm", "-nnk") + cmd := exec.Command("lspci", "-mm", "-nn") out, err := cmd.Output() if err != nil { return nil @@ -222,10 +230,24 @@ func probeGPUs() []spec.GPUSpec { !strings.Contains(low, "3d controller") { continue } - // `lspci -mm` quotes fields; device name is usually field 3. + // lspci -mm quotes fields. splitQuoted indexes: + // [0] = class (e.g. "VGA compatible controller [0300]") + // [1] = vendor (e.g. "Intel Corporation [8086]") + // [2] = device (e.g. "Alder Lake-N [UHD Graphics] [46d1]") + // [3] = subsys (if present — varies between boards even + // for identical chips; NOT a model identifier) + // We used to concatenate [2] + " " + [3], which made the "model" + // key include subsystem noise and the occasional -rXX revision + // marker, so reporter and live-image runs produced different + // slugs for the same silicon. Use only [2], stripped of the + // trailing PCI device-id "[NNNN]" bracket that lspci -nn adds. fields := splitQuoted(line) - if len(fields) >= 4 { - gpus = append(gpus, spec.GPUSpec{Model: fmt.Sprintf("%s %s", fields[2], fields[3])}) + if len(fields) >= 3 { + model := stripPCIID(fields[2]) + model = sanitizeASCII(model) + if model != "" { + gpus = append(gpus, spec.GPUSpec{Model: model}) + } } } return gpus @@ -262,3 +284,31 @@ func readLine(path string) string { return strings.TrimSpace(string(b)) } +// sanitizeASCII drops bytes below 0x20 (control chars) and above 0x7E +// (high-bit / UTF-8 continuation bytes that come from binary sysfs +// files like vpd_pg80 being read as a Go string) and trims the result. +// Everything the caller cares about — disk serials, GPU model names — +// is ASCII-printable, so this is safe and fixes the reporter-vs-live +// mismatch where the same hardware produced different map keys. +func sanitizeASCII(s string) string { + var b strings.Builder + b.Grow(len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c >= 0x20 && c <= 0x7E { + b.WriteByte(c) + } + } + return strings.TrimSpace(b.String()) +} + +// stripPCIID removes the trailing " [NNNN]" PCI device-ID marker that +// `lspci -nn` appends to vendor/device strings — useful context for a +// human but an unstable identifier across pciutils versions. Keeps any +// internal brackets (e.g. "[UHD Graphics]" is part of the real name). +var pciIDTail = regexp.MustCompile(` *\[[0-9a-fA-F]{4}\]$`) + +func stripPCIID(s string) string { + return pciIDTail.ReplaceAllString(s, "") +} +