9bb4b09a04
CI / Lint + build + test (push) Has been cancelled
Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
265 lines
7.1 KiB
Go
265 lines
7.1 KiB
Go
// Package probes collects hardware facts from a booted Linux system.
|
|
// Phase 3 only needs enough to feed the spec diff: CPU model/cores,
|
|
// total RAM, per-disk serial+size, per-NIC MAC+speed, per-GPU model.
|
|
//
|
|
// Every probe is tolerant of missing files or tools — if /sys isn't
|
|
// available the field is just left empty. The orchestrator's diff
|
|
// engine will surface missing expected fields as failures; missing
|
|
// fields that weren't expected stay silent.
|
|
package probes
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"vetting/internal/spec"
|
|
)
|
|
|
|
// Collect runs every probe and returns the merged inventory. The only
|
|
// errors it surfaces are fatal ones that prevent progress — individual
|
|
// probe failures are logged to the returned Inventory's raw field and
|
|
// do not fail the whole call.
|
|
func Collect() (*spec.Inventory, error) {
|
|
inv := &spec.Inventory{}
|
|
|
|
inv.CPU = probeCPU()
|
|
inv.Memory = probeMemory()
|
|
inv.Disks = probeDisks()
|
|
inv.NICs = probeNICs()
|
|
inv.GPUs = probeGPUs()
|
|
|
|
return inv, nil
|
|
}
|
|
|
|
// ----- CPU --------------------------------------------------------------
|
|
|
|
func probeCPU() spec.CPUSpec {
|
|
// model: first "model name" in /proc/cpuinfo.
|
|
// logical_cores: runtime.NumCPU (Linux respects cpu cgroup; agent
|
|
// runs on bare metal so it will report every HT thread).
|
|
c := spec.CPUSpec{LogicalCores: runtime.NumCPU()}
|
|
f, err := os.Open("/proc/cpuinfo")
|
|
if err != nil {
|
|
return c
|
|
}
|
|
defer func() { _ = f.Close() }()
|
|
scan := bufio.NewScanner(f)
|
|
for scan.Scan() {
|
|
line := scan.Text()
|
|
if strings.HasPrefix(line, "model name") {
|
|
if _, v, ok := strings.Cut(line, ":"); ok {
|
|
c.Model = strings.TrimSpace(v)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return c
|
|
}
|
|
|
|
// ----- Memory -----------------------------------------------------------
|
|
|
|
func probeMemory() spec.MemorySpec {
|
|
// /proc/meminfo reports MemTotal in kB. Round down to the nearest
|
|
// GiB so the diff's ±2 GiB tolerance is meaningful.
|
|
f, err := os.Open("/proc/meminfo")
|
|
if err != nil {
|
|
return spec.MemorySpec{}
|
|
}
|
|
defer func() { _ = f.Close() }()
|
|
scan := bufio.NewScanner(f)
|
|
for scan.Scan() {
|
|
fields := strings.Fields(scan.Text())
|
|
if len(fields) >= 2 && fields[0] == "MemTotal:" {
|
|
kb, err := strconv.ParseInt(fields[1], 10, 64)
|
|
if err == nil {
|
|
return spec.MemorySpec{TotalGiB: int(kb / 1024 / 1024)}
|
|
}
|
|
}
|
|
}
|
|
return spec.MemorySpec{}
|
|
}
|
|
|
|
// ----- Disks ------------------------------------------------------------
|
|
|
|
// probeDisks walks /sys/class/block and picks out real block devices
|
|
// (no partitions, no loop/ram). For each it reads size (512B sectors)
|
|
// and serial. Virtio disks in QEMU report a serial only when launched
|
|
// with `-drive serial=...`; without that the field is empty, which is
|
|
// fine — the diff skips disks with empty serials anyway.
|
|
func probeDisks() []spec.DiskSpec {
|
|
entries, err := os.ReadDir("/sys/class/block")
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
var out []spec.DiskSpec
|
|
for _, e := range entries {
|
|
name := e.Name()
|
|
if !isRealDisk(name) {
|
|
continue
|
|
}
|
|
base := filepath.Join("/sys/class/block", name)
|
|
size := diskSizeGB(base)
|
|
serial := diskSerial(name)
|
|
// size == 0 means we couldn't read /size; skip rather than
|
|
// emit garbage.
|
|
if size == 0 && serial == "" {
|
|
continue
|
|
}
|
|
out = append(out, spec.DiskSpec{Serial: serial, SizeGB: size})
|
|
}
|
|
return out
|
|
}
|
|
|
|
func isRealDisk(name string) bool {
|
|
// Exclude partitions: they have a parent block dir and a "partition"
|
|
// attribute. sd* disks without trailing digits are whole disks; nvme
|
|
// disks use nvme0n1 for the namespace and nvme0n1p1 for partitions.
|
|
if strings.HasPrefix(name, "loop") || strings.HasPrefix(name, "ram") ||
|
|
strings.HasPrefix(name, "zram") || strings.HasPrefix(name, "dm-") {
|
|
return false
|
|
}
|
|
partPath := filepath.Join("/sys/class/block", name, "partition")
|
|
if _, err := os.Stat(partPath); err == nil {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func diskSizeGB(base string) int {
|
|
b, err := os.ReadFile(filepath.Join(base, "size"))
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
sectors, err := strconv.ParseInt(strings.TrimSpace(string(b)), 10, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
// /sys reports sectors of 512B regardless of physical sector size.
|
|
return int(sectors * 512 / 1_000_000_000)
|
|
}
|
|
|
|
func diskSerial(name string) string {
|
|
// Try a few known paths; the kernel exposes serials differently for
|
|
// ATA/SCSI vs NVMe.
|
|
for _, rel := range []string{
|
|
filepath.Join("/sys/block", name, "device", "serial"),
|
|
filepath.Join("/sys/block", name, "device", "vpd_pg80"),
|
|
filepath.Join("/sys/block", name, "serial"),
|
|
} {
|
|
if b, err := os.ReadFile(rel); err == nil {
|
|
s := strings.TrimSpace(string(b))
|
|
if s != "" {
|
|
return s
|
|
}
|
|
}
|
|
}
|
|
// Fallback: udevadm often knows the wwid / serial. Best-effort.
|
|
cmd := exec.Command("udevadm", "info", "--query=property", "--name="+name)
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
for _, line := range strings.Split(string(out), "\n") {
|
|
if v, ok := strings.CutPrefix(line, "ID_SERIAL_SHORT="); ok {
|
|
return strings.TrimSpace(v)
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// ----- NICs -------------------------------------------------------------
|
|
|
|
func probeNICs() []spec.NICSpec {
|
|
root := "/sys/class/net"
|
|
entries, err := os.ReadDir(root)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
var out []spec.NICSpec
|
|
for _, e := range entries {
|
|
name := e.Name()
|
|
if name == "lo" {
|
|
continue
|
|
}
|
|
base := filepath.Join(root, name)
|
|
mac := readLine(filepath.Join(base, "address"))
|
|
if mac == "" || mac == "00:00:00:00:00:00" {
|
|
continue
|
|
}
|
|
// /sys/class/net/*/speed reports Mbps or -1 if link down.
|
|
speed := 0
|
|
if b, err := os.ReadFile(filepath.Join(base, "speed")); err == nil {
|
|
if mbps, err := strconv.Atoi(strings.TrimSpace(string(b))); err == nil && mbps > 0 {
|
|
speed = mbps / 1000
|
|
}
|
|
}
|
|
out = append(out, spec.NICSpec{MAC: strings.ToLower(mac), SpeedGbps: speed})
|
|
}
|
|
return out
|
|
}
|
|
|
|
// ----- GPUs -------------------------------------------------------------
|
|
|
|
// probeGPUs leans on lspci; if lspci is missing, returns nothing and
|
|
// the diff engine just won't match any GPU expectations. Phase 4 will
|
|
// add nvidia-smi for VRAM and firmware.
|
|
func probeGPUs() []spec.GPUSpec {
|
|
cmd := exec.Command("lspci", "-mm", "-nnk")
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
var gpus []spec.GPUSpec
|
|
for _, line := range strings.Split(string(out), "\n") {
|
|
low := strings.ToLower(line)
|
|
if !strings.Contains(low, "vga compatible controller") &&
|
|
!strings.Contains(low, "3d controller") {
|
|
continue
|
|
}
|
|
// `lspci -mm` quotes fields; device name is usually field 3.
|
|
fields := splitQuoted(line)
|
|
if len(fields) >= 4 {
|
|
gpus = append(gpus, spec.GPUSpec{Model: fmt.Sprintf("%s %s", fields[2], fields[3])})
|
|
}
|
|
}
|
|
return gpus
|
|
}
|
|
|
|
func splitQuoted(line string) []string {
|
|
var out []string
|
|
var cur strings.Builder
|
|
inQ := false
|
|
for _, r := range line {
|
|
switch {
|
|
case r == '"':
|
|
inQ = !inQ
|
|
if !inQ {
|
|
out = append(out, cur.String())
|
|
cur.Reset()
|
|
}
|
|
case r == ' ' && !inQ:
|
|
continue
|
|
default:
|
|
cur.WriteRune(r)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// ----- shared helpers ---------------------------------------------------
|
|
|
|
func readLine(path string) string {
|
|
b, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(string(b))
|
|
}
|
|
|