Files
Vetting/agent/tests/gpu.go
T
josh 9bb4b09a04
CI / Lint + build + test (push) Has been cancelled
Initial commit: full Phases 1-6 implementation
Post-repair hardware validation pipeline for Proxmox cluster hosts.
Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq
PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
2026-04-17 21:32:10 -04:00

87 lines
2.2 KiB
Go

package tests
import (
"context"
"os/exec"
"strings"
)
// GPU enumerates VGA / 3D PCI devices. No devices → skip cleanly (a
// CPU-only server passes this stage by virtue of having nothing to
// stress). Devices present → try nvidia-smi for NVIDIA cards, else
// accept PCI presence.
func GPU(ctx context.Context, d Deps) Outcome {
devices := listGPUPCI(ctx)
if len(devices) == 0 {
d.Info("GPU: no VGA/3D PCI devices found — skipping stage")
return Outcome{
Passed: true,
Summary: "skipped (no GPU present)",
Extras: map[string]any{"skipped": true, "reason": "no_gpu_present"},
}
}
d.Info("GPU: found " + joinDevices(devices))
nvidia := nvidiaSmiList(ctx)
extras := map[string]any{
"pci_devices": devices,
"skipped": false,
}
if len(nvidia) > 0 {
extras["nvidia"] = nvidia
d.Info("GPU: nvidia-smi reports: " + strings.Join(nvidia, ", "))
}
return Outcome{
Passed: true,
Summary: formatCount(len(devices), "GPU present"),
Extras: extras,
}
}
// listGPUPCI shells out to lspci. Returns human-readable strings, one
// per VGA/3D device. If lspci isn't available we return nil and the
// caller treats it as "no GPU" which auto-skips.
func listGPUPCI(ctx context.Context) []string {
cmd := exec.CommandContext(ctx, "lspci", "-mm")
out, err := cmd.Output()
if err != nil {
return nil
}
var devs []string
for _, line := range strings.Split(string(out), "\n") {
l := strings.ToLower(line)
if strings.Contains(l, "vga compatible controller") || strings.Contains(l, "3d controller") {
devs = append(devs, strings.TrimSpace(line))
}
}
return devs
}
// nvidiaSmiList returns each card's "<name>, <pci bus>" line; empty
// slice when nvidia-smi isn't installed or fails.
func nvidiaSmiList(ctx context.Context) []string {
cmd := exec.CommandContext(ctx, "nvidia-smi", "-L")
out, err := cmd.Output()
if err != nil {
return nil
}
var lines []string
for _, l := range strings.Split(string(out), "\n") {
l = strings.TrimSpace(l)
if l != "" {
lines = append(lines, l)
}
}
return lines
}
func joinDevices(devs []string) string {
if len(devs) == 0 {
return ""
}
if len(devs) == 1 {
return devs[0]
}
return devs[0] + " (+" + strings.TrimSpace(formatCount(len(devs)-1, "more")) + ")"
}