9bb4b09a04
CI / Lint + build + test (push) Has been cancelled
Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
87 lines
2.2 KiB
Go
87 lines
2.2 KiB
Go
package tests
|
|
|
|
import (
|
|
"context"
|
|
"os/exec"
|
|
"strings"
|
|
)
|
|
|
|
// GPU enumerates VGA / 3D PCI devices. No devices → skip cleanly (a
|
|
// CPU-only server passes this stage by virtue of having nothing to
|
|
// stress). Devices present → try nvidia-smi for NVIDIA cards, else
|
|
// accept PCI presence.
|
|
func GPU(ctx context.Context, d Deps) Outcome {
|
|
devices := listGPUPCI(ctx)
|
|
if len(devices) == 0 {
|
|
d.Info("GPU: no VGA/3D PCI devices found — skipping stage")
|
|
return Outcome{
|
|
Passed: true,
|
|
Summary: "skipped (no GPU present)",
|
|
Extras: map[string]any{"skipped": true, "reason": "no_gpu_present"},
|
|
}
|
|
}
|
|
d.Info("GPU: found " + joinDevices(devices))
|
|
|
|
nvidia := nvidiaSmiList(ctx)
|
|
extras := map[string]any{
|
|
"pci_devices": devices,
|
|
"skipped": false,
|
|
}
|
|
if len(nvidia) > 0 {
|
|
extras["nvidia"] = nvidia
|
|
d.Info("GPU: nvidia-smi reports: " + strings.Join(nvidia, ", "))
|
|
}
|
|
return Outcome{
|
|
Passed: true,
|
|
Summary: formatCount(len(devices), "GPU present"),
|
|
Extras: extras,
|
|
}
|
|
}
|
|
|
|
// listGPUPCI shells out to lspci. Returns human-readable strings, one
|
|
// per VGA/3D device. If lspci isn't available we return nil and the
|
|
// caller treats it as "no GPU" which auto-skips.
|
|
func listGPUPCI(ctx context.Context) []string {
|
|
cmd := exec.CommandContext(ctx, "lspci", "-mm")
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
var devs []string
|
|
for _, line := range strings.Split(string(out), "\n") {
|
|
l := strings.ToLower(line)
|
|
if strings.Contains(l, "vga compatible controller") || strings.Contains(l, "3d controller") {
|
|
devs = append(devs, strings.TrimSpace(line))
|
|
}
|
|
}
|
|
return devs
|
|
}
|
|
|
|
// nvidiaSmiList returns each card's "<name>, <pci bus>" line; empty
|
|
// slice when nvidia-smi isn't installed or fails.
|
|
func nvidiaSmiList(ctx context.Context) []string {
|
|
cmd := exec.CommandContext(ctx, "nvidia-smi", "-L")
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
var lines []string
|
|
for _, l := range strings.Split(string(out), "\n") {
|
|
l = strings.TrimSpace(l)
|
|
if l != "" {
|
|
lines = append(lines, l)
|
|
}
|
|
}
|
|
return lines
|
|
}
|
|
|
|
func joinDevices(devs []string) string {
|
|
if len(devs) == 0 {
|
|
return ""
|
|
}
|
|
if len(devs) == 1 {
|
|
return devs[0]
|
|
}
|
|
return devs[0] + " (+" + strings.TrimSpace(formatCount(len(devs)-1, "more")) + ")"
|
|
}
|