Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
This commit is contained in:
@@ -0,0 +1,32 @@
|
||||
# live-image/Makefile — builds the Debian live image that PXE-booted
|
||||
# hosts land in. Requires a Linux host (or WSL) with mkosi installed.
|
||||
# On native Windows this Makefile short-circuits with a clear message.
|
||||
|
||||
ifeq ($(OS),Windows_NT)
|
||||
UNAME_S := Windows
|
||||
else
|
||||
UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
REPO_ROOT := $(abspath ..)
|
||||
AGENT_BIN := $(REPO_ROOT)/bin/vetting-agent.linux-amd64
|
||||
|
||||
.PHONY: all check-linux agent clean
|
||||
all: check-linux agent
|
||||
mkosi --force build
|
||||
|
||||
agent: $(AGENT_BIN)
|
||||
|
||||
$(AGENT_BIN):
|
||||
cd $(REPO_ROOT) && GOOS=linux GOARCH=amd64 go build -o $(AGENT_BIN) ./cmd/vetting-agent
|
||||
|
||||
check-linux:
|
||||
ifneq ($(UNAME_S),Linux)
|
||||
@echo "ERROR: live-image must be built on Linux (you're on $(UNAME_S))."
|
||||
@echo "Run 'wsl make -C live-image all' from Windows instead."
|
||||
@exit 1
|
||||
endif
|
||||
@command -v mkosi >/dev/null 2>&1 || { echo "ERROR: mkosi not installed. Try: apt install mkosi"; exit 1; }
|
||||
|
||||
clean:
|
||||
rm -rf build mkosi.output mkosi.cache
|
||||
@@ -0,0 +1,36 @@
|
||||
# Vetting live image
|
||||
|
||||
Debian-based Linux live image that PXE-booted hosts drop into. Runs the
|
||||
`vetting-agent` binary under systemd and reaches back to the orchestrator
|
||||
over HTTP+SSE.
|
||||
|
||||
## Building
|
||||
|
||||
Must be built on Linux (or WSL). On Windows:
|
||||
|
||||
```sh
|
||||
wsl make -C live-image all
|
||||
```
|
||||
|
||||
On Linux:
|
||||
|
||||
```sh
|
||||
make -C live-image all
|
||||
```
|
||||
|
||||
This produces `live-image/build/vmlinuz` and `live-image/build/initrd.img`.
|
||||
Copy (or symlink) them into the directory configured as `pxe.live_dir` in
|
||||
`deploy/vetting.yaml`; the orchestrator serves them at `/live/*`.
|
||||
|
||||
## iPXE binaries
|
||||
|
||||
The dnsmasq supervisor expects `ipxe.efi` and `undionly.kpxe` to live in
|
||||
`pxe.tftp_root`. Fetch the latest release binaries from
|
||||
https://boot.ipxe.org and drop them in that directory. The Makefile does
|
||||
not download them automatically so their SHA256 can be operator-verified.
|
||||
|
||||
## WSL prerequisites (Windows dev)
|
||||
|
||||
```sh
|
||||
sudo apt install mkosi debootstrap squashfs-tools dosfstools
|
||||
```
|
||||
@@ -0,0 +1,38 @@
|
||||
# Vetting live image (Phase 2 skeleton).
|
||||
#
|
||||
# Produces a Debian-based rootfs packaged as squashfs plus a kernel
|
||||
# image, ready to be served over HTTP to iPXE. The image is deliberately
|
||||
# small: only what the agent needs to run Phase 2 (the Hello / Claim /
|
||||
# Heartbeat loop). Phase 4+ adds smartctl, stress-ng, fio, iperf3, etc.
|
||||
|
||||
[Distribution]
|
||||
Distribution=debian
|
||||
Release=bookworm
|
||||
Repositories=main
|
||||
|
||||
[Output]
|
||||
Format=directory
|
||||
Output=build
|
||||
|
||||
[Content]
|
||||
Bootable=yes
|
||||
BuildPackages=
|
||||
Packages=
|
||||
systemd
|
||||
systemd-sysv
|
||||
udev
|
||||
linux-image-amd64
|
||||
live-boot
|
||||
iproute2
|
||||
iputils-ping
|
||||
openssh-server
|
||||
ca-certificates
|
||||
curl
|
||||
dmidecode
|
||||
pciutils
|
||||
usbutils
|
||||
|
||||
# Phase 4 will add: smartmontools stress-ng fio iperf3 lshw lm-sensors
|
||||
|
||||
[Host]
|
||||
# Copy the prebuilt Go agent in from the repo root via postinst.
|
||||
@@ -0,0 +1,15 @@
|
||||
#!/bin/sh
|
||||
# mkosi postinst: install the vetting-agent binary and its systemd unit
|
||||
# into the image. The binary must already be built for linux-amd64 at
|
||||
# repo root under bin/vetting-agent.linux-amd64 (the top-level Makefile
|
||||
# does this via `make agent-linux`).
|
||||
set -eu
|
||||
|
||||
AGENT_BIN="${SRCDIR:-..}/bin/vetting-agent.linux-amd64"
|
||||
|
||||
install -D -m 0755 "$AGENT_BIN" "$BUILDROOT/usr/local/sbin/vetting-agent"
|
||||
install -D -m 0644 "$SRCDIR/mkosi.skeleton/etc/systemd/system/vetting-agent.service" \
|
||||
"$BUILDROOT/etc/systemd/system/vetting-agent.service"
|
||||
|
||||
ln -sf /etc/systemd/system/vetting-agent.service \
|
||||
"$BUILDROOT/etc/systemd/system/multi-user.target.wants/vetting-agent.service"
|
||||
@@ -0,0 +1,18 @@
|
||||
[Unit]
|
||||
Description=Vetting hardware-validation agent
|
||||
# Wait until networking is minimally up (the agent itself retries
|
||||
# dial failures, but no point hammering before DHCP finishes).
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/local/sbin/vetting-agent
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
# The agent reads /proc/cmdline; it needs no extra env.
|
||||
StandardOutput=journal+console
|
||||
StandardError=journal+console
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Reference in New Issue
Block a user