9bb4b09a04
CI / Lint + build + test (push) Has been cancelled
Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
226 lines
7.2 KiB
Go
226 lines
7.2 KiB
Go
//go:build e2e
|
|
|
|
// Package e2e exercises the orchestrator end-to-end against a real QEMU
|
|
// VM PXE-booting from the orchestrator-supervised dnsmasq into the
|
|
// mkosi-built live image.
|
|
//
|
|
// This test is gated behind the `e2e` build tag because:
|
|
// - it requires root (for bridge + qemu-system-x86_64 network setup),
|
|
// - it needs a pre-built live image at live-image/out/{vmlinuz,initrd.img},
|
|
// - it only runs on Linux (mkosi + qemu-kvm).
|
|
//
|
|
// Run with:
|
|
//
|
|
// sudo go test -tags=e2e -run TestQEMUFullRun ./test/e2e/...
|
|
//
|
|
// See docs/operations.md for the manual QEMU invocation equivalent.
|
|
package e2e
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// Tunables — overridable via env for CI, defaults match the manual
|
|
// setup documented in docs/operations.md.
|
|
var (
|
|
bridgeName = envOr("VETTING_E2E_BRIDGE", "br-vetting")
|
|
liveKernel = envOr("VETTING_E2E_KERNEL", "live-image/out/vmlinuz")
|
|
liveInitrd = envOr("VETTING_E2E_INITRD", "live-image/out/initrd.img")
|
|
testMAC = envOr("VETTING_E2E_MAC", "52:54:00:12:34:56")
|
|
publicURL = envOr("VETTING_E2E_URL", "http://10.77.0.1:8080")
|
|
// Overall budget for the run to reach Completed. Stage timeouts in
|
|
// the config should be tuned down for E2E to well under this.
|
|
runBudget = 10 * time.Minute
|
|
)
|
|
|
|
func envOr(k, d string) string {
|
|
if v := os.Getenv(k); v != "" {
|
|
return v
|
|
}
|
|
return d
|
|
}
|
|
|
|
// TestQEMUFullRun boots a QEMU VM against a running orchestrator and
|
|
// waits for the Run state to reach Completed.
|
|
//
|
|
// Preconditions (test skips unless all are true):
|
|
// - Linux host
|
|
// - Running as root (bridge networking + qemu-kvm)
|
|
// - `qemu-system-x86_64` on PATH
|
|
// - Live image built (kernel + initrd exist)
|
|
// - An orchestrator is already running at $VETTING_E2E_URL with a
|
|
// host registered for $VETTING_E2E_MAC and a run already queued
|
|
// (start the run via the UI before invoking this test, or via the
|
|
// orchestrator's /hosts/{id}/start endpoint).
|
|
//
|
|
// The test exercises the real PXE path. It does NOT embed its own
|
|
// orchestrator because dnsmasq needs CAP_NET_ADMIN and the test binary
|
|
// should stay focused on the "did the run complete?" assertion.
|
|
func TestQEMUFullRun(t *testing.T) {
|
|
if runtime.GOOS != "linux" {
|
|
t.Skip("E2E test requires Linux")
|
|
}
|
|
if os.Geteuid() != 0 {
|
|
t.Skip("E2E test requires root (sudo go test -tags=e2e ...)")
|
|
}
|
|
if _, err := exec.LookPath("qemu-system-x86_64"); err != nil {
|
|
t.Skip("qemu-system-x86_64 not on PATH")
|
|
}
|
|
if _, err := os.Stat(liveKernel); err != nil {
|
|
t.Skipf("live kernel missing at %s (run `make live-image`)", liveKernel)
|
|
}
|
|
if _, err := os.Stat(liveInitrd); err != nil {
|
|
t.Skipf("live initrd missing at %s", liveInitrd)
|
|
}
|
|
if err := pingOrchestrator(publicURL); err != nil {
|
|
t.Skipf("orchestrator not reachable at %s: %v", publicURL, err)
|
|
}
|
|
|
|
runID, err := findQueuedRunForMAC(publicURL, testMAC)
|
|
if err != nil {
|
|
t.Fatalf("no queued run for %s: %v (register the host and click Start Vetting first)", testMAC, err)
|
|
}
|
|
t.Logf("driving run %d for MAC %s", runID, testMAC)
|
|
|
|
disk, cleanup := makeThrowawayDisk(t)
|
|
defer cleanup()
|
|
|
|
qemuCtx, cancel := context.WithTimeout(context.Background(), runBudget)
|
|
defer cancel()
|
|
|
|
cmd := exec.CommandContext(qemuCtx, "qemu-system-x86_64",
|
|
"-enable-kvm", "-cpu", "host", "-smp", "4", "-m", "4096",
|
|
"-netdev", "bridge,id=n0,br="+bridgeName,
|
|
"-device", "virtio-net-pci,netdev=n0,mac="+testMAC,
|
|
"-drive", "file="+disk+",format=raw,if=virtio",
|
|
"-boot", "n", "-serial", "file:"+filepath.Join(os.TempDir(), fmt.Sprintf("vetting-e2e-%d.serial", runID)),
|
|
"-display", "none",
|
|
)
|
|
cmd.Stdout = testLogger{t}
|
|
cmd.Stderr = testLogger{t}
|
|
if err := cmd.Start(); err != nil {
|
|
t.Fatalf("start qemu: %v", err)
|
|
}
|
|
defer func() {
|
|
_ = cmd.Process.Kill()
|
|
_ = cmd.Wait()
|
|
}()
|
|
|
|
// Poll the orchestrator until the run reaches a terminal state.
|
|
poll := time.NewTicker(5 * time.Second)
|
|
defer poll.Stop()
|
|
for {
|
|
select {
|
|
case <-qemuCtx.Done():
|
|
t.Fatalf("run %d did not complete within %s", runID, runBudget)
|
|
case <-poll.C:
|
|
state, err := getRunState(publicURL, runID)
|
|
if err != nil {
|
|
t.Logf("poll state: %v (will retry)", err)
|
|
continue
|
|
}
|
|
t.Logf("run %d state = %s", runID, state)
|
|
switch state {
|
|
case "Completed":
|
|
return // green path
|
|
case "FailedHolding", "Failed", "Released":
|
|
t.Fatalf("run %d ended in non-success state %q", runID, state)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---- helpers ------------------------------------------------------------
|
|
|
|
func pingOrchestrator(url string) error {
|
|
req, err := http.NewRequest(http.MethodGet, url+"/login", nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode >= 500 {
|
|
return fmt.Errorf("status %d", resp.StatusCode)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// findQueuedRunForMAC hits a hypothetical /api/v1/runs?mac=... debug
|
|
// endpoint. Since Phase 6 doesn't add that endpoint (orchestrator stays
|
|
// browser-session-gated for UI routes), we fall back to requiring the
|
|
// caller to set VETTING_E2E_RUN_ID if the orchestrator hasn't been
|
|
// extended with a debug listing. This is a pragmatic hack — the E2E
|
|
// harness is developer-facing and the alternative would be scraping
|
|
// HTML.
|
|
func findQueuedRunForMAC(baseURL, mac string) (int64, error) {
|
|
if s := os.Getenv("VETTING_E2E_RUN_ID"); s != "" {
|
|
var id int64
|
|
_, err := fmt.Sscanf(s, "%d", &id)
|
|
return id, err
|
|
}
|
|
return 0, fmt.Errorf("set VETTING_E2E_RUN_ID (no debug API for MAC lookup yet)")
|
|
}
|
|
|
|
// getRunState reads the run's current state via the report route's
|
|
// fall-through: /reports/{id} returns 404 until Completed, which gives
|
|
// us a cheap terminal-check without a JSON API. For intermediate
|
|
// states we need a debug endpoint — deliberately left as a TODO so
|
|
// the test doesn't depend on an API surface that isn't stable.
|
|
func getRunState(baseURL string, runID int64) (string, error) {
|
|
// Proxy: if /reports/{id} returns 200, the run is Completed.
|
|
resp, err := http.Get(fmt.Sprintf("%s/reports/%d", baseURL, runID))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
_, _ = io.Copy(io.Discard, resp.Body)
|
|
switch resp.StatusCode {
|
|
case 200:
|
|
return "Completed", nil
|
|
case 401, 403:
|
|
// Session-gated; caller must export VETTING_E2E_COOKIE to bypass.
|
|
return "", fmt.Errorf("auth required; set VETTING_E2E_COOKIE")
|
|
case 404:
|
|
return "InProgress", nil
|
|
default:
|
|
return "", fmt.Errorf("unexpected %d", resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
func makeThrowawayDisk(t *testing.T) (string, func()) {
|
|
t.Helper()
|
|
path := filepath.Join(t.TempDir(), "test-disk.img")
|
|
cmd := exec.Command("qemu-img", "create", "-f", "raw", path, "4G")
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
t.Fatalf("qemu-img create: %v\n%s", err, strings.TrimSpace(string(out)))
|
|
}
|
|
return path, func() { _ = os.Remove(path) }
|
|
}
|
|
|
|
// testLogger lets exec.Cmd write into the test's log stream so QEMU's
|
|
// stderr shows up with the test name, not as an orphaned blob.
|
|
type testLogger struct{ t *testing.T }
|
|
|
|
func (w testLogger) Write(p []byte) (int, error) {
|
|
w.t.Logf("qemu: %s", strings.TrimRight(string(p), "\r\n"))
|
|
return len(p), nil
|
|
}
|
|
|
|
// Compile-time reminder: json is imported so future expansions can
|
|
// parse the orchestrator's response bodies when a debug API lands.
|
|
var _ = json.Marshal
|