//go:build e2e // Package e2e exercises the orchestrator end-to-end against a real QEMU // VM PXE-booting from the orchestrator-supervised dnsmasq into the // mkosi-built live image. // // This test is gated behind the `e2e` build tag because: // - it requires root (for bridge + qemu-system-x86_64 network setup), // - it needs a pre-built live image at live-image/out/{vmlinuz,initrd.img}, // - it only runs on Linux (mkosi + qemu-kvm). // // Run with: // // sudo go test -tags=e2e -run TestQEMUFullRun ./test/e2e/... // // See docs/operations.md for the manual QEMU invocation equivalent. package e2e import ( "context" "encoding/json" "fmt" "io" "net/http" "os" "os/exec" "path/filepath" "runtime" "strings" "testing" "time" ) // Tunables — overridable via env for CI, defaults match the manual // setup documented in docs/operations.md. var ( bridgeName = envOr("VETTING_E2E_BRIDGE", "br-vetting") liveKernel = envOr("VETTING_E2E_KERNEL", "live-image/out/vmlinuz") liveInitrd = envOr("VETTING_E2E_INITRD", "live-image/out/initrd.img") testMAC = envOr("VETTING_E2E_MAC", "52:54:00:12:34:56") publicURL = envOr("VETTING_E2E_URL", "http://10.77.0.1:8080") // Overall budget for the run to reach Completed. Stage timeouts in // the config should be tuned down for E2E to well under this. runBudget = 10 * time.Minute ) func envOr(k, d string) string { if v := os.Getenv(k); v != "" { return v } return d } // TestQEMUFullRun boots a QEMU VM against a running orchestrator and // waits for the Run state to reach Completed. // // Preconditions (test skips unless all are true): // - Linux host // - Running as root (bridge networking + qemu-kvm) // - `qemu-system-x86_64` on PATH // - Live image built (kernel + initrd exist) // - An orchestrator is already running at $VETTING_E2E_URL with a // host registered for $VETTING_E2E_MAC and a run already queued // (start the run via the UI before invoking this test, or via the // orchestrator's /hosts/{id}/start endpoint). // // The test exercises the real PXE path. It does NOT embed its own // orchestrator because dnsmasq needs CAP_NET_ADMIN and the test binary // should stay focused on the "did the run complete?" assertion. func TestQEMUFullRun(t *testing.T) { if runtime.GOOS != "linux" { t.Skip("E2E test requires Linux") } if os.Geteuid() != 0 { t.Skip("E2E test requires root (sudo go test -tags=e2e ...)") } if _, err := exec.LookPath("qemu-system-x86_64"); err != nil { t.Skip("qemu-system-x86_64 not on PATH") } if _, err := os.Stat(liveKernel); err != nil { t.Skipf("live kernel missing at %s (run `make live-image`)", liveKernel) } if _, err := os.Stat(liveInitrd); err != nil { t.Skipf("live initrd missing at %s", liveInitrd) } if err := pingOrchestrator(publicURL); err != nil { t.Skipf("orchestrator not reachable at %s: %v", publicURL, err) } runID, err := findQueuedRunForMAC(publicURL, testMAC) if err != nil { t.Fatalf("no queued run for %s: %v (register the host and click Start Vetting first)", testMAC, err) } t.Logf("driving run %d for MAC %s", runID, testMAC) disk, cleanup := makeThrowawayDisk(t) defer cleanup() qemuCtx, cancel := context.WithTimeout(context.Background(), runBudget) defer cancel() cmd := exec.CommandContext(qemuCtx, "qemu-system-x86_64", "-enable-kvm", "-cpu", "host", "-smp", "4", "-m", "4096", "-netdev", "bridge,id=n0,br="+bridgeName, "-device", "virtio-net-pci,netdev=n0,mac="+testMAC, "-drive", "file="+disk+",format=raw,if=virtio", "-boot", "n", "-serial", "file:"+filepath.Join(os.TempDir(), fmt.Sprintf("vetting-e2e-%d.serial", runID)), "-display", "none", ) cmd.Stdout = testLogger{t} cmd.Stderr = testLogger{t} if err := cmd.Start(); err != nil { t.Fatalf("start qemu: %v", err) } defer func() { _ = cmd.Process.Kill() _ = cmd.Wait() }() // Poll the orchestrator until the run reaches a terminal state. poll := time.NewTicker(5 * time.Second) defer poll.Stop() for { select { case <-qemuCtx.Done(): t.Fatalf("run %d did not complete within %s", runID, runBudget) case <-poll.C: state, err := getRunState(publicURL, runID) if err != nil { t.Logf("poll state: %v (will retry)", err) continue } t.Logf("run %d state = %s", runID, state) switch state { case "Completed": return // green path case "FailedHolding", "Failed", "Released": t.Fatalf("run %d ended in non-success state %q", runID, state) } } } } // ---- helpers ------------------------------------------------------------ func pingOrchestrator(url string) error { req, err := http.NewRequest(http.MethodGet, url+"/login", nil) if err != nil { return err } resp, err := http.DefaultClient.Do(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode >= 500 { return fmt.Errorf("status %d", resp.StatusCode) } return nil } // findQueuedRunForMAC hits a hypothetical /api/v1/runs?mac=... debug // endpoint. Since Phase 6 doesn't add that endpoint (orchestrator stays // browser-session-gated for UI routes), we fall back to requiring the // caller to set VETTING_E2E_RUN_ID if the orchestrator hasn't been // extended with a debug listing. This is a pragmatic hack — the E2E // harness is developer-facing and the alternative would be scraping // HTML. func findQueuedRunForMAC(baseURL, mac string) (int64, error) { if s := os.Getenv("VETTING_E2E_RUN_ID"); s != "" { var id int64 _, err := fmt.Sscanf(s, "%d", &id) return id, err } return 0, fmt.Errorf("set VETTING_E2E_RUN_ID (no debug API for MAC lookup yet)") } // getRunState reads the run's current state via the report route's // fall-through: /reports/{id} returns 404 until Completed, which gives // us a cheap terminal-check without a JSON API. For intermediate // states we need a debug endpoint — deliberately left as a TODO so // the test doesn't depend on an API surface that isn't stable. func getRunState(baseURL string, runID int64) (string, error) { // Proxy: if /reports/{id} returns 200, the run is Completed. resp, err := http.Get(fmt.Sprintf("%s/reports/%d", baseURL, runID)) if err != nil { return "", err } defer resp.Body.Close() _, _ = io.Copy(io.Discard, resp.Body) switch resp.StatusCode { case 200: return "Completed", nil case 401, 403: // Session-gated; caller must export VETTING_E2E_COOKIE to bypass. return "", fmt.Errorf("auth required; set VETTING_E2E_COOKIE") case 404: return "InProgress", nil default: return "", fmt.Errorf("unexpected %d", resp.StatusCode) } } func makeThrowawayDisk(t *testing.T) (string, func()) { t.Helper() path := filepath.Join(t.TempDir(), "test-disk.img") cmd := exec.Command("qemu-img", "create", "-f", "raw", path, "4G") if out, err := cmd.CombinedOutput(); err != nil { t.Fatalf("qemu-img create: %v\n%s", err, strings.TrimSpace(string(out))) } return path, func() { _ = os.Remove(path) } } // testLogger lets exec.Cmd write into the test's log stream so QEMU's // stderr shows up with the test name, not as an orphaned blob. type testLogger struct{ t *testing.T } func (w testLogger) Write(p []byte) (int, error) { w.t.Logf("qemu: %s", strings.TrimRight(string(p), "\r\n")) return len(p), nil } // Compile-time reminder: json is imported so future expansions can // parse the orchestrator's response bodies when a debug API lands. var _ = json.Marshal