Files
Vetting/internal/api/heartbeat_test.go
T
josh d0bfae14c8
CI / Lint + build + test (push) Has been cancelled
Heartbeat-first dispatch: retire WoL-as-default, add WaitingReboot
Every supported host runs vetting-reporter in-OS and heartbeats every
30s. WoL was never the thing that started vetting — the heartbeat
response's reboot_for_vetting command was. Firing WoL first only
crowded the run log with misleading diagnostics when the real failure
mode is "reporter isn't installed."

- StartRun 409s if the host hasn't heartbeated within 60s, pointing
  the operator at /register/quick.sh.
- Dispatcher re-checks LastSeenAt at dispatch time (run may sit in
  Queued long enough for the host to go offline); stale hosts mark
  the run Failed with failed_stage=dispatch instead of looping.
- New StateWaitingReboot + TriggerRebootCommanded capture the actual
  semantics. StateWaitingWoL kept as the hook point for a future
  manual-override button.
- Tile disables the Start button with a quick.sh tooltip when the
  host is offline, matching the server-side 409.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-18 01:10:34 -04:00

258 lines
7.6 KiB
Go

package api_test
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
"time"
"github.com/go-chi/chi/v5"
"vetting/internal/api"
"vetting/internal/db"
"vetting/internal/events"
"vetting/internal/model"
"vetting/internal/orchestrator"
"vetting/internal/store"
)
// setupHeartbeat wires just enough of UI to exercise the heartbeat
// handler. Runner is left nil by default — the Phase-2 command path
// short-circuits to idle when Runner is absent, which is fine for the
// "no run yet" happy path. Callers that want to drive the Phase-2
// transition use setupHeartbeatWithRunner.
func setupHeartbeat(t *testing.T) (*api.UI, *store.Hosts) {
t.Helper()
conn, err := db.Open(filepath.Join(t.TempDir(), "vetting.db"))
if err != nil {
t.Fatalf("open db: %v", err)
}
t.Cleanup(func() { _ = conn.Close() })
hosts := &store.Hosts{DB: conn}
return &api.UI{Hosts: hosts}, hosts
}
// setupHeartbeatWithRunner also wires a Runs store + Runner so
// Phase-2 tests can exercise the Queued → WaitingReboot transition and
// the 10-minute retry window on waiting states.
func setupHeartbeatWithRunner(t *testing.T) (*api.UI, *store.Hosts, *store.Runs) {
t.Helper()
conn, err := db.Open(filepath.Join(t.TempDir(), "vetting.db"))
if err != nil {
t.Fatalf("open db: %v", err)
}
t.Cleanup(func() { _ = conn.Close() })
hosts := &store.Hosts{DB: conn}
runs := &store.Runs{DB: conn}
stages := &store.Stages{DB: conn}
hub := events.NewHub()
runner := &orchestrator.Runner{Runs: runs, Hosts: hosts, Stages: stages, EventHub: hub}
ui := &api.UI{Hosts: hosts, Runs: runs, Runner: runner}
return ui, hosts, runs
}
func heartbeatReq(mac string) *http.Request {
req := httptest.NewRequest(http.MethodPost, "/api/v1/hosts/"+mac+"/heartbeat", nil)
rctx := chi.NewRouteContext()
rctx.URLParams.Add("mac", mac)
return req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx))
}
func TestUIHeartbeat_Success(t *testing.T) {
ui, hosts := setupHeartbeat(t)
id, err := hosts.Create(context.Background(), model.Host{
Name: "hb-host",
MAC: "aa:bb:cc:dd:ee:10",
WoLBroadcastIP: "10.0.0.255",
WoLPort: 9,
ExpectedSpecYAML: "memory:\n total_gib: 16\n",
})
if err != nil {
t.Fatalf("create: %v", err)
}
before := time.Now().UTC().Add(-time.Second)
rr := httptest.NewRecorder()
ui.Heartbeat(rr, heartbeatReq("aa:bb:cc:dd:ee:10"))
if rr.Code != http.StatusOK {
t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String())
}
var resp map[string]any
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
t.Fatalf("decode: %v", err)
}
if resp["ok"] != true {
t.Fatalf("response = %v, want ok:true", resp)
}
got, err := hosts.Get(context.Background(), id)
if err != nil {
t.Fatalf("Get: %v", err)
}
if got.LastSeenAt == nil {
t.Fatalf("LastSeenAt not stamped")
}
if got.LastSeenAt.Before(before) {
t.Fatalf("LastSeenAt = %v, want >= %v", got.LastSeenAt, before)
}
}
func TestUIHeartbeat_UnknownMAC(t *testing.T) {
ui, _ := setupHeartbeat(t)
rr := httptest.NewRecorder()
ui.Heartbeat(rr, heartbeatReq("aa:bb:cc:dd:ee:ff"))
if rr.Code != http.StatusNotFound {
t.Fatalf("status = %d, want 404", rr.Code)
}
var resp map[string]string
_ = json.NewDecoder(rr.Body).Decode(&resp)
if resp["error"] == "" {
t.Fatalf("missing error body")
}
}
func TestUIHeartbeat_BadMAC(t *testing.T) {
ui, _ := setupHeartbeat(t)
rr := httptest.NewRecorder()
ui.Heartbeat(rr, heartbeatReq("not-a-mac"))
if rr.Code != http.StatusBadRequest {
t.Fatalf("status = %d, want 400", rr.Code)
}
}
func TestUIHeartbeat_QueuedDispatches(t *testing.T) {
ui, hosts, runs := setupHeartbeatWithRunner(t)
ctx := context.Background()
hostID, err := hosts.Create(ctx, model.Host{
Name: "hb-dispatch",
MAC: "aa:bb:cc:dd:ee:20",
WoLBroadcastIP: "10.0.0.255",
WoLPort: 9,
ExpectedSpecYAML: "memory:\n total_gib: 16\n",
})
if err != nil {
t.Fatalf("create host: %v", err)
}
runID, err := runs.Create(ctx, hostID, "deadbeef")
if err != nil {
t.Fatalf("create run: %v", err)
}
rr := httptest.NewRecorder()
ui.Heartbeat(rr, heartbeatReq("aa:bb:cc:dd:ee:20"))
if rr.Code != http.StatusOK {
t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String())
}
var resp struct {
OK bool `json:"ok"`
Cmd string `json:"cmd"`
RunID int64 `json:"run_id"`
}
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
t.Fatalf("decode: %v", err)
}
if resp.Cmd != "reboot_for_vetting" || resp.RunID != runID {
t.Fatalf("response = %+v, want cmd=reboot_for_vetting run_id=%d", resp, runID)
}
// Run advanced Queued → WaitingReboot via the state machine.
got, err := runs.Get(ctx, runID)
if err != nil {
t.Fatalf("get run: %v", err)
}
if got.State != model.StateWaitingReboot {
t.Fatalf("state = %s, want WaitingReboot", got.State)
}
}
// TestUIHeartbeat_WaitingRebootRetries covers the reporter crashing
// mid-reboot and coming back: the heartbeat must keep returning the
// reboot command while the run is still young (<10min).
func TestUIHeartbeat_WaitingRebootRetries(t *testing.T) {
ui, hosts, runs := setupHeartbeatWithRunner(t)
ctx := context.Background()
hostID, err := hosts.Create(ctx, model.Host{
Name: "hb-retry",
MAC: "aa:bb:cc:dd:ee:21",
WoLBroadcastIP: "10.0.0.255",
WoLPort: 9,
ExpectedSpecYAML: "memory:\n total_gib: 16\n",
})
if err != nil {
t.Fatalf("create host: %v", err)
}
runID, err := runs.Create(ctx, hostID, "deadbeef")
if err != nil {
t.Fatalf("create run: %v", err)
}
if err := runs.SetState(ctx, runID, model.StateWaitingReboot); err != nil {
t.Fatalf("set state: %v", err)
}
rr := httptest.NewRecorder()
ui.Heartbeat(rr, heartbeatReq("aa:bb:cc:dd:ee:21"))
var resp struct {
Cmd string `json:"cmd"`
RunID int64 `json:"run_id"`
}
_ = json.Unmarshal(rr.Body.Bytes(), &resp)
if resp.Cmd != "reboot_for_vetting" || resp.RunID != runID {
t.Fatalf("response = %+v, want reboot_for_vetting retry", resp)
}
}
func TestUIHeartbeat_NoRunIsIdle(t *testing.T) {
ui, hosts, _ := setupHeartbeatWithRunner(t)
if _, err := hosts.Create(context.Background(), model.Host{
Name: "hb-idle",
MAC: "aa:bb:cc:dd:ee:22",
WoLBroadcastIP: "10.0.0.255",
WoLPort: 9,
ExpectedSpecYAML: "memory:\n total_gib: 16\n",
}); err != nil {
t.Fatalf("create host: %v", err)
}
rr := httptest.NewRecorder()
ui.Heartbeat(rr, heartbeatReq("aa:bb:cc:dd:ee:22"))
// Idle = cmd omitted entirely; the agent's heartbeatResponse
// decodes that as "", and handleResponse bails early.
body := rr.Body.String()
if strings.Contains(body, "reboot_for_vetting") {
t.Fatalf("idle host got reboot cmd: %s", body)
}
if strings.Contains(body, `"cmd"`) {
t.Fatalf("idle response should omit cmd, got: %s", body)
}
}
func TestUIHeartbeat_CompletedRunIsIdle(t *testing.T) {
ui, hosts, runs := setupHeartbeatWithRunner(t)
ctx := context.Background()
hostID, err := hosts.Create(ctx, model.Host{
Name: "hb-done",
MAC: "aa:bb:cc:dd:ee:23",
WoLBroadcastIP: "10.0.0.255",
WoLPort: 9,
ExpectedSpecYAML: "memory:\n total_gib: 16\n",
})
if err != nil {
t.Fatalf("create host: %v", err)
}
runID, err := runs.Create(ctx, hostID, "deadbeef")
if err != nil {
t.Fatalf("create run: %v", err)
}
if err := runs.SetState(ctx, runID, model.StateCompleted); err != nil {
t.Fatalf("set state: %v", err)
}
rr := httptest.NewRecorder()
ui.Heartbeat(rr, heartbeatReq("aa:bb:cc:dd:ee:23"))
body := rr.Body.String()
if strings.Contains(body, "reboot_for_vetting") {
t.Fatalf("completed run returned reboot cmd: %s", body)
}
}