Heartbeat-first dispatch: retire WoL-as-default, add WaitingReboot
CI / Lint + build + test (push) Has been cancelled
CI / Lint + build + test (push) Has been cancelled
Every supported host runs vetting-reporter in-OS and heartbeats every 30s. WoL was never the thing that started vetting — the heartbeat response's reboot_for_vetting command was. Firing WoL first only crowded the run log with misleading diagnostics when the real failure mode is "reporter isn't installed." - StartRun 409s if the host hasn't heartbeated within 60s, pointing the operator at /register/quick.sh. - Dispatcher re-checks LastSeenAt at dispatch time (run may sit in Queued long enough for the host to go offline); stale hosts mark the run Failed with failed_stage=dispatch instead of looping. - New StateWaitingReboot + TriggerRebootCommanded capture the actual semantics. StateWaitingWoL kept as the hook point for a future manual-override button. - Tile disables the Start button with a quick.sh tooltip when the host is offline, matching the server-side 409. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -9,19 +9,19 @@ import (
|
||||
// node indexes for the default pipeline layout: pre-stages (3) + stage
|
||||
// rows (9) + terminal Completed (1) = 13 nodes.
|
||||
const (
|
||||
idxQueued = 0
|
||||
idxWaitingWoL = 1
|
||||
idxBooting = 2
|
||||
idxInventory = 3
|
||||
idxSpecValidate = 4
|
||||
idxSMART = 5
|
||||
idxCPUStress = 6
|
||||
idxStorage = 7
|
||||
idxNetwork = 8
|
||||
idxGPU = 9
|
||||
idxPSU = 10
|
||||
idxReporting = 11
|
||||
idxCompleted = 12
|
||||
idxQueued = 0
|
||||
idxWaitingReboot = 1
|
||||
idxBooting = 2
|
||||
idxInventory = 3
|
||||
idxSpecValidate = 4
|
||||
idxSMART = 5
|
||||
idxCPUStress = 6
|
||||
idxStorage = 7
|
||||
idxNetwork = 8
|
||||
idxGPU = 9
|
||||
idxPSU = 10
|
||||
idxReporting = 11
|
||||
idxCompleted = 12
|
||||
)
|
||||
|
||||
// seedStages returns a fresh all-pending stage slice in the canonical order.
|
||||
@@ -48,12 +48,12 @@ func TestBuildPipeline_NoRun(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildPipeline_GhostStagesBeforeClaim models the real WaitingWoL
|
||||
// TestBuildPipeline_GhostStagesBeforeClaim models the real WaitingReboot
|
||||
// case: the run exists but agent hasn't called /claim yet, so there are
|
||||
// no stage rows. Pipeline must still render all 9 stage nodes as ghosts
|
||||
// so the operator sees the full timeline ahead of them.
|
||||
func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
||||
run := &model.Run{State: model.StateWaitingWoL}
|
||||
run := &model.Run{State: model.StateWaitingReboot}
|
||||
nodes := BuildPipeline(run, nil)
|
||||
if len(nodes) != 13 {
|
||||
t.Fatalf("len = %d, want 13", len(nodes))
|
||||
@@ -61,8 +61,8 @@ func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
|
||||
if nodes[idxQueued].State != "passed" {
|
||||
t.Errorf("Queued = %q, want passed", nodes[idxQueued].State)
|
||||
}
|
||||
if nodes[idxWaitingWoL].State != "running" {
|
||||
t.Errorf("WaitingWoL = %q, want running", nodes[idxWaitingWoL].State)
|
||||
if nodes[idxWaitingReboot].State != "running" {
|
||||
t.Errorf("WaitingReboot = %q, want running", nodes[idxWaitingReboot].State)
|
||||
}
|
||||
// All 9 stage ghosts must be pending — nothing has started yet.
|
||||
for i := idxInventory; i <= idxReporting; i++ {
|
||||
@@ -179,7 +179,24 @@ func TestBuildPipeline_QueuedNow(t *testing.T) {
|
||||
if nodes[idxQueued].State != "running" {
|
||||
t.Errorf("Queued = %q, want running", nodes[idxQueued].State)
|
||||
}
|
||||
if nodes[idxWaitingWoL].State != "pending" {
|
||||
t.Errorf("WaitingWoL = %q, want pending", nodes[idxWaitingWoL].State)
|
||||
if nodes[idxWaitingReboot].State != "pending" {
|
||||
t.Errorf("WaitingReboot = %q, want pending", nodes[idxWaitingReboot].State)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildPipeline_PreStageRunning_WaitingReboot confirms the pre-stage
|
||||
// node for WaitingReboot lights up while the run sits there — the new
|
||||
// happy-path state must map onto its pipeline slot.
|
||||
func TestBuildPipeline_PreStageRunning_WaitingReboot(t *testing.T) {
|
||||
run := &model.Run{State: model.StateWaitingReboot}
|
||||
nodes := BuildPipeline(run, seedStages())
|
||||
if nodes[idxQueued].State != "passed" {
|
||||
t.Errorf("Queued = %q, want passed", nodes[idxQueued].State)
|
||||
}
|
||||
if nodes[idxWaitingReboot].State != "running" {
|
||||
t.Errorf("WaitingReboot = %q, want running", nodes[idxWaitingReboot].State)
|
||||
}
|
||||
if nodes[idxBooting].State != "pending" {
|
||||
t.Errorf("Booting = %q, want pending", nodes[idxBooting].State)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user