Heartbeat-first dispatch: retire WoL-as-default, add WaitingReboot
CI / Lint + build + test (push) Has been cancelled
CI / Lint + build + test (push) Has been cancelled
Every supported host runs vetting-reporter in-OS and heartbeats every 30s. WoL was never the thing that started vetting — the heartbeat response's reboot_for_vetting command was. Firing WoL first only crowded the run log with misleading diagnostics when the real failure mode is "reporter isn't installed." - StartRun 409s if the host hasn't heartbeated within 60s, pointing the operator at /register/quick.sh. - Dispatcher re-checks LastSeenAt at dispatch time (run may sit in Queued long enough for the host to go offline); stale hosts mark the run Failed with failed_stage=dispatch instead of looping. - New StateWaitingReboot + TriggerRebootCommanded capture the actual semantics. StateWaitingWoL kept as the hook point for a future manual-override button. - Tile disables the Start button with a quick.sh tooltip when the host is offline, matching the server-side 409. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -40,6 +40,40 @@ func TestNextForOverride(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestTriggerRebootCommanded exercises the new heartbeat-first trigger:
|
||||
// Queued → WaitingReboot, and any other current state is an error.
|
||||
func TestTriggerRebootCommanded(t *testing.T) {
|
||||
got, err := orchestrator.Next(model.StateQueued, orchestrator.TriggerRebootCommanded)
|
||||
if err != nil {
|
||||
t.Fatalf("Queued + RebootCommanded: %v", err)
|
||||
}
|
||||
if got != model.StateWaitingReboot {
|
||||
t.Fatalf("got %q, want %q", got, model.StateWaitingReboot)
|
||||
}
|
||||
for _, bad := range []model.RunState{
|
||||
model.StateRegistered, model.StateBooting, model.StateInventoryCheck, model.StateCompleted,
|
||||
} {
|
||||
if _, err := orchestrator.Next(bad, orchestrator.TriggerRebootCommanded); err == nil {
|
||||
t.Fatalf("RebootCommanded from %q: expected error", bad)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestTriggerAgentClaimedFromWaitingReboot: the agent's /claim must
|
||||
// advance the run out of WaitingReboot (new happy path) AND out of
|
||||
// legacy WaitingWoL, otherwise live boots wouldn't be recognised.
|
||||
func TestTriggerAgentClaimedFromWaitingReboot(t *testing.T) {
|
||||
for _, from := range []model.RunState{model.StateWaitingReboot, model.StateWaitingWoL, model.StateBooting} {
|
||||
got, err := orchestrator.Next(from, orchestrator.TriggerAgentClaimed)
|
||||
if err != nil {
|
||||
t.Fatalf("AgentClaimed from %q: %v", from, err)
|
||||
}
|
||||
if got != model.StateInventoryCheck {
|
||||
t.Fatalf("AgentClaimed from %q = %q, want InventoryCheck", from, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNextStageWalk(t *testing.T) {
|
||||
// Walking StageCompleted from each stage should land on the next
|
||||
// one in the canonical order, and from Reporting onto Completed.
|
||||
|
||||
Reference in New Issue
Block a user