Heartbeat-first dispatch: retire WoL-as-default, add WaitingReboot
CI / Lint + build + test (push) Has been cancelled
CI / Lint + build + test (push) Has been cancelled
Every supported host runs vetting-reporter in-OS and heartbeats every 30s. WoL was never the thing that started vetting — the heartbeat response's reboot_for_vetting command was. Firing WoL first only crowded the run log with misleading diagnostics when the real failure mode is "reporter isn't installed." - StartRun 409s if the host hasn't heartbeated within 60s, pointing the operator at /register/quick.sh. - Dispatcher re-checks LastSeenAt at dispatch time (run may sit in Queued long enough for the host to go offline); stale hosts mark the run Failed with failed_stage=dispatch instead of looping. - New StateWaitingReboot + TriggerRebootCommanded capture the actual semantics. StateWaitingWoL kept as the hook point for a future manual-override button. - Tile disables the Start button with a quick.sh tooltip when the host is offline, matching the server-side 409. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -176,7 +176,7 @@ func HostTile(t TileData) templ.Component {
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
if canStart(t.Latest) {
|
||||
if canStart(t) {
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 12, "<form method=\"post\" action=\"")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
@@ -194,26 +194,31 @@ func HostTile(t TileData) templ.Component {
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
} else if canStartIfOnline(t.Latest) {
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 14, "<button type=\"button\" disabled title=\"host is not heartbeating — install the reporter via /register/quick.sh on the target host\">Start vetting</button>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
} else if hasReport(t.Latest) {
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 14, "<a class=\"button-like\" href=\"")
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, "<a class=\"button-like\" href=\"")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var14 templ.SafeURL
|
||||
templ_7745c5c3_Var14, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/reports/%d", t.Latest.ID)))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 36, Col: 88}
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 38, Col: 88}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var14))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, "\" target=\"_blank\" rel=\"noopener\">View report</a>")
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "\" target=\"_blank\" rel=\"noopener\">View report</a>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "</div></article>")
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 17, "</div></article>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
@@ -235,12 +240,29 @@ func hasReport(r *model.Run) bool {
|
||||
return r != nil && r.State == model.StateCompleted
|
||||
}
|
||||
|
||||
func canStart(r *model.Run) bool {
|
||||
// canStart gates the Start button on two things: the run is in a state
|
||||
// that accepts a fresh start, AND the host is currently heartbeating.
|
||||
// The heartbeat check mirrors the StartRun handler's preflight so the
|
||||
// button never offers a click that the server would reject with 409.
|
||||
func canStart(t TileData) bool {
|
||||
if !canStartIfOnline(t.Latest) {
|
||||
return false
|
||||
}
|
||||
if t.LastSeenAt == nil {
|
||||
return false
|
||||
}
|
||||
return time.Since(*t.LastSeenAt) <= 60*time.Second
|
||||
}
|
||||
|
||||
// canStartIfOnline is the run-state half of canStart, split out so the
|
||||
// template can distinguish "waiting on run to end" (no button) from
|
||||
// "run is done but host is offline" (disabled button with tooltip).
|
||||
func canStartIfOnline(r *model.Run) bool {
|
||||
if r == nil {
|
||||
return true
|
||||
}
|
||||
switch r.State {
|
||||
case model.StateCompleted, model.StateReleased, model.StateFailedHolding:
|
||||
case model.StateCompleted, model.StateReleased, model.StateFailed, model.StateFailedHolding:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -250,6 +272,10 @@ func tileStatus(r *model.Run) string {
|
||||
if r == nil {
|
||||
return "Idle"
|
||||
}
|
||||
switch r.State {
|
||||
case model.StateWaitingReboot:
|
||||
return "Waiting for reboot"
|
||||
}
|
||||
return string(r.State)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user