Files
Vetting/internal/api/ui_handlers.go
T
josh 9b16ed80e6
CI / Lint + build + test (push) Failing after 5m13s
Heartbeat command channel: reboot_for_vetting skips WoL
When the operator clicks Start vetting and the host is heartbeating,
the heartbeat response now carries cmd=reboot_for_vetting + run_id.
The handler drives the Queued → WaitingWoL transition via the existing
state machine, so a benign race with the 2s dispatcher poll is refused
by the state machine (not double-dispatched). WaitingWoL retries for
10 minutes to cover a crashed-mid-reboot case, then falls back to
operator action.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-17 23:37:01 -04:00

464 lines
15 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package api
import (
"context"
"encoding/json"
"errors"
"log"
"net/http"
"regexp"
"strconv"
"strings"
"text/template"
"time"
"github.com/go-chi/chi/v5"
"gopkg.in/yaml.v3"
"vetting/internal/events"
"vetting/internal/model"
"vetting/internal/orchestrator"
"vetting/internal/store"
"vetting/internal/web"
"vetting/internal/web/templates"
)
type UI struct {
Hosts *store.Hosts
Runs *store.Runs
Artifacts *store.Artifacts
EventHub *events.Hub
Runner *orchestrator.Runner
Tiles *TileEnricher
PublicURL string // user-visible base URL baked into the quick-register one-liner
}
var macRe = regexp.MustCompile(`^[0-9a-f]{2}(:[0-9a-f]{2}){5}$`)
// quickRegisterTmpl is parsed once at startup — a malformed template
// should fail the binary at init, not on a visitor's first hit.
var quickRegisterTmpl = template.Must(
template.ParseFS(web.Register, "register/quick.sh.tmpl"),
)
// baseURL returns the orchestrator URL to bake into generated artefacts
// (the quick-register one-liner, its rendered script). Prefers the
// operator-configured public URL; falls back to the request's own host
// so a dev run on http://127.0.0.1:8080 still produces a working command.
func (u *UI) baseURL(r *http.Request) string {
if u.PublicURL != "" {
return strings.TrimRight(u.PublicURL, "/")
}
scheme := "http"
if r.TLS != nil {
scheme = "https"
}
return scheme + "://" + r.Host
}
func (u *UI) Dashboard(w http.ResponseWriter, r *http.Request) {
hosts, err := u.Hosts.List(r.Context())
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
tiles := make([]templates.TileData, 0, len(hosts))
for _, h := range hosts {
latest, err := u.Runs.LatestForHost(r.Context(), h.ID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
tiles = append(tiles, u.Tiles.Build(r.Context(), h, latest))
}
_ = templates.Dashboard(tiles).Render(r.Context(), w)
}
// StartRun creates a new Run for the host, issues an agent token, and
// transitions Registered→Queued. The dispatcher goroutine picks it up
// and fires WoL.
func (u *UI) StartRun(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
hostID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
if _, err := u.Hosts.Get(r.Context(), hostID); err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// Guard: refuse to start a second run while one is still active.
if latest, err := u.Runs.LatestForHost(r.Context(), hostID); err == nil && latest != nil {
switch latest.State {
case model.StateCompleted, model.StateReleased, model.StateFailedHolding:
// ok to start fresh
default:
http.Error(w, "host already has an active run", http.StatusConflict)
return
}
}
_, hash, err := orchestrator.IssueRunToken()
if err != nil {
http.Error(w, "token: "+err.Error(), http.StatusInternalServerError)
return
}
runID, err := u.Runs.Create(r.Context(), hostID, hash)
if err != nil {
http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError)
return
}
log.Printf("ui: created run %d for host %d (state=Queued)", runID, hostID)
http.Redirect(w, r, "/", http.StatusSeeOther)
}
func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) {
_ = templates.Registration(templates.RegistrationForm{
QuickRegisterURL: u.baseURL(r),
}).Render(r.Context(), w)
}
// QuickRegisterScript renders the bash one-liner an operator pastes on
// the target host: hardware autodetect + POST to /api/v1/hosts. The
// orchestrator URL is substituted in so the script is self-contained.
func (u *UI) QuickRegisterScript(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/x-shellscript; charset=utf-8")
w.Header().Set("Cache-Control", "no-store")
if err := quickRegisterTmpl.Execute(w, struct{ OrchestratorURL string }{
OrchestratorURL: u.baseURL(r),
}); err != nil {
log.Printf("quick-register script render: %v", err)
}
}
func (u *UI) CreateHost(w http.ResponseWriter, r *http.Request) {
if err := r.ParseForm(); err != nil {
http.Error(w, "bad form", http.StatusBadRequest)
return
}
form := templates.RegistrationForm{
Name: strings.TrimSpace(r.PostForm.Get("name")),
MAC: strings.ToLower(strings.TrimSpace(r.PostForm.Get("mac"))),
WoLBroadcastIP: strings.TrimSpace(r.PostForm.Get("wol_broadcast_ip")),
WoLPort: r.PostForm.Get("wol_port"),
ExpectedSpecYAML: r.PostForm.Get("expected_spec_yaml"),
Notes: strings.TrimSpace(r.PostForm.Get("notes")),
QuickRegisterURL: u.baseURL(r),
}
if errMsg := validateHostForm(&form); errMsg != "" {
form.Error = errMsg
w.WriteHeader(http.StatusBadRequest)
_ = templates.Registration(form).Render(r.Context(), w)
return
}
wolPort, _ := strconv.Atoi(form.WoLPort)
if wolPort == 0 {
wolPort = 9
}
_, err := u.Hosts.Create(r.Context(), model.Host{
Name: form.Name,
MAC: form.MAC,
WoLBroadcastIP: form.WoLBroadcastIP,
WoLPort: wolPort,
ExpectedSpecYAML: form.ExpectedSpecYAML,
Notes: form.Notes,
})
if err != nil {
form.Error = friendlyDBError(err)
w.WriteHeader(http.StatusConflict)
_ = templates.Registration(form).Render(r.Context(), w)
return
}
http.Redirect(w, r, "/", http.StatusSeeOther)
}
// quickRegisterPayload is the POST body accepted by /api/v1/hosts —
// the shape the quick-register bash one-liner emits.
type quickRegisterPayload struct {
Name string `json:"name"`
MAC string `json:"mac"`
WoLBroadcastIP string `json:"wol_broadcast_ip"`
WoLPort int `json:"wol_port"`
ExpectedSpecYAML string `json:"expected_spec_yaml"`
Notes string `json:"notes"`
}
// CreateHostJSON is the API counterpart to CreateHost. Accepts the same
// fields as the form but in JSON, so a target host can POST its own
// registration payload over curl from the quick-register one-liner.
// Same validation as the form; no auth (LAN-only).
func (u *UI) CreateHostJSON(w http.ResponseWriter, r *http.Request) {
var p quickRegisterPayload
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 256*1024)).Decode(&p); err != nil {
writeJSONError(w, http.StatusBadRequest, "bad json: "+err.Error())
return
}
form := templates.RegistrationForm{
Name: strings.TrimSpace(p.Name),
MAC: strings.ToLower(strings.TrimSpace(p.MAC)),
WoLBroadcastIP: strings.TrimSpace(p.WoLBroadcastIP),
ExpectedSpecYAML: p.ExpectedSpecYAML,
Notes: strings.TrimSpace(p.Notes),
}
if p.WoLPort > 0 {
form.WoLPort = strconv.Itoa(p.WoLPort)
}
if errMsg := validateHostForm(&form); errMsg != "" {
writeJSONError(w, http.StatusBadRequest, errMsg)
return
}
wolPort := p.WoLPort
if wolPort == 0 {
wolPort = 9
}
id, err := u.Hosts.Create(r.Context(), model.Host{
Name: form.Name,
MAC: form.MAC,
WoLBroadcastIP: form.WoLBroadcastIP,
WoLPort: wolPort,
ExpectedSpecYAML: form.ExpectedSpecYAML,
Notes: form.Notes,
})
if err != nil {
writeJSONError(w, http.StatusConflict, friendlyDBError(err))
return
}
log.Printf("api: registered host %d (%s, %s)", id, form.Name, form.MAC)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
_ = json.NewEncoder(w).Encode(map[string]any{
"id": id,
"name": form.Name,
"mac": form.MAC,
})
}
// Heartbeat is called every ~30s by a host-mode vetting-agent running
// as a systemd service on the registered host. LAN-trusted, no auth —
// same threat model as the browser UI and quick-register. Stamps
// last_seen_at, flips the dashboard tile to "online", and — if the
// operator has clicked Start vetting since the last heartbeat — replies
// with cmd=reboot_for_vetting so the host boots into PXE without WoL.
func (u *UI) Heartbeat(w http.ResponseWriter, r *http.Request) {
mac := strings.ToLower(strings.TrimSpace(chi.URLParam(r, "mac")))
if !macRe.MatchString(mac) {
writeJSONError(w, http.StatusBadRequest,
"MAC address must be in the form aa:bb:cc:dd:ee:ff")
return
}
host, err := u.Hosts.GetByMAC(r.Context(), mac)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
writeJSONError(w, http.StatusNotFound, "unknown host")
return
}
writeJSONError(w, http.StatusInternalServerError, err.Error())
return
}
if err := u.Hosts.UpdateLastSeen(r.Context(), mac, time.Now().UTC()); err != nil {
writeJSONError(w, http.StatusInternalServerError, err.Error())
return
}
if u.Runner != nil {
u.Runner.PublishTileUpdate(r.Context(), host.ID)
}
cmd, runID := u.pickHostCommand(r.Context(), host.ID)
resp := heartbeatResponse{OK: true, Cmd: cmd, RunID: runID}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(resp)
}
// heartbeatResponse is the JSON the host-mode agent decodes on every
// heartbeat. `cmd` is "" (omitted) in the idle case so the wire shape
// stays `{"ok": true}` when nothing is happening.
type heartbeatResponse struct {
OK bool `json:"ok"`
Cmd string `json:"cmd,omitempty"`
RunID int64 `json:"run_id,omitempty"`
}
// pickHostCommand decides what the host-mode agent should do on the
// back of this heartbeat. Returns ("", 0) when there's nothing to do.
//
// - Queued run → Transition(Dispatched) and tell the agent to reboot.
// The dispatcher would have WoL'd it anyway; we beat it to the
// punch so the host skips the WoL dance.
// - WaitingWoL run created <10min ago → also return reboot, covering
// "host crashed mid-reboot, systemd brought the reporter back".
// - anything else → idle.
func (u *UI) pickHostCommand(ctx context.Context, hostID int64) (string, int64) {
if u.Runs == nil || u.Runner == nil {
return "", 0
}
run, err := u.Runs.LatestForHost(ctx, hostID)
if err != nil {
log.Printf("heartbeat: latest run for host %d: %v", hostID, err)
return "", 0
}
if run == nil {
return "", 0
}
switch run.State {
case model.StateQueued:
if _, err := u.Runner.Transition(ctx, run.ID, orchestrator.TriggerDispatched); err != nil {
// Benign race with the dispatcher's own 2s poll — the
// state machine refuses the second transition; we just
// log and return idle so the agent doesn't reboot on a
// run that another path is already driving.
log.Printf("heartbeat: transition run %d: %v", run.ID, err)
return "", 0
}
log.Printf("heartbeat: dispatched run %d for host %d via heartbeat (no WoL)", run.ID, hostID)
return cmdRebootForVetting, run.ID
case model.StateWaitingWoL:
// Tolerate a crashed-mid-reboot retry: the reporter is the
// only thing that could be telling us about this host right
// now, and WoL is only the fallback anyway. Bound it so a
// perpetually-broken PXE doesn't reboot-loop the box.
if time.Since(run.StartedAt) < 10*time.Minute {
return cmdRebootForVetting, run.ID
}
return "", 0
}
return "", 0
}
const cmdRebootForVetting = "reboot_for_vetting"
func writeJSONError(w http.ResponseWriter, status int, msg string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(map[string]string{"error": msg})
}
// OverrideWipeStorage is the operator's explicit "yes, wipe the disk
// even though we found filesystem signatures" button. Only meaningful
// when the latest run is FailedHolding with failed_stage=Storage — the
// agent's next heartbeat will receive retry_stage with wipe=true and
// re-enter the Storage stage bypassing the wipe-probe guard.
func (u *UI) OverrideWipeStorage(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
hostID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
latest, err := u.Runs.LatestForHost(r.Context(), hostID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if latest == nil {
http.Error(w, "no run for host", http.StatusConflict)
return
}
if latest.State != model.StateFailedHolding || latest.FailedStage != "Storage" {
http.Error(w, "override only valid when holding on Storage", http.StatusConflict)
return
}
if _, err := u.Runner.Override(r.Context(), latest.ID, `{"wipe":true}`); err != nil {
http.Error(w, "override: "+err.Error(), http.StatusInternalServerError)
return
}
http.Redirect(w, r, "/", http.StatusSeeOther)
}
func (u *UI) DeleteHost(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad id", http.StatusBadRequest)
return
}
if err := u.Hosts.Delete(r.Context(), id); err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
http.Redirect(w, r, "/", http.StatusSeeOther)
}
func (u *UI) SSE(w http.ResponseWriter, r *http.Request) {
u.EventHub.ServeSSE(w, r)
}
// Report serves the HTML report artifact for a run. Looks up the
// report_html artifact row for the runID, validates the path lives
// under the artifacts dir (defence-in-depth against path traversal),
// and streams it back. 404 when the run hasn't produced one yet.
func (u *UI) Report(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "runID")
runID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad run id", http.StatusBadRequest)
return
}
arts, err := u.Artifacts.ListForRun(r.Context(), runID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
var path string
for _, a := range arts {
if a.Kind == "report_html" {
path = a.Path
}
}
if path == "" {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
http.ServeFile(w, r, path)
}
func validateHostForm(form *templates.RegistrationForm) string {
if form.Name == "" {
return "Name is required."
}
if !macRe.MatchString(form.MAC) {
return "MAC address must be in the form aa:bb:cc:dd:ee:ff."
}
if form.WoLBroadcastIP == "" {
return "WoL broadcast IP is required."
}
if form.ExpectedSpecYAML == "" {
return "Expected spec YAML is required."
}
var anything any
if err := yaml.Unmarshal([]byte(form.ExpectedSpecYAML), &anything); err != nil {
return "Expected spec YAML is not valid YAML: " + err.Error()
}
if form.WoLPort != "" {
port, err := strconv.Atoi(form.WoLPort)
if err != nil || port < 1 || port > 65535 {
return "WoL port must be 165535."
}
}
return ""
}
func friendlyDBError(err error) string {
s := err.Error()
switch {
case strings.Contains(s, "UNIQUE constraint failed: hosts.name"):
return "A host with that name already exists."
case strings.Contains(s, "UNIQUE constraint failed: hosts.mac"):
return "A host with that MAC already exists."
default:
return s
}
}