package api import ( "context" "encoding/json" "errors" "log" "net/http" "regexp" "strconv" "strings" "text/template" "time" "github.com/go-chi/chi/v5" "gopkg.in/yaml.v3" "vetting/internal/events" "vetting/internal/logs" "vetting/internal/model" "vetting/internal/orchestrator" "vetting/internal/store" "vetting/internal/web" "vetting/internal/web/templates" ) type UI struct { Hosts *store.Hosts Runs *store.Runs Stages *store.Stages SpecDiffs *store.SpecDiffs Artifacts *store.Artifacts EventHub *events.Hub Logs *logs.Hub Runner *orchestrator.Runner Tiles *TileEnricher PublicURL string // user-visible base URL baked into the quick-register one-liner } var macRe = regexp.MustCompile(`^[0-9a-f]{2}(:[0-9a-f]{2}){5}$`) // quickRegisterTmpl is parsed once at startup — a malformed template // should fail the binary at init, not on a visitor's first hit. var quickRegisterTmpl = template.Must( template.ParseFS(web.Register, "register/quick.sh.tmpl"), ) // baseURL returns the orchestrator URL to bake into generated artefacts // (the quick-register one-liner, its rendered script). Prefers the // operator-configured public URL; falls back to the request's own host // so a dev run on http://127.0.0.1:8080 still produces a working command. func (u *UI) baseURL(r *http.Request) string { if u.PublicURL != "" { return strings.TrimRight(u.PublicURL, "/") } scheme := "http" if r.TLS != nil { scheme = "https" } return scheme + "://" + r.Host } func (u *UI) Dashboard(w http.ResponseWriter, r *http.Request) { hosts, err := u.Hosts.List(r.Context()) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } tiles := make([]templates.TileData, 0, len(hosts)) for _, h := range hosts { latest, err := u.Runs.LatestForHost(r.Context(), h.ID) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } tiles = append(tiles, u.Tiles.Build(r.Context(), h, latest)) } _ = templates.Dashboard(tiles).Render(r.Context(), w) } // HostDetail renders the per-host page: breadcrumb, summary, pipeline // timeline, hold card, action row, spec diffs, log pane, meta. Same // enrichment path as Dashboard for tile data; additionally reads stage // rows + spec diffs for the latest run to populate the timeline and // diff list. func (u *UI) HostDetail(w http.ResponseWriter, r *http.Request) { idStr := chi.URLParam(r, "id") id, err := strconv.ParseInt(idStr, 10, 64) if err != nil { http.Error(w, "bad host id", http.StatusBadRequest) return } host, err := u.Hosts.Get(r.Context(), id) if err != nil { if errors.Is(err, store.ErrNotFound) { http.NotFound(w, r) return } http.Error(w, err.Error(), http.StatusInternalServerError) return } latest, err := u.Runs.LatestForHost(r.Context(), id) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } var stages []model.Stage var diffs []model.SpecDiff if latest != nil { if u.Stages != nil { stages, _ = u.Stages.ListForRun(r.Context(), latest.ID) } if u.SpecDiffs != nil { diffs, _ = u.SpecDiffs.ListForRun(r.Context(), latest.ID) } } t := u.Tiles.Build(r.Context(), *host, latest) replay := "" if latest != nil && u.Logs != nil { replay = u.Logs.Replay(latest.ID) } data := templates.HostDetailData{ Tile: t, Stages: stages, SpecDiffs: diffs, LogReplay: replay, } _ = templates.HostDetail(data).Render(r.Context(), w) } // StartRun creates a new Run for the host, issues an agent token, and // transitions Registered→Queued. The dispatcher goroutine picks it up // and fires WoL. func (u *UI) StartRun(w http.ResponseWriter, r *http.Request) { idStr := chi.URLParam(r, "id") hostID, err := strconv.ParseInt(idStr, 10, 64) if err != nil { http.Error(w, "bad host id", http.StatusBadRequest) return } if _, err := u.Hosts.Get(r.Context(), hostID); err != nil { if errors.Is(err, store.ErrNotFound) { http.NotFound(w, r) return } http.Error(w, err.Error(), http.StatusInternalServerError) return } // Guard: refuse to start a second run while one is still active. if latest, err := u.Runs.LatestForHost(r.Context(), hostID); err == nil && latest != nil { switch latest.State { case model.StateCompleted, model.StateReleased, model.StateFailedHolding: // ok to start fresh default: http.Error(w, "host already has an active run", http.StatusConflict) return } } _, hash, err := orchestrator.IssueRunToken() if err != nil { http.Error(w, "token: "+err.Error(), http.StatusInternalServerError) return } runID, err := u.Runs.Create(r.Context(), hostID, hash) if err != nil { http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError) return } log.Printf("ui: created run %d for host %d (state=Queued)", runID, hostID) http.Redirect(w, r, "/", http.StatusSeeOther) } func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) { _ = templates.Registration(templates.RegistrationForm{ QuickRegisterURL: u.baseURL(r), }).Render(r.Context(), w) } // QuickRegisterScript renders the bash one-liner an operator pastes on // the target host: hardware autodetect + POST to /api/v1/hosts. The // orchestrator URL is substituted in so the script is self-contained. func (u *UI) QuickRegisterScript(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/x-shellscript; charset=utf-8") w.Header().Set("Cache-Control", "no-store") if err := quickRegisterTmpl.Execute(w, struct{ OrchestratorURL string }{ OrchestratorURL: u.baseURL(r), }); err != nil { log.Printf("quick-register script render: %v", err) } } func (u *UI) CreateHost(w http.ResponseWriter, r *http.Request) { if err := r.ParseForm(); err != nil { http.Error(w, "bad form", http.StatusBadRequest) return } form := templates.RegistrationForm{ Name: strings.TrimSpace(r.PostForm.Get("name")), MAC: strings.ToLower(strings.TrimSpace(r.PostForm.Get("mac"))), WoLBroadcastIP: strings.TrimSpace(r.PostForm.Get("wol_broadcast_ip")), WoLPort: r.PostForm.Get("wol_port"), ExpectedSpecYAML: r.PostForm.Get("expected_spec_yaml"), Notes: strings.TrimSpace(r.PostForm.Get("notes")), QuickRegisterURL: u.baseURL(r), } if errMsg := validateHostForm(&form); errMsg != "" { form.Error = errMsg w.WriteHeader(http.StatusBadRequest) _ = templates.Registration(form).Render(r.Context(), w) return } wolPort, _ := strconv.Atoi(form.WoLPort) if wolPort == 0 { wolPort = 9 } _, err := u.Hosts.Create(r.Context(), model.Host{ Name: form.Name, MAC: form.MAC, WoLBroadcastIP: form.WoLBroadcastIP, WoLPort: wolPort, ExpectedSpecYAML: form.ExpectedSpecYAML, Notes: form.Notes, }) if err != nil { form.Error = friendlyDBError(err) w.WriteHeader(http.StatusConflict) _ = templates.Registration(form).Render(r.Context(), w) return } http.Redirect(w, r, "/", http.StatusSeeOther) } // quickRegisterPayload is the POST body accepted by /api/v1/hosts — // the shape the quick-register bash one-liner emits. type quickRegisterPayload struct { Name string `json:"name"` MAC string `json:"mac"` WoLBroadcastIP string `json:"wol_broadcast_ip"` WoLPort int `json:"wol_port"` ExpectedSpecYAML string `json:"expected_spec_yaml"` Notes string `json:"notes"` } // CreateHostJSON is the API counterpart to CreateHost. Accepts the same // fields as the form but in JSON, so a target host can POST its own // registration payload over curl from the quick-register one-liner. // Same validation as the form; no auth (LAN-only). func (u *UI) CreateHostJSON(w http.ResponseWriter, r *http.Request) { var p quickRegisterPayload if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 256*1024)).Decode(&p); err != nil { writeJSONError(w, http.StatusBadRequest, "bad json: "+err.Error()) return } form := templates.RegistrationForm{ Name: strings.TrimSpace(p.Name), MAC: strings.ToLower(strings.TrimSpace(p.MAC)), WoLBroadcastIP: strings.TrimSpace(p.WoLBroadcastIP), ExpectedSpecYAML: p.ExpectedSpecYAML, Notes: strings.TrimSpace(p.Notes), } if p.WoLPort > 0 { form.WoLPort = strconv.Itoa(p.WoLPort) } if errMsg := validateHostForm(&form); errMsg != "" { writeJSONError(w, http.StatusBadRequest, errMsg) return } wolPort := p.WoLPort if wolPort == 0 { wolPort = 9 } id, err := u.Hosts.Create(r.Context(), model.Host{ Name: form.Name, MAC: form.MAC, WoLBroadcastIP: form.WoLBroadcastIP, WoLPort: wolPort, ExpectedSpecYAML: form.ExpectedSpecYAML, Notes: form.Notes, }) if err != nil { writeJSONError(w, http.StatusConflict, friendlyDBError(err)) return } log.Printf("api: registered host %d (%s, %s)", id, form.Name, form.MAC) w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusCreated) _ = json.NewEncoder(w).Encode(map[string]any{ "id": id, "name": form.Name, "mac": form.MAC, }) } // Heartbeat is called every ~30s by a host-mode vetting-agent running // as a systemd service on the registered host. LAN-trusted, no auth — // same threat model as the browser UI and quick-register. Stamps // last_seen_at, flips the dashboard tile to "online", and — if the // operator has clicked Start vetting since the last heartbeat — replies // with cmd=reboot_for_vetting so the host boots into PXE without WoL. func (u *UI) Heartbeat(w http.ResponseWriter, r *http.Request) { mac := strings.ToLower(strings.TrimSpace(chi.URLParam(r, "mac"))) if !macRe.MatchString(mac) { writeJSONError(w, http.StatusBadRequest, "MAC address must be in the form aa:bb:cc:dd:ee:ff") return } host, err := u.Hosts.GetByMAC(r.Context(), mac) if err != nil { if errors.Is(err, store.ErrNotFound) { writeJSONError(w, http.StatusNotFound, "unknown host") return } writeJSONError(w, http.StatusInternalServerError, err.Error()) return } if err := u.Hosts.UpdateLastSeen(r.Context(), mac, time.Now().UTC()); err != nil { writeJSONError(w, http.StatusInternalServerError, err.Error()) return } if u.Runner != nil { u.Runner.PublishTileUpdate(r.Context(), host.ID) } cmd, runID := u.pickHostCommand(r.Context(), host.ID) resp := heartbeatResponse{OK: true, Cmd: cmd, RunID: runID} w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(resp) } // heartbeatResponse is the JSON the host-mode agent decodes on every // heartbeat. `cmd` is "" (omitted) in the idle case so the wire shape // stays `{"ok": true}` when nothing is happening. type heartbeatResponse struct { OK bool `json:"ok"` Cmd string `json:"cmd,omitempty"` RunID int64 `json:"run_id,omitempty"` } // pickHostCommand decides what the host-mode agent should do on the // back of this heartbeat. Returns ("", 0) when there's nothing to do. // // - Queued run → Transition(Dispatched) and tell the agent to reboot. // The dispatcher would have WoL'd it anyway; we beat it to the // punch so the host skips the WoL dance. // - WaitingWoL run created <10min ago → also return reboot, covering // "host crashed mid-reboot, systemd brought the reporter back". // - anything else → idle. func (u *UI) pickHostCommand(ctx context.Context, hostID int64) (string, int64) { if u.Runs == nil || u.Runner == nil { return "", 0 } run, err := u.Runs.LatestForHost(ctx, hostID) if err != nil { log.Printf("heartbeat: latest run for host %d: %v", hostID, err) return "", 0 } if run == nil { return "", 0 } switch run.State { case model.StateQueued: if _, err := u.Runner.Transition(ctx, run.ID, orchestrator.TriggerDispatched); err != nil { // Benign race with the dispatcher's own 2s poll — the // state machine refuses the second transition; we just // log and return idle so the agent doesn't reboot on a // run that another path is already driving. log.Printf("heartbeat: transition run %d: %v", run.ID, err) return "", 0 } log.Printf("heartbeat: dispatched run %d for host %d via heartbeat (no WoL)", run.ID, hostID) return cmdRebootForVetting, run.ID case model.StateWaitingWoL: // Tolerate a crashed-mid-reboot retry: the reporter is the // only thing that could be telling us about this host right // now, and WoL is only the fallback anyway. Bound it so a // perpetually-broken PXE doesn't reboot-loop the box. if time.Since(run.StartedAt) < 10*time.Minute { return cmdRebootForVetting, run.ID } return "", 0 } return "", 0 } const cmdRebootForVetting = "reboot_for_vetting" func writeJSONError(w http.ResponseWriter, status int, msg string) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(status) _ = json.NewEncoder(w).Encode(map[string]string{"error": msg}) } // OverrideWipeStorage is the operator's explicit "yes, wipe the disk // even though we found filesystem signatures" button. Only meaningful // when the latest run is FailedHolding with failed_stage=Storage — the // agent's next heartbeat will receive retry_stage with wipe=true and // re-enter the Storage stage bypassing the wipe-probe guard. func (u *UI) OverrideWipeStorage(w http.ResponseWriter, r *http.Request) { idStr := chi.URLParam(r, "id") hostID, err := strconv.ParseInt(idStr, 10, 64) if err != nil { http.Error(w, "bad host id", http.StatusBadRequest) return } latest, err := u.Runs.LatestForHost(r.Context(), hostID) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if latest == nil { http.Error(w, "no run for host", http.StatusConflict) return } if latest.State != model.StateFailedHolding || latest.FailedStage != "Storage" { http.Error(w, "override only valid when holding on Storage", http.StatusConflict) return } if _, err := u.Runner.Override(r.Context(), latest.ID, `{"wipe":true}`); err != nil { http.Error(w, "override: "+err.Error(), http.StatusInternalServerError) return } http.Redirect(w, r, "/", http.StatusSeeOther) } func (u *UI) DeleteHost(w http.ResponseWriter, r *http.Request) { idStr := chi.URLParam(r, "id") id, err := strconv.ParseInt(idStr, 10, 64) if err != nil { http.Error(w, "bad id", http.StatusBadRequest) return } if err := u.Hosts.Delete(r.Context(), id); err != nil { if errors.Is(err, store.ErrNotFound) { http.NotFound(w, r) return } http.Error(w, err.Error(), http.StatusInternalServerError) return } http.Redirect(w, r, "/", http.StatusSeeOther) } func (u *UI) SSE(w http.ResponseWriter, r *http.Request) { u.EventHub.ServeSSE(w, r) } // Report serves the HTML report artifact for a run. Looks up the // report_html artifact row for the runID, validates the path lives // under the artifacts dir (defence-in-depth against path traversal), // and streams it back. 404 when the run hasn't produced one yet. func (u *UI) Report(w http.ResponseWriter, r *http.Request) { idStr := chi.URLParam(r, "runID") runID, err := strconv.ParseInt(idStr, 10, 64) if err != nil { http.Error(w, "bad run id", http.StatusBadRequest) return } arts, err := u.Artifacts.ListForRun(r.Context(), runID) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } var path string for _, a := range arts { if a.Kind == "report_html" { path = a.Path } } if path == "" { http.NotFound(w, r) return } w.Header().Set("Content-Type", "text/html; charset=utf-8") http.ServeFile(w, r, path) } func validateHostForm(form *templates.RegistrationForm) string { if form.Name == "" { return "Name is required." } if !macRe.MatchString(form.MAC) { return "MAC address must be in the form aa:bb:cc:dd:ee:ff." } if form.WoLBroadcastIP == "" { return "WoL broadcast IP is required." } if form.ExpectedSpecYAML == "" { return "Expected spec YAML is required." } var anything any if err := yaml.Unmarshal([]byte(form.ExpectedSpecYAML), &anything); err != nil { return "Expected spec YAML is not valid YAML: " + err.Error() } if form.WoLPort != "" { port, err := strconv.Atoi(form.WoLPort) if err != nil || port < 1 || port > 65535 { return "WoL port must be 1–65535." } } return "" } func friendlyDBError(err error) string { s := err.Error() switch { case strings.Contains(s, "UNIQUE constraint failed: hosts.name"): return "A host with that name already exists." case strings.Contains(s, "UNIQUE constraint failed: hosts.mac"): return "A host with that MAC already exists." default: return s } }