Files
Vetting/internal/api/ui_handlers.go
T
josh bb658a8435
CI / Lint + build + test (push) Has been cancelled
Host detail page + pipeline timeline
Click a tile to open /hosts/{id} — the canonical control surface per
host. Timeline renders every pre-stage, stage, and terminal node in
order, with the current one pulsing, failed ones flagged, and
downstream ones dimmed as skipped. Detail page shows summary, hold
card (when holding), all action buttons, spec diffs, a full-height
log pane, and a collapsed expected-spec YAML.

Tile slims to name, last-seen, status, and one primary action; a
CSS-overlay <a> makes the whole card clickable while buttons stay
receptive via z-index.

Runner.publishTileUpdate now also emits pipeline-{runID} fragments,
and CompleteStage wraps Stages.CompleteByName so stage completions
advance the timeline live — without this the dots only moved on
state transitions.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-17 23:59:43 -04:00

511 lines
16 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package api
import (
"context"
"encoding/json"
"errors"
"log"
"net/http"
"regexp"
"strconv"
"strings"
"text/template"
"time"
"github.com/go-chi/chi/v5"
"gopkg.in/yaml.v3"
"vetting/internal/events"
"vetting/internal/model"
"vetting/internal/orchestrator"
"vetting/internal/store"
"vetting/internal/web"
"vetting/internal/web/templates"
)
type UI struct {
Hosts *store.Hosts
Runs *store.Runs
Stages *store.Stages
SpecDiffs *store.SpecDiffs
Artifacts *store.Artifacts
EventHub *events.Hub
Runner *orchestrator.Runner
Tiles *TileEnricher
PublicURL string // user-visible base URL baked into the quick-register one-liner
}
var macRe = regexp.MustCompile(`^[0-9a-f]{2}(:[0-9a-f]{2}){5}$`)
// quickRegisterTmpl is parsed once at startup — a malformed template
// should fail the binary at init, not on a visitor's first hit.
var quickRegisterTmpl = template.Must(
template.ParseFS(web.Register, "register/quick.sh.tmpl"),
)
// baseURL returns the orchestrator URL to bake into generated artefacts
// (the quick-register one-liner, its rendered script). Prefers the
// operator-configured public URL; falls back to the request's own host
// so a dev run on http://127.0.0.1:8080 still produces a working command.
func (u *UI) baseURL(r *http.Request) string {
if u.PublicURL != "" {
return strings.TrimRight(u.PublicURL, "/")
}
scheme := "http"
if r.TLS != nil {
scheme = "https"
}
return scheme + "://" + r.Host
}
func (u *UI) Dashboard(w http.ResponseWriter, r *http.Request) {
hosts, err := u.Hosts.List(r.Context())
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
tiles := make([]templates.TileData, 0, len(hosts))
for _, h := range hosts {
latest, err := u.Runs.LatestForHost(r.Context(), h.ID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
tiles = append(tiles, u.Tiles.Build(r.Context(), h, latest))
}
_ = templates.Dashboard(tiles).Render(r.Context(), w)
}
// HostDetail renders the per-host page: breadcrumb, summary, pipeline
// timeline, hold card, action row, spec diffs, log pane, meta. Same
// enrichment path as Dashboard for tile data; additionally reads stage
// rows + spec diffs for the latest run to populate the timeline and
// diff list.
func (u *UI) HostDetail(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
host, err := u.Hosts.Get(r.Context(), id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
latest, err := u.Runs.LatestForHost(r.Context(), id)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
var stages []model.Stage
var diffs []model.SpecDiff
if latest != nil {
if u.Stages != nil {
stages, _ = u.Stages.ListForRun(r.Context(), latest.ID)
}
if u.SpecDiffs != nil {
diffs, _ = u.SpecDiffs.ListForRun(r.Context(), latest.ID)
}
}
t := u.Tiles.Build(r.Context(), *host, latest)
data := templates.HostDetailData{
Tile: t,
Stages: stages,
SpecDiffs: diffs,
}
_ = templates.HostDetail(data).Render(r.Context(), w)
}
// StartRun creates a new Run for the host, issues an agent token, and
// transitions Registered→Queued. The dispatcher goroutine picks it up
// and fires WoL.
func (u *UI) StartRun(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
hostID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
if _, err := u.Hosts.Get(r.Context(), hostID); err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// Guard: refuse to start a second run while one is still active.
if latest, err := u.Runs.LatestForHost(r.Context(), hostID); err == nil && latest != nil {
switch latest.State {
case model.StateCompleted, model.StateReleased, model.StateFailedHolding:
// ok to start fresh
default:
http.Error(w, "host already has an active run", http.StatusConflict)
return
}
}
_, hash, err := orchestrator.IssueRunToken()
if err != nil {
http.Error(w, "token: "+err.Error(), http.StatusInternalServerError)
return
}
runID, err := u.Runs.Create(r.Context(), hostID, hash)
if err != nil {
http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError)
return
}
log.Printf("ui: created run %d for host %d (state=Queued)", runID, hostID)
http.Redirect(w, r, "/", http.StatusSeeOther)
}
func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) {
_ = templates.Registration(templates.RegistrationForm{
QuickRegisterURL: u.baseURL(r),
}).Render(r.Context(), w)
}
// QuickRegisterScript renders the bash one-liner an operator pastes on
// the target host: hardware autodetect + POST to /api/v1/hosts. The
// orchestrator URL is substituted in so the script is self-contained.
func (u *UI) QuickRegisterScript(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/x-shellscript; charset=utf-8")
w.Header().Set("Cache-Control", "no-store")
if err := quickRegisterTmpl.Execute(w, struct{ OrchestratorURL string }{
OrchestratorURL: u.baseURL(r),
}); err != nil {
log.Printf("quick-register script render: %v", err)
}
}
func (u *UI) CreateHost(w http.ResponseWriter, r *http.Request) {
if err := r.ParseForm(); err != nil {
http.Error(w, "bad form", http.StatusBadRequest)
return
}
form := templates.RegistrationForm{
Name: strings.TrimSpace(r.PostForm.Get("name")),
MAC: strings.ToLower(strings.TrimSpace(r.PostForm.Get("mac"))),
WoLBroadcastIP: strings.TrimSpace(r.PostForm.Get("wol_broadcast_ip")),
WoLPort: r.PostForm.Get("wol_port"),
ExpectedSpecYAML: r.PostForm.Get("expected_spec_yaml"),
Notes: strings.TrimSpace(r.PostForm.Get("notes")),
QuickRegisterURL: u.baseURL(r),
}
if errMsg := validateHostForm(&form); errMsg != "" {
form.Error = errMsg
w.WriteHeader(http.StatusBadRequest)
_ = templates.Registration(form).Render(r.Context(), w)
return
}
wolPort, _ := strconv.Atoi(form.WoLPort)
if wolPort == 0 {
wolPort = 9
}
_, err := u.Hosts.Create(r.Context(), model.Host{
Name: form.Name,
MAC: form.MAC,
WoLBroadcastIP: form.WoLBroadcastIP,
WoLPort: wolPort,
ExpectedSpecYAML: form.ExpectedSpecYAML,
Notes: form.Notes,
})
if err != nil {
form.Error = friendlyDBError(err)
w.WriteHeader(http.StatusConflict)
_ = templates.Registration(form).Render(r.Context(), w)
return
}
http.Redirect(w, r, "/", http.StatusSeeOther)
}
// quickRegisterPayload is the POST body accepted by /api/v1/hosts —
// the shape the quick-register bash one-liner emits.
type quickRegisterPayload struct {
Name string `json:"name"`
MAC string `json:"mac"`
WoLBroadcastIP string `json:"wol_broadcast_ip"`
WoLPort int `json:"wol_port"`
ExpectedSpecYAML string `json:"expected_spec_yaml"`
Notes string `json:"notes"`
}
// CreateHostJSON is the API counterpart to CreateHost. Accepts the same
// fields as the form but in JSON, so a target host can POST its own
// registration payload over curl from the quick-register one-liner.
// Same validation as the form; no auth (LAN-only).
func (u *UI) CreateHostJSON(w http.ResponseWriter, r *http.Request) {
var p quickRegisterPayload
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 256*1024)).Decode(&p); err != nil {
writeJSONError(w, http.StatusBadRequest, "bad json: "+err.Error())
return
}
form := templates.RegistrationForm{
Name: strings.TrimSpace(p.Name),
MAC: strings.ToLower(strings.TrimSpace(p.MAC)),
WoLBroadcastIP: strings.TrimSpace(p.WoLBroadcastIP),
ExpectedSpecYAML: p.ExpectedSpecYAML,
Notes: strings.TrimSpace(p.Notes),
}
if p.WoLPort > 0 {
form.WoLPort = strconv.Itoa(p.WoLPort)
}
if errMsg := validateHostForm(&form); errMsg != "" {
writeJSONError(w, http.StatusBadRequest, errMsg)
return
}
wolPort := p.WoLPort
if wolPort == 0 {
wolPort = 9
}
id, err := u.Hosts.Create(r.Context(), model.Host{
Name: form.Name,
MAC: form.MAC,
WoLBroadcastIP: form.WoLBroadcastIP,
WoLPort: wolPort,
ExpectedSpecYAML: form.ExpectedSpecYAML,
Notes: form.Notes,
})
if err != nil {
writeJSONError(w, http.StatusConflict, friendlyDBError(err))
return
}
log.Printf("api: registered host %d (%s, %s)", id, form.Name, form.MAC)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
_ = json.NewEncoder(w).Encode(map[string]any{
"id": id,
"name": form.Name,
"mac": form.MAC,
})
}
// Heartbeat is called every ~30s by a host-mode vetting-agent running
// as a systemd service on the registered host. LAN-trusted, no auth —
// same threat model as the browser UI and quick-register. Stamps
// last_seen_at, flips the dashboard tile to "online", and — if the
// operator has clicked Start vetting since the last heartbeat — replies
// with cmd=reboot_for_vetting so the host boots into PXE without WoL.
func (u *UI) Heartbeat(w http.ResponseWriter, r *http.Request) {
mac := strings.ToLower(strings.TrimSpace(chi.URLParam(r, "mac")))
if !macRe.MatchString(mac) {
writeJSONError(w, http.StatusBadRequest,
"MAC address must be in the form aa:bb:cc:dd:ee:ff")
return
}
host, err := u.Hosts.GetByMAC(r.Context(), mac)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
writeJSONError(w, http.StatusNotFound, "unknown host")
return
}
writeJSONError(w, http.StatusInternalServerError, err.Error())
return
}
if err := u.Hosts.UpdateLastSeen(r.Context(), mac, time.Now().UTC()); err != nil {
writeJSONError(w, http.StatusInternalServerError, err.Error())
return
}
if u.Runner != nil {
u.Runner.PublishTileUpdate(r.Context(), host.ID)
}
cmd, runID := u.pickHostCommand(r.Context(), host.ID)
resp := heartbeatResponse{OK: true, Cmd: cmd, RunID: runID}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(resp)
}
// heartbeatResponse is the JSON the host-mode agent decodes on every
// heartbeat. `cmd` is "" (omitted) in the idle case so the wire shape
// stays `{"ok": true}` when nothing is happening.
type heartbeatResponse struct {
OK bool `json:"ok"`
Cmd string `json:"cmd,omitempty"`
RunID int64 `json:"run_id,omitempty"`
}
// pickHostCommand decides what the host-mode agent should do on the
// back of this heartbeat. Returns ("", 0) when there's nothing to do.
//
// - Queued run → Transition(Dispatched) and tell the agent to reboot.
// The dispatcher would have WoL'd it anyway; we beat it to the
// punch so the host skips the WoL dance.
// - WaitingWoL run created <10min ago → also return reboot, covering
// "host crashed mid-reboot, systemd brought the reporter back".
// - anything else → idle.
func (u *UI) pickHostCommand(ctx context.Context, hostID int64) (string, int64) {
if u.Runs == nil || u.Runner == nil {
return "", 0
}
run, err := u.Runs.LatestForHost(ctx, hostID)
if err != nil {
log.Printf("heartbeat: latest run for host %d: %v", hostID, err)
return "", 0
}
if run == nil {
return "", 0
}
switch run.State {
case model.StateQueued:
if _, err := u.Runner.Transition(ctx, run.ID, orchestrator.TriggerDispatched); err != nil {
// Benign race with the dispatcher's own 2s poll — the
// state machine refuses the second transition; we just
// log and return idle so the agent doesn't reboot on a
// run that another path is already driving.
log.Printf("heartbeat: transition run %d: %v", run.ID, err)
return "", 0
}
log.Printf("heartbeat: dispatched run %d for host %d via heartbeat (no WoL)", run.ID, hostID)
return cmdRebootForVetting, run.ID
case model.StateWaitingWoL:
// Tolerate a crashed-mid-reboot retry: the reporter is the
// only thing that could be telling us about this host right
// now, and WoL is only the fallback anyway. Bound it so a
// perpetually-broken PXE doesn't reboot-loop the box.
if time.Since(run.StartedAt) < 10*time.Minute {
return cmdRebootForVetting, run.ID
}
return "", 0
}
return "", 0
}
const cmdRebootForVetting = "reboot_for_vetting"
func writeJSONError(w http.ResponseWriter, status int, msg string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(map[string]string{"error": msg})
}
// OverrideWipeStorage is the operator's explicit "yes, wipe the disk
// even though we found filesystem signatures" button. Only meaningful
// when the latest run is FailedHolding with failed_stage=Storage — the
// agent's next heartbeat will receive retry_stage with wipe=true and
// re-enter the Storage stage bypassing the wipe-probe guard.
func (u *UI) OverrideWipeStorage(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
hostID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
latest, err := u.Runs.LatestForHost(r.Context(), hostID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if latest == nil {
http.Error(w, "no run for host", http.StatusConflict)
return
}
if latest.State != model.StateFailedHolding || latest.FailedStage != "Storage" {
http.Error(w, "override only valid when holding on Storage", http.StatusConflict)
return
}
if _, err := u.Runner.Override(r.Context(), latest.ID, `{"wipe":true}`); err != nil {
http.Error(w, "override: "+err.Error(), http.StatusInternalServerError)
return
}
http.Redirect(w, r, "/", http.StatusSeeOther)
}
func (u *UI) DeleteHost(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad id", http.StatusBadRequest)
return
}
if err := u.Hosts.Delete(r.Context(), id); err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
http.Redirect(w, r, "/", http.StatusSeeOther)
}
func (u *UI) SSE(w http.ResponseWriter, r *http.Request) {
u.EventHub.ServeSSE(w, r)
}
// Report serves the HTML report artifact for a run. Looks up the
// report_html artifact row for the runID, validates the path lives
// under the artifacts dir (defence-in-depth against path traversal),
// and streams it back. 404 when the run hasn't produced one yet.
func (u *UI) Report(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "runID")
runID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad run id", http.StatusBadRequest)
return
}
arts, err := u.Artifacts.ListForRun(r.Context(), runID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
var path string
for _, a := range arts {
if a.Kind == "report_html" {
path = a.Path
}
}
if path == "" {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
http.ServeFile(w, r, path)
}
func validateHostForm(form *templates.RegistrationForm) string {
if form.Name == "" {
return "Name is required."
}
if !macRe.MatchString(form.MAC) {
return "MAC address must be in the form aa:bb:cc:dd:ee:ff."
}
if form.WoLBroadcastIP == "" {
return "WoL broadcast IP is required."
}
if form.ExpectedSpecYAML == "" {
return "Expected spec YAML is required."
}
var anything any
if err := yaml.Unmarshal([]byte(form.ExpectedSpecYAML), &anything); err != nil {
return "Expected spec YAML is not valid YAML: " + err.Error()
}
if form.WoLPort != "" {
port, err := strconv.Atoi(form.WoLPort)
if err != nil || port < 1 || port > 65535 {
return "WoL port must be 165535."
}
}
return ""
}
func friendlyDBError(err error) string {
s := err.Error()
switch {
case strings.Contains(s, "UNIQUE constraint failed: hosts.name"):
return "A host with that name already exists."
case strings.Contains(s, "UNIQUE constraint failed: hosts.mac"):
return "A host with that MAC already exists."
default:
return s
}
}