Files
Vetting/internal/api/ui_handlers.go
T
josh 23c689aa5b
CI / Lint + build + test (push) Failing after 1m57s
Release / release (push) Has been cancelled
deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now
carry a profile (quick/deep/soak); every profile walks the same
11-stage order — Inventory → Firmware → SpecValidate → SMART →
CPUStress → Storage → Network → Burn → GPU → PSU → Reporting —
with only per-stage durations and concurrency scaled.

Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile
column + CreateWithProfile; threshold table + evaluator seeded per-run
from the shared vetting.thresholds block; breach flips result at
/sensor + /result.

Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify +
EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta),
Network (sustained iperf + /proc/net/dev deltas) with per-profile
knobs from Deps.

Phase 3: Burn super-stage with goroutine fan-out for CPU + memory +
fio + iperf, PSU rails sampled across the Burn window, SensorMux
(2 s flush, 500-sample cap) to absorb backpressure.

Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode
(BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl),
lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into
SpecValidate with pin-by-identifier and fan-out-across-component
matching; mismatches park the run in FailedHolding.

Phase 5: profile radio on the host start form, profile chip on the
run header, Firmware section in the HTML report, coverage artifact
uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath
seam + stress_ng and dmidecode example fakes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-18 22:50:57 -04:00

765 lines
25 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package api
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"net/http"
"regexp"
"strconv"
"strings"
"text/template"
"time"
"github.com/go-chi/chi/v5"
"gopkg.in/yaml.v3"
"vetting/internal/config"
"vetting/internal/events"
"vetting/internal/logs"
"vetting/internal/model"
"vetting/internal/orchestrator"
"vetting/internal/store"
"vetting/internal/web"
"vetting/internal/web/templates"
)
type UI struct {
Hosts *store.Hosts
Runs *store.Runs
Stages *store.Stages
SubSteps *store.SubSteps
SpecDiffs *store.SpecDiffs
Artifacts *store.Artifacts
Thresholds *store.Thresholds // Phase 1: seeded at StartRun from Profiles
Profiles *config.ProfileRegistry
EventHub *events.Hub
Logs *logs.Hub
Runner *orchestrator.Runner
Tiles *TileEnricher
PublicURL string // user-visible base URL baked into the quick-register one-liner
// PXE, when non-nil, gets Reload()ed after host create/delete so
// dnsmasq's dhcp-host= allowlist reflects the current registry.
// Without this, a newly-registered host PXE-boots and gets
// "proxy-ignored" because its MAC isn't tagged `known`.
PXE PXEReloader
}
// PXEReloader rewrites dnsmasq.conf with the current host list and
// SIGHUPs the subprocess. Satisfied by *pxe.Supervisor.
type PXEReloader interface {
Reload(hosts []model.Host) error
}
// reloadPXE reads the full host list and hands it to the reloader.
// Logs on failure; never returns an error — the HTTP request that
// triggered the host change has already succeeded.
func (u *UI) reloadPXE(ctx context.Context) {
if u.PXE == nil {
return
}
hosts, err := u.Hosts.List(ctx)
if err != nil {
log.Printf("pxe reload: list hosts: %v", err)
return
}
if err := u.PXE.Reload(hosts); err != nil {
log.Printf("pxe reload: %v", err)
}
}
var macRe = regexp.MustCompile(`^[0-9a-f]{2}(:[0-9a-f]{2}){5}$`)
// quickRegisterTmpl is parsed once at startup — a malformed template
// should fail the binary at init, not on a visitor's first hit.
var quickRegisterTmpl = template.Must(
template.ParseFS(web.Register, "register/quick.sh.tmpl"),
)
// baseURL returns the orchestrator URL to bake into generated artefacts
// (the quick-register one-liner, its rendered script). Prefers the
// operator-configured public URL; falls back to the request's own host
// so a dev run on http://127.0.0.1:8080 still produces a working command.
func (u *UI) baseURL(r *http.Request) string {
if u.PublicURL != "" {
return strings.TrimRight(u.PublicURL, "/")
}
scheme := "http"
if r.TLS != nil {
scheme = "https"
}
return scheme + "://" + r.Host
}
func (u *UI) Dashboard(w http.ResponseWriter, r *http.Request) {
hosts, err := u.Hosts.List(r.Context())
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
tiles := make([]templates.TileData, 0, len(hosts))
for _, h := range hosts {
latest, err := u.Runs.LatestForHost(r.Context(), h.ID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
tiles = append(tiles, u.Tiles.Build(r.Context(), h, latest))
}
_ = templates.Dashboard(tiles).Render(r.Context(), w)
}
// HostPage renders /hosts/{id}: summary + actions + in-flight banner +
// runs table. Run-level detail (pipeline, logs, sub-steps, spec diffs,
// hold banner) lives on /runs/{runID}. The split keeps host-scoped and
// run-scoped work on distinct URLs so permalinks don't wander onto
// whichever run happens to be active.
func (u *UI) HostPage(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
data, err := u.LoadHostPageData(r.Context(), id)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
_ = templates.HostPage(data).Render(r.Context(), w)
}
// LoadHostPageData assembles the HostPageData payload for hostID — host
// metadata, the full newest-first runs list, the currently non-terminal
// run (if any) for the in-flight banner, and a per-run stages map so
// the runs table can paint its compact stage-strips without re-querying
// inside the template. Returns store.ErrNotFound when the host doesn't
// exist; other store errors are surfaced. Stage lookups are fail-soft:
// a transient DB error on one run's stages yields an empty strip for
// that row rather than blanking the whole page.
func (u *UI) LoadHostPageData(ctx context.Context, hostID int64) (templates.HostPageData, error) {
host, err := u.Hosts.Get(ctx, hostID)
if err != nil {
return templates.HostPageData{}, err
}
var runs []model.Run
if u.Runs != nil {
runs, _ = u.Runs.ListForHostAll(ctx, hostID)
}
var active *model.Run
for i := range runs {
if !runs[i].State.IsTerminal() {
active = &runs[i]
break
}
}
runStages := make(map[int64][]model.Stage, len(runs))
if u.Stages != nil {
for _, r := range runs {
if stages, err := u.Stages.ListForRun(ctx, r.ID); err == nil {
runStages[r.ID] = stages
}
}
}
return templates.HostPageData{
Host: *host,
LastSeenAt: host.LastSeenAt,
Runs: runs,
ActiveRun: active,
RunStages: runStages,
}, nil
}
// RunPage renders /runs/{runID}: breadcrumb, run header, hold banner,
// pipeline, per-stage active-step panels, and spec diffs. Host metadata
// is resolved from run.HostID for the breadcrumb and for action POST
// targets (cancel/override still live under /hosts/{hostID}/...).
func (u *UI) RunPage(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "runID")
runID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad run id", http.StatusBadRequest)
return
}
data, err := u.LoadRunPageData(r.Context(), runID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
_ = templates.RunPage(data).Render(r.Context(), w)
}
// LoadRunPageData assembles the RunPageData payload for runID. Resolves
// the owning host, then reads stages, sub-steps, spec diffs, and log
// replay. Returns store.ErrNotFound when the run or host is gone. The
// orchestrator's PublishRunPage path uses the same loader so SSE fragments
// render from identical inputs as the initial GET.
func (u *UI) LoadRunPageData(ctx context.Context, runID int64) (templates.RunPageData, error) {
if u.Runs == nil {
return templates.RunPageData{}, store.ErrNotFound
}
run, err := u.Runs.Get(ctx, runID)
if err != nil {
return templates.RunPageData{}, err
}
if run == nil {
return templates.RunPageData{}, store.ErrNotFound
}
host, err := u.Hosts.Get(ctx, run.HostID)
if err != nil {
return templates.RunPageData{}, err
}
var stages []model.Stage
var subSteps []model.SubStep
var diffs []model.SpecDiff
if u.Stages != nil {
stages, _ = u.Stages.ListForRun(ctx, runID)
}
if u.SubSteps != nil {
subSteps, _ = u.SubSteps.ListForRun(ctx, runID)
}
if u.SpecDiffs != nil {
diffs, _ = u.SpecDiffs.ListForRun(ctx, runID)
}
replayByStage := map[string]string{}
if u.Logs != nil {
replayByStage = u.Logs.ReplayByStage(runID)
}
// Critical-diff count + hold-key path reuse the tile enricher so the
// run header shows the same numbers the dashboard tile + runs-table
// row show. Fail-soft if tiles isn't wired (test setups can skip it).
critical := 0
holdKeyPath := ""
if u.Tiles != nil {
t := u.Tiles.Build(ctx, *host, run)
critical = t.SpecDiffCritical
holdKeyPath = t.HoldKeyPath
}
return templates.RunPageData{
Host: *host,
Run: *run,
Stages: stages,
SubSteps: subSteps,
SpecDiffs: diffs,
DefaultStepStage: pickDefaultStep(stages),
LogReplayByStage: replayByStage,
HoldKeyPath: holdKeyPath,
SpecDiffCritical: critical,
}, nil
}
// pickDefaultStep chooses which stage the detail page opens expanded by
// default. Rule: running → first-failed → Reporting. The operator is
// almost always most interested in the thing currently happening (or
// the thing that just failed); Reporting is the sensible terminal fallback
// because it's where the report link lives.
func pickDefaultStep(stages []model.Stage) string {
for _, s := range stages {
if s.State == model.StageRunning {
return s.Name
}
}
for _, s := range stages {
if s.State == model.StageFailed {
return s.Name
}
}
return "Reporting"
}
// StartRun creates a new Run for the host, issues an agent token, and
// transitions Registered→Queued. The dispatcher goroutine picks it up
// on its next tick; the happy path is heartbeat-driven (the reporter's
// next heartbeat fetches reboot_for_vetting). Refuses the click outright
// if the host isn't currently heartbeating — there is no path from
// Queued to live-image without an in-OS reporter on the target.
func (u *UI) StartRun(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
hostID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
host, err := u.Hosts.Get(r.Context(), hostID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// Preflight: host must be heartbeating. The dispatcher re-checks at
// dispatch time (belt-and-braces for the gap between click and tick),
// but rejecting here gives the operator an immediate, actionable
// error instead of a mysterious Failed run 2s later.
if host.LastSeenAt == nil || time.Since(*host.LastSeenAt) > orchestrator.HostHeartbeatStaleAfter {
writeJSONError(w, http.StatusConflict,
"host is not heartbeating — install the reporter via /register/quick.sh on the target host, then retry")
return
}
// Guard: refuse to start a second run while one is still active.
if latest, err := u.Runs.LatestForHost(r.Context(), hostID); err == nil && latest != nil {
if !latest.State.IsTerminal() {
http.Error(w, "host already has an active run", http.StatusConflict)
return
}
}
nonDestructive := r.PostFormValue("non_destructive") == "1"
profile := strings.TrimSpace(r.PostFormValue("profile"))
if profile == "" {
profile = config.ProfileQuick
}
if !config.IsValidProfile(profile) {
http.Error(w, "unknown profile: "+profile, http.StatusBadRequest)
return
}
_, hash, err := orchestrator.IssueRunToken()
if err != nil {
http.Error(w, "token: "+err.Error(), http.StatusInternalServerError)
return
}
runID, err := u.Runs.CreateWithProfile(r.Context(), hostID, hash, nonDestructive, profile)
if err != nil {
http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError)
return
}
if err := u.seedThresholds(r.Context(), runID, host, profile); err != nil {
// A threshold-seed failure shouldn't orphan a run row — log
// and continue. Samples will just accumulate without a gate
// until the operator retries, same as before Phase 1.
log.Printf("ui: seed thresholds run %d: %v", runID, err)
}
log.Printf("ui: created run %d for host %d profile=%s (state=Queued)", runID, hostID, profile)
// Send the operator straight to the new run — the button they clicked
// was "Start vetting", the thing they want next is to watch it.
http.Redirect(w, r, fmt.Sprintf("/runs/%d", runID), http.StatusSeeOther)
}
// seedThresholds materializes the per-run threshold table from the
// ProfileRegistry. The shared vetting.thresholds block applies to
// every profile; future per-profile overrides will layer on top here,
// and per-host overrides (Phase 1 extra) land via ExpectedSpecYAML in
// a later iteration. Safe to skip silently when Thresholds or the
// registry isn't wired — tests do not always build one.
func (u *UI) seedThresholds(ctx context.Context, runID int64, host *model.Host, profile string) error {
if u.Thresholds == nil || u.Profiles == nil {
return nil
}
_ = host // reserved for per-host override layer
_ = profile // reserved for per-profile override layer
defaults := u.Profiles.Vetting.Thresholds
if len(defaults) == 0 {
return nil
}
specs := make([]store.ThresholdSpec, 0, len(defaults))
for _, d := range defaults {
specs = append(specs, store.ThresholdSpec{
Stage: d.Stage,
Kind: d.Kind,
Key: d.Key,
Op: d.Op,
Value: d.Value,
Nominal: d.Nominal,
Unit: d.Unit,
Severity: d.Severity,
Source: "profile",
})
}
_, err := u.Thresholds.SeedForRun(ctx, runID, specs)
return err
}
func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) {
_ = templates.Registration(templates.RegistrationForm{
QuickRegisterURL: u.baseURL(r),
}).Render(r.Context(), w)
}
// QuickRegisterScript renders the bash one-liner an operator pastes on
// the target host: hardware autodetect + POST to /api/v1/hosts. The
// orchestrator URL is substituted in so the script is self-contained.
func (u *UI) QuickRegisterScript(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/x-shellscript; charset=utf-8")
w.Header().Set("Cache-Control", "no-store")
if err := quickRegisterTmpl.Execute(w, struct{ OrchestratorURL string }{
OrchestratorURL: u.baseURL(r),
}); err != nil {
log.Printf("quick-register script render: %v", err)
}
}
func (u *UI) CreateHost(w http.ResponseWriter, r *http.Request) {
if err := r.ParseForm(); err != nil {
http.Error(w, "bad form", http.StatusBadRequest)
return
}
form := templates.RegistrationForm{
Name: strings.TrimSpace(r.PostForm.Get("name")),
MAC: strings.ToLower(strings.TrimSpace(r.PostForm.Get("mac"))),
WoLBroadcastIP: strings.TrimSpace(r.PostForm.Get("wol_broadcast_ip")),
WoLPort: r.PostForm.Get("wol_port"),
ExpectedSpecYAML: r.PostForm.Get("expected_spec_yaml"),
Notes: strings.TrimSpace(r.PostForm.Get("notes")),
QuickRegisterURL: u.baseURL(r),
}
if errMsg := validateHostForm(&form); errMsg != "" {
form.Error = errMsg
w.WriteHeader(http.StatusBadRequest)
_ = templates.Registration(form).Render(r.Context(), w)
return
}
wolPort, _ := strconv.Atoi(form.WoLPort)
if wolPort == 0 {
wolPort = 9
}
_, err := u.Hosts.Create(r.Context(), model.Host{
Name: form.Name,
MAC: form.MAC,
WoLBroadcastIP: form.WoLBroadcastIP,
WoLPort: wolPort,
ExpectedSpecYAML: form.ExpectedSpecYAML,
Notes: form.Notes,
})
if err != nil {
form.Error = friendlyDBError(err)
w.WriteHeader(http.StatusConflict)
_ = templates.Registration(form).Render(r.Context(), w)
return
}
u.reloadPXE(r.Context())
http.Redirect(w, r, "/", http.StatusSeeOther)
}
// quickRegisterPayload is the POST body accepted by /api/v1/hosts —
// the shape the quick-register bash one-liner emits.
type quickRegisterPayload struct {
Name string `json:"name"`
MAC string `json:"mac"`
WoLBroadcastIP string `json:"wol_broadcast_ip"`
WoLPort int `json:"wol_port"`
ExpectedSpecYAML string `json:"expected_spec_yaml"`
Notes string `json:"notes"`
}
// CreateHostJSON is the API counterpart to CreateHost. Accepts the same
// fields as the form but in JSON, so a target host can POST its own
// registration payload over curl from the quick-register one-liner.
// Same validation as the form; no auth (LAN-only).
func (u *UI) CreateHostJSON(w http.ResponseWriter, r *http.Request) {
var p quickRegisterPayload
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 256*1024)).Decode(&p); err != nil {
writeJSONError(w, http.StatusBadRequest, "bad json: "+err.Error())
return
}
form := templates.RegistrationForm{
Name: strings.TrimSpace(p.Name),
MAC: strings.ToLower(strings.TrimSpace(p.MAC)),
WoLBroadcastIP: strings.TrimSpace(p.WoLBroadcastIP),
ExpectedSpecYAML: p.ExpectedSpecYAML,
Notes: strings.TrimSpace(p.Notes),
}
if p.WoLPort > 0 {
form.WoLPort = strconv.Itoa(p.WoLPort)
}
if errMsg := validateHostForm(&form); errMsg != "" {
writeJSONError(w, http.StatusBadRequest, errMsg)
return
}
wolPort := p.WoLPort
if wolPort == 0 {
wolPort = 9
}
id, err := u.Hosts.Create(r.Context(), model.Host{
Name: form.Name,
MAC: form.MAC,
WoLBroadcastIP: form.WoLBroadcastIP,
WoLPort: wolPort,
ExpectedSpecYAML: form.ExpectedSpecYAML,
Notes: form.Notes,
})
if err != nil {
writeJSONError(w, http.StatusConflict, friendlyDBError(err))
return
}
log.Printf("api: registered host %d (%s, %s)", id, form.Name, form.MAC)
u.reloadPXE(r.Context())
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
_ = json.NewEncoder(w).Encode(map[string]any{
"id": id,
"name": form.Name,
"mac": form.MAC,
})
}
// Heartbeat is called every ~30s by a host-mode vetting-agent running
// as a systemd service on the registered host. LAN-trusted, no auth —
// same threat model as the browser UI and quick-register. Stamps
// last_seen_at, flips the dashboard tile to "online", and — if the
// operator has clicked Start vetting since the last heartbeat — replies
// with cmd=reboot_for_vetting so the host boots into PXE without WoL.
func (u *UI) Heartbeat(w http.ResponseWriter, r *http.Request) {
mac := strings.ToLower(strings.TrimSpace(chi.URLParam(r, "mac")))
if !macRe.MatchString(mac) {
writeJSONError(w, http.StatusBadRequest,
"MAC address must be in the form aa:bb:cc:dd:ee:ff")
return
}
host, err := u.Hosts.GetByMAC(r.Context(), mac)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
writeJSONError(w, http.StatusNotFound, "unknown host")
return
}
writeJSONError(w, http.StatusInternalServerError, err.Error())
return
}
if err := u.Hosts.UpdateLastSeen(r.Context(), mac, time.Now().UTC()); err != nil {
writeJSONError(w, http.StatusInternalServerError, err.Error())
return
}
if u.Runner != nil {
u.Runner.PublishTileUpdate(r.Context(), host.ID)
}
cmd, runID := u.pickHostCommand(r.Context(), host.ID)
resp := heartbeatResponse{OK: true, Cmd: cmd, RunID: runID}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(resp)
}
// heartbeatResponse is the JSON the host-mode agent decodes on every
// heartbeat. `cmd` is "" (omitted) in the idle case so the wire shape
// stays `{"ok": true}` when nothing is happening.
type heartbeatResponse struct {
OK bool `json:"ok"`
Cmd string `json:"cmd,omitempty"`
RunID int64 `json:"run_id,omitempty"`
}
// pickHostCommand decides what the host-mode agent should do on the
// back of this heartbeat. Returns ("", 0) when there's nothing to do.
//
// - Queued run → Transition(RebootCommanded) and tell the agent to
// reboot. Beats the dispatcher's 2s poll to the punch, but either
// path ends at WaitingReboot.
// - WaitingReboot (or legacy WaitingWoL) run <10min old → also return
// reboot, covering "host crashed mid-reboot, systemd brought the
// reporter back".
// - anything else → idle.
func (u *UI) pickHostCommand(ctx context.Context, hostID int64) (string, int64) {
if u.Runs == nil || u.Runner == nil {
return "", 0
}
run, err := u.Runs.LatestForHost(ctx, hostID)
if err != nil {
log.Printf("heartbeat: latest run for host %d: %v", hostID, err)
return "", 0
}
if run == nil {
return "", 0
}
switch run.State {
case model.StateQueued:
if _, err := u.Runner.Transition(ctx, run.ID, orchestrator.TriggerRebootCommanded); err != nil {
// Benign race with the dispatcher's own 2s poll — the
// state machine refuses the second transition; we just
// log and return idle so the agent doesn't reboot on a
// run that another path is already driving.
log.Printf("heartbeat: transition run %d: %v", run.ID, err)
return "", 0
}
log.Printf("heartbeat: dispatched run %d for host %d (reboot commanded)", run.ID, hostID)
return cmdRebootForVetting, run.ID
case model.StateWaitingReboot, model.StateWaitingWoL:
// Tolerate a crashed-mid-reboot retry: the reporter is the
// only thing that could be telling us about this host right
// now. Bound it so a perpetually-broken PXE doesn't
// reboot-loop the box.
if time.Since(run.StartedAt) < 10*time.Minute {
return cmdRebootForVetting, run.ID
}
return "", 0
}
return "", 0
}
const cmdRebootForVetting = "reboot_for_vetting"
func writeJSONError(w http.ResponseWriter, status int, msg string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(map[string]string{"error": msg})
}
// OverrideWipeStorage is the operator's explicit "yes, wipe the disk
// even though we found filesystem signatures" button. Only meaningful
// when the latest run is FailedHolding with failed_stage=Storage — the
// agent's next heartbeat will receive retry_stage with wipe=true and
// re-enter the Storage stage bypassing the wipe-probe guard.
func (u *UI) OverrideWipeStorage(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
hostID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
latest, err := u.Runs.LatestForHost(r.Context(), hostID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if latest == nil {
http.Error(w, "no run for host", http.StatusConflict)
return
}
if latest.State != model.StateFailedHolding || latest.FailedStage != "Storage" {
http.Error(w, "override only valid when holding on Storage", http.StatusConflict)
return
}
if _, err := u.Runner.Override(r.Context(), latest.ID, `{"wipe":true}`); err != nil {
http.Error(w, "override: "+err.Error(), http.StatusInternalServerError)
return
}
// Operator was on /runs/{latest.ID} when they clicked — land them
// back there so they can see the override take effect.
http.Redirect(w, r, fmt.Sprintf("/runs/%d", latest.ID), http.StatusSeeOther)
}
// CancelRun halts an in-flight run. Transitions the run to
// StateCancelled; the next agent heartbeat receives cmd=cancel_stage
// which cancels the stage ctx on the agent side. Destructive stages
// mid-run can leave the host in an intermediate state — the confirm
// dialog in the UI warns the operator.
func (u *UI) CancelRun(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
hostID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad host id", http.StatusBadRequest)
return
}
latest, err := u.Runs.LatestForHost(r.Context(), hostID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if latest == nil || latest.State.IsTerminal() {
http.Error(w, "no active run to cancel", http.StatusConflict)
return
}
if _, err := u.Runner.Transition(r.Context(), latest.ID, orchestrator.TriggerOperatorCancelled); err != nil {
http.Error(w, "cancel: "+err.Error(), http.StatusInternalServerError)
return
}
log.Printf("ui: cancelled run %d for host %d", latest.ID, hostID)
http.Redirect(w, r, fmt.Sprintf("/runs/%d", latest.ID), http.StatusSeeOther)
}
func (u *UI) DeleteHost(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad id", http.StatusBadRequest)
return
}
if err := u.Hosts.Delete(r.Context(), id); err != nil {
if errors.Is(err, store.ErrNotFound) {
http.NotFound(w, r)
return
}
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
u.reloadPXE(r.Context())
http.Redirect(w, r, "/", http.StatusSeeOther)
}
func (u *UI) SSE(w http.ResponseWriter, r *http.Request) {
u.EventHub.ServeSSE(w, r)
}
// Report serves the HTML report artifact for a run. Looks up the
// report_html artifact row for the runID, validates the path lives
// under the artifacts dir (defence-in-depth against path traversal),
// and streams it back. 404 when the run hasn't produced one yet.
func (u *UI) Report(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "runID")
runID, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
http.Error(w, "bad run id", http.StatusBadRequest)
return
}
arts, err := u.Artifacts.ListForRun(r.Context(), runID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
var path string
for _, a := range arts {
if a.Kind == "report_html" {
path = a.Path
}
}
if path == "" {
http.NotFound(w, r)
return
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
http.ServeFile(w, r, path)
}
func validateHostForm(form *templates.RegistrationForm) string {
if form.Name == "" {
return "Name is required."
}
if !macRe.MatchString(form.MAC) {
return "MAC address must be in the form aa:bb:cc:dd:ee:ff."
}
if form.WoLBroadcastIP == "" {
return "WoL broadcast IP is required."
}
if form.ExpectedSpecYAML == "" {
return "Expected spec YAML is required."
}
var anything any
if err := yaml.Unmarshal([]byte(form.ExpectedSpecYAML), &anything); err != nil {
return "Expected spec YAML is not valid YAML: " + err.Error()
}
if form.WoLPort != "" {
port, err := strconv.Atoi(form.WoLPort)
if err != nil || port < 1 || port > 65535 {
return "WoL port must be 165535."
}
}
return ""
}
func friendlyDBError(err error) string {
s := err.Error()
switch {
case strings.Contains(s, "UNIQUE constraint failed: hosts.name"):
return "A host with that name already exists."
case strings.Contains(s, "UNIQUE constraint failed: hosts.mac"):
return "A host with that MAC already exists."
default:
return s
}
}