19608bef1b
Host page owns host metadata, full runs table with per-row stage strip, in-flight banner, and empty-state CTA. Run page owns pipeline, active step, logs, sub-steps, spec diffs, and hold banner with a breadcrumb back to the host. Dashboard tile reverts to host-only. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
714 lines
23 KiB
Go
714 lines
23 KiB
Go
package api
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"errors"
|
||
"fmt"
|
||
"log"
|
||
"net/http"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
"text/template"
|
||
"time"
|
||
|
||
"github.com/go-chi/chi/v5"
|
||
"gopkg.in/yaml.v3"
|
||
|
||
"vetting/internal/events"
|
||
"vetting/internal/logs"
|
||
"vetting/internal/model"
|
||
"vetting/internal/orchestrator"
|
||
"vetting/internal/store"
|
||
"vetting/internal/web"
|
||
"vetting/internal/web/templates"
|
||
)
|
||
|
||
type UI struct {
|
||
Hosts *store.Hosts
|
||
Runs *store.Runs
|
||
Stages *store.Stages
|
||
SubSteps *store.SubSteps
|
||
SpecDiffs *store.SpecDiffs
|
||
Artifacts *store.Artifacts
|
||
EventHub *events.Hub
|
||
Logs *logs.Hub
|
||
Runner *orchestrator.Runner
|
||
Tiles *TileEnricher
|
||
PublicURL string // user-visible base URL baked into the quick-register one-liner
|
||
// PXE, when non-nil, gets Reload()ed after host create/delete so
|
||
// dnsmasq's dhcp-host= allowlist reflects the current registry.
|
||
// Without this, a newly-registered host PXE-boots and gets
|
||
// "proxy-ignored" because its MAC isn't tagged `known`.
|
||
PXE PXEReloader
|
||
}
|
||
|
||
// PXEReloader rewrites dnsmasq.conf with the current host list and
|
||
// SIGHUPs the subprocess. Satisfied by *pxe.Supervisor.
|
||
type PXEReloader interface {
|
||
Reload(hosts []model.Host) error
|
||
}
|
||
|
||
// reloadPXE reads the full host list and hands it to the reloader.
|
||
// Logs on failure; never returns an error — the HTTP request that
|
||
// triggered the host change has already succeeded.
|
||
func (u *UI) reloadPXE(ctx context.Context) {
|
||
if u.PXE == nil {
|
||
return
|
||
}
|
||
hosts, err := u.Hosts.List(ctx)
|
||
if err != nil {
|
||
log.Printf("pxe reload: list hosts: %v", err)
|
||
return
|
||
}
|
||
if err := u.PXE.Reload(hosts); err != nil {
|
||
log.Printf("pxe reload: %v", err)
|
||
}
|
||
}
|
||
|
||
var macRe = regexp.MustCompile(`^[0-9a-f]{2}(:[0-9a-f]{2}){5}$`)
|
||
|
||
// quickRegisterTmpl is parsed once at startup — a malformed template
|
||
// should fail the binary at init, not on a visitor's first hit.
|
||
var quickRegisterTmpl = template.Must(
|
||
template.ParseFS(web.Register, "register/quick.sh.tmpl"),
|
||
)
|
||
|
||
// baseURL returns the orchestrator URL to bake into generated artefacts
|
||
// (the quick-register one-liner, its rendered script). Prefers the
|
||
// operator-configured public URL; falls back to the request's own host
|
||
// so a dev run on http://127.0.0.1:8080 still produces a working command.
|
||
func (u *UI) baseURL(r *http.Request) string {
|
||
if u.PublicURL != "" {
|
||
return strings.TrimRight(u.PublicURL, "/")
|
||
}
|
||
scheme := "http"
|
||
if r.TLS != nil {
|
||
scheme = "https"
|
||
}
|
||
return scheme + "://" + r.Host
|
||
}
|
||
|
||
func (u *UI) Dashboard(w http.ResponseWriter, r *http.Request) {
|
||
hosts, err := u.Hosts.List(r.Context())
|
||
if err != nil {
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
tiles := make([]templates.TileData, 0, len(hosts))
|
||
for _, h := range hosts {
|
||
latest, err := u.Runs.LatestForHost(r.Context(), h.ID)
|
||
if err != nil {
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
tiles = append(tiles, u.Tiles.Build(r.Context(), h, latest))
|
||
}
|
||
_ = templates.Dashboard(tiles).Render(r.Context(), w)
|
||
}
|
||
|
||
// HostPage renders /hosts/{id}: summary + actions + in-flight banner +
|
||
// runs table. Run-level detail (pipeline, logs, sub-steps, spec diffs,
|
||
// hold banner) lives on /runs/{runID}. The split keeps host-scoped and
|
||
// run-scoped work on distinct URLs so permalinks don't wander onto
|
||
// whichever run happens to be active.
|
||
func (u *UI) HostPage(w http.ResponseWriter, r *http.Request) {
|
||
idStr := chi.URLParam(r, "id")
|
||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||
if err != nil {
|
||
http.Error(w, "bad host id", http.StatusBadRequest)
|
||
return
|
||
}
|
||
data, err := u.LoadHostPageData(r.Context(), id)
|
||
if err != nil {
|
||
if errors.Is(err, store.ErrNotFound) {
|
||
http.NotFound(w, r)
|
||
return
|
||
}
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
_ = templates.HostPage(data).Render(r.Context(), w)
|
||
}
|
||
|
||
// LoadHostPageData assembles the HostPageData payload for hostID — host
|
||
// metadata, the full newest-first runs list, the currently non-terminal
|
||
// run (if any) for the in-flight banner, and a per-run stages map so
|
||
// the runs table can paint its compact stage-strips without re-querying
|
||
// inside the template. Returns store.ErrNotFound when the host doesn't
|
||
// exist; other store errors are surfaced. Stage lookups are fail-soft:
|
||
// a transient DB error on one run's stages yields an empty strip for
|
||
// that row rather than blanking the whole page.
|
||
func (u *UI) LoadHostPageData(ctx context.Context, hostID int64) (templates.HostPageData, error) {
|
||
host, err := u.Hosts.Get(ctx, hostID)
|
||
if err != nil {
|
||
return templates.HostPageData{}, err
|
||
}
|
||
var runs []model.Run
|
||
if u.Runs != nil {
|
||
runs, _ = u.Runs.ListForHostAll(ctx, hostID)
|
||
}
|
||
var active *model.Run
|
||
for i := range runs {
|
||
if !runs[i].State.IsTerminal() {
|
||
active = &runs[i]
|
||
break
|
||
}
|
||
}
|
||
runStages := make(map[int64][]model.Stage, len(runs))
|
||
if u.Stages != nil {
|
||
for _, r := range runs {
|
||
if stages, err := u.Stages.ListForRun(ctx, r.ID); err == nil {
|
||
runStages[r.ID] = stages
|
||
}
|
||
}
|
||
}
|
||
return templates.HostPageData{
|
||
Host: *host,
|
||
LastSeenAt: host.LastSeenAt,
|
||
Runs: runs,
|
||
ActiveRun: active,
|
||
RunStages: runStages,
|
||
}, nil
|
||
}
|
||
|
||
// RunPage renders /runs/{runID}: breadcrumb, run header, hold banner,
|
||
// pipeline, per-stage active-step panels, and spec diffs. Host metadata
|
||
// is resolved from run.HostID for the breadcrumb and for action POST
|
||
// targets (cancel/override still live under /hosts/{hostID}/...).
|
||
func (u *UI) RunPage(w http.ResponseWriter, r *http.Request) {
|
||
idStr := chi.URLParam(r, "runID")
|
||
runID, err := strconv.ParseInt(idStr, 10, 64)
|
||
if err != nil {
|
||
http.Error(w, "bad run id", http.StatusBadRequest)
|
||
return
|
||
}
|
||
data, err := u.LoadRunPageData(r.Context(), runID)
|
||
if err != nil {
|
||
if errors.Is(err, store.ErrNotFound) {
|
||
http.NotFound(w, r)
|
||
return
|
||
}
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
_ = templates.RunPage(data).Render(r.Context(), w)
|
||
}
|
||
|
||
// LoadRunPageData assembles the RunPageData payload for runID. Resolves
|
||
// the owning host, then reads stages, sub-steps, spec diffs, and log
|
||
// replay. Returns store.ErrNotFound when the run or host is gone. The
|
||
// orchestrator's PublishRunPage path uses the same loader so SSE fragments
|
||
// render from identical inputs as the initial GET.
|
||
func (u *UI) LoadRunPageData(ctx context.Context, runID int64) (templates.RunPageData, error) {
|
||
if u.Runs == nil {
|
||
return templates.RunPageData{}, store.ErrNotFound
|
||
}
|
||
run, err := u.Runs.Get(ctx, runID)
|
||
if err != nil {
|
||
return templates.RunPageData{}, err
|
||
}
|
||
if run == nil {
|
||
return templates.RunPageData{}, store.ErrNotFound
|
||
}
|
||
host, err := u.Hosts.Get(ctx, run.HostID)
|
||
if err != nil {
|
||
return templates.RunPageData{}, err
|
||
}
|
||
var stages []model.Stage
|
||
var subSteps []model.SubStep
|
||
var diffs []model.SpecDiff
|
||
if u.Stages != nil {
|
||
stages, _ = u.Stages.ListForRun(ctx, runID)
|
||
}
|
||
if u.SubSteps != nil {
|
||
subSteps, _ = u.SubSteps.ListForRun(ctx, runID)
|
||
}
|
||
if u.SpecDiffs != nil {
|
||
diffs, _ = u.SpecDiffs.ListForRun(ctx, runID)
|
||
}
|
||
replayByStage := map[string]string{}
|
||
if u.Logs != nil {
|
||
replayByStage = u.Logs.ReplayByStage(runID)
|
||
}
|
||
// Critical-diff count + hold-key path reuse the tile enricher so the
|
||
// run header shows the same numbers the dashboard tile + runs-table
|
||
// row show. Fail-soft if tiles isn't wired (test setups can skip it).
|
||
critical := 0
|
||
holdKeyPath := ""
|
||
if u.Tiles != nil {
|
||
t := u.Tiles.Build(ctx, *host, run)
|
||
critical = t.SpecDiffCritical
|
||
holdKeyPath = t.HoldKeyPath
|
||
}
|
||
return templates.RunPageData{
|
||
Host: *host,
|
||
Run: *run,
|
||
Stages: stages,
|
||
SubSteps: subSteps,
|
||
SpecDiffs: diffs,
|
||
DefaultStepStage: pickDefaultStep(stages),
|
||
LogReplayByStage: replayByStage,
|
||
HoldKeyPath: holdKeyPath,
|
||
SpecDiffCritical: critical,
|
||
}, nil
|
||
}
|
||
|
||
// pickDefaultStep chooses which stage the detail page opens expanded by
|
||
// default. Rule: running → first-failed → Reporting. The operator is
|
||
// almost always most interested in the thing currently happening (or
|
||
// the thing that just failed); Reporting is the sensible terminal fallback
|
||
// because it's where the report link lives.
|
||
func pickDefaultStep(stages []model.Stage) string {
|
||
for _, s := range stages {
|
||
if s.State == model.StageRunning {
|
||
return s.Name
|
||
}
|
||
}
|
||
for _, s := range stages {
|
||
if s.State == model.StageFailed {
|
||
return s.Name
|
||
}
|
||
}
|
||
return "Reporting"
|
||
}
|
||
|
||
// StartRun creates a new Run for the host, issues an agent token, and
|
||
// transitions Registered→Queued. The dispatcher goroutine picks it up
|
||
// on its next tick; the happy path is heartbeat-driven (the reporter's
|
||
// next heartbeat fetches reboot_for_vetting). Refuses the click outright
|
||
// if the host isn't currently heartbeating — there is no path from
|
||
// Queued to live-image without an in-OS reporter on the target.
|
||
func (u *UI) StartRun(w http.ResponseWriter, r *http.Request) {
|
||
idStr := chi.URLParam(r, "id")
|
||
hostID, err := strconv.ParseInt(idStr, 10, 64)
|
||
if err != nil {
|
||
http.Error(w, "bad host id", http.StatusBadRequest)
|
||
return
|
||
}
|
||
host, err := u.Hosts.Get(r.Context(), hostID)
|
||
if err != nil {
|
||
if errors.Is(err, store.ErrNotFound) {
|
||
http.NotFound(w, r)
|
||
return
|
||
}
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
// Preflight: host must be heartbeating. The dispatcher re-checks at
|
||
// dispatch time (belt-and-braces for the gap between click and tick),
|
||
// but rejecting here gives the operator an immediate, actionable
|
||
// error instead of a mysterious Failed run 2s later.
|
||
if host.LastSeenAt == nil || time.Since(*host.LastSeenAt) > orchestrator.HostHeartbeatStaleAfter {
|
||
writeJSONError(w, http.StatusConflict,
|
||
"host is not heartbeating — install the reporter via /register/quick.sh on the target host, then retry")
|
||
return
|
||
}
|
||
|
||
// Guard: refuse to start a second run while one is still active.
|
||
if latest, err := u.Runs.LatestForHost(r.Context(), hostID); err == nil && latest != nil {
|
||
if !latest.State.IsTerminal() {
|
||
http.Error(w, "host already has an active run", http.StatusConflict)
|
||
return
|
||
}
|
||
}
|
||
|
||
nonDestructive := r.PostFormValue("non_destructive") == "1"
|
||
|
||
_, hash, err := orchestrator.IssueRunToken()
|
||
if err != nil {
|
||
http.Error(w, "token: "+err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
runID, err := u.Runs.Create(r.Context(), hostID, hash, nonDestructive)
|
||
if err != nil {
|
||
http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
log.Printf("ui: created run %d for host %d (state=Queued)", runID, hostID)
|
||
// Send the operator straight to the new run — the button they clicked
|
||
// was "Start vetting", the thing they want next is to watch it.
|
||
http.Redirect(w, r, fmt.Sprintf("/runs/%d", runID), http.StatusSeeOther)
|
||
}
|
||
|
||
func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) {
|
||
_ = templates.Registration(templates.RegistrationForm{
|
||
QuickRegisterURL: u.baseURL(r),
|
||
}).Render(r.Context(), w)
|
||
}
|
||
|
||
// QuickRegisterScript renders the bash one-liner an operator pastes on
|
||
// the target host: hardware autodetect + POST to /api/v1/hosts. The
|
||
// orchestrator URL is substituted in so the script is self-contained.
|
||
func (u *UI) QuickRegisterScript(w http.ResponseWriter, r *http.Request) {
|
||
w.Header().Set("Content-Type", "text/x-shellscript; charset=utf-8")
|
||
w.Header().Set("Cache-Control", "no-store")
|
||
if err := quickRegisterTmpl.Execute(w, struct{ OrchestratorURL string }{
|
||
OrchestratorURL: u.baseURL(r),
|
||
}); err != nil {
|
||
log.Printf("quick-register script render: %v", err)
|
||
}
|
||
}
|
||
|
||
func (u *UI) CreateHost(w http.ResponseWriter, r *http.Request) {
|
||
if err := r.ParseForm(); err != nil {
|
||
http.Error(w, "bad form", http.StatusBadRequest)
|
||
return
|
||
}
|
||
form := templates.RegistrationForm{
|
||
Name: strings.TrimSpace(r.PostForm.Get("name")),
|
||
MAC: strings.ToLower(strings.TrimSpace(r.PostForm.Get("mac"))),
|
||
WoLBroadcastIP: strings.TrimSpace(r.PostForm.Get("wol_broadcast_ip")),
|
||
WoLPort: r.PostForm.Get("wol_port"),
|
||
ExpectedSpecYAML: r.PostForm.Get("expected_spec_yaml"),
|
||
Notes: strings.TrimSpace(r.PostForm.Get("notes")),
|
||
QuickRegisterURL: u.baseURL(r),
|
||
}
|
||
|
||
if errMsg := validateHostForm(&form); errMsg != "" {
|
||
form.Error = errMsg
|
||
w.WriteHeader(http.StatusBadRequest)
|
||
_ = templates.Registration(form).Render(r.Context(), w)
|
||
return
|
||
}
|
||
|
||
wolPort, _ := strconv.Atoi(form.WoLPort)
|
||
if wolPort == 0 {
|
||
wolPort = 9
|
||
}
|
||
|
||
_, err := u.Hosts.Create(r.Context(), model.Host{
|
||
Name: form.Name,
|
||
MAC: form.MAC,
|
||
WoLBroadcastIP: form.WoLBroadcastIP,
|
||
WoLPort: wolPort,
|
||
ExpectedSpecYAML: form.ExpectedSpecYAML,
|
||
Notes: form.Notes,
|
||
})
|
||
if err != nil {
|
||
form.Error = friendlyDBError(err)
|
||
w.WriteHeader(http.StatusConflict)
|
||
_ = templates.Registration(form).Render(r.Context(), w)
|
||
return
|
||
}
|
||
u.reloadPXE(r.Context())
|
||
http.Redirect(w, r, "/", http.StatusSeeOther)
|
||
}
|
||
|
||
// quickRegisterPayload is the POST body accepted by /api/v1/hosts —
|
||
// the shape the quick-register bash one-liner emits.
|
||
type quickRegisterPayload struct {
|
||
Name string `json:"name"`
|
||
MAC string `json:"mac"`
|
||
WoLBroadcastIP string `json:"wol_broadcast_ip"`
|
||
WoLPort int `json:"wol_port"`
|
||
ExpectedSpecYAML string `json:"expected_spec_yaml"`
|
||
Notes string `json:"notes"`
|
||
}
|
||
|
||
// CreateHostJSON is the API counterpart to CreateHost. Accepts the same
|
||
// fields as the form but in JSON, so a target host can POST its own
|
||
// registration payload over curl from the quick-register one-liner.
|
||
// Same validation as the form; no auth (LAN-only).
|
||
func (u *UI) CreateHostJSON(w http.ResponseWriter, r *http.Request) {
|
||
var p quickRegisterPayload
|
||
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 256*1024)).Decode(&p); err != nil {
|
||
writeJSONError(w, http.StatusBadRequest, "bad json: "+err.Error())
|
||
return
|
||
}
|
||
form := templates.RegistrationForm{
|
||
Name: strings.TrimSpace(p.Name),
|
||
MAC: strings.ToLower(strings.TrimSpace(p.MAC)),
|
||
WoLBroadcastIP: strings.TrimSpace(p.WoLBroadcastIP),
|
||
ExpectedSpecYAML: p.ExpectedSpecYAML,
|
||
Notes: strings.TrimSpace(p.Notes),
|
||
}
|
||
if p.WoLPort > 0 {
|
||
form.WoLPort = strconv.Itoa(p.WoLPort)
|
||
}
|
||
if errMsg := validateHostForm(&form); errMsg != "" {
|
||
writeJSONError(w, http.StatusBadRequest, errMsg)
|
||
return
|
||
}
|
||
wolPort := p.WoLPort
|
||
if wolPort == 0 {
|
||
wolPort = 9
|
||
}
|
||
id, err := u.Hosts.Create(r.Context(), model.Host{
|
||
Name: form.Name,
|
||
MAC: form.MAC,
|
||
WoLBroadcastIP: form.WoLBroadcastIP,
|
||
WoLPort: wolPort,
|
||
ExpectedSpecYAML: form.ExpectedSpecYAML,
|
||
Notes: form.Notes,
|
||
})
|
||
if err != nil {
|
||
writeJSONError(w, http.StatusConflict, friendlyDBError(err))
|
||
return
|
||
}
|
||
log.Printf("api: registered host %d (%s, %s)", id, form.Name, form.MAC)
|
||
u.reloadPXE(r.Context())
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusCreated)
|
||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||
"id": id,
|
||
"name": form.Name,
|
||
"mac": form.MAC,
|
||
})
|
||
}
|
||
|
||
// Heartbeat is called every ~30s by a host-mode vetting-agent running
|
||
// as a systemd service on the registered host. LAN-trusted, no auth —
|
||
// same threat model as the browser UI and quick-register. Stamps
|
||
// last_seen_at, flips the dashboard tile to "online", and — if the
|
||
// operator has clicked Start vetting since the last heartbeat — replies
|
||
// with cmd=reboot_for_vetting so the host boots into PXE without WoL.
|
||
func (u *UI) Heartbeat(w http.ResponseWriter, r *http.Request) {
|
||
mac := strings.ToLower(strings.TrimSpace(chi.URLParam(r, "mac")))
|
||
if !macRe.MatchString(mac) {
|
||
writeJSONError(w, http.StatusBadRequest,
|
||
"MAC address must be in the form aa:bb:cc:dd:ee:ff")
|
||
return
|
||
}
|
||
host, err := u.Hosts.GetByMAC(r.Context(), mac)
|
||
if err != nil {
|
||
if errors.Is(err, store.ErrNotFound) {
|
||
writeJSONError(w, http.StatusNotFound, "unknown host")
|
||
return
|
||
}
|
||
writeJSONError(w, http.StatusInternalServerError, err.Error())
|
||
return
|
||
}
|
||
if err := u.Hosts.UpdateLastSeen(r.Context(), mac, time.Now().UTC()); err != nil {
|
||
writeJSONError(w, http.StatusInternalServerError, err.Error())
|
||
return
|
||
}
|
||
if u.Runner != nil {
|
||
u.Runner.PublishTileUpdate(r.Context(), host.ID)
|
||
}
|
||
cmd, runID := u.pickHostCommand(r.Context(), host.ID)
|
||
resp := heartbeatResponse{OK: true, Cmd: cmd, RunID: runID}
|
||
w.Header().Set("Content-Type", "application/json")
|
||
_ = json.NewEncoder(w).Encode(resp)
|
||
}
|
||
|
||
// heartbeatResponse is the JSON the host-mode agent decodes on every
|
||
// heartbeat. `cmd` is "" (omitted) in the idle case so the wire shape
|
||
// stays `{"ok": true}` when nothing is happening.
|
||
type heartbeatResponse struct {
|
||
OK bool `json:"ok"`
|
||
Cmd string `json:"cmd,omitempty"`
|
||
RunID int64 `json:"run_id,omitempty"`
|
||
}
|
||
|
||
// pickHostCommand decides what the host-mode agent should do on the
|
||
// back of this heartbeat. Returns ("", 0) when there's nothing to do.
|
||
//
|
||
// - Queued run → Transition(RebootCommanded) and tell the agent to
|
||
// reboot. Beats the dispatcher's 2s poll to the punch, but either
|
||
// path ends at WaitingReboot.
|
||
// - WaitingReboot (or legacy WaitingWoL) run <10min old → also return
|
||
// reboot, covering "host crashed mid-reboot, systemd brought the
|
||
// reporter back".
|
||
// - anything else → idle.
|
||
func (u *UI) pickHostCommand(ctx context.Context, hostID int64) (string, int64) {
|
||
if u.Runs == nil || u.Runner == nil {
|
||
return "", 0
|
||
}
|
||
run, err := u.Runs.LatestForHost(ctx, hostID)
|
||
if err != nil {
|
||
log.Printf("heartbeat: latest run for host %d: %v", hostID, err)
|
||
return "", 0
|
||
}
|
||
if run == nil {
|
||
return "", 0
|
||
}
|
||
switch run.State {
|
||
case model.StateQueued:
|
||
if _, err := u.Runner.Transition(ctx, run.ID, orchestrator.TriggerRebootCommanded); err != nil {
|
||
// Benign race with the dispatcher's own 2s poll — the
|
||
// state machine refuses the second transition; we just
|
||
// log and return idle so the agent doesn't reboot on a
|
||
// run that another path is already driving.
|
||
log.Printf("heartbeat: transition run %d: %v", run.ID, err)
|
||
return "", 0
|
||
}
|
||
log.Printf("heartbeat: dispatched run %d for host %d (reboot commanded)", run.ID, hostID)
|
||
return cmdRebootForVetting, run.ID
|
||
case model.StateWaitingReboot, model.StateWaitingWoL:
|
||
// Tolerate a crashed-mid-reboot retry: the reporter is the
|
||
// only thing that could be telling us about this host right
|
||
// now. Bound it so a perpetually-broken PXE doesn't
|
||
// reboot-loop the box.
|
||
if time.Since(run.StartedAt) < 10*time.Minute {
|
||
return cmdRebootForVetting, run.ID
|
||
}
|
||
return "", 0
|
||
}
|
||
return "", 0
|
||
}
|
||
|
||
const cmdRebootForVetting = "reboot_for_vetting"
|
||
|
||
func writeJSONError(w http.ResponseWriter, status int, msg string) {
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(status)
|
||
_ = json.NewEncoder(w).Encode(map[string]string{"error": msg})
|
||
}
|
||
|
||
// OverrideWipeStorage is the operator's explicit "yes, wipe the disk
|
||
// even though we found filesystem signatures" button. Only meaningful
|
||
// when the latest run is FailedHolding with failed_stage=Storage — the
|
||
// agent's next heartbeat will receive retry_stage with wipe=true and
|
||
// re-enter the Storage stage bypassing the wipe-probe guard.
|
||
func (u *UI) OverrideWipeStorage(w http.ResponseWriter, r *http.Request) {
|
||
idStr := chi.URLParam(r, "id")
|
||
hostID, err := strconv.ParseInt(idStr, 10, 64)
|
||
if err != nil {
|
||
http.Error(w, "bad host id", http.StatusBadRequest)
|
||
return
|
||
}
|
||
latest, err := u.Runs.LatestForHost(r.Context(), hostID)
|
||
if err != nil {
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
if latest == nil {
|
||
http.Error(w, "no run for host", http.StatusConflict)
|
||
return
|
||
}
|
||
if latest.State != model.StateFailedHolding || latest.FailedStage != "Storage" {
|
||
http.Error(w, "override only valid when holding on Storage", http.StatusConflict)
|
||
return
|
||
}
|
||
if _, err := u.Runner.Override(r.Context(), latest.ID, `{"wipe":true}`); err != nil {
|
||
http.Error(w, "override: "+err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
// Operator was on /runs/{latest.ID} when they clicked — land them
|
||
// back there so they can see the override take effect.
|
||
http.Redirect(w, r, fmt.Sprintf("/runs/%d", latest.ID), http.StatusSeeOther)
|
||
}
|
||
|
||
// CancelRun halts an in-flight run. Transitions the run to
|
||
// StateCancelled; the next agent heartbeat receives cmd=cancel_stage
|
||
// which cancels the stage ctx on the agent side. Destructive stages
|
||
// mid-run can leave the host in an intermediate state — the confirm
|
||
// dialog in the UI warns the operator.
|
||
func (u *UI) CancelRun(w http.ResponseWriter, r *http.Request) {
|
||
idStr := chi.URLParam(r, "id")
|
||
hostID, err := strconv.ParseInt(idStr, 10, 64)
|
||
if err != nil {
|
||
http.Error(w, "bad host id", http.StatusBadRequest)
|
||
return
|
||
}
|
||
latest, err := u.Runs.LatestForHost(r.Context(), hostID)
|
||
if err != nil {
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
if latest == nil || latest.State.IsTerminal() {
|
||
http.Error(w, "no active run to cancel", http.StatusConflict)
|
||
return
|
||
}
|
||
if _, err := u.Runner.Transition(r.Context(), latest.ID, orchestrator.TriggerOperatorCancelled); err != nil {
|
||
http.Error(w, "cancel: "+err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
log.Printf("ui: cancelled run %d for host %d", latest.ID, hostID)
|
||
http.Redirect(w, r, fmt.Sprintf("/runs/%d", latest.ID), http.StatusSeeOther)
|
||
}
|
||
|
||
func (u *UI) DeleteHost(w http.ResponseWriter, r *http.Request) {
|
||
idStr := chi.URLParam(r, "id")
|
||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||
if err != nil {
|
||
http.Error(w, "bad id", http.StatusBadRequest)
|
||
return
|
||
}
|
||
if err := u.Hosts.Delete(r.Context(), id); err != nil {
|
||
if errors.Is(err, store.ErrNotFound) {
|
||
http.NotFound(w, r)
|
||
return
|
||
}
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
u.reloadPXE(r.Context())
|
||
http.Redirect(w, r, "/", http.StatusSeeOther)
|
||
}
|
||
|
||
func (u *UI) SSE(w http.ResponseWriter, r *http.Request) {
|
||
u.EventHub.ServeSSE(w, r)
|
||
}
|
||
|
||
// Report serves the HTML report artifact for a run. Looks up the
|
||
// report_html artifact row for the runID, validates the path lives
|
||
// under the artifacts dir (defence-in-depth against path traversal),
|
||
// and streams it back. 404 when the run hasn't produced one yet.
|
||
func (u *UI) Report(w http.ResponseWriter, r *http.Request) {
|
||
idStr := chi.URLParam(r, "runID")
|
||
runID, err := strconv.ParseInt(idStr, 10, 64)
|
||
if err != nil {
|
||
http.Error(w, "bad run id", http.StatusBadRequest)
|
||
return
|
||
}
|
||
arts, err := u.Artifacts.ListForRun(r.Context(), runID)
|
||
if err != nil {
|
||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||
return
|
||
}
|
||
var path string
|
||
for _, a := range arts {
|
||
if a.Kind == "report_html" {
|
||
path = a.Path
|
||
}
|
||
}
|
||
if path == "" {
|
||
http.NotFound(w, r)
|
||
return
|
||
}
|
||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||
http.ServeFile(w, r, path)
|
||
}
|
||
|
||
func validateHostForm(form *templates.RegistrationForm) string {
|
||
if form.Name == "" {
|
||
return "Name is required."
|
||
}
|
||
if !macRe.MatchString(form.MAC) {
|
||
return "MAC address must be in the form aa:bb:cc:dd:ee:ff."
|
||
}
|
||
if form.WoLBroadcastIP == "" {
|
||
return "WoL broadcast IP is required."
|
||
}
|
||
if form.ExpectedSpecYAML == "" {
|
||
return "Expected spec YAML is required."
|
||
}
|
||
var anything any
|
||
if err := yaml.Unmarshal([]byte(form.ExpectedSpecYAML), &anything); err != nil {
|
||
return "Expected spec YAML is not valid YAML: " + err.Error()
|
||
}
|
||
if form.WoLPort != "" {
|
||
port, err := strconv.Atoi(form.WoLPort)
|
||
if err != nil || port < 1 || port > 65535 {
|
||
return "WoL port must be 1–65535."
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func friendlyDBError(err error) string {
|
||
s := err.Error()
|
||
switch {
|
||
case strings.Contains(s, "UNIQUE constraint failed: hosts.name"):
|
||
return "A host with that name already exists."
|
||
case strings.Contains(s, "UNIQUE constraint failed: hosts.mac"):
|
||
return "A host with that MAC already exists."
|
||
default:
|
||
return s
|
||
}
|
||
}
|