Files
Vetting/internal/web/templates/host_page.templ
T
josh 23c689aa5b
CI / Lint + build + test (push) Failing after 1m57s
Release / release (push) Has been cancelled
deep profile + threshold gating + firmware stage + Burn super-stage
Ships all five phases of the deep-profile overhaul together. Runs now
carry a profile (quick/deep/soak); every profile walks the same
11-stage order — Inventory → Firmware → SpecValidate → SMART →
CPUStress → Storage → Network → Burn → GPU → PSU → Reporting —
with only per-stage durations and concurrency scaled.

Phase 1: profiles.ProfileRegistry loaded from vetting.yaml; runs.profile
column + CreateWithProfile; threshold table + evaluator seeded per-run
from the shared vetting.thresholds block; breach flips result at
/sensor + /result.

Phase 2: upgraded CPUStress (stress-ng --cpu-method=all --verify +
EDAC/MCE poll), Storage (fio --verify=md5 + SMART start/end delta),
Network (sustained iperf + /proc/net/dev deltas) with per-profile
knobs from Deps.

Phase 3: Burn super-stage with goroutine fan-out for CPU + memory +
fio + iperf, PSU rails sampled across the Burn window, SensorMux
(2 s flush, 500-sample cap) to absorb backpressure.

Phase 4: Firmware stage + firmware_snapshots table; probes dmidecode
(BIOS), ipmitool (BMC), ethtool -i (NIC), nvme (sysfs + id-ctrl),
lspci (HBA), /proc/cpuinfo (microcode). spec.DiffFirmware folds into
SpecValidate with pin-by-identifier and fan-out-across-component
matching; mismatches park the run in FailedHolding.

Phase 5: profile radio on the host start form, profile chip on the
run header, Firmware section in the HTML report, coverage artifact
uploaded from CI, agent/tests/fakes/ scaffold with Deps.LookPath
seam + stress_ng and dmidecode example fakes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-18 22:50:57 -04:00

391 lines
13 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package templates
import (
"bytes"
"context"
"fmt"
"time"
"vetting/internal/model"
"vetting/internal/store"
)
// HostPageData is the payload HostPage renders. Host + LastSeenAt drive
// the summary drawer; Runs is the full newest-first run list for this
// host; ActiveRun is the non-terminal run (if any) that fills the sticky
// in-flight banner and highlights one row in the runs table; RunStages
// maps runID → stage rows so each row can paint its own 9-dot strip
// without a per-render query ladder in the template.
type HostPageData struct {
Host model.Host
LastSeenAt *time.Time
Runs []model.Run
ActiveRun *model.Run
RunStages map[int64][]model.Stage
}
// HostPage is the host-focused URL: summary + actions + in-flight banner
// + runs table. Everything run-specific (pipeline, logs, sub-steps, spec
// diffs, hold banner) lives on /runs/{runID} instead. SSE targets are
// scoped per region so live tile refreshes don't reflow the whole page.
templ HostPage(d HostPageData) {
@Layout(d.Host.Name) {
<section class="host-page" hx-ext="sse" sse-connect="/events">
<nav class="breadcrumb">
<a href="/">Dashboard</a>
<span class="breadcrumb-sep">/</span>
<span>{ d.Host.Name }</span>
</nav>
@HostSummary(d)
@HostActions(d)
@InFlightBanner(d)
if len(d.Runs) == 0 {
@HostEmptyState(d)
} else {
@RunsTable(d)
}
</section>
}
}
// HostSummary is the compact meta card at the top of the host page:
// hostname, last-seen chip, MAC, WoL target, expected spec (collapsed).
// SSE-swap target so an operator edit / heartbeat arriving mid-view
// updates the card without a reload.
templ HostSummary(d HostPageData) {
<section
id={ fmt.Sprintf("detail-summary-%d", d.Host.ID) }
class="host-summary"
sse-swap={ fmt.Sprintf("detail-summary-%d", d.Host.ID) }
hx-swap="outerHTML"
>
<div class="host-summary-head">
<h1 class="host-summary-name">{ d.Host.Name }</h1>
<span class={ "tile-last-seen", lastSeenClass(d.LastSeenAt) }>{ lastSeenLabel(d.LastSeenAt) }</span>
</div>
<dl class="host-summary-meta">
<div>
<dt>MAC</dt>
<dd>{ d.Host.MAC }</dd>
</div>
<div>
<dt>WoL</dt>
<dd>{ fmt.Sprintf("%s:%d", d.Host.WoLBroadcastIP, d.Host.WoLPort) }</dd>
</div>
</dl>
if d.Host.Notes != "" {
<div class="host-summary-notes">
<h3>Notes</h3>
<p>{ d.Host.Notes }</p>
</div>
}
<details class="host-summary-spec">
<summary>Expected spec</summary>
<pre class="host-summary-spec-yaml">{ d.Host.ExpectedSpecYAML }</pre>
</details>
</section>
}
// HostActions is the primary-action row: Start vetting (enabled only when
// no active run AND host is heartbeating), Delete host. Run-level actions
// (Cancel / Override / View report) live on the run page — the host page
// only exposes things scoped to the host itself.
templ HostActions(d HostPageData) {
<section
id={ fmt.Sprintf("detail-actions-%d", d.Host.ID) }
class="host-actions"
sse-swap={ fmt.Sprintf("detail-actions-%d", d.Host.ID) }
hx-swap="outerHTML"
>
<div class="host-actions-row">
if hostCanStart(d) {
<form method="post" action={ templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)) } class="inline host-start-form">
<fieldset class="host-profile-picker">
<legend>Profile</legend>
<label title="~10 min — post-repair sanity: all probes + gates, short budgets">
<input type="radio" name="profile" value="quick" checked/>
quick
</label>
<label title="~812 h — overnight soak: long CPU/RAM, full-disk fio verify, 30 min network">
<input type="radio" name="profile" value="deep"/>
deep
</label>
<label title="≥24 h — week-long burn-in; opt-in when you suspect intermittent faults">
<input type="radio" name="profile" value="soak"/>
soak
</label>
</fieldset>
<label class="host-nd-toggle">
<input type="checkbox" name="non_destructive" value="1"/>
Non-destructive (skip wipe-probe + disk writes)
</label>
<button type="submit" class="btn-primary">Start vetting</button>
</form>
} else if hostCanStartIfOnline(d) {
<button type="button" disabled title="host is not heartbeating — install the reporter via /register/quick.sh on the target host">Start vetting</button>
} else {
<button type="button" disabled>Run in flight</button>
}
<form method="post" action={ templ.SafeURL(fmt.Sprintf("/hosts/%d/delete", d.Host.ID)) } class="inline" onsubmit="return confirm('Delete host and all its runs?');">
<button type="submit" class="btn-danger">Delete host</button>
</form>
</div>
</section>
}
// InFlightBanner is the sticky "Run #N in progress — open →" strip that
// shows only when an active (non-terminal) run exists. SSE target so a
// run starting or ending flips the banner live.
templ InFlightBanner(d HostPageData) {
<section
id={ fmt.Sprintf("detail-inflight-%d", d.Host.ID) }
class="in-flight-banner-wrap"
sse-swap={ fmt.Sprintf("detail-inflight-%d", d.Host.ID) }
hx-swap="outerHTML"
>
if d.ActiveRun != nil {
<a class="in-flight-banner" href={ templ.SafeURL(fmt.Sprintf("/runs/%d", d.ActiveRun.ID)) }>
<span class="in-flight-label">Run #{ fmt.Sprintf("%d", d.ActiveRun.ID) } in progress —</span>
<span class="in-flight-state">{ tileStatus(d.ActiveRun) }</span>
<span class="in-flight-open">open →</span>
</a>
}
</section>
}
// HostEmptyState replaces the runs table with a big call-to-action when
// this host has never had a run. Only renders when the host is both
// reachable AND has no runs — the standard "Run in flight"-ish disabled
// button from HostActions handles the other corners.
templ HostEmptyState(d HostPageData) {
<section class="host-empty-state">
<p class="host-empty-title">No runs yet.</p>
<p class="host-empty-sub">Kick off the first vetting run whenever the host is heartbeating.</p>
if hostCanStart(d) {
<form method="post" action={ templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)) } class="inline">
<button type="submit" class="btn-primary big">Start vetting</button>
</form>
} else {
<button type="button" class="btn-primary big" disabled title="host is not heartbeating — install the reporter via /register/quick.sh on the target host">Start vetting</button>
}
</section>
}
// RunsTable is one row per run, newest first. Each row carries its own
// SSE-swap target so live state changes (a running row flipping to
// passed) update one <tr> without re-rendering the whole table.
templ RunsTable(d HostPageData) {
<section class="host-runs">
<h2>Runs</h2>
<table class="runs-table">
<thead>
<tr>
<th>Run</th>
<th>State</th>
<th>Started</th>
<th>Duration</th>
<th>Stages</th>
<th></th>
</tr>
</thead>
<tbody>
for _, r := range d.Runs {
@RunRow(RunRowData{
Run: r,
Stages: d.RunStages[r.ID],
Live: d.ActiveRun != nil && d.ActiveRun.ID == r.ID,
})
}
</tbody>
</table>
</section>
}
// RunRowData is a single row's payload. Live is true for the currently
// non-terminal run so CSS can highlight it at the top of the table.
type RunRowData struct {
Run model.Run
Stages []model.Stage
Live bool
}
// RunRow renders one <tr> keyed by runrow-{runID}. State changes fire
// runrow-{runID} from the orchestrator so the single row re-renders with
// its updated state + stage-strip without reloading the host page.
templ RunRow(d RunRowData) {
<tr
id={ fmt.Sprintf("runrow-%d", d.Run.ID) }
class={ "runs-row", "runs-row-" + tileMood(&d.Run), runRowLiveClass(d.Live) }
sse-swap={ fmt.Sprintf("runrow-%d", d.Run.ID) }
hx-swap="outerHTML"
>
<td class="runs-col-id">
<a href={ templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)) }>{ fmt.Sprintf("#%d", d.Run.ID) }</a>
</td>
<td class="runs-col-state">
<span class={ "run-status-badge", "run-status-" + tileMood(&d.Run) }>{ tileStatus(&d.Run) }</span>
</td>
<td class="runs-col-started">{ relativeTime(d.Run.StartedAt) }</td>
<td class="runs-col-duration">{ runDuration(&d.Run) }</td>
<td class="runs-col-strip">
<div class="stage-strip">
for _, name := range store.DefaultStageOrder {
{{ st := stageForName(d.Stages, name) }}
<span class={ "stage-dot", "stage-dot-sm", "stage-dot-" + string(st.State) } title={ name }></span>
}
</div>
</td>
<td class="runs-col-open">
<a class="runs-open-link" href={ templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)) }>open →</a>
</td>
</tr>
}
// runRowLiveClass tags the currently non-terminal run so CSS can
// highlight it. Empty string for every other row.
func runRowLiveClass(live bool) string {
if live {
return "runs-row-live"
}
return ""
}
// hostCanStart is the host-page analogue of canStart. Guards the Start
// button on two things: there's no active run, AND the host is currently
// heartbeating. Mirrors the StartRun handler's preflight so the button
// never offers a click the server rejects.
func hostCanStart(d HostPageData) bool {
if !hostCanStartIfOnline(d) {
return false
}
if d.LastSeenAt == nil {
return false
}
return time.Since(*d.LastSeenAt) <= 60*time.Second
}
// hostCanStartIfOnline is the run-state half of hostCanStart, split out
// so HostActions can distinguish "run in flight" (no button) from "run
// is done / no run yet but host is offline" (disabled button).
func hostCanStartIfOnline(d HostPageData) bool {
return d.ActiveRun == nil
}
// profileChipValue normalizes a Run.Profile string for display on the
// run page chip. Older runs with an empty column predate Phase 1 — show
// them as "quick" (the prior implicit default).
func profileChipValue(p string) string {
if p == "" {
return "quick"
}
return p
}
// runDuration formats the elapsed time for a run using the same buckets
// as stageDuration. In-flight runs clock from StartedAt to now so the
// run-page header + runs-table row keep ticking on each SSE push.
func runDuration(r *model.Run) string {
if r == nil || r.StartedAt.IsZero() {
return ""
}
end := time.Now()
if r.CompletedAt != nil {
end = *r.CompletedAt
}
d := end.Sub(r.StartedAt)
if d < 0 {
d = 0
}
switch {
case d < time.Second:
return fmt.Sprintf("%dms", int(d/time.Millisecond))
case d < 10*time.Second:
return fmt.Sprintf("%.1fs", d.Seconds())
case d < time.Minute:
return fmt.Sprintf("%ds", int(d/time.Second))
case d < time.Hour:
return fmt.Sprintf("%dm %ds", int(d/time.Minute), int((d%time.Minute)/time.Second))
default:
return fmt.Sprintf("%dh %dm", int(d/time.Hour), int((d%time.Hour)/time.Minute))
}
}
// stageForName returns the persisted Stage row for a given name, or a
// synthetic pending-state stub when no row has been seeded yet (e.g.
// a run still in a pre-stage). Keeps the template free of nil checks —
// the caller always gets a concrete Stage.
func stageForName(stages []model.Stage, name string) model.Stage {
for _, s := range stages {
if s.Name == name {
return s
}
}
return model.Stage{Name: name, State: model.StagePending}
}
// hasCriticalDiff opens the spec-diff <details> by default when any diff
// is critical — operator shouldn't have to click to see the blocker.
func hasCriticalDiff(diffs []model.SpecDiff) bool {
for _, d := range diffs {
if d.Severity == "critical" && !d.Ignored {
return true
}
}
return false
}
// relativeTime renders a past time as "2m ago" / "1h ago" / "3d ago".
// Future times (clock skew) render as "now" so the runs table never
// shows nonsense when a host's clock is ahead of the orchestrator.
func relativeTime(t time.Time) string {
if t.IsZero() {
return ""
}
d := time.Since(t)
if d < 0 {
return "now"
}
if d < time.Minute {
return "just now"
}
if d < time.Hour {
return fmt.Sprintf("%dm ago", int(d/time.Minute))
}
if d < 24*time.Hour {
return fmt.Sprintf("%dh ago", int(d/time.Hour))
}
return fmt.Sprintf("%dd ago", int(d/(24*time.Hour)))
}
// RenderHostSummaryString, RenderHostActionsString, and
// RenderInFlightBannerString render one region to a string for the
// orchestrator's SSE publish path. Matches the RenderTileString pattern.
func RenderHostSummaryString(d HostPageData) string {
var buf bytes.Buffer
_ = HostSummary(d).Render(context.Background(), &buf)
return buf.String()
}
func RenderHostActionsString(d HostPageData) string {
var buf bytes.Buffer
_ = HostActions(d).Render(context.Background(), &buf)
return buf.String()
}
func RenderInFlightBannerString(d HostPageData) string {
var buf bytes.Buffer
_ = InFlightBanner(d).Render(context.Background(), &buf)
return buf.String()
}
// RenderRunRowString renders one row for the runs table over SSE when
// a run's state changes. The orchestrator fires runrow-{runID} at every
// site that already fires tile-{hostID} + pipeline-{runID}.
func RenderRunRowString(d RunRowData) string {
var buf bytes.Buffer
_ = RunRow(d).Render(context.Background(), &buf)
return buf.String()
}