diff --git a/Makefile b/Makefile index 2544194..d4ee8b6 100644 --- a/Makefile +++ b/Makefile @@ -67,8 +67,8 @@ run: orchestrator ## Build and run orchestrator with example config ./bin/vetting$(if $(filter Windows%,$(UNAME_S)),.exe,) --config deploy/vetting.example.yaml .PHONY: install -install: orchestrator-linux ## Run deploy/install.sh (must be run on the target LXC as root) - sudo ./deploy/install.sh --binary ./bin/vetting-linux-amd64 +install: orchestrator-linux agent-linux ## Run deploy/install.sh (must be run on the target LXC as root) + sudo ./deploy/install.sh --binary ./bin/vetting-linux-amd64 --agent-binary ./bin/vetting-agent.linux-amd64 .PHONY: clean clean: ## Remove build artifacts diff --git a/agent/hostmode/boot.go b/agent/hostmode/boot.go new file mode 100644 index 0000000..78ea356 --- /dev/null +++ b/agent/hostmode/boot.go @@ -0,0 +1,65 @@ +package hostmode + +import ( + "bufio" + "context" + "log" + "os" + "os/exec" + "strings" +) + +// setPXEBootNext points the next boot at a PXE-capable BootOrder +// entry via efibootmgr --bootnext. Best-effort: absent efibootmgr, +// non-UEFI firmware, or zero PXE entries all fall through silently — +// the operator's BIOS/DHCP chain will still PXE-boot on most hosts. +func setPXEBootNext(ctx context.Context) { + if _, err := os.Stat("/sys/firmware/efi"); err != nil { + log.Printf("hostmode: not a UEFI system; skipping efibootmgr") + return + } + bin, err := exec.LookPath("efibootmgr") + if err != nil { + log.Printf("hostmode: efibootmgr not installed; skipping") + return + } + boots, err := exec.CommandContext(ctx, bin, "-v").Output() + if err != nil { + log.Printf("hostmode: efibootmgr -v: %v", err) + return + } + num := findPXEBootNum(string(boots)) + if num == "" { + log.Printf("hostmode: no PXE boot entry found") + return + } + if err := exec.CommandContext(ctx, bin, "--bootnext", num).Run(); err != nil { + log.Printf("hostmode: efibootmgr --bootnext %s: %v", num, err) + return + } + log.Printf("hostmode: efibootmgr --bootnext %s", num) +} + +// findPXEBootNum picks the first BootXXXX entry whose description +// looks like a network boot. efibootmgr -v output lines look like: +// +// Boot0003* UEFI: IPv4 Intel I225-V PciRoot(0x0)/Pci(...)/MAC(...) +// Boot0001* ubuntu HD(1,GPT,...)/File(\EFI\ubuntu\shimx64.efi) +func findPXEBootNum(out string) string { + scan := bufio.NewScanner(strings.NewReader(out)) + for scan.Scan() { + line := scan.Text() + if !strings.HasPrefix(line, "Boot") || len(line) < 8 { + continue + } + low := strings.ToLower(line) + if !(strings.Contains(low, "pxe") || + strings.Contains(low, "ipv4") || + strings.Contains(low, "ipv6") || + strings.Contains(low, "network")) { + continue + } + return line[4:8] + } + return "" +} diff --git a/agent/hostmode/client.go b/agent/hostmode/client.go new file mode 100644 index 0000000..e9f1a92 --- /dev/null +++ b/agent/hostmode/client.go @@ -0,0 +1,55 @@ +package hostmode + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// heartbeatResponse is what the orchestrator sends back. +// Phase 1 only populates Ok. Phase 2 adds Cmd + RunID. +type heartbeatResponse struct { + Ok bool `json:"ok"` + Cmd string `json:"cmd,omitempty"` + RunID int64 `json:"run_id,omitempty"` +} + +type hostClient struct { + base string + h *http.Client +} + +func newHostClient(base string) *hostClient { + return &hostClient{ + base: base, + h: &http.Client{Timeout: 5 * time.Second}, + } +} + +func (c *hostClient) heartbeat(ctx context.Context, mac string) (*heartbeatResponse, error) { + url := fmt.Sprintf("%s/api/v1/hosts/%s/heartbeat", c.base, mac) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, + bytes.NewReader([]byte(`{}`))) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + resp, err := c.h.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return nil, fmt.Errorf("status %d: %s", resp.StatusCode, string(body)) + } + var out heartbeatResponse + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return nil, fmt.Errorf("decode: %w", err) + } + return &out, nil +} diff --git a/agent/hostmode/command.go b/agent/hostmode/command.go new file mode 100644 index 0000000..e1788d7 --- /dev/null +++ b/agent/hostmode/command.go @@ -0,0 +1,41 @@ +package hostmode + +import ( + "context" + "log" + "os/exec" +) + +// cmdRebootForVetting is the Phase 2 command the orchestrator sends +// when the operator clicked "Start vetting" and the host is actively +// heartbeating — the agent redirects next boot to PXE and reboots +// itself, obviating WoL. +const cmdRebootForVetting = "reboot_for_vetting" + +// handleResponse dispatches on the heartbeat response. Phase 1 never +// sees a non-empty Cmd (the server omits the field). Phase 2 adds +// reboot_for_vetting handling. +func handleResponse(ctx context.Context, resp *heartbeatResponse) { + if resp == nil || resp.Cmd == "" { + return + } + switch resp.Cmd { + case cmdRebootForVetting: + log.Printf("hostmode: orchestrator requested reboot_for_vetting (run=%d)", resp.RunID) + rebootForVetting(ctx) + default: + log.Printf("hostmode: unknown cmd %q, ignoring", resp.Cmd) + } +} + +// rebootForVetting redirects next boot to PXE (best-effort on UEFI +// via efibootmgr) and triggers a clean reboot. BIOS/legacy hosts +// typically PXE-boot via DHCP chain on every boot, so efibootmgr +// missing is non-fatal. +func rebootForVetting(ctx context.Context) { + setPXEBootNext(ctx) + log.Printf("hostmode: executing systemctl reboot") + if err := exec.CommandContext(ctx, "systemctl", "reboot").Run(); err != nil { + log.Printf("hostmode: systemctl reboot failed: %v", err) + } +} diff --git a/agent/hostmode/iface.go b/agent/hostmode/iface.go new file mode 100644 index 0000000..d71a302 --- /dev/null +++ b/agent/hostmode/iface.go @@ -0,0 +1,47 @@ +package hostmode + +import ( + "bufio" + "errors" + "fmt" + "os" + "os/exec" + "strings" +) + +// primaryMAC resolves the MAC of the iface that carries the default +// IPv4 route. Mirrors quick.sh.tmpl's primary_iface so the agent +// reports the same MAC that was registered (important on Proxmox +// where vmbr0 inherits its physical NIC's MAC). +func primaryMAC() (string, error) { + iface, err := defaultRouteIface() + if err != nil { + return "", err + } + raw, err := os.ReadFile(fmt.Sprintf("/sys/class/net/%s/address", iface)) + if err != nil { + return "", fmt.Errorf("read mac for %s: %w", iface, err) + } + return strings.ToLower(strings.TrimSpace(string(raw))), nil +} + +// defaultRouteIface shells out to `ip` because reading /proc/net/route +// requires hex-swap logic and still misses the IPv4-only "dev" +// qualification. The service runs as root on a Linux box; `ip` is +// always present. +func defaultRouteIface() (string, error) { + out, err := exec.Command("ip", "-o", "-4", "route", "show", "default").Output() + if err != nil { + return "", fmt.Errorf("ip route: %w", err) + } + scan := bufio.NewScanner(strings.NewReader(string(out))) + for scan.Scan() { + fields := strings.Fields(scan.Text()) + for i, f := range fields { + if f == "dev" && i+1 < len(fields) { + return fields[i+1], nil + } + } + } + return "", errors.New("no default IPv4 route") +} diff --git a/agent/hostmode/run.go b/agent/hostmode/run.go new file mode 100644 index 0000000..5cf5c9e --- /dev/null +++ b/agent/hostmode/run.go @@ -0,0 +1,101 @@ +// Package hostmode implements the "persistent reporter" mode of +// vetting-agent. It runs as a systemd service on the host (not in +// the live image), heartbeats to the orchestrator every ~30s, and +// in Phase 2 accepts commands — most importantly reboot-for-vetting. +package hostmode + +import ( + "context" + "errors" + "fmt" + "log" + "os" + "strings" + "time" + + "gopkg.in/yaml.v3" +) + +// Config mirrors /etc/vetting/host-agent.yaml. All fields are +// optional except OrchestratorURL — the rest have reasonable +// defaults so a single `orchestrator_url:` line works. +type Config struct { + OrchestratorURL string `yaml:"orchestrator_url"` + MAC string `yaml:"mac,omitempty"` + Interval time.Duration `yaml:"-"` + IntervalRaw string `yaml:"interval,omitempty"` +} + +func LoadConfig(path string) (*Config, error) { + b, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read %s: %w", path, err) + } + var c Config + if err := yaml.Unmarshal(b, &c); err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + c.OrchestratorURL = strings.TrimRight(strings.TrimSpace(c.OrchestratorURL), "/") + if c.OrchestratorURL == "" { + return nil, errors.New("orchestrator_url is required") + } + c.MAC = strings.ToLower(strings.TrimSpace(c.MAC)) + if c.IntervalRaw == "" { + c.Interval = 30 * time.Second + } else { + d, err := time.ParseDuration(c.IntervalRaw) + if err != nil { + return nil, fmt.Errorf("parse interval: %w", err) + } + if d < time.Second { + return nil, fmt.Errorf("interval %s is too aggressive", d) + } + c.Interval = d + } + return &c, nil +} + +// Run blocks until ctx is cancelled, heartbeating on an interval. +// Errors never abort the loop — the service is `Restart=on-failure` +// in systemd, and a transient HTTP failure is not a reason to exit. +func Run(ctx context.Context, cfgPath string) error { + cfg, err := LoadConfig(cfgPath) + if err != nil { + return err + } + if cfg.MAC == "" { + mac, err := primaryMAC() + if err != nil { + return fmt.Errorf("resolve primary MAC: %w", err) + } + cfg.MAC = mac + } + log.Printf("hostmode: reporting to %s as %s every %s", + cfg.OrchestratorURL, cfg.MAC, cfg.Interval) + + client := newHostClient(cfg.OrchestratorURL) + + // Fire one heartbeat immediately so the dashboard lights up on + // service start, without waiting for the first tick. + tick(ctx, client, cfg) + + t := time.NewTicker(cfg.Interval) + defer t.Stop() + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + tick(ctx, client, cfg) + } + } +} + +func tick(ctx context.Context, c *hostClient, cfg *Config) { + resp, err := c.heartbeat(ctx, cfg.MAC) + if err != nil { + log.Printf("hostmode: heartbeat: %v", err) + return + } + handleResponse(ctx, resp) +} diff --git a/cmd/vetting-agent/main.go b/cmd/vetting-agent/main.go index 44e0b60..81a3834 100644 --- a/cmd/vetting-agent/main.go +++ b/cmd/vetting-agent/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "errors" "flag" "log" "os" @@ -10,18 +11,10 @@ import ( "vetting/agent" "vetting/agent/bootstate" + "vetting/agent/hostmode" ) func main() { - cmdlinePath := flag.String("cmdline", "/proc/cmdline", "path to kernel cmdline (override for local testing)") - flag.Parse() - - p, err := bootstate.ParseCmdline(*cmdlinePath) - if err != nil { - log.Fatalf("bootstate: %v", err) - } - log.Printf("vetting-agent starting: run=%d mac=%s orchestrator=%s", p.RunID, p.MAC, p.OrchestratorURL) - ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -33,7 +26,34 @@ func main() { cancel() }() - if err := agent.Run(ctx, p); err != nil && err != context.Canceled { + // `vetting-agent host` = persistent reporter (systemd service on + // the installed host). No-arg = live-image agent that parses the + // boot cmdline — keeping the default preserves PXE/initrd scripts. + if len(os.Args) >= 2 && os.Args[1] == "host" { + runHost(ctx, os.Args[2:]) + return + } + runLive(ctx) +} + +func runLive(ctx context.Context) { + cmdlinePath := flag.String("cmdline", "/proc/cmdline", "path to kernel cmdline (override for local testing)") + flag.Parse() + p, err := bootstate.ParseCmdline(*cmdlinePath) + if err != nil { + log.Fatalf("bootstate: %v", err) + } + log.Printf("vetting-agent starting: run=%d mac=%s orchestrator=%s", p.RunID, p.MAC, p.OrchestratorURL) + if err := agent.Run(ctx, p); err != nil && !errors.Is(err, context.Canceled) { log.Fatalf("agent: %v", err) } } + +func runHost(ctx context.Context, args []string) { + fs := flag.NewFlagSet("host", flag.ExitOnError) + cfgPath := fs.String("config", "/etc/vetting/host-agent.yaml", "path to host-agent.yaml") + _ = fs.Parse(args) + if err := hostmode.Run(ctx, *cfgPath); err != nil && !errors.Is(err, context.Canceled) { + log.Fatalf("hostmode: %v", err) + } +} diff --git a/cmd/vetting/main.go b/cmd/vetting/main.go index 5ada34a..c3c55a9 100644 --- a/cmd/vetting/main.go +++ b/cmd/vetting/main.go @@ -149,9 +149,10 @@ func main() { } router := httpserver.NewRouter(httpserver.Deps{ - UI: ui, - Agent: agentAPI, - LiveDir: cfg.PXE.LiveDir, + UI: ui, + Agent: agentAPI, + LiveDir: cfg.PXE.LiveDir, + AgentAssetDir: cfg.Agent.AssetDir, }) srv := &http.Server{ diff --git a/deploy/install.sh b/deploy/install.sh index 09e3360..60a83b6 100644 --- a/deploy/install.sh +++ b/deploy/install.sh @@ -25,30 +25,36 @@ set -euo pipefail BINARY="" +AGENT_BINARY="" CONFIG_DIR="/etc/vetting" STATE_DIR="/var/lib/vetting" LOG_DIR="/var/log/vetting" +ASSET_DIR="/var/lib/vetting/assets" SERVICE_USER="vetting" usage() { cat <&2; usage; exit 2 ;; + --binary) BINARY="$2"; shift 2 ;; + --agent-binary) AGENT_BINARY="$2"; shift 2 ;; + --config-dir) CONFIG_DIR="$2"; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) echo "unknown arg: $1" >&2; usage; exit 2 ;; esac done @@ -73,6 +79,19 @@ if [[ -z "${BINARY}" || ! -x "${BINARY}" ]]; then exit 1 fi +if [[ -z "${AGENT_BINARY}" ]]; then + for cand in \ + "${REPO_ROOT}/bin/vetting-agent.linux-amd64" \ + "${REPO_ROOT}/bin/vetting-agent-linux-amd64" \ + "${SCRIPT_DIR}/vetting-agent-linux-amd64"; do + if [[ -x "${cand}" ]]; then AGENT_BINARY="${cand}"; break; fi + done +fi +if [[ -z "${AGENT_BINARY}" || ! -x "${AGENT_BINARY}" ]]; then + echo "could not find a vetting-agent binary; pass --agent-binary PATH or run 'make agent-linux' first" >&2 + exit 1 +fi + echo "==> installing runtime dependencies" export DEBIAN_FRONTEND=noninteractive apt-get update -qq @@ -90,10 +109,12 @@ fi echo "==> preparing directories" install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${STATE_DIR}" install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${LOG_DIR}" +install -d -m 0755 -o "${SERVICE_USER}" -g "${SERVICE_USER}" "${ASSET_DIR}" install -d -m 0755 "${CONFIG_DIR}" echo "==> installing binary" install -m 0755 "${BINARY}" /usr/local/bin/vetting +install -m 0755 "${AGENT_BINARY}" "${ASSET_DIR}/vetting-agent-linux-amd64" echo "==> installing config and systemd unit" # vetting.production.yaml uses absolute /var/lib/vetting + /var/log/vetting diff --git a/deploy/vetting.example.yaml b/deploy/vetting.example.yaml index f1d6670..b18e000 100644 --- a/deploy/vetting.example.yaml +++ b/deploy/vetting.example.yaml @@ -41,6 +41,13 @@ pxe: tftp_root: "" # holds ipxe.efi + undionly.kpxe live_dir: "" # holds vmlinuz + initrd.img; served at /live/* +agent: + # Directory containing vetting-agent-linux-amd64. The quick-register + # one-liner downloads from /assets/vetting-agent-linux-amd64 and + # installs it as a systemd service so the host keeps heartbeating. + # Leave empty to disable the /assets/* route. + asset_dir: "./var/assets" + # Notifications fire on StageFailed, SpecMismatch, HoldingOpened, # RunCompleted. Declare one or more notifiers and route each event # kind (and optionally severity) to a notifier by name. Delivery is diff --git a/deploy/vetting.production.yaml b/deploy/vetting.production.yaml index 6f584a2..a2026e0 100644 --- a/deploy/vetting.production.yaml +++ b/deploy/vetting.production.yaml @@ -39,6 +39,11 @@ pxe: tftp_root: "/var/lib/vetting/tftp" # holds ipxe.efi + undionly.kpxe live_dir: "/var/lib/vetting/live" # holds vmlinuz + initrd.img; served at /live/* +agent: + # Directory holding vetting-agent-linux-amd64, served at + # /assets/vetting-agent-linux-amd64. install.sh drops the binary here. + asset_dir: "/var/lib/vetting/assets" + # Notifications fire on StageFailed, SpecMismatch, HoldingOpened, # RunCompleted. Declare one or more notifiers and route each event # kind (and optionally severity) to a notifier by name. Delivery is diff --git a/internal/api/heartbeat_test.go b/internal/api/heartbeat_test.go new file mode 100644 index 0000000..1ca79ba --- /dev/null +++ b/internal/api/heartbeat_test.go @@ -0,0 +1,102 @@ +package api_test + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "path/filepath" + "testing" + "time" + + "github.com/go-chi/chi/v5" + + "vetting/internal/api" + "vetting/internal/db" + "vetting/internal/model" + "vetting/internal/store" +) + +// setupHeartbeat wires just enough of UI to exercise the heartbeat +// handler. Runner is left nil — the handler no-ops the SSE publish in +// that case, which matches "tests don't assert on SSE" (covered by +// integration-style runner tests). +func setupHeartbeat(t *testing.T) (*api.UI, *store.Hosts) { + t.Helper() + conn, err := db.Open(filepath.Join(t.TempDir(), "vetting.db")) + if err != nil { + t.Fatalf("open db: %v", err) + } + t.Cleanup(func() { _ = conn.Close() }) + hosts := &store.Hosts{DB: conn} + return &api.UI{Hosts: hosts}, hosts +} + +func heartbeatReq(mac string) *http.Request { + req := httptest.NewRequest(http.MethodPost, "/api/v1/hosts/"+mac+"/heartbeat", nil) + rctx := chi.NewRouteContext() + rctx.URLParams.Add("mac", mac) + return req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx)) +} + +func TestUIHeartbeat_Success(t *testing.T) { + ui, hosts := setupHeartbeat(t) + id, err := hosts.Create(context.Background(), model.Host{ + Name: "hb-host", + MAC: "aa:bb:cc:dd:ee:10", + WoLBroadcastIP: "10.0.0.255", + WoLPort: 9, + ExpectedSpecYAML: "memory:\n total_gib: 16\n", + }) + if err != nil { + t.Fatalf("create: %v", err) + } + + before := time.Now().UTC().Add(-time.Second) + rr := httptest.NewRecorder() + ui.Heartbeat(rr, heartbeatReq("aa:bb:cc:dd:ee:10")) + if rr.Code != http.StatusOK { + t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String()) + } + var resp map[string]any + if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil { + t.Fatalf("decode: %v", err) + } + if resp["ok"] != true { + t.Fatalf("response = %v, want ok:true", resp) + } + + got, err := hosts.Get(context.Background(), id) + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.LastSeenAt == nil { + t.Fatalf("LastSeenAt not stamped") + } + if got.LastSeenAt.Before(before) { + t.Fatalf("LastSeenAt = %v, want >= %v", got.LastSeenAt, before) + } +} + +func TestUIHeartbeat_UnknownMAC(t *testing.T) { + ui, _ := setupHeartbeat(t) + rr := httptest.NewRecorder() + ui.Heartbeat(rr, heartbeatReq("aa:bb:cc:dd:ee:ff")) + if rr.Code != http.StatusNotFound { + t.Fatalf("status = %d, want 404", rr.Code) + } + var resp map[string]string + _ = json.NewDecoder(rr.Body).Decode(&resp) + if resp["error"] == "" { + t.Fatalf("missing error body") + } +} + +func TestUIHeartbeat_BadMAC(t *testing.T) { + ui, _ := setupHeartbeat(t) + rr := httptest.NewRecorder() + ui.Heartbeat(rr, heartbeatReq("not-a-mac")) + if rr.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want 400", rr.Code) + } +} diff --git a/internal/api/tile.go b/internal/api/tile.go index a1020d8..5151668 100644 --- a/internal/api/tile.go +++ b/internal/api/tile.go @@ -27,7 +27,7 @@ type TileEnricher struct { // fall back to a tile without the extra fields rather than breaking // the whole dashboard. func (e *TileEnricher) Build(ctx context.Context, host model.Host, latest *model.Run) templates.TileData { - t := templates.TileData{Host: host, Latest: latest} + t := templates.TileData{Host: host, Latest: latest, LastSeenAt: host.LastSeenAt} if latest == nil { return t } diff --git a/internal/api/ui_handlers.go b/internal/api/ui_handlers.go index d430fe8..d8c0687 100644 --- a/internal/api/ui_handlers.go +++ b/internal/api/ui_handlers.go @@ -9,6 +9,7 @@ import ( "strconv" "strings" "text/template" + "time" "github.com/go-chi/chi/v5" "gopkg.in/yaml.v3" @@ -240,6 +241,37 @@ func (u *UI) CreateHostJSON(w http.ResponseWriter, r *http.Request) { }) } +// Heartbeat is called every ~30s by a host-mode vetting-agent running +// as a systemd service on the registered host. LAN-trusted, no auth — +// same threat model as the browser UI and quick-register. Phase 1 +// just stamps last_seen_at and flips the dashboard tile to "online". +func (u *UI) Heartbeat(w http.ResponseWriter, r *http.Request) { + mac := strings.ToLower(strings.TrimSpace(chi.URLParam(r, "mac"))) + if !macRe.MatchString(mac) { + writeJSONError(w, http.StatusBadRequest, + "MAC address must be in the form aa:bb:cc:dd:ee:ff") + return + } + host, err := u.Hosts.GetByMAC(r.Context(), mac) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + writeJSONError(w, http.StatusNotFound, "unknown host") + return + } + writeJSONError(w, http.StatusInternalServerError, err.Error()) + return + } + if err := u.Hosts.UpdateLastSeen(r.Context(), mac, time.Now().UTC()); err != nil { + writeJSONError(w, http.StatusInternalServerError, err.Error()) + return + } + if u.Runner != nil { + u.Runner.PublishTileUpdate(r.Context(), host.ID) + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{"ok": true}) +} + func writeJSONError(w http.ResponseWriter, status int, msg string) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(status) diff --git a/internal/config/config.go b/internal/config/config.go index 9fe2acf..bb6965d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -16,6 +16,7 @@ type Config struct { Janitor Janitor `yaml:"janitor"` PXE PXE `yaml:"pxe"` Network Network `yaml:"network"` + Agent Agent `yaml:"agent"` Notifiers []Notifier `yaml:"notifiers"` Routes []Route `yaml:"routes"` } @@ -70,6 +71,14 @@ type PXE struct { LiveDir string `yaml:"live_dir"` // holds vmlinuz + initrd.img; served at /live } +// Agent holds settings related to the host-mode vetting-agent binary +// that operators install on their hosts. AssetDir is served at +// /assets/*, which is where the quick-register script downloads +// `vetting-agent-linux-amd64` from. +type Agent struct { + AssetDir string `yaml:"asset_dir"` // directory containing vetting-agent-linux-amd64; "" disables /assets +} + type Notifier struct { Name string `yaml:"name"` Type string `yaml:"type"` diff --git a/internal/db/migrations/0002_add_hosts_last_seen_at.sql b/internal/db/migrations/0002_add_hosts_last_seen_at.sql new file mode 100644 index 0000000..e670f24 --- /dev/null +++ b/internal/db/migrations/0002_add_hosts_last_seen_at.sql @@ -0,0 +1,2 @@ +-- Persistent host-mode agent heartbeats land here. NULL = never seen. +ALTER TABLE hosts ADD COLUMN last_seen_at TIMESTAMP; diff --git a/internal/httpserver/router.go b/internal/httpserver/router.go index 389ace4..7f6d5c5 100644 --- a/internal/httpserver/router.go +++ b/internal/httpserver/router.go @@ -15,9 +15,10 @@ import ( ) type Deps struct { - UI *api.UI - Agent *api.Agent - LiveDir string // directory containing vmlinuz + initrd.img; "" disables /live + UI *api.UI + Agent *api.Agent + LiveDir string // directory containing vmlinuz + initrd.img; "" disables /live + AgentAssetDir string // directory containing vetting-agent-linux-amd64; "" disables /assets } func NewRouter(d Deps) http.Handler { @@ -36,6 +37,12 @@ func NewRouter(d Deps) http.Handler { r.Handle("/live/*", http.StripPrefix("/live/", http.FileServer(http.Dir(d.LiveDir)))) } + // Host-mode agent binary is served here so the quick-register + // one-liner can curl it without the operator pre-staging anything. + if d.AgentAssetDir != "" { + r.Handle("/assets/*", http.StripPrefix("/assets/", http.FileServer(http.Dir(d.AgentAssetDir)))) + } + // Agent / PXE endpoints — authenticated per-request by bearer token // or by the unforgeable MAC path parameter. r.Get("/ipxe/{mac}", d.Agent.IPXEScript) @@ -54,6 +61,10 @@ func NewRouter(d Deps) http.Handler { // as the browser UI. r.Post("/api/v1/hosts", d.UI.CreateHostJSON) + // Host-mode agent heartbeat. Keyed by MAC (no bearer token), same + // LAN-trust model as /api/v1/hosts. + r.Post("/api/v1/hosts/{mac}/heartbeat", d.UI.Heartbeat) + // Browser UI — no auth; bind to loopback or LAN only, or front // with a reverse proxy if you need a password. r.Get("/", d.UI.Dashboard) diff --git a/internal/model/model.go b/internal/model/model.go index e643336..f7a5103 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -14,6 +14,7 @@ type Host struct { Notes string CreatedAt time.Time UpdatedAt time.Time + LastSeenAt *time.Time // host-mode agent heartbeat; nil = never seen } type RunState string diff --git a/internal/orchestrator/runner.go b/internal/orchestrator/runner.go index 40f7399..85f6c40 100644 --- a/internal/orchestrator/runner.go +++ b/internal/orchestrator/runner.go @@ -50,6 +50,13 @@ func (r *Runner) StartStage(ctx context.Context, runID int64, name string) error return nil } +// PublishTileUpdate is the exported entry point for non-orchestrator +// callers (the UI heartbeat handler) that change tile-visible state +// without going through Transition. +func (r *Runner) PublishTileUpdate(ctx context.Context, hostID int64) { + r.publishTileUpdate(ctx, hostID) +} + func (r *Runner) publishTileUpdate(ctx context.Context, hostID int64) { host, err := r.Hosts.Get(ctx, hostID) if err != nil { diff --git a/internal/store/hosts.go b/internal/store/hosts.go index 2a80cba..7f70396 100644 --- a/internal/store/hosts.go +++ b/internal/store/hosts.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "strings" + "time" "vetting/internal/model" ) @@ -16,6 +17,26 @@ type Hosts struct { var ErrNotFound = errors.New("not found") +const hostColumns = `id, name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml, + COALESCE(pdu_config_json,''), COALESCE(ipmi_config_json,''), + notes, created_at, updated_at, last_seen_at` + +func scanHost(row interface { + Scan(dest ...any) error +}, h *model.Host) error { + var lastSeen sql.NullTime + if err := row.Scan(&h.ID, &h.Name, &h.MAC, &h.WoLBroadcastIP, &h.WoLPort, + &h.ExpectedSpecYAML, &h.PDUConfigJSON, &h.IPMIConfigJSON, + &h.Notes, &h.CreatedAt, &h.UpdatedAt, &lastSeen); err != nil { + return err + } + if lastSeen.Valid { + t := lastSeen.Time + h.LastSeenAt = &t + } + return nil +} + func (h *Hosts) Create(ctx context.Context, in model.Host) (int64, error) { in.MAC = normalizeMAC(in.MAC) res, err := h.DB.ExecContext(ctx, ` @@ -30,9 +51,7 @@ func (h *Hosts) Create(ctx context.Context, in model.Host) (int64, error) { func (h *Hosts) List(ctx context.Context) ([]model.Host, error) { rows, err := h.DB.QueryContext(ctx, ` - SELECT id, name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml, - COALESCE(pdu_config_json,''), COALESCE(ipmi_config_json,''), - notes, created_at, updated_at + SELECT `+hostColumns+` FROM hosts ORDER BY name COLLATE NOCASE `) @@ -44,9 +63,7 @@ func (h *Hosts) List(ctx context.Context) ([]model.Host, error) { var out []model.Host for rows.Next() { var host model.Host - if err := rows.Scan(&host.ID, &host.Name, &host.MAC, &host.WoLBroadcastIP, &host.WoLPort, - &host.ExpectedSpecYAML, &host.PDUConfigJSON, &host.IPMIConfigJSON, - &host.Notes, &host.CreatedAt, &host.UpdatedAt); err != nil { + if err := scanHost(rows, &host); err != nil { return nil, fmt.Errorf("scan host: %w", err) } out = append(out, host) @@ -56,15 +73,11 @@ func (h *Hosts) List(ctx context.Context) ([]model.Host, error) { func (h *Hosts) Get(ctx context.Context, id int64) (*model.Host, error) { row := h.DB.QueryRowContext(ctx, ` - SELECT id, name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml, - COALESCE(pdu_config_json,''), COALESCE(ipmi_config_json,''), - notes, created_at, updated_at + SELECT `+hostColumns+` FROM hosts WHERE id = ? `, id) var host model.Host - err := row.Scan(&host.ID, &host.Name, &host.MAC, &host.WoLBroadcastIP, &host.WoLPort, - &host.ExpectedSpecYAML, &host.PDUConfigJSON, &host.IPMIConfigJSON, - &host.Notes, &host.CreatedAt, &host.UpdatedAt) + err := scanHost(row, &host) if errors.Is(err, sql.ErrNoRows) { return nil, ErrNotFound } @@ -74,6 +87,40 @@ func (h *Hosts) Get(ctx context.Context, id int64) (*model.Host, error) { return &host, nil } +// GetByMAC looks up a host by its normalized MAC. Used by the host-mode +// heartbeat endpoint, which only has a MAC to go on. +func (h *Hosts) GetByMAC(ctx context.Context, mac string) (*model.Host, error) { + row := h.DB.QueryRowContext(ctx, ` + SELECT `+hostColumns+` + FROM hosts WHERE mac = ? + `, normalizeMAC(mac)) + var host model.Host + err := scanHost(row, &host) + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + if err != nil { + return nil, fmt.Errorf("get host by mac: %w", err) + } + return &host, nil +} + +// UpdateLastSeen stamps the host row with the most recent heartbeat. +// Targeted UPDATE so it doesn't race with UI edits of other fields. +func (h *Hosts) UpdateLastSeen(ctx context.Context, mac string, t time.Time) error { + res, err := h.DB.ExecContext(ctx, + `UPDATE hosts SET last_seen_at = ? WHERE mac = ?`, + t.UTC(), normalizeMAC(mac)) + if err != nil { + return fmt.Errorf("update last_seen_at: %w", err) + } + n, _ := res.RowsAffected() + if n == 0 { + return ErrNotFound + } + return nil +} + func (h *Hosts) Delete(ctx context.Context, id int64) error { res, err := h.DB.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, id) if err != nil { diff --git a/internal/store/hosts_test.go b/internal/store/hosts_test.go new file mode 100644 index 0000000..3b3feb6 --- /dev/null +++ b/internal/store/hosts_test.go @@ -0,0 +1,105 @@ +package store_test + +import ( + "context" + "errors" + "path/filepath" + "testing" + "time" + + "vetting/internal/db" + "vetting/internal/model" + "vetting/internal/store" +) + +func newHosts(t *testing.T) *store.Hosts { + t.Helper() + conn, err := db.Open(filepath.Join(t.TempDir(), "vetting.db")) + if err != nil { + t.Fatalf("open db: %v", err) + } + t.Cleanup(func() { _ = conn.Close() }) + return &store.Hosts{DB: conn} +} + +func TestHostsGetByMAC(t *testing.T) { + hosts := newHosts(t) + ctx := context.Background() + + id, err := hosts.Create(ctx, model.Host{ + Name: "mac-host", + MAC: "AA:BB:CC:DD:EE:01", + WoLBroadcastIP: "10.0.0.255", + WoLPort: 9, + ExpectedSpecYAML: "memory:\n total_gib: 16\n", + }) + if err != nil { + t.Fatalf("create: %v", err) + } + + // Lookup normalizes case — upper-case MAC resolves same row. + got, err := hosts.GetByMAC(ctx, "Aa:Bb:Cc:Dd:Ee:01") + if err != nil { + t.Fatalf("GetByMAC: %v", err) + } + if got.ID != id || got.Name != "mac-host" { + t.Fatalf("wrong row: %+v", got) + } + if got.LastSeenAt != nil { + t.Fatalf("LastSeenAt = %v, want nil on fresh host", got.LastSeenAt) + } + + if _, err := hosts.GetByMAC(ctx, "aa:bb:cc:dd:ee:99"); !errors.Is(err, store.ErrNotFound) { + t.Fatalf("GetByMAC unknown = %v, want ErrNotFound", err) + } +} + +func TestHostsUpdateLastSeen(t *testing.T) { + hosts := newHosts(t) + ctx := context.Background() + + id, err := hosts.Create(ctx, model.Host{ + Name: "ls-host", + MAC: "aa:bb:cc:dd:ee:02", + WoLBroadcastIP: "10.0.0.255", + WoLPort: 9, + ExpectedSpecYAML: "memory:\n total_gib: 8\n", + Notes: "keep me", + }) + if err != nil { + t.Fatalf("create: %v", err) + } + + stamp := time.Date(2026, 4, 17, 12, 0, 0, 0, time.UTC) + if err := hosts.UpdateLastSeen(ctx, "AA:BB:CC:DD:EE:02", stamp); err != nil { + t.Fatalf("UpdateLastSeen: %v", err) + } + + got, err := hosts.Get(ctx, id) + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.LastSeenAt == nil || !got.LastSeenAt.Equal(stamp) { + t.Fatalf("LastSeenAt = %v, want %v", got.LastSeenAt, stamp) + } + // Other fields untouched — targeted UPDATE must not stomp anything. + if got.Name != "ls-host" || got.Notes != "keep me" || got.WoLPort != 9 { + t.Fatalf("row damaged: %+v", got) + } + + // A second update advances the timestamp. + later := stamp.Add(45 * time.Second) + if err := hosts.UpdateLastSeen(ctx, got.MAC, later); err != nil { + t.Fatalf("second UpdateLastSeen: %v", err) + } + got, _ = hosts.Get(ctx, id) + if !got.LastSeenAt.Equal(later) { + t.Fatalf("LastSeenAt not advanced: %v", got.LastSeenAt) + } + + // Unknown MAC is an error, not a silent no-op — a stale agent on a + // re-registered box should complain loudly. + if err := hosts.UpdateLastSeen(ctx, "aa:bb:cc:dd:ee:ff", later); !errors.Is(err, store.ErrNotFound) { + t.Fatalf("UpdateLastSeen unknown = %v, want ErrNotFound", err) + } +} diff --git a/internal/web/register/quick.sh.tmpl b/internal/web/register/quick.sh.tmpl index b4e21d8..6385323 100644 --- a/internal/web/register/quick.sh.tmpl +++ b/internal/web/register/quick.sh.tmpl @@ -16,6 +16,9 @@ # WOL_PORT WoL UDP port (default: 9) # NOTES Free-text notes # ORCH_URL Override orchestrator base URL +# INSTALL_AGENT 1=install vetting-reporter systemd service (default) +# 0=skip the agent install (registration only) +# Pass via: curl ... | sudo INSTALL_AGENT=0 bash set -euo pipefail ORCH_URL="${ORCH_URL:-{{.OrchestratorURL}}}" @@ -175,5 +178,52 @@ resp="$(curl -fsS -X POST \ -d "${payload}" \ "${ORCH_URL}/api/v1/hosts")" echo "OK: ${resp}" + +# --- Optional: install the vetting-reporter systemd service so the +# host keeps heartbeating to the orchestrator long-term. Skipped when +# INSTALL_AGENT=0 or when systemctl isn't present (non-systemd hosts). +install_agent() { + if [[ "${INSTALL_AGENT:-1}" == "0" ]]; then + echo "Skipping agent install (INSTALL_AGENT=0)." + return + fi + if ! command -v systemctl >/dev/null 2>&1; then + echo "systemctl not found — skipping agent install." + return + fi + echo "Installing vetting-reporter service..." + install -d /etc/vetting /usr/local/bin + if ! curl -fsSL "${ORCH_URL}/assets/vetting-agent-linux-amd64" \ + -o /usr/local/bin/vetting-agent; then + echo "WARN: could not download agent from ${ORCH_URL}/assets/vetting-agent-linux-amd64" + echo "WARN: registration succeeded but the host won't heartbeat." + return + fi + chmod +x /usr/local/bin/vetting-agent + cat >/etc/vetting/host-agent.yaml </etc/systemd/system/vetting-reporter.service <<'UNIT' +[Unit] +Description=Vetting host-mode reporter +After=network-online.target +Wants=network-online.target + +[Service] +ExecStart=/usr/local/bin/vetting-agent host -config /etc/vetting/host-agent.yaml +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target +UNIT + systemctl daemon-reload + systemctl enable --now vetting-reporter.service + echo "vetting-reporter.service enabled." +} +install_agent + echo echo "Open ${ORCH_URL}/ and click 'Start vetting' on ${NAME}." diff --git a/internal/web/static/app.css b/internal/web/static/app.css index ccc74b3..8cb7600 100644 --- a/internal/web/static/app.css +++ b/internal/web/static/app.css @@ -107,9 +107,30 @@ button.danger:hover { background: rgba(229,100,102,.1); } } .tile-head { display: flex; justify-content: space-between; align-items: center; } .tile-name { font-weight: 600; } +.tile-header-right { display: flex; align-items: center; gap: 10px; } .tile-status { font-size: 12px; color: var(--text-dim); text-transform: uppercase; letter-spacing: .5px; } .tile-idle .tile-status { color: var(--text-dim); } +.tile-last-seen { + font-family: var(--mono); + font-size: 11px; + color: var(--text-dim); + display: inline-flex; + align-items: center; + gap: 5px; +} +.tile-last-seen::before { + content: ""; + width: 6px; + height: 6px; + border-radius: 50%; + background: var(--text-dim); +} +.tile-last-seen.online { color: var(--success); } +.tile-last-seen.online::before { background: var(--success); } +.tile-last-seen.stale::before { background: var(--warn); } +.tile-last-seen.offline::before { background: var(--text-dim); opacity: .5; } + .tile-meta { display: grid; grid-template-columns: 1fr 1fr; gap: 4px 16px; margin: 0; font-size: 13px; } .tile-meta div { display: flex; justify-content: space-between; align-items: baseline; } .tile-meta dt { color: var(--text-dim); } diff --git a/internal/web/templates/dashboard.templ b/internal/web/templates/dashboard.templ index 7b12481..b2fe522 100644 --- a/internal/web/templates/dashboard.templ +++ b/internal/web/templates/dashboard.templ @@ -1,15 +1,21 @@ package templates -import "vetting/internal/model" +import ( + "time" + + "vetting/internal/model" +) // TileData pairs a host with its latest run and the derived fields the // tile needs to render: spec-diff count (server-side diff result) and // the on-disk path to the hold-key artifact when the run is holding. +// LastSeenAt is the host-mode agent's most recent heartbeat. type TileData struct { Host model.Host Latest *model.Run SpecDiffCritical int HoldKeyPath string + LastSeenAt *time.Time } templ Dashboard(tiles []TileData) { diff --git a/internal/web/templates/dashboard_templ.go b/internal/web/templates/dashboard_templ.go index 40f8d2e..32deb7f 100644 --- a/internal/web/templates/dashboard_templ.go +++ b/internal/web/templates/dashboard_templ.go @@ -8,16 +8,22 @@ package templates import "github.com/a-h/templ" import templruntime "github.com/a-h/templ/runtime" -import "vetting/internal/model" +import ( + "time" + + "vetting/internal/model" +) // TileData pairs a host with its latest run and the derived fields the // tile needs to render: spec-diff count (server-side diff result) and // the on-disk path to the hold-key artifact when the run is holding. +// LastSeenAt is the host-mode agent's most recent heartbeat. type TileData struct { Host model.Host Latest *model.Run SpecDiffCritical int HoldKeyPath string + LastSeenAt *time.Time } func Dashboard(tiles []TileData) templ.Component { diff --git a/internal/web/templates/host_tile.templ b/internal/web/templates/host_tile.templ index aab4b11..bd37361 100644 --- a/internal/web/templates/host_tile.templ +++ b/internal/web/templates/host_tile.templ @@ -4,6 +4,7 @@ import ( "bytes" "context" "fmt" + "time" "vetting/internal/model" ) @@ -19,7 +20,10 @@ templ HostTile(t TileData) { >
{ t.Host.Name }
-
{ tileStatus(t.Latest) }
+
+ { lastSeenLabel(t.LastSeenAt) } +
{ tileStatus(t.Latest) }
+
@@ -142,3 +146,46 @@ func RenderTileString(t TileData) string { _ = HostTile(t).Render(context.Background(), &buf) return buf.String() } + +// lastSeenLabel renders the host-mode agent's liveness into a short +// badge: "never" if the host has never heartbeated, "online" within +// a 2×heartbeat grace window (60s, since agents heartbeat every 30s), +// "Nm ago" / "Nh ago" / "Nd ago" otherwise. +func lastSeenLabel(t *time.Time) string { + if t == nil { + return "never" + } + return humanAgoFrom(time.Now(), *t) +} + +// lastSeenClass pairs with lastSeenLabel to drive the badge color +// without the template having to carry its own logic. +func lastSeenClass(t *time.Time) string { + if t == nil { + return "offline" + } + if time.Since(*t) < 60*time.Second { + return "online" + } + return "stale" +} + +// humanAgoFrom formats (now - t) as a short "Nm ago" style string. +// Buckets: <60s -> "online", <60m -> minutes, <24h -> hours, else days. +// Split on `now` so callers can hold time for tests. +func humanAgoFrom(now time.Time, t time.Time) string { + d := now.Sub(t) + if d < 0 { + d = 0 + } + if d < 60*time.Second { + return "online" + } + if d < time.Hour { + return fmt.Sprintf("%dm ago", int(d/time.Minute)) + } + if d < 24*time.Hour { + return fmt.Sprintf("%dh ago", int(d/time.Hour)) + } + return fmt.Sprintf("%dd ago", int(d/(24*time.Hour))) +} diff --git a/internal/web/templates/host_tile_templ.go b/internal/web/templates/host_tile_templ.go index cec25e1..6e9269c 100644 --- a/internal/web/templates/host_tile_templ.go +++ b/internal/web/templates/host_tile_templ.go @@ -12,6 +12,7 @@ import ( "bytes" "context" "fmt" + "time" "vetting/internal/model" ) @@ -51,7 +52,7 @@ func HostTile(t TileData) templ.Component { var templ_7745c5c3_Var3 string templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("host-%d", t.Host.ID)) if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 15, Col: 40} + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 16, Col: 40} } _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3)) if templ_7745c5c3_Err != nil { @@ -77,7 +78,7 @@ func HostTile(t TileData) templ.Component { var templ_7745c5c3_Var5 string templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("tile-%d", t.Host.ID)) if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 17, Col: 46} + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 18, Col: 46} } _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5)) if templ_7745c5c3_Err != nil { @@ -90,228 +91,263 @@ func HostTile(t TileData) templ.Component { var templ_7745c5c3_Var6 string templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs(t.Host.Name) if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 21, Col: 39} + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 22, Col: 39} } _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6)) if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 5, "
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 5, "
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - var templ_7745c5c3_Var7 string - templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(t.Latest)) - if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 22, Col: 50} - } - _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7)) + var templ_7745c5c3_Var7 = []any{"tile-last-seen", lastSeenClass(t.LastSeenAt)} + templ_7745c5c3_Err = templ.RenderCSSItems(ctx, templ_7745c5c3_Buffer, templ_7745c5c3_Var7...) if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 6, "
MAC
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 6, "
WoL
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 7, "\">") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } var templ_7745c5c3_Var9 string - templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%s:%d", t.Host.WoLBroadcastIP, t.Host.WoLPort)) + templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(lastSeenLabel(t.LastSeenAt)) if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 31, Col: 69} + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 24, Col: 95} } _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9)) if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 8, "
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 8, "
") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + var templ_7745c5c3_Var10 string + templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(t.Latest)) + if templ_7745c5c3_Err != nil { + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 25, Col: 51} + } + _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10)) + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 9, "
MAC
") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + var templ_7745c5c3_Var11 string + templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(t.Host.MAC) + if templ_7745c5c3_Err != nil { + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 31, Col: 20} + } + _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11)) + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "
WoL
") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + var templ_7745c5c3_Var12 string + templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%s:%d", t.Host.WoLBroadcastIP, t.Host.WoLPort)) + if templ_7745c5c3_Err != nil { + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 35, Col: 69} + } + _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12)) + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 11, "
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } if t.Latest != nil && t.Latest.FailedStage != "" { - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 9, "
Failed at
") - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - var templ_7745c5c3_Var10 string - templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(t.Latest.FailedStage) - if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 36, Col: 31} - } - _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10)) - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "
") - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - } - if t.SpecDiffCritical > 0 { - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 11, "
Spec diffs
") - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - var templ_7745c5c3_Var11 string - templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d critical", t.SpecDiffCritical)) - if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 42, Col: 69} - } - _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11)) - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 12, "
") - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 13, "
") - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - if t.Latest != nil && t.Latest.State == model.StateFailedHolding && t.Latest.HoldIP != "" { - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 14, "
Host is holding — SSH available
") - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - var templ_7745c5c3_Var12 string - templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(sshInvocation(t.HoldKeyPath, t.Latest.HoldIP)) - if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 49, Col: 74} - } - _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12)) - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, "
") - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - } - if t.Latest != nil { - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "
Failed at
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } var templ_7745c5c3_Var13 string - templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("log-%d", t.Latest.ID)) + templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinStringErrs(t.Latest.FailedStage) if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 55, Col: 43} + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 40, Col: 31} } _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var13)) if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 17, "\" sse-swap=\"") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 13, "
") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + } + if t.SpecDiffCritical > 0 { + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 14, "
Spec diffs
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } var templ_7745c5c3_Var14 string - templ_7745c5c3_Var14, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("log-%d", t.Latest.ID)) + templ_7745c5c3_Var14, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d critical", t.SpecDiffCritical)) if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 56, Col: 49} + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 46, Col: 69} } _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var14)) if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 18, "\" hx-swap=\"beforeend\">
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, "") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 19, "
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - if canStart(t.Latest) { - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 20, "
Host is holding — SSH available
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - var templ_7745c5c3_Var15 templ.SafeURL - templ_7745c5c3_Var15, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", t.Host.ID))) + var templ_7745c5c3_Var15 string + templ_7745c5c3_Var15, templ_7745c5c3_Err = templ.JoinStringErrs(sshInvocation(t.HoldKeyPath, t.Latest.HoldIP)) if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 62, Col: 89} + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 53, Col: 74} } _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var15)) if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "\" class=\"inline\">") - if templ_7745c5c3_Err != nil { - return templ_7745c5c3_Err - } - } else { - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 22, " ") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 18, "
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } } - if canOverrideWipe(t.Latest) { - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 23, "
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 20, "\" sse-swap=\"") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - } - if hasReport(t.Latest) { - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 25, "View report") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "\" hx-swap=\"beforeend\">") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 27, "
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - var templ_7745c5c3_Var18 templ.SafeURL - templ_7745c5c3_Var18, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/delete", t.Host.ID))) - if templ_7745c5c3_Err != nil { - return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 76, Col: 89} + if canStart(t.Latest) { + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 23, "
") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + } else { + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 25, " ") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } } - _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var18)) + if canOverrideWipe(t.Latest) { + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "
") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + } + if hasReport(t.Latest) { + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 28, "View report") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + } + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 30, "
") + var templ_7745c5c3_Var21 templ.SafeURL + templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/delete", t.Host.ID))) + if templ_7745c5c3_Err != nil { + return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 80, Col: 89} + } + _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var21)) + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 31, "\" class=\"inline\">") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } @@ -382,4 +418,47 @@ func RenderTileString(t TileData) string { return buf.String() } +// lastSeenLabel renders the host-mode agent's liveness into a short +// badge: "never" if the host has never heartbeated, "online" within +// a 2×heartbeat grace window (60s, since agents heartbeat every 30s), +// "Nm ago" / "Nh ago" / "Nd ago" otherwise. +func lastSeenLabel(t *time.Time) string { + if t == nil { + return "never" + } + return humanAgoFrom(time.Now(), *t) +} + +// lastSeenClass pairs with lastSeenLabel to drive the badge color +// without the template having to carry its own logic. +func lastSeenClass(t *time.Time) string { + if t == nil { + return "offline" + } + if time.Since(*t) < 60*time.Second { + return "online" + } + return "stale" +} + +// humanAgoFrom formats (now - t) as a short "Nm ago" style string. +// Buckets: <60s -> "online", <60m -> minutes, <24h -> hours, else days. +// Split on `now` so callers can hold time for tests. +func humanAgoFrom(now time.Time, t time.Time) string { + d := now.Sub(t) + if d < 0 { + d = 0 + } + if d < 60*time.Second { + return "online" + } + if d < time.Hour { + return fmt.Sprintf("%dm ago", int(d/time.Minute)) + } + if d < 24*time.Hour { + return fmt.Sprintf("%dh ago", int(d/time.Hour)) + } + return fmt.Sprintf("%dd ago", int(d/(24*time.Hour))) +} + var _ = templruntime.GeneratedTemplate diff --git a/internal/web/templates/host_tile_test.go b/internal/web/templates/host_tile_test.go new file mode 100644 index 0000000..e9adbff --- /dev/null +++ b/internal/web/templates/host_tile_test.go @@ -0,0 +1,53 @@ +package templates + +import ( + "testing" + "time" +) + +func TestHumanAgoFrom(t *testing.T) { + now := time.Date(2026, 4, 17, 12, 0, 0, 0, time.UTC) + cases := []struct { + name string + ago time.Duration + want string + }{ + {"just now", 5 * time.Second, "online"}, + {"edge-just-under-minute", 59 * time.Second, "online"}, + {"one minute", 60 * time.Second, "1m ago"}, + {"five minutes", 5 * time.Minute, "5m ago"}, + {"fifty-nine minutes", 59 * time.Minute, "59m ago"}, + {"one hour", 1 * time.Hour, "1h ago"}, + {"eight hours", 8 * time.Hour, "8h ago"}, + {"one day", 24 * time.Hour, "1d ago"}, + {"three days", 72 * time.Hour, "3d ago"}, + // Clock skew: "future" heartbeat clamps to "online" rather than + // printing "-3m ago" or panicking. + {"future clamps to online", -5 * time.Second, "online"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := humanAgoFrom(now, now.Add(-tc.ago)) + if got != tc.want { + t.Fatalf("humanAgoFrom(%v) = %q, want %q", tc.ago, got, tc.want) + } + }) + } +} + +func TestLastSeenLabelAndClass(t *testing.T) { + if got := lastSeenLabel(nil); got != "never" { + t.Fatalf("label nil = %q, want never", got) + } + if got := lastSeenClass(nil); got != "offline" { + t.Fatalf("class nil = %q, want offline", got) + } + recent := time.Now().Add(-5 * time.Second) + if got := lastSeenClass(&recent); got != "online" { + t.Fatalf("class recent = %q, want online", got) + } + stale := time.Now().Add(-10 * time.Minute) + if got := lastSeenClass(&stale); got != "stale" { + t.Fatalf("class stale = %q, want stale", got) + } +}