Initial commit: full Phases 1-6 implementation
CI / Lint + build + test (push) Has been cancelled

Post-repair hardware validation pipeline for Proxmox cluster hosts.
Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq
PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
This commit is contained in:
2026-04-17 21:32:10 -04:00
commit 9bb4b09a04
98 changed files with 11960 additions and 0 deletions
+56
View File
@@ -0,0 +1,56 @@
package notify
import (
"fmt"
"time"
"vetting/internal/config"
)
// BuildRegistry translates the config surface into a live Registry.
// Unknown notifier types produce an error so typos fail startup loudly
// rather than silently drop events.
func BuildRegistry(notifiers []config.Notifier, routes []config.Route) (*Registry, error) {
reg := NewRegistry(10 * time.Second)
for _, n := range notifiers {
switch n.Type {
case "":
continue // skip blank entries; useful for commented-out examples
case "ntfy":
reg.Register(NewNtfy(n.Name, n.Server, n.Topic))
case "discord":
reg.Register(NewDiscord(n.Name, n.WebhookURL))
case "smtp":
reg.Register(NewSMTP(n.Name, n.SMTP.Host, n.SMTP.Port, n.SMTP.From, n.SMTP.To))
default:
return nil, fmt.Errorf("notify: unknown notifier type %q (name=%q)", n.Type, n.Name)
}
}
for _, r := range routes {
if r.Notifier == "" {
return nil, fmt.Errorf("notify: route has no notifier name")
}
reg.AddRoute(Route{
MatchKind: toKinds(r.MatchKind),
MatchSeverity: toSeverities(r.MatchSeverity),
Notifier: r.Notifier,
})
}
return reg, nil
}
func toKinds(ss []string) []Kind {
out := make([]Kind, 0, len(ss))
for _, s := range ss {
out = append(out, Kind(s))
}
return out
}
func toSeverities(ss []string) []Severity {
out := make([]Severity, 0, len(ss))
for _, s := range ss {
out = append(out, Severity(s))
}
return out
}
+87
View File
@@ -0,0 +1,87 @@
package notify
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// DiscordNotifier posts to a Discord incoming webhook. Body is rendered
// as a single embed so Discord shows a colored sidebar matching event
// severity. Discord rejects empty content+embeds; we always include the
// embed so that never happens.
type DiscordNotifier struct {
NameStr string
WebhookURL string
HTTP *http.Client
}
func NewDiscord(name, webhookURL string) *DiscordNotifier {
return &DiscordNotifier{
NameStr: name,
WebhookURL: webhookURL,
HTTP: &http.Client{Timeout: 10 * time.Second},
}
}
func (d *DiscordNotifier) Name() string { return d.NameStr }
type discordPayload struct {
Embeds []discordEmbed `json:"embeds"`
}
type discordEmbed struct {
Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"`
URL string `json:"url,omitempty"`
Color int `json:"color,omitempty"`
}
func (d *DiscordNotifier) Send(ctx context.Context, ev Event) error {
if d.WebhookURL == "" {
return fmt.Errorf("discord: no webhook_url configured")
}
payload := discordPayload{Embeds: []discordEmbed{{
Title: ev.Title,
Description: ev.Body,
URL: ev.URL,
Color: discordColor(ev.Severity),
}}}
buf, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, d.WebhookURL, bytes.NewReader(buf))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
resp, err := d.HTTP.Do(req)
if err != nil {
return err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode >= 300 {
b, _ := io.ReadAll(resp.Body)
return fmt.Errorf("discord: %d: %s", resp.StatusCode, strings.TrimSpace(string(b)))
}
return nil
}
// discordColor returns the embed sidebar color for each severity.
// Values are standard Discord decimal color codes.
func discordColor(s Severity) int {
switch s {
case SeverityCritical:
return 0xE74C3C // red
case SeverityWarning:
return 0xF1C40F // yellow
default:
return 0x2ECC71 // green
}
}
+179
View File
@@ -0,0 +1,179 @@
// Package notify owns outbound operator notifications. The orchestrator
// fires Events at well-known points (stage failure, hold opened, run
// completed, spec mismatch); a Registry matches each Event against
// config-declared routes and dispatches to the matching Notifiers.
//
// Delivery is fire-and-forget: a single HTTP/SMTP attempt per notifier
// with a bounded timeout. Failures are logged and nothing is persisted
// — on a solo LAN deployment the orchestrator UI is the source of truth
// and we don't want to build a durable queue for a convenience feature.
package notify
import (
"context"
"log"
"sync"
"time"
)
// Kind enumerates the event types the orchestrator can fire. Names are
// stable: they appear in config files' match_kind lists.
type Kind string
const (
KindStageFailed Kind = "StageFailed"
KindSpecMismatch Kind = "SpecMismatch"
KindHoldingOpened Kind = "HoldingOpened"
KindRunCompleted Kind = "RunCompleted"
)
// Severity is classification for filtering routes. "critical" pairs
// with StageFailed/SpecMismatch/HoldingOpened; RunCompleted uses "info".
type Severity string
const (
SeverityInfo Severity = "info"
SeverityWarning Severity = "warning"
SeverityCritical Severity = "critical"
)
// Event is the payload passed to each Notifier's Send method. Title and
// Body are pre-rendered; notifiers shape them for their own transport
// (e.g. Discord embed vs SMTP body) but shouldn't re-compose semantics.
//
// URL links back to the orchestrator UI so a push notification can be
// clicked through for full context.
type Event struct {
Kind Kind
Severity Severity
RunID int64
HostName string
Title string
Body string
URL string // optional; UI link for this run/host
}
// Notifier is one delivery target. Implementations must not block on
// remote-side failure any longer than their own timeout — the Registry
// calls Send from a goroutine but still wants the goroutine to exit.
type Notifier interface {
Name() string
Send(ctx context.Context, ev Event) error
}
// Route binds an event selector to a notifier name. A route matches an
// event when every non-empty field is satisfied; empty fields are wildcards.
type Route struct {
MatchKind []Kind
MatchSeverity []Severity
Notifier string // name of a registered Notifier
}
// Registry holds notifiers + routes and fans events out. Safe for
// concurrent Dispatch. It's built once at startup from config.
type Registry struct {
notifiers map[string]Notifier
routes []Route
timeout time.Duration
mu sync.Mutex // guards in-flight goroutine count (future-use metrics)
}
// NewRegistry builds a Registry with its per-notification timeout budget.
// A zero timeout becomes 10s so tests and prod both get sane defaults.
func NewRegistry(timeout time.Duration) *Registry {
if timeout <= 0 {
timeout = 10 * time.Second
}
return &Registry{
notifiers: map[string]Notifier{},
timeout: timeout,
}
}
// Register adds a Notifier. Re-registering a name overwrites silently —
// configs can shadow by listing the same name twice.
func (r *Registry) Register(n Notifier) {
if n == nil {
return
}
r.notifiers[n.Name()] = n
}
// AddRoute appends a route rule. Order is preserved for deterministic
// multi-match dispatch.
func (r *Registry) AddRoute(rt Route) {
r.routes = append(r.routes, rt)
}
// Dispatch finds every route matching ev and fires each targeted
// notifier on its own goroutine. Returns immediately — the caller does
// not wait on delivery. Errors are logged.
func (r *Registry) Dispatch(ev Event) {
targets := r.match(ev)
if len(targets) == 0 {
return
}
for _, n := range targets {
n := n
go func() {
ctx, cancel := context.WithTimeout(context.Background(), r.timeout)
defer cancel()
if err := n.Send(ctx, ev); err != nil {
log.Printf("notify: %s send(%s run=%d): %v", n.Name(), ev.Kind, ev.RunID, err)
}
}()
}
}
// match walks the route table in order and returns the unique notifiers
// that should be fired for ev. Duplicates (same notifier named by two
// matching routes) collapse — the operator intent is delivery, not
// duplicate delivery.
func (r *Registry) match(ev Event) []Notifier {
seen := map[string]bool{}
out := []Notifier{}
for _, rt := range r.routes {
if !matchesKind(rt.MatchKind, ev.Kind) {
continue
}
if !matchesSeverity(rt.MatchSeverity, ev.Severity) {
continue
}
if seen[rt.Notifier] {
continue
}
n, ok := r.notifiers[rt.Notifier]
if !ok {
log.Printf("notify: route references unknown notifier %q", rt.Notifier)
continue
}
seen[rt.Notifier] = true
out = append(out, n)
}
return out
}
func matchesKind(allow []Kind, got Kind) bool {
if len(allow) == 0 {
return true
}
for _, k := range allow {
if k == got {
return true
}
}
return false
}
func matchesSeverity(allow []Severity, got Severity) bool {
if len(allow) == 0 {
return true
}
for _, s := range allow {
if s == got {
return true
}
}
return false
}
+268
View File
@@ -0,0 +1,268 @@
package notify
import (
"context"
"io"
"net/http"
"net/http/httptest"
"net/smtp"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
// stubNotifier records every Send call; it's the test harness for
// Registry routing logic without hitting network.
type stubNotifier struct {
name string
calls []Event
mu sync.Mutex
failOn Kind // if non-empty, returns an error when ev.Kind == failOn
}
func (s *stubNotifier) Name() string { return s.name }
func (s *stubNotifier) Send(_ context.Context, ev Event) error {
s.mu.Lock()
s.calls = append(s.calls, ev)
s.mu.Unlock()
if s.failOn != "" && ev.Kind == s.failOn {
return errFake("forced failure")
}
return nil
}
func (s *stubNotifier) seen() []Event {
s.mu.Lock()
defer s.mu.Unlock()
return append([]Event(nil), s.calls...)
}
type errFake string
func (e errFake) Error() string { return string(e) }
// awaitCalls spins until every stub has the expected count or the
// deadline elapses — Dispatch uses goroutines so the test must wait.
func awaitCalls(t *testing.T, want map[*stubNotifier]int) {
t.Helper()
deadline := time.Now().Add(2 * time.Second)
for {
ok := true
for s, n := range want {
if len(s.seen()) < n {
ok = false
break
}
}
if ok {
return
}
if time.Now().After(deadline) {
for s, n := range want {
t.Errorf("notifier %q: got %d calls, want %d", s.name, len(s.seen()), n)
}
return
}
time.Sleep(5 * time.Millisecond)
}
}
func TestRegistryRoutesByKind(t *testing.T) {
reg := NewRegistry(time.Second)
a := &stubNotifier{name: "fails-only"}
b := &stubNotifier{name: "everything"}
reg.Register(a)
reg.Register(b)
reg.AddRoute(Route{MatchKind: []Kind{KindStageFailed}, Notifier: "fails-only"})
reg.AddRoute(Route{Notifier: "everything"})
reg.Dispatch(Event{Kind: KindStageFailed, Severity: SeverityCritical})
reg.Dispatch(Event{Kind: KindRunCompleted, Severity: SeverityInfo})
awaitCalls(t, map[*stubNotifier]int{a: 1, b: 2})
if got := a.seen()[0].Kind; got != KindStageFailed {
t.Fatalf("a got %q, want StageFailed", got)
}
}
func TestRegistryRoutesBySeverity(t *testing.T) {
reg := NewRegistry(time.Second)
crit := &stubNotifier{name: "crit-only"}
reg.Register(crit)
reg.AddRoute(Route{MatchSeverity: []Severity{SeverityCritical}, Notifier: "crit-only"})
reg.Dispatch(Event{Kind: KindRunCompleted, Severity: SeverityInfo})
reg.Dispatch(Event{Kind: KindHoldingOpened, Severity: SeverityCritical})
awaitCalls(t, map[*stubNotifier]int{crit: 1})
if got := crit.seen()[0].Severity; got != SeverityCritical {
t.Fatalf("got severity %q, want critical", got)
}
}
func TestRegistryDeduplicatesNotifiers(t *testing.T) {
reg := NewRegistry(time.Second)
n := &stubNotifier{name: "only"}
reg.Register(n)
// Two routes naming the same notifier — a single Dispatch should
// fire once, not twice.
reg.AddRoute(Route{MatchKind: []Kind{KindStageFailed}, Notifier: "only"})
reg.AddRoute(Route{MatchSeverity: []Severity{SeverityCritical}, Notifier: "only"})
reg.Dispatch(Event{Kind: KindStageFailed, Severity: SeverityCritical})
awaitCalls(t, map[*stubNotifier]int{n: 1})
}
func TestRegistryUnknownNotifierIsNoop(t *testing.T) {
reg := NewRegistry(time.Second)
reg.AddRoute(Route{Notifier: "does-not-exist"})
// Should not panic or block.
reg.Dispatch(Event{Kind: KindRunCompleted})
}
func TestRegistryFailureDoesNotPoisonOthers(t *testing.T) {
reg := NewRegistry(time.Second)
bad := &stubNotifier{name: "bad", failOn: KindStageFailed}
good := &stubNotifier{name: "good"}
reg.Register(bad)
reg.Register(good)
reg.AddRoute(Route{Notifier: "bad"})
reg.AddRoute(Route{Notifier: "good"})
reg.Dispatch(Event{Kind: KindStageFailed, Severity: SeverityCritical})
awaitCalls(t, map[*stubNotifier]int{bad: 1, good: 1})
}
func TestNtfyNotifierPOSTsBodyAndHeaders(t *testing.T) {
var captured *http.Request
var body string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
captured = r
b, _ := io.ReadAll(r.Body)
body = string(b)
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
n := NewNtfy("n", srv.URL, "vetting")
err := n.Send(context.Background(), Event{
Kind: KindStageFailed,
Severity: SeverityCritical,
Title: "host-01 FAILED",
Body: "SMART failed",
URL: "https://vetting.example/reports/42",
})
if err != nil {
t.Fatalf("send: %v", err)
}
if captured.Method != http.MethodPost {
t.Fatalf("method = %s, want POST", captured.Method)
}
if captured.URL.Path != "/vetting" {
t.Fatalf("path = %s, want /vetting", captured.URL.Path)
}
if got := captured.Header.Get("X-Title"); got != "host-01 FAILED" {
t.Fatalf("X-Title = %q", got)
}
if got := captured.Header.Get("X-Click"); got != "https://vetting.example/reports/42" {
t.Fatalf("X-Click = %q", got)
}
if got := captured.Header.Get("X-Priority"); got != "5" {
t.Fatalf("X-Priority = %q, want 5 for critical", got)
}
if body != "SMART failed" {
t.Fatalf("body = %q, want %q", body, "SMART failed")
}
}
func TestNtfyNotifierNon2xxErrors(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
http.Error(w, "rate limited", http.StatusTooManyRequests)
}))
defer srv.Close()
n := NewNtfy("n", srv.URL, "t")
err := n.Send(context.Background(), Event{Kind: KindRunCompleted, Body: "x"})
if err == nil || !strings.Contains(err.Error(), "429") {
t.Fatalf("want 429 error, got %v", err)
}
}
func TestDiscordNotifierPOSTsEmbed(t *testing.T) {
var body string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
b, _ := io.ReadAll(r.Body)
body = string(b)
w.WriteHeader(http.StatusNoContent)
}))
defer srv.Close()
d := NewDiscord("d", srv.URL)
err := d.Send(context.Background(), Event{
Kind: KindRunCompleted,
Severity: SeverityInfo,
Title: "host-01 passed",
Body: "all green",
URL: "https://vetting.example/reports/1",
})
if err != nil {
t.Fatalf("send: %v", err)
}
// Body should be a JSON payload containing an embeds array with our
// title/description/URL.
for _, want := range []string{`"embeds"`, `"host-01 passed"`, `"all green"`, `reports/1`} {
if !strings.Contains(body, want) {
t.Errorf("body missing %q: %s", want, body)
}
}
}
func TestSMTPNotifierInvokesSendMail(t *testing.T) {
var called int32
var gotAddr, gotFrom string
var gotTo []string
var gotMsg []byte
s := NewSMTP("s", "mail.example", 2525, "vetting@example", []string{"ops@example"})
s.SendMailFn = func(addr string, _ smtp.Auth, from string, to []string, msg []byte) error {
atomic.AddInt32(&called, 1)
gotAddr, gotFrom, gotTo, gotMsg = addr, from, to, msg
return nil
}
err := s.Send(context.Background(), Event{
Kind: KindStageFailed, Title: "subj", Body: "failure body",
URL: "https://vetting.example/reports/9",
})
if err != nil {
t.Fatalf("send: %v", err)
}
if atomic.LoadInt32(&called) != 1 {
t.Fatal("SendMailFn not called")
}
if gotAddr != "mail.example:2525" {
t.Fatalf("addr = %q", gotAddr)
}
if gotFrom != "vetting@example" {
t.Fatalf("from = %q", gotFrom)
}
if len(gotTo) != 1 || gotTo[0] != "ops@example" {
t.Fatalf("to = %v", gotTo)
}
s1 := string(gotMsg)
for _, want := range []string{"Subject: subj", "failure body", "Link: https://vetting.example/reports/9"} {
if !strings.Contains(s1, want) {
t.Errorf("message missing %q", want)
}
}
}
func TestSMTPNotifierRejectsIncompleteConfig(t *testing.T) {
s := &SMTPNotifier{NameStr: "s"}
if err := s.Send(context.Background(), Event{Kind: KindRunCompleted}); err == nil {
t.Fatal("want error, got nil")
}
}
+90
View File
@@ -0,0 +1,90 @@
package notify
import (
"context"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// NtfyNotifier posts to ntfy.sh (or a self-hosted ntfy server). Message
// body is the plain text body; title and URL are passed via X-Title and
// X-Click headers so ntfy renders them as the push title + deep link.
type NtfyNotifier struct {
NameStr string
Server string // e.g. "https://ntfy.sh" or self-hosted
Topic string
HTTP *http.Client
}
func NewNtfy(name, server, topic string) *NtfyNotifier {
if server == "" {
server = "https://ntfy.sh"
}
return &NtfyNotifier{
NameStr: name,
Server: strings.TrimRight(server, "/"),
Topic: topic,
HTTP: &http.Client{Timeout: 10 * time.Second},
}
}
func (n *NtfyNotifier) Name() string { return n.NameStr }
func (n *NtfyNotifier) Send(ctx context.Context, ev Event) error {
if n.Topic == "" {
return fmt.Errorf("ntfy: no topic configured")
}
url := n.Server + "/" + n.Topic
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, strings.NewReader(ev.Body))
if err != nil {
return err
}
if ev.Title != "" {
req.Header.Set("X-Title", ev.Title)
}
if ev.URL != "" {
req.Header.Set("X-Click", ev.URL)
}
req.Header.Set("X-Priority", priorityForSeverity(ev.Severity))
req.Header.Set("X-Tags", ntfyTag(ev.Kind, ev.Severity))
resp, err := n.HTTP.Do(req)
if err != nil {
return err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode >= 300 {
b, _ := io.ReadAll(resp.Body)
return fmt.Errorf("ntfy: %d: %s", resp.StatusCode, strings.TrimSpace(string(b)))
}
return nil
}
// priorityForSeverity maps our severities to ntfy's 15 scale. "info"
// → 3 (default), warning → 4, critical → 5.
func priorityForSeverity(s Severity) string {
switch s {
case SeverityCritical:
return "5"
case SeverityWarning:
return "4"
default:
return "3"
}
}
func ntfyTag(k Kind, s Severity) string {
switch {
case s == SeverityCritical:
return "rotating_light," + string(k)
case k == KindRunCompleted:
return "white_check_mark," + string(k)
case k == KindHoldingOpened:
return "construction," + string(k)
default:
return string(k)
}
}
+81
View File
@@ -0,0 +1,81 @@
package notify
import (
"context"
"fmt"
"net/smtp"
"strconv"
"strings"
)
// SMTPNotifier sends a plaintext email. Authentication is left at zero
// (LAN-only relay assumed); if the configured server requires auth the
// Send call will return an error and the Registry will log it.
//
// SendMailFn is overridable so tests can capture the outgoing message
// without needing a live SMTP server.
type SMTPNotifier struct {
NameStr string
Host string
Port int
From string
To []string
SendMailFn func(addr string, a smtp.Auth, from string, to []string, msg []byte) error
}
func NewSMTP(name, host string, port int, from string, to []string) *SMTPNotifier {
if port == 0 {
port = 25
}
return &SMTPNotifier{
NameStr: name,
Host: host,
Port: port,
From: from,
To: to,
SendMailFn: smtp.SendMail,
}
}
func (s *SMTPNotifier) Name() string { return s.NameStr }
func (s *SMTPNotifier) Send(ctx context.Context, ev Event) error {
if s.Host == "" || s.From == "" || len(s.To) == 0 {
return fmt.Errorf("smtp: incomplete config (host/from/to required)")
}
// We intentionally don't honour ctx here — net/smtp.SendMail doesn't
// accept a context; for a LAN relay with a short TCP timeout the
// Registry's goroutine will outlive the timeout but only by seconds.
addr := s.Host + ":" + strconv.Itoa(s.Port)
msg := buildEmail(s.From, s.To, ev)
return s.SendMailFn(addr, nil, s.From, s.To, msg)
}
// buildEmail produces an RFC 5322 minimal message. Body is plaintext;
// the URL is appended so the recipient can click through from a text
// mail client. No MIME for now — keeps it robust.
func buildEmail(from string, to []string, ev Event) []byte {
var b strings.Builder
b.WriteString("From: ")
b.WriteString(from)
b.WriteString("\r\n")
b.WriteString("To: ")
b.WriteString(strings.Join(to, ", "))
b.WriteString("\r\n")
subject := ev.Title
if subject == "" {
subject = "[vetting] " + string(ev.Kind)
}
b.WriteString("Subject: ")
b.WriteString(subject)
b.WriteString("\r\n")
b.WriteString("Content-Type: text/plain; charset=UTF-8\r\n")
b.WriteString("\r\n")
b.WriteString(ev.Body)
if ev.URL != "" {
b.WriteString("\r\n\r\nLink: ")
b.WriteString(ev.URL)
}
b.WriteString("\r\n")
return []byte(b.String())
}