Initial commit: full Phases 1-6 implementation
CI / Lint + build + test (push) Has been cancelled

Post-repair hardware validation pipeline for Proxmox cluster hosts.
Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq
PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
This commit is contained in:
2026-04-17 21:32:10 -04:00
commit 9bb4b09a04
98 changed files with 11960 additions and 0 deletions
+179
View File
@@ -0,0 +1,179 @@
// Package notify owns outbound operator notifications. The orchestrator
// fires Events at well-known points (stage failure, hold opened, run
// completed, spec mismatch); a Registry matches each Event against
// config-declared routes and dispatches to the matching Notifiers.
//
// Delivery is fire-and-forget: a single HTTP/SMTP attempt per notifier
// with a bounded timeout. Failures are logged and nothing is persisted
// — on a solo LAN deployment the orchestrator UI is the source of truth
// and we don't want to build a durable queue for a convenience feature.
package notify
import (
"context"
"log"
"sync"
"time"
)
// Kind enumerates the event types the orchestrator can fire. Names are
// stable: they appear in config files' match_kind lists.
type Kind string
const (
KindStageFailed Kind = "StageFailed"
KindSpecMismatch Kind = "SpecMismatch"
KindHoldingOpened Kind = "HoldingOpened"
KindRunCompleted Kind = "RunCompleted"
)
// Severity is classification for filtering routes. "critical" pairs
// with StageFailed/SpecMismatch/HoldingOpened; RunCompleted uses "info".
type Severity string
const (
SeverityInfo Severity = "info"
SeverityWarning Severity = "warning"
SeverityCritical Severity = "critical"
)
// Event is the payload passed to each Notifier's Send method. Title and
// Body are pre-rendered; notifiers shape them for their own transport
// (e.g. Discord embed vs SMTP body) but shouldn't re-compose semantics.
//
// URL links back to the orchestrator UI so a push notification can be
// clicked through for full context.
type Event struct {
Kind Kind
Severity Severity
RunID int64
HostName string
Title string
Body string
URL string // optional; UI link for this run/host
}
// Notifier is one delivery target. Implementations must not block on
// remote-side failure any longer than their own timeout — the Registry
// calls Send from a goroutine but still wants the goroutine to exit.
type Notifier interface {
Name() string
Send(ctx context.Context, ev Event) error
}
// Route binds an event selector to a notifier name. A route matches an
// event when every non-empty field is satisfied; empty fields are wildcards.
type Route struct {
MatchKind []Kind
MatchSeverity []Severity
Notifier string // name of a registered Notifier
}
// Registry holds notifiers + routes and fans events out. Safe for
// concurrent Dispatch. It's built once at startup from config.
type Registry struct {
notifiers map[string]Notifier
routes []Route
timeout time.Duration
mu sync.Mutex // guards in-flight goroutine count (future-use metrics)
}
// NewRegistry builds a Registry with its per-notification timeout budget.
// A zero timeout becomes 10s so tests and prod both get sane defaults.
func NewRegistry(timeout time.Duration) *Registry {
if timeout <= 0 {
timeout = 10 * time.Second
}
return &Registry{
notifiers: map[string]Notifier{},
timeout: timeout,
}
}
// Register adds a Notifier. Re-registering a name overwrites silently —
// configs can shadow by listing the same name twice.
func (r *Registry) Register(n Notifier) {
if n == nil {
return
}
r.notifiers[n.Name()] = n
}
// AddRoute appends a route rule. Order is preserved for deterministic
// multi-match dispatch.
func (r *Registry) AddRoute(rt Route) {
r.routes = append(r.routes, rt)
}
// Dispatch finds every route matching ev and fires each targeted
// notifier on its own goroutine. Returns immediately — the caller does
// not wait on delivery. Errors are logged.
func (r *Registry) Dispatch(ev Event) {
targets := r.match(ev)
if len(targets) == 0 {
return
}
for _, n := range targets {
n := n
go func() {
ctx, cancel := context.WithTimeout(context.Background(), r.timeout)
defer cancel()
if err := n.Send(ctx, ev); err != nil {
log.Printf("notify: %s send(%s run=%d): %v", n.Name(), ev.Kind, ev.RunID, err)
}
}()
}
}
// match walks the route table in order and returns the unique notifiers
// that should be fired for ev. Duplicates (same notifier named by two
// matching routes) collapse — the operator intent is delivery, not
// duplicate delivery.
func (r *Registry) match(ev Event) []Notifier {
seen := map[string]bool{}
out := []Notifier{}
for _, rt := range r.routes {
if !matchesKind(rt.MatchKind, ev.Kind) {
continue
}
if !matchesSeverity(rt.MatchSeverity, ev.Severity) {
continue
}
if seen[rt.Notifier] {
continue
}
n, ok := r.notifiers[rt.Notifier]
if !ok {
log.Printf("notify: route references unknown notifier %q", rt.Notifier)
continue
}
seen[rt.Notifier] = true
out = append(out, n)
}
return out
}
func matchesKind(allow []Kind, got Kind) bool {
if len(allow) == 0 {
return true
}
for _, k := range allow {
if k == got {
return true
}
}
return false
}
func matchesSeverity(allow []Severity, got Severity) bool {
if len(allow) == 0 {
return true
}
for _, s := range allow {
if s == got {
return true
}
}
return false
}