Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
This commit is contained in:
@@ -0,0 +1,179 @@
|
||||
// Package notify owns outbound operator notifications. The orchestrator
|
||||
// fires Events at well-known points (stage failure, hold opened, run
|
||||
// completed, spec mismatch); a Registry matches each Event against
|
||||
// config-declared routes and dispatches to the matching Notifiers.
|
||||
//
|
||||
// Delivery is fire-and-forget: a single HTTP/SMTP attempt per notifier
|
||||
// with a bounded timeout. Failures are logged and nothing is persisted
|
||||
// — on a solo LAN deployment the orchestrator UI is the source of truth
|
||||
// and we don't want to build a durable queue for a convenience feature.
|
||||
package notify
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Kind enumerates the event types the orchestrator can fire. Names are
|
||||
// stable: they appear in config files' match_kind lists.
|
||||
type Kind string
|
||||
|
||||
const (
|
||||
KindStageFailed Kind = "StageFailed"
|
||||
KindSpecMismatch Kind = "SpecMismatch"
|
||||
KindHoldingOpened Kind = "HoldingOpened"
|
||||
KindRunCompleted Kind = "RunCompleted"
|
||||
)
|
||||
|
||||
// Severity is classification for filtering routes. "critical" pairs
|
||||
// with StageFailed/SpecMismatch/HoldingOpened; RunCompleted uses "info".
|
||||
type Severity string
|
||||
|
||||
const (
|
||||
SeverityInfo Severity = "info"
|
||||
SeverityWarning Severity = "warning"
|
||||
SeverityCritical Severity = "critical"
|
||||
)
|
||||
|
||||
// Event is the payload passed to each Notifier's Send method. Title and
|
||||
// Body are pre-rendered; notifiers shape them for their own transport
|
||||
// (e.g. Discord embed vs SMTP body) but shouldn't re-compose semantics.
|
||||
//
|
||||
// URL links back to the orchestrator UI so a push notification can be
|
||||
// clicked through for full context.
|
||||
type Event struct {
|
||||
Kind Kind
|
||||
Severity Severity
|
||||
RunID int64
|
||||
HostName string
|
||||
Title string
|
||||
Body string
|
||||
URL string // optional; UI link for this run/host
|
||||
}
|
||||
|
||||
// Notifier is one delivery target. Implementations must not block on
|
||||
// remote-side failure any longer than their own timeout — the Registry
|
||||
// calls Send from a goroutine but still wants the goroutine to exit.
|
||||
type Notifier interface {
|
||||
Name() string
|
||||
Send(ctx context.Context, ev Event) error
|
||||
}
|
||||
|
||||
// Route binds an event selector to a notifier name. A route matches an
|
||||
// event when every non-empty field is satisfied; empty fields are wildcards.
|
||||
type Route struct {
|
||||
MatchKind []Kind
|
||||
MatchSeverity []Severity
|
||||
Notifier string // name of a registered Notifier
|
||||
}
|
||||
|
||||
// Registry holds notifiers + routes and fans events out. Safe for
|
||||
// concurrent Dispatch. It's built once at startup from config.
|
||||
type Registry struct {
|
||||
notifiers map[string]Notifier
|
||||
routes []Route
|
||||
timeout time.Duration
|
||||
|
||||
mu sync.Mutex // guards in-flight goroutine count (future-use metrics)
|
||||
}
|
||||
|
||||
// NewRegistry builds a Registry with its per-notification timeout budget.
|
||||
// A zero timeout becomes 10s so tests and prod both get sane defaults.
|
||||
func NewRegistry(timeout time.Duration) *Registry {
|
||||
if timeout <= 0 {
|
||||
timeout = 10 * time.Second
|
||||
}
|
||||
return &Registry{
|
||||
notifiers: map[string]Notifier{},
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
// Register adds a Notifier. Re-registering a name overwrites silently —
|
||||
// configs can shadow by listing the same name twice.
|
||||
func (r *Registry) Register(n Notifier) {
|
||||
if n == nil {
|
||||
return
|
||||
}
|
||||
r.notifiers[n.Name()] = n
|
||||
}
|
||||
|
||||
// AddRoute appends a route rule. Order is preserved for deterministic
|
||||
// multi-match dispatch.
|
||||
func (r *Registry) AddRoute(rt Route) {
|
||||
r.routes = append(r.routes, rt)
|
||||
}
|
||||
|
||||
// Dispatch finds every route matching ev and fires each targeted
|
||||
// notifier on its own goroutine. Returns immediately — the caller does
|
||||
// not wait on delivery. Errors are logged.
|
||||
func (r *Registry) Dispatch(ev Event) {
|
||||
targets := r.match(ev)
|
||||
if len(targets) == 0 {
|
||||
return
|
||||
}
|
||||
for _, n := range targets {
|
||||
n := n
|
||||
go func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), r.timeout)
|
||||
defer cancel()
|
||||
if err := n.Send(ctx, ev); err != nil {
|
||||
log.Printf("notify: %s send(%s run=%d): %v", n.Name(), ev.Kind, ev.RunID, err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// match walks the route table in order and returns the unique notifiers
|
||||
// that should be fired for ev. Duplicates (same notifier named by two
|
||||
// matching routes) collapse — the operator intent is delivery, not
|
||||
// duplicate delivery.
|
||||
func (r *Registry) match(ev Event) []Notifier {
|
||||
seen := map[string]bool{}
|
||||
out := []Notifier{}
|
||||
for _, rt := range r.routes {
|
||||
if !matchesKind(rt.MatchKind, ev.Kind) {
|
||||
continue
|
||||
}
|
||||
if !matchesSeverity(rt.MatchSeverity, ev.Severity) {
|
||||
continue
|
||||
}
|
||||
if seen[rt.Notifier] {
|
||||
continue
|
||||
}
|
||||
n, ok := r.notifiers[rt.Notifier]
|
||||
if !ok {
|
||||
log.Printf("notify: route references unknown notifier %q", rt.Notifier)
|
||||
continue
|
||||
}
|
||||
seen[rt.Notifier] = true
|
||||
out = append(out, n)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func matchesKind(allow []Kind, got Kind) bool {
|
||||
if len(allow) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, k := range allow {
|
||||
if k == got {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func matchesSeverity(allow []Severity, got Severity) bool {
|
||||
if len(allow) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, s := range allow {
|
||||
if s == got {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user