9bb4b09a04
CI / Lint + build + test (push) Has been cancelled
Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
180 lines
4.9 KiB
Go
180 lines
4.9 KiB
Go
// Package notify owns outbound operator notifications. The orchestrator
|
|
// fires Events at well-known points (stage failure, hold opened, run
|
|
// completed, spec mismatch); a Registry matches each Event against
|
|
// config-declared routes and dispatches to the matching Notifiers.
|
|
//
|
|
// Delivery is fire-and-forget: a single HTTP/SMTP attempt per notifier
|
|
// with a bounded timeout. Failures are logged and nothing is persisted
|
|
// — on a solo LAN deployment the orchestrator UI is the source of truth
|
|
// and we don't want to build a durable queue for a convenience feature.
|
|
package notify
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// Kind enumerates the event types the orchestrator can fire. Names are
|
|
// stable: they appear in config files' match_kind lists.
|
|
type Kind string
|
|
|
|
const (
|
|
KindStageFailed Kind = "StageFailed"
|
|
KindSpecMismatch Kind = "SpecMismatch"
|
|
KindHoldingOpened Kind = "HoldingOpened"
|
|
KindRunCompleted Kind = "RunCompleted"
|
|
)
|
|
|
|
// Severity is classification for filtering routes. "critical" pairs
|
|
// with StageFailed/SpecMismatch/HoldingOpened; RunCompleted uses "info".
|
|
type Severity string
|
|
|
|
const (
|
|
SeverityInfo Severity = "info"
|
|
SeverityWarning Severity = "warning"
|
|
SeverityCritical Severity = "critical"
|
|
)
|
|
|
|
// Event is the payload passed to each Notifier's Send method. Title and
|
|
// Body are pre-rendered; notifiers shape them for their own transport
|
|
// (e.g. Discord embed vs SMTP body) but shouldn't re-compose semantics.
|
|
//
|
|
// URL links back to the orchestrator UI so a push notification can be
|
|
// clicked through for full context.
|
|
type Event struct {
|
|
Kind Kind
|
|
Severity Severity
|
|
RunID int64
|
|
HostName string
|
|
Title string
|
|
Body string
|
|
URL string // optional; UI link for this run/host
|
|
}
|
|
|
|
// Notifier is one delivery target. Implementations must not block on
|
|
// remote-side failure any longer than their own timeout — the Registry
|
|
// calls Send from a goroutine but still wants the goroutine to exit.
|
|
type Notifier interface {
|
|
Name() string
|
|
Send(ctx context.Context, ev Event) error
|
|
}
|
|
|
|
// Route binds an event selector to a notifier name. A route matches an
|
|
// event when every non-empty field is satisfied; empty fields are wildcards.
|
|
type Route struct {
|
|
MatchKind []Kind
|
|
MatchSeverity []Severity
|
|
Notifier string // name of a registered Notifier
|
|
}
|
|
|
|
// Registry holds notifiers + routes and fans events out. Safe for
|
|
// concurrent Dispatch. It's built once at startup from config.
|
|
type Registry struct {
|
|
notifiers map[string]Notifier
|
|
routes []Route
|
|
timeout time.Duration
|
|
|
|
mu sync.Mutex // guards in-flight goroutine count (future-use metrics)
|
|
}
|
|
|
|
// NewRegistry builds a Registry with its per-notification timeout budget.
|
|
// A zero timeout becomes 10s so tests and prod both get sane defaults.
|
|
func NewRegistry(timeout time.Duration) *Registry {
|
|
if timeout <= 0 {
|
|
timeout = 10 * time.Second
|
|
}
|
|
return &Registry{
|
|
notifiers: map[string]Notifier{},
|
|
timeout: timeout,
|
|
}
|
|
}
|
|
|
|
// Register adds a Notifier. Re-registering a name overwrites silently —
|
|
// configs can shadow by listing the same name twice.
|
|
func (r *Registry) Register(n Notifier) {
|
|
if n == nil {
|
|
return
|
|
}
|
|
r.notifiers[n.Name()] = n
|
|
}
|
|
|
|
// AddRoute appends a route rule. Order is preserved for deterministic
|
|
// multi-match dispatch.
|
|
func (r *Registry) AddRoute(rt Route) {
|
|
r.routes = append(r.routes, rt)
|
|
}
|
|
|
|
// Dispatch finds every route matching ev and fires each targeted
|
|
// notifier on its own goroutine. Returns immediately — the caller does
|
|
// not wait on delivery. Errors are logged.
|
|
func (r *Registry) Dispatch(ev Event) {
|
|
targets := r.match(ev)
|
|
if len(targets) == 0 {
|
|
return
|
|
}
|
|
for _, n := range targets {
|
|
n := n
|
|
go func() {
|
|
ctx, cancel := context.WithTimeout(context.Background(), r.timeout)
|
|
defer cancel()
|
|
if err := n.Send(ctx, ev); err != nil {
|
|
log.Printf("notify: %s send(%s run=%d): %v", n.Name(), ev.Kind, ev.RunID, err)
|
|
}
|
|
}()
|
|
}
|
|
}
|
|
|
|
// match walks the route table in order and returns the unique notifiers
|
|
// that should be fired for ev. Duplicates (same notifier named by two
|
|
// matching routes) collapse — the operator intent is delivery, not
|
|
// duplicate delivery.
|
|
func (r *Registry) match(ev Event) []Notifier {
|
|
seen := map[string]bool{}
|
|
out := []Notifier{}
|
|
for _, rt := range r.routes {
|
|
if !matchesKind(rt.MatchKind, ev.Kind) {
|
|
continue
|
|
}
|
|
if !matchesSeverity(rt.MatchSeverity, ev.Severity) {
|
|
continue
|
|
}
|
|
if seen[rt.Notifier] {
|
|
continue
|
|
}
|
|
n, ok := r.notifiers[rt.Notifier]
|
|
if !ok {
|
|
log.Printf("notify: route references unknown notifier %q", rt.Notifier)
|
|
continue
|
|
}
|
|
seen[rt.Notifier] = true
|
|
out = append(out, n)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func matchesKind(allow []Kind, got Kind) bool {
|
|
if len(allow) == 0 {
|
|
return true
|
|
}
|
|
for _, k := range allow {
|
|
if k == got {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func matchesSeverity(allow []Severity, got Severity) bool {
|
|
if len(allow) == 0 {
|
|
return true
|
|
}
|
|
for _, s := range allow {
|
|
if s == got {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|