package orchestrator import ( "fmt" "strings" ) // ThresholdOp is one of the comparison operators a threshold supports. // within_pct is the only one that cares about a "nominal" value for // the key — used for PSU rails ("+12V within 5% of 12.0"). type ThresholdOp string const ( OpLT ThresholdOp = "lt" OpLTE ThresholdOp = "lte" OpGT ThresholdOp = "gt" OpGTE ThresholdOp = "gte" OpWithinPct ThresholdOp = "within_pct" ) // ThresholdSeverity routes a breach to either "fail the run" or "just // surface a warning in the report". The evaluator returns it alongside // the Pass flag so the caller can decide whether to transition the run. type ThresholdSeverity string const ( SeverityCritical ThresholdSeverity = "critical" SeverityWarning ThresholdSeverity = "warning" ) // Threshold is the evaluator's view of a stored threshold row. It's a // flat, already-parsed value-object — the evaluator doesn't look at // the DB and the store doesn't look at the evaluator. type Threshold struct { ID int64 Stage string // "*" matches any stage Kind string Key string // glob-ish: "*" / "prefix*" / "*suffix" / exact Op ThresholdOp Value float64 Nominal float64 // for within_pct (nominal voltage/frequency) Severity ThresholdSeverity } // Sample is a single observation the evaluator tests against matching // thresholds. Stage may be empty when the agent doesn't know which // stage posted it (e.g. the thermal sidecar running across stages) — // empty-stage samples only match thresholds with Stage == "*". type Sample struct { Stage string Kind string Key string Value float64 } // EvalResult is the per-sample outcome of a threshold evaluation: // which threshold was consulted, whether the sample passed, and the // severity so the caller can fast-fail on critical breaches. type EvalResult struct { Threshold Threshold Passed bool Observed float64 } // Breached returns true when the sample violated the threshold. func (r EvalResult) Breached() bool { return !r.Passed } // CriticalBreach returns true only for critical-severity breaches — // the "fail the run right now" case. func (r EvalResult) CriticalBreach() bool { return r.Breached() && r.Threshold.Severity == SeverityCritical } // Evaluate runs a single sample through every threshold that applies // to it. A sample may match more than one threshold (a generic "*" // rule + a stage-specific override); each match produces its own // EvalResult in the returned slice so both get persisted. func Evaluate(sample Sample, thresholds []Threshold) []EvalResult { out := make([]EvalResult, 0, 1) for _, t := range thresholds { if !thresholdMatchesSample(t, sample) { continue } passed, err := evaluateOp(t.Op, sample.Value, t.Value, t.Nominal) if err != nil { // Unknown operator — skip. The caller could validate on // insert; here we prefer to drop the threshold than to // return an error that forces every Sensor write to 500. continue } out = append(out, EvalResult{ Threshold: t, Passed: passed, Observed: sample.Value, }) } return out } // thresholdMatchesSample applies the stage + kind + key filter. Kind // is always literal — there's no "any kind" threshold and if there // ever is we'll add a `kind: *` escape hatch. Stage and key both // support glob-ish matching. func thresholdMatchesSample(t Threshold, s Sample) bool { if t.Kind != s.Kind { return false } if !stageMatches(t.Stage, s.Stage) { return false } if !keyMatches(t.Key, s.Key) { return false } return true } // stageMatches returns true if the threshold's stage selector applies // to the sample's stage. "*" matches everything; empty threshold // selector is treated as "*" so a threshold declared without a stage // key isn't accidentally inert. A sample without a stage only matches // the "*" selector — we don't guess. func stageMatches(selector, sampleStage string) bool { if selector == "" || selector == "*" { return true } return selector == sampleStage } // keyMatches handles "*", "prefix*", "*suffix", and exact match. We // avoid pulling in filepath.Match so Windows `\`-vs-`/` rules don't // leak into the sample namespace (key "eth0/rx_errors" is not a path). func keyMatches(pattern, key string) bool { if pattern == "" || pattern == "*" { return true } hasPrefix := strings.HasPrefix(pattern, "*") hasSuffix := strings.HasSuffix(pattern, "*") switch { case hasPrefix && hasSuffix: inner := strings.TrimPrefix(strings.TrimSuffix(pattern, "*"), "*") return strings.Contains(key, inner) case hasSuffix: return strings.HasPrefix(key, strings.TrimSuffix(pattern, "*")) case hasPrefix: return strings.HasSuffix(key, strings.TrimPrefix(pattern, "*")) default: return pattern == key } } // evaluateOp does the numeric comparison. within_pct is the oddball: // it tests |observed - nominal| <= (pct / 100) * nominal. Returns an // error for unknown operators so the caller can log + drop. func evaluateOp(op ThresholdOp, observed, threshold, nominal float64) (bool, error) { switch op { case OpLT: return observed < threshold, nil case OpLTE: return observed <= threshold, nil case OpGT: return observed > threshold, nil case OpGTE: return observed >= threshold, nil case OpWithinPct: if nominal == 0 { // within_pct against a 0 nominal is meaningless. Treat as // pass so a misconfigured rule doesn't spuriously fail. return true, nil } allowed := (threshold / 100.0) * nominal if allowed < 0 { allowed = -allowed } diff := observed - nominal if diff < 0 { diff = -diff } return diff <= allowed, nil default: return false, fmt.Errorf("unknown op %q", op) } }