Vetting/internal/store/thresholds.go

package store

import (
	"context"
	"database/sql"
	"fmt"
	"time"
)

// Threshold is the DB view of a per-run threshold row. Mirrors the
// orchestrator.Threshold value-object but keeps Severity/Op as strings
// so callers higher up don't force this package to import orchestrator.
type Threshold struct {
	ID        int64
	RunID     int64
	Stage     string
	Kind      string
	Key       string
	Op        string
	Threshold float64
	Nominal   float64
	Unit      string
	Severity  string
	Source    string // profile|host_override
}

// ThresholdEvaluation is one recorded comparison — the evaluator calls
// this for every sample that matched a threshold, whether it passed
// or breached. The report page aggregates these to show the operator
// why a run failed (or was flagged as warning-only).
type ThresholdEvaluation struct {
	ID          int64
	RunID       int64
	ThresholdID int64
	Stage       string
	Kind        string
	Key         string
	TS          time.Time
	Observed    float64
	Passed      bool
}

// Thresholds is the CRUD seam. Kept intentionally narrow: seed at run
// creation, list for evaluation on each sensor batch, record eval
// results, aggregate for the report.
type Thresholds struct {
	DB *sql.DB
}

// ThresholdSpec is the caller-supplied shape for seeding — a flat
// value-object that carries the threshold rule plus its source so
// the ProfileRegistry-driven seed and per-host overrides converge
// on one insert path. Kept here (not in config) so the store layer
// doesn't have to import config.
type ThresholdSpec struct {
	Stage    string
	Kind     string
	Key      string
	Op       string
	Value    float64
	Nominal  float64
	Unit     string
	Severity string
	Source   string
}

// SeedForRun converts the caller's specs into Threshold rows for the
// given run and bulk-inserts them. Returns the inserted rows with IDs
// populated so the evaluator can pin evaluations without a re-read.
func (t *Thresholds) SeedForRun(ctx context.Context, runID int64, specs []ThresholdSpec) ([]Threshold, error) {
	rows := make([]Threshold, 0, len(specs))
	for _, s := range specs {
		rows = append(rows, Threshold{
			RunID:     runID,
			Stage:     s.Stage,
			Kind:      s.Kind,
			Key:       s.Key,
			Op:        s.Op,
			Threshold: s.Value,
			Nominal:   s.Nominal,
			Unit:      s.Unit,
			Severity:  s.Severity,
			Source:    s.Source,
		})
	}
	return t.CreateBatch(ctx, rows)
}

// Create inserts a single threshold row — used by the seed path when
// the orchestrator materializes per-run rules from the ProfileRegistry.
// Returns the row's ID so the evaluator can pin evaluations to it.
func (t *Thresholds) Create(ctx context.Context, th Threshold) (int64, error) {
	res, err := t.DB.ExecContext(ctx, `
		INSERT INTO thresholds(run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source)
		VALUES(?,?,?,?,?,?,?,?,?,?)
	`, th.RunID, th.Stage, th.Kind, th.Key, th.Op, th.Threshold, th.Nominal, th.Unit, th.Severity, th.Source)
	if err != nil {
		return 0, fmt.Errorf("insert threshold: %w", err)
	}
	return res.LastInsertId()
}

// CreateBatch is the fast path for run seeding — one transaction per
// run, one row per threshold. Returns the inserted rows with IDs set
// so the caller can drop them into the in-memory evaluator without a
// follow-up read.
func (t *Thresholds) CreateBatch(ctx context.Context, rows []Threshold) ([]Threshold, error) {
	if len(rows) == 0 {
		return nil, nil
	}
	tx, err := t.DB.BeginTx(ctx, nil)
	if err != nil {
		return nil, err
	}
	defer func() { _ = tx.Rollback() }()
	stmt, err := tx.PrepareContext(ctx, `
		INSERT INTO thresholds(run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source)
		VALUES(?,?,?,?,?,?,?,?,?,?)
	`)
	if err != nil {
		return nil, fmt.Errorf("prepare threshold insert: %w", err)
	}
	defer func() { _ = stmt.Close() }()
	out := make([]Threshold, 0, len(rows))
	for _, th := range rows {
		res, err := stmt.ExecContext(ctx, th.RunID, th.Stage, th.Kind, th.Key, th.Op,
			th.Threshold, th.Nominal, th.Unit, th.Severity, th.Source)
		if err != nil {
			return nil, fmt.Errorf("insert threshold %s/%s: %w", th.Stage, th.Key, err)
		}
		id, err := res.LastInsertId()
		if err != nil {
			return nil, err
		}
		th.ID = id
		out = append(out, th)
	}
	if err := tx.Commit(); err != nil {
		return nil, err
	}
	return out, nil
}

// ListForRun returns every threshold seeded for a run, in stable ID
// order. Evaluator expects this to be cheap (few tens of rows per run)
// and pulls it on each /sensor batch.
func (t *Thresholds) ListForRun(ctx context.Context, runID int64) ([]Threshold, error) {
	rows, err := t.DB.QueryContext(ctx, `
		SELECT id, run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source
		FROM thresholds WHERE run_id = ? ORDER BY id
	`, runID)
	if err != nil {
		return nil, err
	}
	defer rows.Close()
	var out []Threshold
	for rows.Next() {
		var th Threshold
		if err := rows.Scan(&th.ID, &th.RunID, &th.Stage, &th.Kind, &th.Key,
			&th.Op, &th.Threshold, &th.Nominal, &th.Unit, &th.Severity, &th.Source); err != nil {
			return nil, err
		}
		out = append(out, th)
	}
	return out, rows.Err()
}

// RecordEvaluation persists a single evaluation outcome. Called per
// matching sample so the run's report has a full audit trail ("temp
// hit 95 at 14:22:03" rather than just "temp failed").
func (t *Thresholds) RecordEvaluation(ctx context.Context, ev ThresholdEvaluation) error {
	passed := 0
	if ev.Passed {
		passed = 1
	}
	if ev.TS.IsZero() {
		ev.TS = time.Now().UTC()
	}
	_, err := t.DB.ExecContext(ctx, `
		INSERT INTO threshold_evaluations(run_id, threshold_id, stage_name, kind, key, ts, observed, passed)
		VALUES(?,?,?,?,?,?,?,?)
	`, ev.RunID, ev.ThresholdID, ev.Stage, ev.Kind, ev.Key, ev.TS, ev.Observed, passed)
	if err != nil {
		return fmt.Errorf("record evaluation: %w", err)
	}
	return nil
}

// RecordBatch persists a slice of evaluations in one transaction. The
// agent-handler hot path builds these one per sample and batches them
// under the same Sensor POST so we take one round-trip rather than N.
func (t *Thresholds) RecordBatch(ctx context.Context, evals []ThresholdEvaluation) error {
	if len(evals) == 0 {
		return nil
	}
	tx, err := t.DB.BeginTx(ctx, nil)
	if err != nil {
		return err
	}
	defer func() { _ = tx.Rollback() }()
	stmt, err := tx.PrepareContext(ctx, `
		INSERT INTO threshold_evaluations(run_id, threshold_id, stage_name, kind, key, ts, observed, passed)
		VALUES(?,?,?,?,?,?,?,?)
	`)
	if err != nil {
		return fmt.Errorf("prepare eval insert: %w", err)
	}
	defer func() { _ = stmt.Close() }()
	for _, ev := range evals {
		passed := 0
		if ev.Passed {
			passed = 1
		}
		if ev.TS.IsZero() {
			ev.TS = time.Now().UTC()
		}
		if _, err := stmt.ExecContext(ctx, ev.RunID, ev.ThresholdID, ev.Stage, ev.Kind, ev.Key, ev.TS, ev.Observed, passed); err != nil {
			return fmt.Errorf("insert eval: %w", err)
		}
	}
	return tx.Commit()
}

// ListEvaluations returns the evaluation history for a run, newest
// last. Bounded at a sane cap so a pathological run with a sample-per-
// second sidecar doesn't blow up the report page.
func (t *Thresholds) ListEvaluations(ctx context.Context, runID int64) ([]ThresholdEvaluation, error) {
	rows, err := t.DB.QueryContext(ctx, `
		SELECT id, run_id, threshold_id, stage_name, kind, key, ts, observed, passed
		FROM threshold_evaluations WHERE run_id = ?
		ORDER BY id LIMIT 5000
	`, runID)
	if err != nil {
		return nil, err
	}
	defer rows.Close()
	var out []ThresholdEvaluation
	for rows.Next() {
		var ev ThresholdEvaluation
		var passed int
		if err := rows.Scan(&ev.ID, &ev.RunID, &ev.ThresholdID, &ev.Stage, &ev.Kind,
			&ev.Key, &ev.TS, &ev.Observed, &passed); err != nil {
			return nil, err
		}
		ev.Passed = passed == 1
		out = append(out, ev)
	}
	return out, rows.Err()
}

// CriticalBreaches returns the evaluations that fire the "fail the
// run" gate — critical-severity thresholds with passed=0. The
// agent-handler calls this at /result close so an aggregate breach
// (p99 latency > bound) still flips the run to FailedHolding even if
// no single sample tripped the fast-fail path.
func (t *Thresholds) CriticalBreaches(ctx context.Context, runID int64) ([]ThresholdEvaluation, error) {
	rows, err := t.DB.QueryContext(ctx, `
		SELECT e.id, e.run_id, e.threshold_id, e.stage_name, e.kind, e.key, e.ts, e.observed, e.passed
		FROM threshold_evaluations e
		JOIN thresholds t ON t.id = e.threshold_id
		WHERE e.run_id = ? AND e.passed = 0 AND t.severity = 'critical'
		ORDER BY e.id
	`, runID)
	if err != nil {
		return nil, err
	}
	defer rows.Close()
	var out []ThresholdEvaluation
	for rows.Next() {
		var ev ThresholdEvaluation
		var passed int
		if err := rows.Scan(&ev.ID, &ev.RunID, &ev.ThresholdID, &ev.Stage, &ev.Kind,
			&ev.Key, &ev.TS, &ev.Observed, &passed); err != nil {
			return nil, err
		}
		ev.Passed = passed == 1
		out = append(out, ev)
	}
	return out, rows.Err()
}