Vetting/internal/api/sensor_thresholds_test.go

package api_test

import (
	"context"
	"encoding/json"
	"net/http"
	"net/http/httptest"
	"path/filepath"
	"strconv"
	"testing"

	"vetting/internal/api"
	"vetting/internal/db"
	"vetting/internal/events"
	"vetting/internal/model"
	"vetting/internal/orchestrator"
	"vetting/internal/store"
)

// setupAgentWithThresholds builds an Agent wired up to the thresholds
// store + a Runner so the /sensor handler can drive the state machine.
// Seeds one critical thermal threshold and parks the run in CPUStress
// so the handler will stamp a stage-relevant failed_stage.
func setupAgentWithThresholds(t *testing.T) (*api.Agent, int64, string) {
	t.Helper()
	path := filepath.Join(t.TempDir(), "vetting.db")
	conn, err := db.Open(path)
	if err != nil {
		t.Fatalf("open db: %v", err)
	}
	t.Cleanup(func() { _ = conn.Close() })

	hosts := &store.Hosts{DB: conn}
	runs := &store.Runs{DB: conn}
	stages := &store.Stages{DB: conn}
	meas := &store.Measurements{DB: conn}
	thresholds := &store.Thresholds{DB: conn}
	hub := events.NewHub()
	runner := &orchestrator.Runner{Runs: runs, Hosts: hosts, Stages: stages, EventHub: hub}

	hostID, err := hosts.Create(context.Background(), model.Host{
		Name:             "thresh-host",
		MAC:              "aa:bb:cc:dd:ee:aa",
		WoLBroadcastIP:   "10.0.0.255",
		WoLPort:          9,
		ExpectedSpecYAML: "memory:\n  total_gib: 16\n",
	})
	if err != nil {
		t.Fatalf("create host: %v", err)
	}
	plain, hash, err := orchestrator.IssueRunToken()
	if err != nil {
		t.Fatalf("issue token: %v", err)
	}
	runID, err := runs.Create(context.Background(), hostID, hash, false)
	if err != nil {
		t.Fatalf("create run: %v", err)
	}
	if err := stages.Seed(context.Background(), runID); err != nil {
		t.Fatalf("seed stages: %v", err)
	}
	// Park the run where a real thermal sidecar would be posting samples.
	if err := runs.SetState(context.Background(), runID, model.StateCPUStress); err != nil {
		t.Fatalf("set state: %v", err)
	}
	// Seed one critical thermal threshold.
	if _, err := thresholds.SeedForRun(context.Background(), runID, []store.ThresholdSpec{
		{Stage: "*", Kind: "temp", Key: "cpu/*", Op: "lt", Value: 92, Unit: "C", Severity: "critical", Source: "profile"},
	}); err != nil {
		t.Fatalf("seed thresholds: %v", err)
	}
	return &api.Agent{
		Hosts:        hosts,
		Runs:         runs,
		Stages:       stages,
		Measurements: meas,
		Thresholds:   thresholds,
		Runner:       runner,
	}, runID, plain
}

// TestSensor_ThermalRunawayFailsRun: a sample that breaches a critical
// threshold lands in threshold_evaluations (passed=0) and flips the
// run into FailedHolding with failed_stage naming the current stage.
// This is the Phase-1 behavior gate — without the evaluator, the sample
// would just sit in measurements and the run would happily march on.
func TestSensor_ThermalRunawayFailsRun(t *testing.T) {
	a, runID, token := setupAgentWithThresholds(t)
	batch := api.SensorBatch{Samples: []api.SensorSample{
		{Kind: "temp", Key: "cpu/0", Value: 95.3, Unit: "C"},
	}}
	buf, _ := json.Marshal(batch)
	req := routedRequest(runID, http.MethodPost,
		"/api/v1/runs/"+strconv.FormatInt(runID, 10)+"/sensor", buf)
	req.Header.Set("Authorization", "Bearer "+token)
	req.Header.Set("Content-Type", "application/json")

	rr := httptest.NewRecorder()
	a.Sensor(rr, req)
	if rr.Code != http.StatusOK {
		t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String())
	}
	var resp struct {
		OK     bool   `json:"ok"`
		Breach bool   `json:"breach"`
		Kind   string `json:"breach_kind"`
	}
	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
		t.Fatalf("decode: %v", err)
	}
	if !resp.Breach {
		t.Fatalf("expected breach=true, got %+v", resp)
	}
	run, err := a.Runs.Get(context.Background(), runID)
	if err != nil {
		t.Fatalf("get run: %v", err)
	}
	if run.State != model.StateFailedHolding {
		t.Fatalf("state = %s, want FailedHolding", run.State)
	}
	if run.FailedStage == "" {
		t.Fatalf("failed_stage empty; want stage-named breach")
	}
	evals, err := a.Thresholds.ListEvaluations(context.Background(), runID)
	if err != nil {
		t.Fatalf("list evaluations: %v", err)
	}
	if len(evals) != 1 {
		t.Fatalf("want 1 evaluation recorded, got %d", len(evals))
	}
	if evals[0].Passed {
		t.Fatalf("evaluation recorded as passed for 95.3C sample against <92C rule")
	}
}

// TestSensor_WithinThresholdPasses: a sample comfortably inside the
// threshold writes an evaluation row with passed=1 and leaves the run
// state untouched.
func TestSensor_WithinThresholdPasses(t *testing.T) {
	a, runID, token := setupAgentWithThresholds(t)
	batch := api.SensorBatch{Samples: []api.SensorSample{
		{Kind: "temp", Key: "cpu/0", Value: 55.0, Unit: "C"},
	}}
	buf, _ := json.Marshal(batch)
	req := routedRequest(runID, http.MethodPost,
		"/api/v1/runs/"+strconv.FormatInt(runID, 10)+"/sensor", buf)
	req.Header.Set("Authorization", "Bearer "+token)
	req.Header.Set("Content-Type", "application/json")

	rr := httptest.NewRecorder()
	a.Sensor(rr, req)
	if rr.Code != http.StatusOK {
		t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String())
	}
	run, err := a.Runs.Get(context.Background(), runID)
	if err != nil {
		t.Fatalf("get run: %v", err)
	}
	if run.State != model.StateCPUStress {
		t.Fatalf("state = %s, want CPUStress unchanged", run.State)
	}
	evals, err := a.Thresholds.ListEvaluations(context.Background(), runID)
	if err != nil {
		t.Fatalf("list evaluations: %v", err)
	}
	if len(evals) != 1 || !evals[0].Passed {
		t.Fatalf("want 1 passed evaluation, got %+v", evals)
	}
}