Initial commit: full Phases 1-6 implementation
CI / Lint + build + test (push) Has been cancelled

Post-repair hardware validation pipeline for Proxmox cluster hosts.
Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq
PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
This commit is contained in:
2026-04-17 21:32:10 -04:00
commit 9bb4b09a04
98 changed files with 11960 additions and 0 deletions
+126
View File
@@ -0,0 +1,126 @@
package store
import (
"context"
"database/sql"
"fmt"
"vetting/internal/model"
)
type Artifact struct {
ID int64
RunID int64
StageID *int64
Kind string // inventory|spec_diff|hold_key|report|log|fio|iperf|smart
Path string
SHA256 string
SizeBytes int64
}
type Artifacts struct {
DB *sql.DB
}
func (a *Artifacts) Create(ctx context.Context, art Artifact) (int64, error) {
res, err := a.DB.ExecContext(ctx, `
INSERT INTO artifacts(run_id, stage_id, kind, path, sha256, size_bytes)
VALUES(?,?,?,?,?,?)
`, art.RunID, nullInt64(art.StageID), art.Kind, art.Path, art.SHA256, art.SizeBytes)
if err != nil {
return 0, fmt.Errorf("insert artifact: %w", err)
}
return res.LastInsertId()
}
// DeleteForRun removes every artifact row for a run. Returns the rows
// that were deleted so the caller can unlink the on-disk files. Used by
// the janitor; ordinary flow treats artifacts as append-only.
func (a *Artifacts) DeleteForRun(ctx context.Context, runID int64) ([]Artifact, error) {
arts, err := a.ListForRun(ctx, runID)
if err != nil {
return nil, err
}
if _, err := a.DB.ExecContext(ctx, `DELETE FROM artifacts WHERE run_id = ?`, runID); err != nil {
return nil, fmt.Errorf("delete artifacts for run %d: %w", runID, err)
}
return arts, nil
}
func (a *Artifacts) ListForRun(ctx context.Context, runID int64) ([]Artifact, error) {
rows, err := a.DB.QueryContext(ctx, `
SELECT id, run_id, stage_id, kind, path, sha256, size_bytes
FROM artifacts WHERE run_id = ? ORDER BY id
`, runID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []Artifact
for rows.Next() {
var ar Artifact
var stageID sql.NullInt64
if err := rows.Scan(&ar.ID, &ar.RunID, &stageID, &ar.Kind, &ar.Path, &ar.SHA256, &ar.SizeBytes); err != nil {
return nil, err
}
if stageID.Valid {
v := stageID.Int64
ar.StageID = &v
}
out = append(out, ar)
}
return out, rows.Err()
}
type SpecDiffs struct {
DB *sql.DB
}
func (s *SpecDiffs) ReplaceForRun(ctx context.Context, runID int64, diffs []model.SpecDiff) error {
tx, err := s.DB.BeginTx(ctx, nil)
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
if _, err := tx.ExecContext(ctx, `DELETE FROM spec_diffs WHERE run_id = ?`, runID); err != nil {
return err
}
for _, d := range diffs {
if _, err := tx.ExecContext(ctx, `
INSERT INTO spec_diffs(run_id, field, expected, actual, severity, ignored)
VALUES(?,?,?,?,?,?)
`, runID, d.Field, d.Expected, d.Actual, d.Severity, 0); err != nil {
return err
}
}
return tx.Commit()
}
func (s *SpecDiffs) ListForRun(ctx context.Context, runID int64) ([]model.SpecDiff, error) {
rows, err := s.DB.QueryContext(ctx, `
SELECT id, run_id, field, COALESCE(expected,''), COALESCE(actual,''), severity, ignored
FROM spec_diffs WHERE run_id = ? ORDER BY id
`, runID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []model.SpecDiff
for rows.Next() {
var d model.SpecDiff
var ignored int
if err := rows.Scan(&d.ID, &d.RunID, &d.Field, &d.Expected, &d.Actual, &d.Severity, &ignored); err != nil {
return nil, err
}
d.Ignored = ignored != 0
out = append(out, d)
}
return out, rows.Err()
}
func nullInt64(p *int64) any {
if p == nil {
return nil
}
return *p
}
+98
View File
@@ -0,0 +1,98 @@
package store
import (
"context"
"database/sql"
"errors"
"fmt"
"strings"
"vetting/internal/model"
)
type Hosts struct {
DB *sql.DB
}
var ErrNotFound = errors.New("not found")
func (h *Hosts) Create(ctx context.Context, in model.Host) (int64, error) {
in.MAC = normalizeMAC(in.MAC)
res, err := h.DB.ExecContext(ctx, `
INSERT INTO hosts(name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml, pdu_config_json, ipmi_config_json, notes)
VALUES(?,?,?,?,?,?,?,?)
`, in.Name, in.MAC, in.WoLBroadcastIP, in.WoLPort, in.ExpectedSpecYAML, nullIfEmpty(in.PDUConfigJSON), nullIfEmpty(in.IPMIConfigJSON), in.Notes)
if err != nil {
return 0, fmt.Errorf("insert host: %w", err)
}
return res.LastInsertId()
}
func (h *Hosts) List(ctx context.Context) ([]model.Host, error) {
rows, err := h.DB.QueryContext(ctx, `
SELECT id, name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml,
COALESCE(pdu_config_json,''), COALESCE(ipmi_config_json,''),
notes, created_at, updated_at
FROM hosts
ORDER BY name COLLATE NOCASE
`)
if err != nil {
return nil, fmt.Errorf("list hosts: %w", err)
}
defer rows.Close()
var out []model.Host
for rows.Next() {
var host model.Host
if err := rows.Scan(&host.ID, &host.Name, &host.MAC, &host.WoLBroadcastIP, &host.WoLPort,
&host.ExpectedSpecYAML, &host.PDUConfigJSON, &host.IPMIConfigJSON,
&host.Notes, &host.CreatedAt, &host.UpdatedAt); err != nil {
return nil, fmt.Errorf("scan host: %w", err)
}
out = append(out, host)
}
return out, rows.Err()
}
func (h *Hosts) Get(ctx context.Context, id int64) (*model.Host, error) {
row := h.DB.QueryRowContext(ctx, `
SELECT id, name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml,
COALESCE(pdu_config_json,''), COALESCE(ipmi_config_json,''),
notes, created_at, updated_at
FROM hosts WHERE id = ?
`, id)
var host model.Host
err := row.Scan(&host.ID, &host.Name, &host.MAC, &host.WoLBroadcastIP, &host.WoLPort,
&host.ExpectedSpecYAML, &host.PDUConfigJSON, &host.IPMIConfigJSON,
&host.Notes, &host.CreatedAt, &host.UpdatedAt)
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrNotFound
}
if err != nil {
return nil, fmt.Errorf("get host: %w", err)
}
return &host, nil
}
func (h *Hosts) Delete(ctx context.Context, id int64) error {
res, err := h.DB.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("delete host: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return ErrNotFound
}
return nil
}
func normalizeMAC(m string) string {
return strings.ToLower(strings.TrimSpace(m))
}
func nullIfEmpty(s string) any {
if s == "" {
return nil
}
return s
}
+85
View File
@@ -0,0 +1,85 @@
package store
import (
"context"
"database/sql"
"fmt"
"time"
"vetting/internal/model"
)
// Measurements persists timestamped numeric samples: temps, fan speeds,
// PSU voltages, fio IOPS, iperf throughput, SMART attributes. The schema
// stores (kind, key, value, unit) so Phase 5 reports can group freely
// without new tables per source.
type Measurements struct {
DB *sql.DB
}
func (m *Measurements) Create(ctx context.Context, in model.Measurement) (int64, error) {
if in.TS.IsZero() {
in.TS = time.Now().UTC()
}
res, err := m.DB.ExecContext(ctx, `
INSERT INTO measurements(run_id, stage_id, ts, kind, key, value, unit)
VALUES(?,?,?,?,?,?,?)
`, in.RunID, nullInt64(in.StageID), in.TS, in.Kind, in.Key, in.Value, in.Unit)
if err != nil {
return 0, fmt.Errorf("insert measurement: %w", err)
}
return res.LastInsertId()
}
// CreateBatch inserts a batch in one transaction. The sensor endpoint
// hands us ~520 samples per tick; a single commit keeps SQLite happy.
func (m *Measurements) CreateBatch(ctx context.Context, rows []model.Measurement) error {
if len(rows) == 0 {
return nil
}
tx, err := m.DB.BeginTx(ctx, nil)
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
now := time.Now().UTC()
for _, r := range rows {
if r.TS.IsZero() {
r.TS = now
}
if _, err := tx.ExecContext(ctx, `
INSERT INTO measurements(run_id, stage_id, ts, kind, key, value, unit)
VALUES(?,?,?,?,?,?,?)
`, r.RunID, nullInt64(r.StageID), r.TS, r.Kind, r.Key, r.Value, r.Unit); err != nil {
return fmt.Errorf("insert measurement: %w", err)
}
}
return tx.Commit()
}
// ListForRun returns all measurements for a run. Callers filter by kind
// in memory; the row count is small per run (≈thousands).
func (m *Measurements) ListForRun(ctx context.Context, runID int64) ([]model.Measurement, error) {
rows, err := m.DB.QueryContext(ctx, `
SELECT id, run_id, stage_id, ts, kind, key, value, COALESCE(unit,'')
FROM measurements WHERE run_id = ? ORDER BY ts, id
`, runID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []model.Measurement
for rows.Next() {
var meas model.Measurement
var stageID sql.NullInt64
if err := rows.Scan(&meas.ID, &meas.RunID, &stageID, &meas.TS, &meas.Kind, &meas.Key, &meas.Value, &meas.Unit); err != nil {
return nil, err
}
if stageID.Valid {
v := stageID.Int64
meas.StageID = &v
}
out = append(out, meas)
}
return out, rows.Err()
}
+226
View File
@@ -0,0 +1,226 @@
package store
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"vetting/internal/model"
)
type Runs struct {
DB *sql.DB
}
func (r *Runs) Create(ctx context.Context, hostID int64, tokenHash string) (int64, error) {
now := time.Now().UTC()
res, err := r.DB.ExecContext(ctx, `
INSERT INTO runs(host_id, state, agent_token_hash, next_boot_target, started_at)
VALUES(?,?,?,?,?)
`, hostID, string(model.StateQueued), tokenHash, "linux", now)
if err != nil {
return 0, fmt.Errorf("insert run: %w", err)
}
return res.LastInsertId()
}
func (r *Runs) SetState(ctx context.Context, runID int64, state model.RunState) error {
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET state = ? WHERE id = ?`, string(state), runID)
return err
}
// RotateTokenHash replaces the stored token hash. Called on each iPXE
// fetch so only the most-recently-booted agent can claim the run.
func (r *Runs) RotateTokenHash(ctx context.Context, runID int64, hash string) error {
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET agent_token_hash = ? WHERE id = ?`, hash, runID)
return err
}
// SetHoldIP records the agent's LAN IP so the UI can show the ssh
// command. Called when the agent POSTs /hold.
func (r *Runs) SetHoldIP(ctx context.Context, runID int64, ip string) error {
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET hold_ip = ? WHERE id = ?`, ip, runID)
return err
}
// SetFailedStage records which stage tripped the run; used by the tile
// and by reports. Does not change state.
func (r *Runs) SetFailedStage(ctx context.Context, runID int64, stage string) error {
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET failed_stage = ? WHERE id = ?`, stage, runID)
return err
}
// ClearFailedStage wipes the failed_stage marker. Called when the
// operator overrides a stage and the run re-enters the pipeline.
func (r *Runs) ClearFailedStage(ctx context.Context, runID int64) error {
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET failed_stage = NULL WHERE id = ?`, runID)
return err
}
// SetOverrideFlags persists the operator's override decisions (JSON blob
// like `{"wipe":true}`). Passed back to the agent on the next heartbeat
// so it can resume the held stage with the gate bypassed.
func (r *Runs) SetOverrideFlags(ctx context.Context, runID int64, flagsJSON string) error {
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET override_flags_json = ? WHERE id = ?`, flagsJSON, runID)
return err
}
func (r *Runs) MarkFailed(ctx context.Context, runID int64, failedStage, holdIP string) error {
now := time.Now().UTC()
_, err := r.DB.ExecContext(ctx, `
UPDATE runs SET state = ?, result = 'fail', failed_stage = ?, hold_ip = ?, completed_at = ?
WHERE id = ?
`, string(model.StateFailedHolding), failedStage, holdIP, now, runID)
return err
}
func (r *Runs) MarkCompleted(ctx context.Context, runID int64, reportPath string) error {
now := time.Now().UTC()
_, err := r.DB.ExecContext(ctx, `
UPDATE runs SET state = ?, result = 'pass', report_path = ?, completed_at = ?
WHERE id = ?
`, string(model.StateCompleted), reportPath, now, runID)
return err
}
func (r *Runs) Get(ctx context.Context, id int64) (*model.Run, error) {
row := r.DB.QueryRowContext(ctx, `
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
COALESCE(next_boot_target,''), agent_token_hash, started_at,
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
COALESCE(override_flags_json,'')
FROM runs WHERE id = ?
`, id)
var run model.Run
var completedAt sql.NullTime
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON)
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrNotFound
}
if err != nil {
return nil, fmt.Errorf("get run: %w", err)
}
if completedAt.Valid {
run.CompletedAt = &completedAt.Time
}
return &run, nil
}
// LatestForHost returns the most recent run for a host, or nil if none.
func (r *Runs) LatestForHost(ctx context.Context, hostID int64) (*model.Run, error) {
row := r.DB.QueryRowContext(ctx, `
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
COALESCE(next_boot_target,''), agent_token_hash, started_at,
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
COALESCE(override_flags_json,'')
FROM runs WHERE host_id = ?
ORDER BY id DESC LIMIT 1
`, hostID)
var run model.Run
var completedAt sql.NullTime
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON)
if errors.Is(err, sql.ErrNoRows) {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("latest run: %w", err)
}
if completedAt.Valid {
run.CompletedAt = &completedAt.Time
}
return &run, nil
}
// Active returns all runs in non-terminal states.
func (r *Runs) Active(ctx context.Context) ([]model.Run, error) {
rows, err := r.DB.QueryContext(ctx, `
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
COALESCE(next_boot_target,''), agent_token_hash, started_at,
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
COALESCE(override_flags_json,'')
FROM runs
WHERE state NOT IN ('Completed','Released')
ORDER BY id
`)
if err != nil {
return nil, err
}
defer rows.Close()
var out []model.Run
for rows.Next() {
var run model.Run
var completedAt sql.NullTime
if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON); err != nil {
return nil, err
}
if completedAt.Valid {
run.CompletedAt = &completedAt.Time
}
out = append(out, run)
}
return out, rows.Err()
}
// CompletedOlderThan returns run IDs for terminal (Completed/Released/
// FailedHolding) runs whose completed_at is older than cutoff. Runs with
// a NULL completed_at fall back to started_at so a stuck run doesn't get
// garbage-collected out from under its own logs. Used by the janitor.
func (r *Runs) CompletedOlderThan(ctx context.Context, cutoff time.Time) ([]int64, error) {
rows, err := r.DB.QueryContext(ctx, `
SELECT id FROM runs
WHERE state IN ('Completed','Released','FailedHolding')
AND COALESCE(completed_at, started_at) < ?
ORDER BY id
`, cutoff)
if err != nil {
return nil, err
}
defer rows.Close()
var out []int64
for rows.Next() {
var id int64
if err := rows.Scan(&id); err != nil {
return nil, err
}
out = append(out, id)
}
return out, rows.Err()
}
// FindByMAC returns the current active run for the host with the given MAC,
// or nil if the MAC is unknown or has no active run.
func (r *Runs) FindActiveByMAC(ctx context.Context, mac string) (*model.Run, error) {
row := r.DB.QueryRowContext(ctx, `
SELECT r.id, r.host_id, r.state, COALESCE(r.result,''), COALESCE(r.failed_stage,''),
COALESCE(r.next_boot_target,''), r.agent_token_hash, r.started_at,
r.completed_at, COALESCE(r.report_path,''), COALESCE(r.hold_ip,''),
COALESCE(r.override_flags_json,'')
FROM runs r
JOIN hosts h ON h.id = r.host_id
WHERE h.mac = ? AND r.state NOT IN ('Completed','Released')
ORDER BY r.id DESC LIMIT 1
`, mac)
var run model.Run
var completedAt sql.NullTime
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON)
if errors.Is(err, sql.ErrNoRows) {
return nil, nil
}
if err != nil {
return nil, err
}
if completedAt.Valid {
run.CompletedAt = &completedAt.Time
}
return &run, nil
}
+91
View File
@@ -0,0 +1,91 @@
package store
import (
"context"
"database/sql"
"fmt"
"time"
"vetting/internal/model"
)
type Stages struct {
DB *sql.DB
}
// DefaultStageOrder is the canonical sequence for every run. Phase 2 only
// reaches Inventory; later phases add more executors but the list is fixed.
var DefaultStageOrder = []string{
"Inventory",
"SpecValidate",
"SMART",
"CPUStress",
"Storage",
"Network",
"GPU",
"PSU",
"Reporting",
}
// Seed creates one pending row per stage for the given run.
func (s *Stages) Seed(ctx context.Context, runID int64) error {
tx, err := s.DB.BeginTx(ctx, nil)
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
for i, name := range DefaultStageOrder {
if _, err := tx.ExecContext(ctx,
`INSERT INTO stages(run_id, name, ordinal, state) VALUES(?,?,?,?)`,
runID, name, i, string(model.StagePending)); err != nil {
return fmt.Errorf("seed stage %s: %w", name, err)
}
}
return tx.Commit()
}
func (s *Stages) ListForRun(ctx context.Context, runID int64) ([]model.Stage, error) {
rows, err := s.DB.QueryContext(ctx, `
SELECT id, run_id, name, ordinal, state, started_at, completed_at, COALESCE(summary_json,'')
FROM stages WHERE run_id = ? ORDER BY ordinal
`, runID)
if err != nil {
return nil, err
}
defer rows.Close()
var out []model.Stage
for rows.Next() {
var st model.Stage
var started, completed sql.NullTime
if err := rows.Scan(&st.ID, &st.RunID, &st.Name, &st.Ordinal, &st.State,
&started, &completed, &st.SummaryJSON); err != nil {
return nil, err
}
if started.Valid {
st.StartedAt = &started.Time
}
if completed.Valid {
st.CompletedAt = &completed.Time
}
out = append(out, st)
}
return out, rows.Err()
}
func (s *Stages) StartByName(ctx context.Context, runID int64, name string) error {
now := time.Now().UTC()
_, err := s.DB.ExecContext(ctx, `
UPDATE stages SET state = ?, started_at = ?
WHERE run_id = ? AND name = ?
`, string(model.StageRunning), now, runID, name)
return err
}
func (s *Stages) CompleteByName(ctx context.Context, runID int64, name string, state model.StageState, summaryJSON string) error {
now := time.Now().UTC()
_, err := s.DB.ExecContext(ctx, `
UPDATE stages SET state = ?, completed_at = ?, summary_json = ?
WHERE run_id = ? AND name = ?
`, string(state), now, nullIfEmpty(summaryJSON), runID, name)
return err
}
+229
View File
@@ -0,0 +1,229 @@
package store_test
import (
"context"
"path/filepath"
"testing"
"vetting/internal/db"
"vetting/internal/model"
"vetting/internal/store"
)
func newDB(t *testing.T) *store.Runs {
t.Helper()
path := filepath.Join(t.TempDir(), "vetting.db")
conn, err := db.Open(path)
if err != nil {
t.Fatalf("open db: %v", err)
}
t.Cleanup(func() { _ = conn.Close() })
return &store.Runs{DB: conn}
}
// seedRun inserts a host + a run and returns (hostID, runID). Every
// subsequent store test builds on this so run_id foreign keys resolve.
func seedRun(t *testing.T, runs *store.Runs) (int64, int64) {
t.Helper()
hosts := &store.Hosts{DB: runs.DB}
hostID, err := hosts.Create(context.Background(), model.Host{
Name: "t-host",
MAC: "aa:bb:cc:dd:ee:ff",
WoLBroadcastIP: "10.0.0.255",
WoLPort: 9,
ExpectedSpecYAML: "memory:\n total_gib: 16\n",
})
if err != nil {
t.Fatalf("create host: %v", err)
}
runID, err := runs.Create(context.Background(), hostID, "deadbeef")
if err != nil {
t.Fatalf("create run: %v", err)
}
return hostID, runID
}
func TestArtifactsRoundtrip(t *testing.T) {
runs := newDB(t)
_, runID := seedRun(t, runs)
arts := &store.Artifacts{DB: runs.DB}
id, err := arts.Create(context.Background(), store.Artifact{
RunID: runID,
Kind: "inventory",
Path: "/var/artifacts/run-1/inventory.json",
SHA256: "abc123",
SizeBytes: 42,
})
if err != nil {
t.Fatalf("Create: %v", err)
}
if id == 0 {
t.Fatalf("expected non-zero id")
}
// Hold key on the same run — ListForRun should return both in
// insertion order and TileEnricher picks the hold_key row.
if _, err := arts.Create(context.Background(), store.Artifact{
RunID: runID, Kind: "hold_key", Path: "/var/artifacts/run-1/hold.key", SHA256: "def456", SizeBytes: 400,
}); err != nil {
t.Fatalf("Create hold_key: %v", err)
}
list, err := arts.ListForRun(context.Background(), runID)
if err != nil {
t.Fatalf("ListForRun: %v", err)
}
if len(list) != 2 {
t.Fatalf("ListForRun returned %d, want 2", len(list))
}
if list[0].Kind != "inventory" || list[1].Kind != "hold_key" {
t.Fatalf("unexpected order: %+v", list)
}
if list[1].Path != "/var/artifacts/run-1/hold.key" {
t.Fatalf("hold_key path lost: %q", list[1].Path)
}
}
func TestSpecDiffsReplaceForRun(t *testing.T) {
runs := newDB(t)
_, runID := seedRun(t, runs)
sd := &store.SpecDiffs{DB: runs.DB}
ctx := context.Background()
// First write: three diffs.
err := sd.ReplaceForRun(ctx, runID, []model.SpecDiff{
{RunID: runID, Field: "cpu.model", Expected: "Xeon", Actual: "EPYC", Severity: "critical"},
{RunID: runID, Field: "memory.total_gib", Expected: "16", Actual: "8", Severity: "critical"},
{RunID: runID, Field: "note", Expected: "", Actual: "dusty", Severity: "info"},
})
if err != nil {
t.Fatalf("ReplaceForRun: %v", err)
}
list, err := sd.ListForRun(ctx, runID)
if err != nil {
t.Fatalf("ListForRun: %v", err)
}
if len(list) != 3 {
t.Fatalf("got %d rows, want 3", len(list))
}
// Second write replaces, doesn't append — otherwise a re-run would
// double-count spec diffs and the tile badge would grow without bound.
err = sd.ReplaceForRun(ctx, runID, []model.SpecDiff{
{RunID: runID, Field: "cpu.model", Expected: "Xeon", Actual: "Xeon Gold", Severity: "info"},
})
if err != nil {
t.Fatalf("second ReplaceForRun: %v", err)
}
list, err = sd.ListForRun(ctx, runID)
if err != nil {
t.Fatalf("ListForRun after replace: %v", err)
}
if len(list) != 1 {
t.Fatalf("expected 1 row after replace, got %d", len(list))
}
if list[0].Severity != "info" {
t.Fatalf("expected severity info, got %q", list[0].Severity)
}
}
func TestMeasurementsBatchAndList(t *testing.T) {
runs := newDB(t)
_, runID := seedRun(t, runs)
meas := &store.Measurements{DB: runs.DB}
ctx := context.Background()
err := meas.CreateBatch(ctx, []model.Measurement{
{RunID: runID, Kind: "thermal", Key: "cpu", Value: 52.5, Unit: "C"},
{RunID: runID, Kind: "iperf", Key: "throughput_mbps", Value: 940.1, Unit: "Mbps"},
{RunID: runID, Kind: "psu", Key: "in0", Value: 12.04, Unit: "V"},
})
if err != nil {
t.Fatalf("CreateBatch: %v", err)
}
// Zero-length batch must be a no-op, not an error.
if err := meas.CreateBatch(ctx, nil); err != nil {
t.Fatalf("empty CreateBatch: %v", err)
}
rows, err := meas.ListForRun(ctx, runID)
if err != nil {
t.Fatalf("ListForRun: %v", err)
}
if len(rows) != 3 {
t.Fatalf("got %d rows, want 3", len(rows))
}
foundIperf := false
for _, r := range rows {
if r.Kind == "iperf" && r.Key == "throughput_mbps" && r.Value > 900 {
foundIperf = true
}
}
if !foundIperf {
t.Fatalf("iperf row missing or wrong value: %+v", rows)
}
}
func TestRunsOverrideFlagsAndClearFailedStage(t *testing.T) {
runs := newDB(t)
_, runID := seedRun(t, runs)
ctx := context.Background()
if err := runs.SetFailedStage(ctx, runID, "Storage"); err != nil {
t.Fatalf("SetFailedStage: %v", err)
}
if err := runs.SetOverrideFlags(ctx, runID, `{"wipe":true}`); err != nil {
t.Fatalf("SetOverrideFlags: %v", err)
}
run, err := runs.Get(ctx, runID)
if err != nil {
t.Fatalf("Get: %v", err)
}
if run.OverrideFlagsJSON != `{"wipe":true}` {
t.Fatalf("OverrideFlagsJSON = %q, want {\"wipe\":true}", run.OverrideFlagsJSON)
}
if run.FailedStage != "Storage" {
t.Fatalf("FailedStage = %q, want Storage", run.FailedStage)
}
if err := runs.ClearFailedStage(ctx, runID); err != nil {
t.Fatalf("ClearFailedStage: %v", err)
}
run, err = runs.Get(ctx, runID)
if err != nil {
t.Fatalf("Get after clear: %v", err)
}
if run.FailedStage != "" {
t.Fatalf("FailedStage not cleared: %q", run.FailedStage)
}
// override_flags_json should persist across ClearFailedStage so the
// agent can still read it on its next heartbeat.
if run.OverrideFlagsJSON != `{"wipe":true}` {
t.Fatalf("OverrideFlagsJSON lost after ClearFailedStage: %q", run.OverrideFlagsJSON)
}
}
func TestRunsHoldAndFailedStage(t *testing.T) {
runs := newDB(t)
_, runID := seedRun(t, runs)
ctx := context.Background()
if err := runs.SetHoldIP(ctx, runID, "10.0.0.42"); err != nil {
t.Fatalf("SetHoldIP: %v", err)
}
if err := runs.SetFailedStage(ctx, runID, "SpecValidate"); err != nil {
t.Fatalf("SetFailedStage: %v", err)
}
run, err := runs.Get(ctx, runID)
if err != nil {
t.Fatalf("Get: %v", err)
}
if run.HoldIP != "10.0.0.42" {
t.Fatalf("HoldIP = %q, want 10.0.0.42", run.HoldIP)
}
if run.FailedStage != "SpecValidate" {
t.Fatalf("FailedStage = %q, want SpecValidate", run.FailedStage)
}
}