Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
This commit is contained in:
@@ -0,0 +1,126 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
|
||||
"vetting/internal/model"
|
||||
)
|
||||
|
||||
type Artifact struct {
|
||||
ID int64
|
||||
RunID int64
|
||||
StageID *int64
|
||||
Kind string // inventory|spec_diff|hold_key|report|log|fio|iperf|smart
|
||||
Path string
|
||||
SHA256 string
|
||||
SizeBytes int64
|
||||
}
|
||||
|
||||
type Artifacts struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
func (a *Artifacts) Create(ctx context.Context, art Artifact) (int64, error) {
|
||||
res, err := a.DB.ExecContext(ctx, `
|
||||
INSERT INTO artifacts(run_id, stage_id, kind, path, sha256, size_bytes)
|
||||
VALUES(?,?,?,?,?,?)
|
||||
`, art.RunID, nullInt64(art.StageID), art.Kind, art.Path, art.SHA256, art.SizeBytes)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("insert artifact: %w", err)
|
||||
}
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
// DeleteForRun removes every artifact row for a run. Returns the rows
|
||||
// that were deleted so the caller can unlink the on-disk files. Used by
|
||||
// the janitor; ordinary flow treats artifacts as append-only.
|
||||
func (a *Artifacts) DeleteForRun(ctx context.Context, runID int64) ([]Artifact, error) {
|
||||
arts, err := a.ListForRun(ctx, runID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := a.DB.ExecContext(ctx, `DELETE FROM artifacts WHERE run_id = ?`, runID); err != nil {
|
||||
return nil, fmt.Errorf("delete artifacts for run %d: %w", runID, err)
|
||||
}
|
||||
return arts, nil
|
||||
}
|
||||
|
||||
func (a *Artifacts) ListForRun(ctx context.Context, runID int64) ([]Artifact, error) {
|
||||
rows, err := a.DB.QueryContext(ctx, `
|
||||
SELECT id, run_id, stage_id, kind, path, sha256, size_bytes
|
||||
FROM artifacts WHERE run_id = ? ORDER BY id
|
||||
`, runID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []Artifact
|
||||
for rows.Next() {
|
||||
var ar Artifact
|
||||
var stageID sql.NullInt64
|
||||
if err := rows.Scan(&ar.ID, &ar.RunID, &stageID, &ar.Kind, &ar.Path, &ar.SHA256, &ar.SizeBytes); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if stageID.Valid {
|
||||
v := stageID.Int64
|
||||
ar.StageID = &v
|
||||
}
|
||||
out = append(out, ar)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
type SpecDiffs struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
func (s *SpecDiffs) ReplaceForRun(ctx context.Context, runID int64, diffs []model.SpecDiff) error {
|
||||
tx, err := s.DB.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
if _, err := tx.ExecContext(ctx, `DELETE FROM spec_diffs WHERE run_id = ?`, runID); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, d := range diffs {
|
||||
if _, err := tx.ExecContext(ctx, `
|
||||
INSERT INTO spec_diffs(run_id, field, expected, actual, severity, ignored)
|
||||
VALUES(?,?,?,?,?,?)
|
||||
`, runID, d.Field, d.Expected, d.Actual, d.Severity, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func (s *SpecDiffs) ListForRun(ctx context.Context, runID int64) ([]model.SpecDiff, error) {
|
||||
rows, err := s.DB.QueryContext(ctx, `
|
||||
SELECT id, run_id, field, COALESCE(expected,''), COALESCE(actual,''), severity, ignored
|
||||
FROM spec_diffs WHERE run_id = ? ORDER BY id
|
||||
`, runID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []model.SpecDiff
|
||||
for rows.Next() {
|
||||
var d model.SpecDiff
|
||||
var ignored int
|
||||
if err := rows.Scan(&d.ID, &d.RunID, &d.Field, &d.Expected, &d.Actual, &d.Severity, &ignored); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d.Ignored = ignored != 0
|
||||
out = append(out, d)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func nullInt64(p *int64) any {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
return *p
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"vetting/internal/model"
|
||||
)
|
||||
|
||||
type Hosts struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
var ErrNotFound = errors.New("not found")
|
||||
|
||||
func (h *Hosts) Create(ctx context.Context, in model.Host) (int64, error) {
|
||||
in.MAC = normalizeMAC(in.MAC)
|
||||
res, err := h.DB.ExecContext(ctx, `
|
||||
INSERT INTO hosts(name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml, pdu_config_json, ipmi_config_json, notes)
|
||||
VALUES(?,?,?,?,?,?,?,?)
|
||||
`, in.Name, in.MAC, in.WoLBroadcastIP, in.WoLPort, in.ExpectedSpecYAML, nullIfEmpty(in.PDUConfigJSON), nullIfEmpty(in.IPMIConfigJSON), in.Notes)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("insert host: %w", err)
|
||||
}
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
func (h *Hosts) List(ctx context.Context) ([]model.Host, error) {
|
||||
rows, err := h.DB.QueryContext(ctx, `
|
||||
SELECT id, name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml,
|
||||
COALESCE(pdu_config_json,''), COALESCE(ipmi_config_json,''),
|
||||
notes, created_at, updated_at
|
||||
FROM hosts
|
||||
ORDER BY name COLLATE NOCASE
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list hosts: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []model.Host
|
||||
for rows.Next() {
|
||||
var host model.Host
|
||||
if err := rows.Scan(&host.ID, &host.Name, &host.MAC, &host.WoLBroadcastIP, &host.WoLPort,
|
||||
&host.ExpectedSpecYAML, &host.PDUConfigJSON, &host.IPMIConfigJSON,
|
||||
&host.Notes, &host.CreatedAt, &host.UpdatedAt); err != nil {
|
||||
return nil, fmt.Errorf("scan host: %w", err)
|
||||
}
|
||||
out = append(out, host)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (h *Hosts) Get(ctx context.Context, id int64) (*model.Host, error) {
|
||||
row := h.DB.QueryRowContext(ctx, `
|
||||
SELECT id, name, mac, wol_broadcast_ip, wol_port, expected_spec_yaml,
|
||||
COALESCE(pdu_config_json,''), COALESCE(ipmi_config_json,''),
|
||||
notes, created_at, updated_at
|
||||
FROM hosts WHERE id = ?
|
||||
`, id)
|
||||
var host model.Host
|
||||
err := row.Scan(&host.ID, &host.Name, &host.MAC, &host.WoLBroadcastIP, &host.WoLPort,
|
||||
&host.ExpectedSpecYAML, &host.PDUConfigJSON, &host.IPMIConfigJSON,
|
||||
&host.Notes, &host.CreatedAt, &host.UpdatedAt)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get host: %w", err)
|
||||
}
|
||||
return &host, nil
|
||||
}
|
||||
|
||||
func (h *Hosts) Delete(ctx context.Context, id int64) error {
|
||||
res, err := h.DB.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete host: %w", err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
if n == 0 {
|
||||
return ErrNotFound
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func normalizeMAC(m string) string {
|
||||
return strings.ToLower(strings.TrimSpace(m))
|
||||
}
|
||||
|
||||
func nullIfEmpty(s string) any {
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
return s
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"vetting/internal/model"
|
||||
)
|
||||
|
||||
// Measurements persists timestamped numeric samples: temps, fan speeds,
|
||||
// PSU voltages, fio IOPS, iperf throughput, SMART attributes. The schema
|
||||
// stores (kind, key, value, unit) so Phase 5 reports can group freely
|
||||
// without new tables per source.
|
||||
type Measurements struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
func (m *Measurements) Create(ctx context.Context, in model.Measurement) (int64, error) {
|
||||
if in.TS.IsZero() {
|
||||
in.TS = time.Now().UTC()
|
||||
}
|
||||
res, err := m.DB.ExecContext(ctx, `
|
||||
INSERT INTO measurements(run_id, stage_id, ts, kind, key, value, unit)
|
||||
VALUES(?,?,?,?,?,?,?)
|
||||
`, in.RunID, nullInt64(in.StageID), in.TS, in.Kind, in.Key, in.Value, in.Unit)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("insert measurement: %w", err)
|
||||
}
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
// CreateBatch inserts a batch in one transaction. The sensor endpoint
|
||||
// hands us ~5–20 samples per tick; a single commit keeps SQLite happy.
|
||||
func (m *Measurements) CreateBatch(ctx context.Context, rows []model.Measurement) error {
|
||||
if len(rows) == 0 {
|
||||
return nil
|
||||
}
|
||||
tx, err := m.DB.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
now := time.Now().UTC()
|
||||
for _, r := range rows {
|
||||
if r.TS.IsZero() {
|
||||
r.TS = now
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx, `
|
||||
INSERT INTO measurements(run_id, stage_id, ts, kind, key, value, unit)
|
||||
VALUES(?,?,?,?,?,?,?)
|
||||
`, r.RunID, nullInt64(r.StageID), r.TS, r.Kind, r.Key, r.Value, r.Unit); err != nil {
|
||||
return fmt.Errorf("insert measurement: %w", err)
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// ListForRun returns all measurements for a run. Callers filter by kind
|
||||
// in memory; the row count is small per run (≈thousands).
|
||||
func (m *Measurements) ListForRun(ctx context.Context, runID int64) ([]model.Measurement, error) {
|
||||
rows, err := m.DB.QueryContext(ctx, `
|
||||
SELECT id, run_id, stage_id, ts, kind, key, value, COALESCE(unit,'')
|
||||
FROM measurements WHERE run_id = ? ORDER BY ts, id
|
||||
`, runID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []model.Measurement
|
||||
for rows.Next() {
|
||||
var meas model.Measurement
|
||||
var stageID sql.NullInt64
|
||||
if err := rows.Scan(&meas.ID, &meas.RunID, &stageID, &meas.TS, &meas.Kind, &meas.Key, &meas.Value, &meas.Unit); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if stageID.Valid {
|
||||
v := stageID.Int64
|
||||
meas.StageID = &v
|
||||
}
|
||||
out = append(out, meas)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
@@ -0,0 +1,226 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"vetting/internal/model"
|
||||
)
|
||||
|
||||
type Runs struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
func (r *Runs) Create(ctx context.Context, hostID int64, tokenHash string) (int64, error) {
|
||||
now := time.Now().UTC()
|
||||
res, err := r.DB.ExecContext(ctx, `
|
||||
INSERT INTO runs(host_id, state, agent_token_hash, next_boot_target, started_at)
|
||||
VALUES(?,?,?,?,?)
|
||||
`, hostID, string(model.StateQueued), tokenHash, "linux", now)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("insert run: %w", err)
|
||||
}
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
func (r *Runs) SetState(ctx context.Context, runID int64, state model.RunState) error {
|
||||
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET state = ? WHERE id = ?`, string(state), runID)
|
||||
return err
|
||||
}
|
||||
|
||||
// RotateTokenHash replaces the stored token hash. Called on each iPXE
|
||||
// fetch so only the most-recently-booted agent can claim the run.
|
||||
func (r *Runs) RotateTokenHash(ctx context.Context, runID int64, hash string) error {
|
||||
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET agent_token_hash = ? WHERE id = ?`, hash, runID)
|
||||
return err
|
||||
}
|
||||
|
||||
// SetHoldIP records the agent's LAN IP so the UI can show the ssh
|
||||
// command. Called when the agent POSTs /hold.
|
||||
func (r *Runs) SetHoldIP(ctx context.Context, runID int64, ip string) error {
|
||||
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET hold_ip = ? WHERE id = ?`, ip, runID)
|
||||
return err
|
||||
}
|
||||
|
||||
// SetFailedStage records which stage tripped the run; used by the tile
|
||||
// and by reports. Does not change state.
|
||||
func (r *Runs) SetFailedStage(ctx context.Context, runID int64, stage string) error {
|
||||
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET failed_stage = ? WHERE id = ?`, stage, runID)
|
||||
return err
|
||||
}
|
||||
|
||||
// ClearFailedStage wipes the failed_stage marker. Called when the
|
||||
// operator overrides a stage and the run re-enters the pipeline.
|
||||
func (r *Runs) ClearFailedStage(ctx context.Context, runID int64) error {
|
||||
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET failed_stage = NULL WHERE id = ?`, runID)
|
||||
return err
|
||||
}
|
||||
|
||||
// SetOverrideFlags persists the operator's override decisions (JSON blob
|
||||
// like `{"wipe":true}`). Passed back to the agent on the next heartbeat
|
||||
// so it can resume the held stage with the gate bypassed.
|
||||
func (r *Runs) SetOverrideFlags(ctx context.Context, runID int64, flagsJSON string) error {
|
||||
_, err := r.DB.ExecContext(ctx, `UPDATE runs SET override_flags_json = ? WHERE id = ?`, flagsJSON, runID)
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *Runs) MarkFailed(ctx context.Context, runID int64, failedStage, holdIP string) error {
|
||||
now := time.Now().UTC()
|
||||
_, err := r.DB.ExecContext(ctx, `
|
||||
UPDATE runs SET state = ?, result = 'fail', failed_stage = ?, hold_ip = ?, completed_at = ?
|
||||
WHERE id = ?
|
||||
`, string(model.StateFailedHolding), failedStage, holdIP, now, runID)
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *Runs) MarkCompleted(ctx context.Context, runID int64, reportPath string) error {
|
||||
now := time.Now().UTC()
|
||||
_, err := r.DB.ExecContext(ctx, `
|
||||
UPDATE runs SET state = ?, result = 'pass', report_path = ?, completed_at = ?
|
||||
WHERE id = ?
|
||||
`, string(model.StateCompleted), reportPath, now, runID)
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *Runs) Get(ctx context.Context, id int64) (*model.Run, error) {
|
||||
row := r.DB.QueryRowContext(ctx, `
|
||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||
COALESCE(override_flags_json,'')
|
||||
FROM runs WHERE id = ?
|
||||
`, id)
|
||||
var run model.Run
|
||||
var completedAt sql.NullTime
|
||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get run: %w", err)
|
||||
}
|
||||
if completedAt.Valid {
|
||||
run.CompletedAt = &completedAt.Time
|
||||
}
|
||||
return &run, nil
|
||||
}
|
||||
|
||||
// LatestForHost returns the most recent run for a host, or nil if none.
|
||||
func (r *Runs) LatestForHost(ctx context.Context, hostID int64) (*model.Run, error) {
|
||||
row := r.DB.QueryRowContext(ctx, `
|
||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||
COALESCE(override_flags_json,'')
|
||||
FROM runs WHERE host_id = ?
|
||||
ORDER BY id DESC LIMIT 1
|
||||
`, hostID)
|
||||
var run model.Run
|
||||
var completedAt sql.NullTime
|
||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("latest run: %w", err)
|
||||
}
|
||||
if completedAt.Valid {
|
||||
run.CompletedAt = &completedAt.Time
|
||||
}
|
||||
return &run, nil
|
||||
}
|
||||
|
||||
// Active returns all runs in non-terminal states.
|
||||
func (r *Runs) Active(ctx context.Context) ([]model.Run, error) {
|
||||
rows, err := r.DB.QueryContext(ctx, `
|
||||
SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
|
||||
COALESCE(next_boot_target,''), agent_token_hash, started_at,
|
||||
completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
|
||||
COALESCE(override_flags_json,'')
|
||||
FROM runs
|
||||
WHERE state NOT IN ('Completed','Released')
|
||||
ORDER BY id
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []model.Run
|
||||
for rows.Next() {
|
||||
var run model.Run
|
||||
var completedAt sql.NullTime
|
||||
if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if completedAt.Valid {
|
||||
run.CompletedAt = &completedAt.Time
|
||||
}
|
||||
out = append(out, run)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// CompletedOlderThan returns run IDs for terminal (Completed/Released/
|
||||
// FailedHolding) runs whose completed_at is older than cutoff. Runs with
|
||||
// a NULL completed_at fall back to started_at so a stuck run doesn't get
|
||||
// garbage-collected out from under its own logs. Used by the janitor.
|
||||
func (r *Runs) CompletedOlderThan(ctx context.Context, cutoff time.Time) ([]int64, error) {
|
||||
rows, err := r.DB.QueryContext(ctx, `
|
||||
SELECT id FROM runs
|
||||
WHERE state IN ('Completed','Released','FailedHolding')
|
||||
AND COALESCE(completed_at, started_at) < ?
|
||||
ORDER BY id
|
||||
`, cutoff)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []int64
|
||||
for rows.Next() {
|
||||
var id int64
|
||||
if err := rows.Scan(&id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, id)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// FindByMAC returns the current active run for the host with the given MAC,
|
||||
// or nil if the MAC is unknown or has no active run.
|
||||
func (r *Runs) FindActiveByMAC(ctx context.Context, mac string) (*model.Run, error) {
|
||||
row := r.DB.QueryRowContext(ctx, `
|
||||
SELECT r.id, r.host_id, r.state, COALESCE(r.result,''), COALESCE(r.failed_stage,''),
|
||||
COALESCE(r.next_boot_target,''), r.agent_token_hash, r.started_at,
|
||||
r.completed_at, COALESCE(r.report_path,''), COALESCE(r.hold_ip,''),
|
||||
COALESCE(r.override_flags_json,'')
|
||||
FROM runs r
|
||||
JOIN hosts h ON h.id = r.host_id
|
||||
WHERE h.mac = ? AND r.state NOT IN ('Completed','Released')
|
||||
ORDER BY r.id DESC LIMIT 1
|
||||
`, mac)
|
||||
var run model.Run
|
||||
var completedAt sql.NullTime
|
||||
err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
|
||||
&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
|
||||
&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if completedAt.Valid {
|
||||
run.CompletedAt = &completedAt.Time
|
||||
}
|
||||
return &run, nil
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"vetting/internal/model"
|
||||
)
|
||||
|
||||
type Stages struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
// DefaultStageOrder is the canonical sequence for every run. Phase 2 only
|
||||
// reaches Inventory; later phases add more executors but the list is fixed.
|
||||
var DefaultStageOrder = []string{
|
||||
"Inventory",
|
||||
"SpecValidate",
|
||||
"SMART",
|
||||
"CPUStress",
|
||||
"Storage",
|
||||
"Network",
|
||||
"GPU",
|
||||
"PSU",
|
||||
"Reporting",
|
||||
}
|
||||
|
||||
// Seed creates one pending row per stage for the given run.
|
||||
func (s *Stages) Seed(ctx context.Context, runID int64) error {
|
||||
tx, err := s.DB.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
for i, name := range DefaultStageOrder {
|
||||
if _, err := tx.ExecContext(ctx,
|
||||
`INSERT INTO stages(run_id, name, ordinal, state) VALUES(?,?,?,?)`,
|
||||
runID, name, i, string(model.StagePending)); err != nil {
|
||||
return fmt.Errorf("seed stage %s: %w", name, err)
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func (s *Stages) ListForRun(ctx context.Context, runID int64) ([]model.Stage, error) {
|
||||
rows, err := s.DB.QueryContext(ctx, `
|
||||
SELECT id, run_id, name, ordinal, state, started_at, completed_at, COALESCE(summary_json,'')
|
||||
FROM stages WHERE run_id = ? ORDER BY ordinal
|
||||
`, runID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []model.Stage
|
||||
for rows.Next() {
|
||||
var st model.Stage
|
||||
var started, completed sql.NullTime
|
||||
if err := rows.Scan(&st.ID, &st.RunID, &st.Name, &st.Ordinal, &st.State,
|
||||
&started, &completed, &st.SummaryJSON); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if started.Valid {
|
||||
st.StartedAt = &started.Time
|
||||
}
|
||||
if completed.Valid {
|
||||
st.CompletedAt = &completed.Time
|
||||
}
|
||||
out = append(out, st)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (s *Stages) StartByName(ctx context.Context, runID int64, name string) error {
|
||||
now := time.Now().UTC()
|
||||
_, err := s.DB.ExecContext(ctx, `
|
||||
UPDATE stages SET state = ?, started_at = ?
|
||||
WHERE run_id = ? AND name = ?
|
||||
`, string(model.StageRunning), now, runID, name)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Stages) CompleteByName(ctx context.Context, runID int64, name string, state model.StageState, summaryJSON string) error {
|
||||
now := time.Now().UTC()
|
||||
_, err := s.DB.ExecContext(ctx, `
|
||||
UPDATE stages SET state = ?, completed_at = ?, summary_json = ?
|
||||
WHERE run_id = ? AND name = ?
|
||||
`, string(state), now, nullIfEmpty(summaryJSON), runID, name)
|
||||
return err
|
||||
}
|
||||
@@ -0,0 +1,229 @@
|
||||
package store_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"vetting/internal/db"
|
||||
"vetting/internal/model"
|
||||
"vetting/internal/store"
|
||||
)
|
||||
|
||||
func newDB(t *testing.T) *store.Runs {
|
||||
t.Helper()
|
||||
path := filepath.Join(t.TempDir(), "vetting.db")
|
||||
conn, err := db.Open(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open db: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = conn.Close() })
|
||||
return &store.Runs{DB: conn}
|
||||
}
|
||||
|
||||
// seedRun inserts a host + a run and returns (hostID, runID). Every
|
||||
// subsequent store test builds on this so run_id foreign keys resolve.
|
||||
func seedRun(t *testing.T, runs *store.Runs) (int64, int64) {
|
||||
t.Helper()
|
||||
hosts := &store.Hosts{DB: runs.DB}
|
||||
hostID, err := hosts.Create(context.Background(), model.Host{
|
||||
Name: "t-host",
|
||||
MAC: "aa:bb:cc:dd:ee:ff",
|
||||
WoLBroadcastIP: "10.0.0.255",
|
||||
WoLPort: 9,
|
||||
ExpectedSpecYAML: "memory:\n total_gib: 16\n",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("create host: %v", err)
|
||||
}
|
||||
runID, err := runs.Create(context.Background(), hostID, "deadbeef")
|
||||
if err != nil {
|
||||
t.Fatalf("create run: %v", err)
|
||||
}
|
||||
return hostID, runID
|
||||
}
|
||||
|
||||
func TestArtifactsRoundtrip(t *testing.T) {
|
||||
runs := newDB(t)
|
||||
_, runID := seedRun(t, runs)
|
||||
arts := &store.Artifacts{DB: runs.DB}
|
||||
|
||||
id, err := arts.Create(context.Background(), store.Artifact{
|
||||
RunID: runID,
|
||||
Kind: "inventory",
|
||||
Path: "/var/artifacts/run-1/inventory.json",
|
||||
SHA256: "abc123",
|
||||
SizeBytes: 42,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Create: %v", err)
|
||||
}
|
||||
if id == 0 {
|
||||
t.Fatalf("expected non-zero id")
|
||||
}
|
||||
|
||||
// Hold key on the same run — ListForRun should return both in
|
||||
// insertion order and TileEnricher picks the hold_key row.
|
||||
if _, err := arts.Create(context.Background(), store.Artifact{
|
||||
RunID: runID, Kind: "hold_key", Path: "/var/artifacts/run-1/hold.key", SHA256: "def456", SizeBytes: 400,
|
||||
}); err != nil {
|
||||
t.Fatalf("Create hold_key: %v", err)
|
||||
}
|
||||
|
||||
list, err := arts.ListForRun(context.Background(), runID)
|
||||
if err != nil {
|
||||
t.Fatalf("ListForRun: %v", err)
|
||||
}
|
||||
if len(list) != 2 {
|
||||
t.Fatalf("ListForRun returned %d, want 2", len(list))
|
||||
}
|
||||
if list[0].Kind != "inventory" || list[1].Kind != "hold_key" {
|
||||
t.Fatalf("unexpected order: %+v", list)
|
||||
}
|
||||
if list[1].Path != "/var/artifacts/run-1/hold.key" {
|
||||
t.Fatalf("hold_key path lost: %q", list[1].Path)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpecDiffsReplaceForRun(t *testing.T) {
|
||||
runs := newDB(t)
|
||||
_, runID := seedRun(t, runs)
|
||||
sd := &store.SpecDiffs{DB: runs.DB}
|
||||
ctx := context.Background()
|
||||
|
||||
// First write: three diffs.
|
||||
err := sd.ReplaceForRun(ctx, runID, []model.SpecDiff{
|
||||
{RunID: runID, Field: "cpu.model", Expected: "Xeon", Actual: "EPYC", Severity: "critical"},
|
||||
{RunID: runID, Field: "memory.total_gib", Expected: "16", Actual: "8", Severity: "critical"},
|
||||
{RunID: runID, Field: "note", Expected: "", Actual: "dusty", Severity: "info"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("ReplaceForRun: %v", err)
|
||||
}
|
||||
|
||||
list, err := sd.ListForRun(ctx, runID)
|
||||
if err != nil {
|
||||
t.Fatalf("ListForRun: %v", err)
|
||||
}
|
||||
if len(list) != 3 {
|
||||
t.Fatalf("got %d rows, want 3", len(list))
|
||||
}
|
||||
|
||||
// Second write replaces, doesn't append — otherwise a re-run would
|
||||
// double-count spec diffs and the tile badge would grow without bound.
|
||||
err = sd.ReplaceForRun(ctx, runID, []model.SpecDiff{
|
||||
{RunID: runID, Field: "cpu.model", Expected: "Xeon", Actual: "Xeon Gold", Severity: "info"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("second ReplaceForRun: %v", err)
|
||||
}
|
||||
list, err = sd.ListForRun(ctx, runID)
|
||||
if err != nil {
|
||||
t.Fatalf("ListForRun after replace: %v", err)
|
||||
}
|
||||
if len(list) != 1 {
|
||||
t.Fatalf("expected 1 row after replace, got %d", len(list))
|
||||
}
|
||||
if list[0].Severity != "info" {
|
||||
t.Fatalf("expected severity info, got %q", list[0].Severity)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMeasurementsBatchAndList(t *testing.T) {
|
||||
runs := newDB(t)
|
||||
_, runID := seedRun(t, runs)
|
||||
meas := &store.Measurements{DB: runs.DB}
|
||||
ctx := context.Background()
|
||||
|
||||
err := meas.CreateBatch(ctx, []model.Measurement{
|
||||
{RunID: runID, Kind: "thermal", Key: "cpu", Value: 52.5, Unit: "C"},
|
||||
{RunID: runID, Kind: "iperf", Key: "throughput_mbps", Value: 940.1, Unit: "Mbps"},
|
||||
{RunID: runID, Kind: "psu", Key: "in0", Value: 12.04, Unit: "V"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("CreateBatch: %v", err)
|
||||
}
|
||||
|
||||
// Zero-length batch must be a no-op, not an error.
|
||||
if err := meas.CreateBatch(ctx, nil); err != nil {
|
||||
t.Fatalf("empty CreateBatch: %v", err)
|
||||
}
|
||||
|
||||
rows, err := meas.ListForRun(ctx, runID)
|
||||
if err != nil {
|
||||
t.Fatalf("ListForRun: %v", err)
|
||||
}
|
||||
if len(rows) != 3 {
|
||||
t.Fatalf("got %d rows, want 3", len(rows))
|
||||
}
|
||||
foundIperf := false
|
||||
for _, r := range rows {
|
||||
if r.Kind == "iperf" && r.Key == "throughput_mbps" && r.Value > 900 {
|
||||
foundIperf = true
|
||||
}
|
||||
}
|
||||
if !foundIperf {
|
||||
t.Fatalf("iperf row missing or wrong value: %+v", rows)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunsOverrideFlagsAndClearFailedStage(t *testing.T) {
|
||||
runs := newDB(t)
|
||||
_, runID := seedRun(t, runs)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := runs.SetFailedStage(ctx, runID, "Storage"); err != nil {
|
||||
t.Fatalf("SetFailedStage: %v", err)
|
||||
}
|
||||
if err := runs.SetOverrideFlags(ctx, runID, `{"wipe":true}`); err != nil {
|
||||
t.Fatalf("SetOverrideFlags: %v", err)
|
||||
}
|
||||
run, err := runs.Get(ctx, runID)
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
if run.OverrideFlagsJSON != `{"wipe":true}` {
|
||||
t.Fatalf("OverrideFlagsJSON = %q, want {\"wipe\":true}", run.OverrideFlagsJSON)
|
||||
}
|
||||
if run.FailedStage != "Storage" {
|
||||
t.Fatalf("FailedStage = %q, want Storage", run.FailedStage)
|
||||
}
|
||||
if err := runs.ClearFailedStage(ctx, runID); err != nil {
|
||||
t.Fatalf("ClearFailedStage: %v", err)
|
||||
}
|
||||
run, err = runs.Get(ctx, runID)
|
||||
if err != nil {
|
||||
t.Fatalf("Get after clear: %v", err)
|
||||
}
|
||||
if run.FailedStage != "" {
|
||||
t.Fatalf("FailedStage not cleared: %q", run.FailedStage)
|
||||
}
|
||||
// override_flags_json should persist across ClearFailedStage so the
|
||||
// agent can still read it on its next heartbeat.
|
||||
if run.OverrideFlagsJSON != `{"wipe":true}` {
|
||||
t.Fatalf("OverrideFlagsJSON lost after ClearFailedStage: %q", run.OverrideFlagsJSON)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunsHoldAndFailedStage(t *testing.T) {
|
||||
runs := newDB(t)
|
||||
_, runID := seedRun(t, runs)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := runs.SetHoldIP(ctx, runID, "10.0.0.42"); err != nil {
|
||||
t.Fatalf("SetHoldIP: %v", err)
|
||||
}
|
||||
if err := runs.SetFailedStage(ctx, runID, "SpecValidate"); err != nil {
|
||||
t.Fatalf("SetFailedStage: %v", err)
|
||||
}
|
||||
run, err := runs.Get(ctx, runID)
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
if run.HoldIP != "10.0.0.42" {
|
||||
t.Fatalf("HoldIP = %q, want 10.0.0.42", run.HoldIP)
|
||||
}
|
||||
if run.FailedStage != "SpecValidate" {
|
||||
t.Fatalf("FailedStage = %q, want SpecValidate", run.FailedStage)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user