diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml
index abf2ac7..adda700 100644
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -42,4 +42,20 @@ jobs:
           GOOS=linux GOARCH=amd64 go build ./...
 
       - name: Test
-        run: go test -race -count=1 ./...
+        run: go test -race -count=1 -coverprofile=coverage.out ./...
+
+      - name: Coverage summary
+        run: |
+          go tool cover -func=coverage.out | tee coverage.txt
+          go tool cover -html=coverage.out -o coverage.html
+
+      - name: Upload coverage artifact
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: coverage
+          path: |
+            coverage.out
+            coverage.txt
+            coverage.html
+          retention-days: 14
diff --git a/agent/client.go b/agent/client.go
index 56dc64c..223996e 100644
--- a/agent/client.go
+++ b/agent/client.go
@@ -124,6 +124,56 @@ type ClaimResponse struct {
 	// at the right stage instead of silently replaying Inventory and
 	// letting the orchestrator advance past the crashed stage.
 	CurrentState string `json:"current_state"`
+	// StageConfig carries per-profile stage knobs (Phase 2): stage-level
+	// timeouts and probe-level durations/modes. Empty when the agent
+	// talks to a pre-Phase-2 orchestrator; the agent applies compile-
+	// time defaults in that case.
+	StageConfig ClaimStageConfig `json:"stage_config"`
+}
+
+// ClaimStageConfig mirrors config.StageConfig server-side — duplicated so
+// the agent doesn't need to import internal/config. Durations arrive as
+// strings ("2m", "2h") and are parsed by the tests package at the point
+// of use. An empty field means "use the agent-side default" so a missing
+// knob doesn't silently turn CPUStress / Storage into a no-op.
+type ClaimStageConfig struct {
+	Profile       string                `json:"profile"`
+	StageTimeouts map[string]string     `json:"stage_timeouts,omitempty"`
+	CPUStress     ClaimCPUStressKnobs   `json:"cpustress"`
+	Storage       ClaimStorageKnobs     `json:"storage"`
+	Network       ClaimNetworkKnobs     `json:"network"`
+	Burn          ClaimBurnKnobs        `json:"burn"`
+}
+
+type ClaimCPUStressKnobs struct {
+	CPUPass  string `json:"cpu_pass,omitempty"`
+	MemPass  string `json:"mem_pass,omitempty"`
+	EDACPoll string `json:"edac_poll,omitempty"`
+}
+
+type ClaimStorageKnobs struct {
+	Mode    string `json:"mode,omitempty"`
+	FioSize string `json:"fio_size,omitempty"`
+	FioTime string `json:"fio_time,omitempty"`
+	FioBS   string `json:"fio_bs,omitempty"`
+	FioRW   string `json:"fio_rw,omitempty"`
+	Verify  string `json:"verify,omitempty"`
+}
+
+type ClaimNetworkKnobs struct {
+	Duration string `json:"duration,omitempty"`
+}
+
+// ClaimBurnKnobs mirrors config.BurnKnobs. Duration/CPUWorkers arrive as
+// strings so the agent can treat empty as "use compile-time default".
+// MemPct is a percentage (0-100); IperfParallel is the parallel stream
+// count fed to iperf3 -P. FioOnSpare gates whether fio runs inside Burn.
+type ClaimBurnKnobs struct {
+	Duration      string `json:"duration,omitempty"`
+	CPUWorkers    string `json:"cpu_workers,omitempty"`
+	MemPct        int    `json:"mem_pct,omitempty"`
+	FioOnSpare    bool   `json:"fio_on_spare,omitempty"`
+	IperfParallel int    `json:"iperf_parallel,omitempty"`
 }
 
 type ClaimExpectedDiskSpec struct {
diff --git a/agent/probes/edac.go b/agent/probes/edac.go
new file mode 100644
index 0000000..45f98c0
--- /dev/null
+++ b/agent/probes/edac.go
@@ -0,0 +1,70 @@
+package probes
+
+import (
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+)
+
+// EDACSample is one counter reading from /sys/devices/system/edac/mc/.
+// Kind is "edac_ce" (correctable ECC errors) or "edac_ue"
+// (uncorrectable — always a critical signal). Key identifies the memory
+// controller (e.g. "mc0"). Value is the cumulative count since boot;
+// the threshold evaluator flags it the moment it exceeds 0.
+type EDACSample struct {
+	Kind  string
+	Key   string
+	Value float64
+	Unit  string
+}
+
+// EDAC returns one EDACSample per (memory-controller × {ce,ue}) pair
+// that /sys exposes. Returns an empty slice when EDAC isn't available
+// (virtualized host, missing kernel driver, mdadm-style boards without
+// a controller node) — callers treat an empty return as "no data",
+// not "passed". Errors are swallowed for the same reason: a hot-
+// swapped DIMM that makes /sys blink briefly shouldn't fail the stage
+// before the real counter can be read.
+//
+// This is intentionally small — the sidecar polls periodically, so one
+// bad read is recovered on the next tick. The counters are monotonic,
+// so emitting the current raw value is correct.
+func EDAC() []EDACSample {
+	root := "/sys/devices/system/edac/mc"
+	entries, err := os.ReadDir(root)
+	if err != nil {
+		return nil
+	}
+	var out []EDACSample
+	for _, e := range entries {
+		name := e.Name()
+		if !strings.HasPrefix(name, "mc") {
+			continue
+		}
+		base := filepath.Join(root, name)
+		if ce, ok := readCount(filepath.Join(base, "ce_count")); ok {
+			out = append(out, EDACSample{Kind: "edac_ce", Key: name, Value: ce, Unit: "count"})
+		}
+		if ue, ok := readCount(filepath.Join(base, "ue_count")); ok {
+			out = append(out, EDACSample{Kind: "edac_ue", Key: name, Value: ue, Unit: "count"})
+		}
+	}
+	return out
+}
+
+// readCount reads a single decimal integer from a sysfs file and
+// returns it as a float. Returns (0, false) on any failure so callers
+// can skip the sample without a diagnostic.
+func readCount(path string) (float64, bool) {
+	b, err := os.ReadFile(path)
+	if err != nil {
+		return 0, false
+	}
+	s := strings.TrimSpace(string(b))
+	n, err := strconv.ParseInt(s, 10, 64)
+	if err != nil {
+		return 0, false
+	}
+	return float64(n), true
+}
diff --git a/agent/probes/firmware.go b/agent/probes/firmware.go
new file mode 100644
index 0000000..db4c37e
--- /dev/null
+++ b/agent/probes/firmware.go
@@ -0,0 +1,496 @@
+package probes
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"time"
+)
+
+// FirmwareSnapshot is the on-wire shape the agent POSTs alongside the
+// Firmware stage result. Mirrors internal/store.FirmwareSnapshot without
+// the import — the /result handler converts to the store type and
+// persists. One run produces many snapshots (one per BIOS / BMC / NIC
+// port / HBA / microcode / NVMe); identifier distinguishes siblings
+// (e.g. "eth0" / "eth1"), version is the canonical string to diff.
+type FirmwareSnapshot struct {
+	Component  string            `json:"component"` // bios|bmc|nic|hba|microcode|nvme_fw
+	Identifier string            `json:"identifier"`
+	Version    string            `json:"version"`
+	Vendor     string            `json:"vendor,omitempty"`
+	Raw        map[string]string `json:"raw,omitempty"`
+}
+
+// Firmware runs every sub-probe in sequence. Each one is bounded with
+// a short timeout so a hung dmidecode / ipmitool / nvme tool can't
+// freeze the stage — the probe is best-effort, missing tools produce
+// empty output rather than an error. Returns the aggregated slice
+// along with a list of probe-level warnings (surfaced in the stage
+// summary so operators see which subsystem couldn't be read).
+func Firmware(ctx context.Context) ([]FirmwareSnapshot, []string) {
+	var out []FirmwareSnapshot
+	var warnings []string
+
+	if snap, warn := probeBIOS(ctx); snap != nil {
+		out = append(out, *snap)
+	} else if warn != "" {
+		warnings = append(warnings, warn)
+	}
+	if snap, warn := probeBMC(ctx); snap != nil {
+		out = append(out, *snap)
+	} else if warn != "" {
+		warnings = append(warnings, warn)
+	}
+	out = append(out, probeNICFirmware(ctx)...)
+	out = append(out, probeNVMeFirmware(ctx)...)
+	out = append(out, probeHBAFirmware(ctx)...)
+	if snap := probeMicrocode(); snap != nil {
+		out = append(out, *snap)
+	}
+
+	return out, warnings
+}
+
+// runCmd executes a short-lived command with a per-call timeout. The
+// timeout is intentionally aggressive (5 s) because firmware probes
+// read device registers and occasionally block forever on a wedged
+// controller — the stage should report "no HBA firmware readable"
+// rather than hang the pipeline.
+func runCmd(ctx context.Context, name string, args ...string) (string, error) {
+	cctx, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+	cmd := exec.CommandContext(cctx, name, args...)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return string(out), err
+	}
+	return string(out), nil
+}
+
+// ----- BIOS --------------------------------------------------------------
+
+// probeBIOS invokes dmidecode -t bios and parses the vendor + version
+// lines. dmidecode must run as root; we let it fail gracefully when the
+// agent is mis-deployed without privileges.
+func probeBIOS(ctx context.Context) (*FirmwareSnapshot, string) {
+	if _, err := exec.LookPath("dmidecode"); err != nil {
+		return nil, "bios: dmidecode not installed"
+	}
+	out, err := runCmd(ctx, "dmidecode", "-t", "bios")
+	if err != nil {
+		return nil, fmt.Sprintf("bios: dmidecode failed: %v", trimErr(err, out))
+	}
+	snap := parseDmidecodeBIOS(strings.NewReader(out))
+	if snap == nil {
+		return nil, "bios: dmidecode produced no usable output"
+	}
+	return snap, ""
+}
+
+// parseDmidecodeBIOS consumes `dmidecode -t bios` output and pulls
+// Vendor / Version / Release Date. Kept as an io.Reader for unit tests.
+func parseDmidecodeBIOS(r io.Reader) *FirmwareSnapshot {
+	kv := parseDmidecodeSection(r, "BIOS Information")
+	if kv == nil {
+		return nil
+	}
+	snap := &FirmwareSnapshot{
+		Component:  "bios",
+		Identifier: "system",
+		Version:    firstNonEmpty(kv["Version"], kv["Firmware Revision"]),
+		Vendor:     kv["Vendor"],
+		Raw:        kv,
+	}
+	if snap.Version == "" {
+		return nil
+	}
+	return snap
+}
+
+// parseDmidecodeSection returns the key/value map of the first dmidecode
+// handle whose title matches. dmidecode blocks look like:
+//   Handle 0x0000, ...
+//           BIOS Information
+//           Vendor: American Megatrends
+//           Version: 3.0
+//           ...
+// With a blank line between blocks. Values like "Characteristics:"
+// followed by a bulleted sub-list are collapsed into "…" so we don't
+// accidentally swallow the next handle.
+func parseDmidecodeSection(r io.Reader, title string) map[string]string {
+	sc := bufio.NewScanner(r)
+	sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
+	var kv map[string]string
+	var inside, seenTitle bool
+	for sc.Scan() {
+		line := sc.Text()
+		trim := strings.TrimSpace(line)
+		if strings.HasPrefix(line, "Handle ") {
+			if seenTitle && kv != nil {
+				return kv
+			}
+			inside = false
+			kv = nil
+			continue
+		}
+		if !inside {
+			if trim == title {
+				inside = true
+				seenTitle = true
+				kv = map[string]string{}
+			}
+			continue
+		}
+		if trim == "" {
+			continue
+		}
+		if k, v, ok := strings.Cut(trim, ":"); ok {
+			v = strings.TrimSpace(v)
+			if v == "" {
+				continue
+			}
+			kv[strings.TrimSpace(k)] = v
+		}
+	}
+	if seenTitle {
+		return kv
+	}
+	return nil
+}
+
+// ----- BMC / IPMI --------------------------------------------------------
+
+// probeBMC walks `ipmitool mc info`. Home-lab hosts often lack a BMC —
+// missing binary or a non-zero exit returns a warning without failing
+// the stage. We capture Firmware Revision + Manufacturer as the version.
+func probeBMC(ctx context.Context) (*FirmwareSnapshot, string) {
+	if _, err := exec.LookPath("ipmitool"); err != nil {
+		return nil, "bmc: ipmitool not installed"
+	}
+	out, err := runCmd(ctx, "ipmitool", "mc", "info")
+	if err != nil {
+		return nil, fmt.Sprintf("bmc: ipmitool mc info failed: %v", trimErr(err, out))
+	}
+	snap := parseIpmitoolMCInfo(strings.NewReader(out))
+	if snap == nil {
+		return nil, "bmc: ipmitool output not parseable"
+	}
+	return snap, ""
+}
+
+// parseIpmitoolMCInfo pulls "Firmware Revision" + "Manufacturer Name"
+// from the textual output. Format is indented key : value lines.
+func parseIpmitoolMCInfo(r io.Reader) *FirmwareSnapshot {
+	sc := bufio.NewScanner(r)
+	kv := map[string]string{}
+	for sc.Scan() {
+		line := strings.TrimSpace(sc.Text())
+		if k, v, ok := strings.Cut(line, ":"); ok {
+			kv[strings.TrimSpace(k)] = strings.TrimSpace(v)
+		}
+	}
+	version := firstNonEmpty(kv["Firmware Revision"], kv["Aux Firmware Rev Info"])
+	if version == "" {
+		return nil
+	}
+	return &FirmwareSnapshot{
+		Component:  "bmc",
+		Identifier: "bmc0",
+		Version:    version,
+		Vendor:     kv["Manufacturer Name"],
+		Raw:        kv,
+	}
+}
+
+// ----- NIC firmware ------------------------------------------------------
+
+// probeNICFirmware enumerates /sys/class/net/*/device and calls
+// `ethtool -i <iface>` on each real NIC (skip lo, bridges, virtuals).
+// One snapshot per interface so a mismatched port lights up in the diff
+// without silencing sibling ports.
+func probeNICFirmware(ctx context.Context) []FirmwareSnapshot {
+	if _, err := exec.LookPath("ethtool"); err != nil {
+		return nil
+	}
+	ifaces, err := os.ReadDir("/sys/class/net")
+	if err != nil {
+		return nil
+	}
+	var out []FirmwareSnapshot
+	for _, entry := range ifaces {
+		name := entry.Name()
+		if !isRealNIC(name) {
+			continue
+		}
+		raw, err := runCmd(ctx, "ethtool", "-i", name)
+		if err != nil {
+			continue
+		}
+		snap := parseEthtoolI(strings.NewReader(raw), name)
+		if snap != nil {
+			out = append(out, *snap)
+		}
+	}
+	return out
+}
+
+// parseEthtoolI extracts driver/firmware-version from `ethtool -i`
+// output. Lines are "key: value" with a consistent prefix order.
+func parseEthtoolI(r io.Reader, iface string) *FirmwareSnapshot {
+	sc := bufio.NewScanner(r)
+	kv := map[string]string{}
+	for sc.Scan() {
+		line := sc.Text()
+		if k, v, ok := strings.Cut(line, ":"); ok {
+			kv[strings.TrimSpace(k)] = strings.TrimSpace(v)
+		}
+	}
+	if kv["firmware-version"] == "" && kv["driver"] == "" {
+		return nil
+	}
+	return &FirmwareSnapshot{
+		Component:  "nic",
+		Identifier: iface,
+		Version:    kv["firmware-version"],
+		Vendor:     kv["driver"],
+		Raw:        kv,
+	}
+}
+
+// isRealNIC filters out loopback, bridges, veth, and the handful of
+// virtual kernel devices ethtool will refuse on.
+func isRealNIC(name string) bool {
+	if name == "" || name == "lo" {
+		return false
+	}
+	for _, prefix := range []string{"docker", "br-", "veth", "virbr", "tun", "tap", "bond"} {
+		if strings.HasPrefix(name, prefix) {
+			return false
+		}
+	}
+	// Only accept interfaces that have a `device` link — real PCI NICs
+	// do; pure virtuals (dummy0, wg*) don't.
+	if _, err := os.Stat(filepath.Join("/sys/class/net", name, "device")); err != nil {
+		return false
+	}
+	return true
+}
+
+// ----- NVMe --------------------------------------------------------------
+
+// probeNVMeFirmware reads /sys/class/nvme/nvmeN/firmware_rev for every
+// controller. Falls back to `nvme id-ctrl` if the sysfs file is missing
+// (older kernels). Identifier is the controller path so a run with two
+// drives produces two snapshots.
+func probeNVMeFirmware(ctx context.Context) []FirmwareSnapshot {
+	entries, err := os.ReadDir("/sys/class/nvme")
+	if err != nil {
+		return nil
+	}
+	var out []FirmwareSnapshot
+	for _, e := range entries {
+		ctrl := e.Name()
+		rev := strings.TrimSpace(readFile(filepath.Join("/sys/class/nvme", ctrl, "firmware_rev")))
+		model := strings.TrimSpace(readFile(filepath.Join("/sys/class/nvme", ctrl, "model")))
+		if rev == "" {
+			// Fallback: nvme id-ctrl -H /dev/<ctrl>. Available on hosts
+			// where sysfs doesn't export firmware_rev.
+			if _, err := exec.LookPath("nvme"); err == nil {
+				raw, _ := runCmd(ctx, "nvme", "id-ctrl", "/dev/"+ctrl)
+				rev = parseNVMeIDCtrl(strings.NewReader(raw), "fr")
+				if model == "" {
+					model = parseNVMeIDCtrl(strings.NewReader(raw), "mn")
+				}
+			}
+		}
+		if rev == "" {
+			continue
+		}
+		out = append(out, FirmwareSnapshot{
+			Component:  "nvme_fw",
+			Identifier: ctrl,
+			Version:    rev,
+			Vendor:     model,
+			Raw:        map[string]string{"model": model, "firmware_rev": rev},
+		})
+	}
+	return out
+}
+
+// parseNVMeIDCtrl pulls a single field out of `nvme id-ctrl` output.
+// Format: "fr        : FW1234" / "mn        : Samsung SSD 980 PRO".
+// Leading spaces vary, values may contain spaces.
+func parseNVMeIDCtrl(r io.Reader, key string) string {
+	sc := bufio.NewScanner(r)
+	prefix := key + " "
+	for sc.Scan() {
+		line := strings.TrimSpace(sc.Text())
+		if !strings.HasPrefix(line, prefix) {
+			continue
+		}
+		_, v, ok := strings.Cut(line, ":")
+		if !ok {
+			continue
+		}
+		return strings.TrimSpace(v)
+	}
+	return ""
+}
+
+// ----- HBA ---------------------------------------------------------------
+
+var lspciClassHBA = regexp.MustCompile(`(?i)(serial attached scsi|sas controller|raid bus controller)`)
+
+// probeHBAFirmware looks for SAS/RAID HBAs via `lspci -Dvvnn`. The
+// firmware string is typically exposed as "Product Name" +
+// "Capabilities" but in practice the LSI/Broadcom driver writes a
+// "revision" on the device line. We capture what's printed and rely on
+// SpecValidate to diff — this keeps us off tool-specific CLIs (storcli,
+// mpt-status) that aren't always installed.
+func probeHBAFirmware(ctx context.Context) []FirmwareSnapshot {
+	if _, err := exec.LookPath("lspci"); err != nil {
+		return nil
+	}
+	out, err := runCmd(ctx, "lspci", "-Dvvnn")
+	if err != nil {
+		return nil
+	}
+	return parseLspciHBA(strings.NewReader(out))
+}
+
+// parseLspciHBA walks `lspci -Dvvnn` stanzas and picks SAS/RAID
+// controllers. One snapshot per device; identifier is the PCI address.
+// Version is the device line's revision (rev NN) or the Kernel modules
+// string when no rev is printed.
+func parseLspciHBA(r io.Reader) []FirmwareSnapshot {
+	sc := bufio.NewScanner(r)
+	sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
+	var out []FirmwareSnapshot
+	var cur *FirmwareSnapshot
+	revRe := regexp.MustCompile(`\(rev\s+([0-9a-fA-F]+)\)`)
+	flush := func() {
+		if cur != nil && cur.Version != "" {
+			out = append(out, *cur)
+		}
+		cur = nil
+	}
+	for sc.Scan() {
+		line := sc.Text()
+		if !strings.HasPrefix(line, "\t") && strings.Contains(line, " ") {
+			// New device line.
+			flush()
+			if lspciClassHBA.MatchString(line) {
+				addr, rest, _ := strings.Cut(line, " ")
+				cur = &FirmwareSnapshot{
+					Component:  "hba",
+					Identifier: addr,
+					Vendor:     strings.TrimSpace(rest),
+					Raw:        map[string]string{"device_line": line},
+				}
+				if m := revRe.FindStringSubmatch(line); len(m) == 2 {
+					cur.Version = "rev " + m[1]
+				}
+			}
+			continue
+		}
+		if cur == nil {
+			continue
+		}
+		trim := strings.TrimSpace(line)
+		if strings.HasPrefix(trim, "Kernel modules:") {
+			cur.Raw["kernel_modules"] = strings.TrimPrefix(trim, "Kernel modules:")
+		}
+		if strings.HasPrefix(trim, "Kernel driver in use:") {
+			cur.Raw["kernel_driver"] = strings.TrimPrefix(trim, "Kernel driver in use:")
+		}
+	}
+	flush()
+	return out
+}
+
+// ----- Microcode ---------------------------------------------------------
+
+// probeMicrocode reads /proc/cpuinfo for the "microcode" line. All
+// cores report the same value post-boot, so one snapshot is enough.
+func probeMicrocode() *FirmwareSnapshot {
+	f, err := os.Open("/proc/cpuinfo")
+	if err != nil {
+		return nil
+	}
+	defer func() { _ = f.Close() }()
+	snap := parseMicrocode(f)
+	return snap
+}
+
+func parseMicrocode(r io.Reader) *FirmwareSnapshot {
+	sc := bufio.NewScanner(r)
+	version := ""
+	vendor := ""
+	for sc.Scan() {
+		line := sc.Text()
+		k, v, ok := strings.Cut(line, ":")
+		if !ok {
+			continue
+		}
+		key := strings.TrimSpace(k)
+		val := strings.TrimSpace(v)
+		switch key {
+		case "microcode":
+			if version == "" {
+				version = val
+			}
+		case "vendor_id":
+			if vendor == "" {
+				vendor = val
+			}
+		}
+		if version != "" && vendor != "" {
+			break
+		}
+	}
+	if version == "" {
+		return nil
+	}
+	return &FirmwareSnapshot{
+		Component:  "microcode",
+		Identifier: "cpu",
+		Version:    version,
+		Vendor:     vendor,
+	}
+}
+
+// ----- helpers -----------------------------------------------------------
+
+func firstNonEmpty(ss ...string) string {
+	for _, s := range ss {
+		if strings.TrimSpace(s) != "" {
+			return s
+		}
+	}
+	return ""
+}
+
+func readFile(p string) string {
+	b, err := os.ReadFile(p)
+	if err != nil {
+		return ""
+	}
+	return string(b)
+}
+
+// trimErr joins the underlying error with the first line of combined
+// output so the warning message carries enough diagnostic context
+// without dumping a screenful of dmidecode/ipmitool noise.
+func trimErr(err error, out string) string {
+	firstLine := strings.SplitN(strings.TrimSpace(out), "\n", 2)[0]
+	if firstLine == "" {
+		return err.Error()
+	}
+	return fmt.Sprintf("%v (%s)", err, firstLine)
+}
diff --git a/agent/probes/firmware_test.go b/agent/probes/firmware_test.go
new file mode 100644
index 0000000..44ed938
--- /dev/null
+++ b/agent/probes/firmware_test.go
@@ -0,0 +1,232 @@
+package probes
+
+import (
+	"strings"
+	"testing"
+)
+
+// Golden dmidecode -t bios output (trimmed, representative). A real
+// host will have more lines; parse must tolerate the unknown fields.
+const dmidecodeBIOS = `# dmidecode 3.3
+Getting SMBIOS data from sysfs.
+SMBIOS 3.2.0 present.
+
+Handle 0x0000, DMI type 0, 26 bytes
+BIOS Information
+	Vendor: American Megatrends Inc.
+	Version: 3.2
+	Release Date: 07/15/2021
+	Address: 0xF0000
+	Runtime Size: 64 kB
+	ROM Size: 32 MB
+	Characteristics:
+		PCI is supported
+		BIOS is upgradeable
+
+Handle 0x0001, DMI type 1, 27 bytes
+System Information
+	Manufacturer: Supermicro
+	Product Name: X11SSL-F
+`
+
+func TestParseDmidecodeBIOS(t *testing.T) {
+	snap := parseDmidecodeBIOS(strings.NewReader(dmidecodeBIOS))
+	if snap == nil {
+		t.Fatal("parseDmidecodeBIOS returned nil")
+	}
+	if snap.Component != "bios" {
+		t.Errorf("component = %q, want bios", snap.Component)
+	}
+	if snap.Version != "3.2" {
+		t.Errorf("version = %q, want 3.2", snap.Version)
+	}
+	if snap.Vendor != "American Megatrends Inc." {
+		t.Errorf("vendor = %q, want American Megatrends Inc.", snap.Vendor)
+	}
+	if snap.Raw["Release Date"] != "07/15/2021" {
+		t.Errorf("release date = %q, want 07/15/2021", snap.Raw["Release Date"])
+	}
+}
+
+func TestParseDmidecodeBIOSMissingBlock(t *testing.T) {
+	// No BIOS Information block → nil result, not a crash.
+	input := "Handle 0x0001, DMI type 1, 27 bytes\nSystem Information\n\tManufacturer: Acme\n"
+	if snap := parseDmidecodeBIOS(strings.NewReader(input)); snap != nil {
+		t.Fatalf("expected nil when BIOS block absent, got %+v", snap)
+	}
+}
+
+const ipmitoolMCInfo = `Device ID                 : 32
+Device Revision           : 1
+Firmware Revision         : 1.74
+IPMI Version              : 2.0
+Manufacturer ID           : 10876
+Manufacturer Name         : Supermicro
+Product ID                : 2051 (0x0803)
+Product Name              : Unknown (0x803)
+`
+
+func TestParseIpmitoolMCInfo(t *testing.T) {
+	snap := parseIpmitoolMCInfo(strings.NewReader(ipmitoolMCInfo))
+	if snap == nil {
+		t.Fatal("parseIpmitoolMCInfo returned nil")
+	}
+	if snap.Component != "bmc" {
+		t.Errorf("component = %q, want bmc", snap.Component)
+	}
+	if snap.Version != "1.74" {
+		t.Errorf("version = %q, want 1.74", snap.Version)
+	}
+	if snap.Vendor != "Supermicro" {
+		t.Errorf("vendor = %q, want Supermicro", snap.Vendor)
+	}
+}
+
+func TestParseIpmitoolMCInfoEmpty(t *testing.T) {
+	if snap := parseIpmitoolMCInfo(strings.NewReader("")); snap != nil {
+		t.Fatalf("expected nil on empty input, got %+v", snap)
+	}
+}
+
+const ethtoolEth0 = `driver: mlx5_core
+version: 5.15.0
+firmware-version: 16.32.1010 (MT_0000000008)
+expansion-rom-version:
+bus-info: 0000:5e:00.0
+supports-statistics: yes
+`
+
+func TestParseEthtoolI(t *testing.T) {
+	snap := parseEthtoolI(strings.NewReader(ethtoolEth0), "eth0")
+	if snap == nil {
+		t.Fatal("parseEthtoolI returned nil")
+	}
+	if snap.Component != "nic" || snap.Identifier != "eth0" {
+		t.Errorf("component/id = %q/%q, want nic/eth0", snap.Component, snap.Identifier)
+	}
+	if snap.Version != "16.32.1010 (MT_0000000008)" {
+		t.Errorf("version = %q, want 16.32.1010 (MT_0000000008)", snap.Version)
+	}
+	if snap.Vendor != "mlx5_core" {
+		t.Errorf("vendor = %q, want mlx5_core", snap.Vendor)
+	}
+}
+
+func TestParseEthtoolIEmpty(t *testing.T) {
+	if snap := parseEthtoolI(strings.NewReader("not a valid output"), "eth0"); snap != nil {
+		t.Fatalf("expected nil on garbage input, got %+v", snap)
+	}
+}
+
+const nvmeIDCtrl = `NVME Identify Controller:
+vid       : 0x144d
+ssvid     : 0x144d
+sn        : S5GYNX0R500123X
+mn        : Samsung SSD 980 PRO 1TB
+fr        : 5B2QGXA7
+rab       : 2
+`
+
+func TestParseNVMeIDCtrl(t *testing.T) {
+	if got := parseNVMeIDCtrl(strings.NewReader(nvmeIDCtrl), "fr"); got != "5B2QGXA7" {
+		t.Errorf("fr = %q, want 5B2QGXA7", got)
+	}
+	if got := parseNVMeIDCtrl(strings.NewReader(nvmeIDCtrl), "mn"); got != "Samsung SSD 980 PRO 1TB" {
+		t.Errorf("mn = %q, want Samsung SSD 980 PRO 1TB", got)
+	}
+	if got := parseNVMeIDCtrl(strings.NewReader(nvmeIDCtrl), "missing"); got != "" {
+		t.Errorf("missing key should be empty, got %q", got)
+	}
+}
+
+const lspciHBA = `0000:01:00.0 Ethernet controller [0200]: Intel Corporation I350 [8086:1521] (rev 01)
+	Subsystem: Intel Corporation I350 [8086:0001]
+	Kernel driver in use: igb
+	Kernel modules: igb
+
+0000:03:00.0 Serial Attached SCSI controller [0107]: Broadcom / LSI SAS3008 PCI-Express Fusion-MPT SAS-3 [1000:0097] (rev 02)
+	Subsystem: Broadcom / LSI SAS9300-8i [1000:30e0]
+	Kernel driver in use: mpt3sas
+	Kernel modules: mpt3sas
+
+0000:04:00.0 RAID bus controller [0104]: LSI MegaRAID SAS-3 3108 [1000:005d] (rev 02)
+	Subsystem: LSI MegaRAID SAS 9361-8i [1000:9361]
+	Kernel driver in use: megaraid_sas
+	Kernel modules: megaraid_sas
+`
+
+func TestParseLspciHBA(t *testing.T) {
+	got := parseLspciHBA(strings.NewReader(lspciHBA))
+	if len(got) != 2 {
+		t.Fatalf("got %d HBA snapshots, want 2 (SAS + RAID; Ethernet must be skipped)", len(got))
+	}
+	for _, s := range got {
+		if s.Component != "hba" {
+			t.Errorf("component = %q, want hba", s.Component)
+		}
+		if s.Version != "rev 02" {
+			t.Errorf("version = %q, want 'rev 02'", s.Version)
+		}
+	}
+	if got[0].Identifier != "0000:03:00.0" {
+		t.Errorf("first identifier = %q, want 0000:03:00.0", got[0].Identifier)
+	}
+	if got[1].Identifier != "0000:04:00.0" {
+		t.Errorf("second identifier = %q, want 0000:04:00.0", got[1].Identifier)
+	}
+}
+
+const cpuinfo = `processor	: 0
+vendor_id	: GenuineIntel
+cpu family	: 6
+model		: 85
+model name	: Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz
+stepping	: 7
+microcode	: 0x5003006
+cpu MHz		: 2100.000
+`
+
+func TestParseMicrocode(t *testing.T) {
+	snap := parseMicrocode(strings.NewReader(cpuinfo))
+	if snap == nil {
+		t.Fatal("parseMicrocode returned nil")
+	}
+	if snap.Version != "0x5003006" {
+		t.Errorf("version = %q, want 0x5003006", snap.Version)
+	}
+	if snap.Vendor != "GenuineIntel" {
+		t.Errorf("vendor = %q, want GenuineIntel", snap.Vendor)
+	}
+	if snap.Identifier != "cpu" {
+		t.Errorf("identifier = %q, want cpu", snap.Identifier)
+	}
+}
+
+func TestParseMicrocodeMissing(t *testing.T) {
+	// A /proc/cpuinfo without a microcode line returns nil.
+	input := "processor\t: 0\nvendor_id\t: GenuineIntel\n"
+	if snap := parseMicrocode(strings.NewReader(input)); snap != nil {
+		t.Fatalf("expected nil when microcode line absent, got %+v", snap)
+	}
+}
+
+func TestIsRealNIC(t *testing.T) {
+	cases := []struct {
+		name string
+		want bool // want=true means a real-looking name (the /sys/class/net/<name>/device check is skipped here)
+	}{
+		{"lo", false},
+		{"", false},
+		{"docker0", false},
+		{"br-abc", false},
+		{"veth1234", false},
+		{"virbr0", false},
+		{"bond0", false},
+		{"tun0", false},
+	}
+	for _, tc := range cases {
+		if got := isRealNIC(tc.name); got != tc.want {
+			t.Errorf("isRealNIC(%q) = %v, want %v", tc.name, got, tc.want)
+		}
+	}
+}
diff --git a/agent/probes/netdev.go b/agent/probes/netdev.go
new file mode 100644
index 0000000..f059d9a
--- /dev/null
+++ b/agent/probes/netdev.go
@@ -0,0 +1,85 @@
+package probes
+
+import (
+	"bufio"
+	"io"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// NetDevSnapshot is the per-interface counter row from /proc/net/dev at
+// a single instant. Used by the Network stage to compute deltas across
+// an iperf window — a rising rx_errors or tx_dropped during a loaded
+// link is a real NIC problem, not general noise.
+type NetDevSnapshot struct {
+	Iface   string
+	RxBytes uint64
+	RxErrs  uint64
+	RxDrop  uint64
+	TxBytes uint64
+	TxErrs  uint64
+	TxDrop  uint64
+}
+
+// NetDev reads /proc/net/dev and returns one snapshot per non-loopback
+// interface. Returns nil on read/parse failure (best-effort: a missing
+// /proc is survivable; the caller skips delta reporting that tick).
+func NetDev() []NetDevSnapshot {
+	f, err := os.Open("/proc/net/dev")
+	if err != nil {
+		return nil
+	}
+	defer func() { _ = f.Close() }()
+	return parseNetDev(f)
+}
+
+// parseNetDev is split from NetDev so tests can feed a fixture without
+// touching the real /proc. The /proc/net/dev format is two header lines
+// followed by rows of "iface: rx_bytes rx_packets rx_errs rx_drop ... tx_bytes tx_packets tx_errs tx_drop ..."
+// — 16 whitespace-separated counters, of which we pull a curated six.
+func parseNetDev(r io.Reader) []NetDevSnapshot {
+	var out []NetDevSnapshot
+	sc := bufio.NewScanner(r)
+	// Skip the two header lines (iface || bytes ... || bytes ...).
+	for i := 0; i < 2 && sc.Scan(); i++ {
+	}
+	for sc.Scan() {
+		line := strings.TrimSpace(sc.Text())
+		if line == "" {
+			continue
+		}
+		colon := strings.IndexByte(line, ':')
+		if colon < 0 {
+			continue
+		}
+		iface := strings.TrimSpace(line[:colon])
+		if iface == "" || iface == "lo" {
+			continue
+		}
+		fields := strings.Fields(line[colon+1:])
+		if len(fields) < 16 {
+			continue
+		}
+		// /proc/net/dev columns:
+		//   0 rx_bytes  1 rx_packets  2 rx_errs  3 rx_drop  4 fifo  5 frame  6 compressed  7 multicast
+		//   8 tx_bytes  9 tx_packets 10 tx_errs 11 tx_drop 12 fifo 13 colls 14 carrier 15 compressed
+		snap := NetDevSnapshot{Iface: iface}
+		snap.RxBytes = parseU64(fields[0])
+		snap.RxErrs = parseU64(fields[2])
+		snap.RxDrop = parseU64(fields[3])
+		snap.TxBytes = parseU64(fields[8])
+		snap.TxErrs = parseU64(fields[10])
+		snap.TxDrop = parseU64(fields[11])
+		out = append(out, snap)
+	}
+	return out
+}
+
+func parseU64(s string) uint64 {
+	n, err := strconv.ParseUint(s, 10, 64)
+	if err != nil {
+		return 0
+	}
+	return n
+}
diff --git a/agent/probes/netdev_test.go b/agent/probes/netdev_test.go
new file mode 100644
index 0000000..f443a5d
--- /dev/null
+++ b/agent/probes/netdev_test.go
@@ -0,0 +1,84 @@
+package probes
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestParseNetDev_RealSample exercises parseNetDev against a synthetic
+// /proc/net/dev fixture with the full 16-column layout. Confirms the
+// loopback interface is dropped, headers are skipped, and each of the
+// six curated counters lands in the right field.
+func TestParseNetDev_RealSample(t *testing.T) {
+	// Columns after "iface:":
+	//   0 rx_bytes  1 rx_packets  2 rx_errs  3 rx_drop
+	//   4 fifo  5 frame  6 compressed  7 multicast
+	//   8 tx_bytes  9 tx_packets 10 tx_errs 11 tx_drop
+	//  12 fifo 13 colls 14 carrier 15 compressed
+	fixture := `Inter-|   Receive                                                |  Transmit
+ face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed
+    lo: 1000000   10000    0    0    0     0          0         0  1000000   10000    0    0    0     0       0          0
+  eth0: 50000000  100000   7   12    0     0          0         0  40000000   90000   3    5    0     0       0          0
+  eth1: 12345      200     0    0    0     0          0         0    54321     180    0    0    0     0       0          0
+`
+	snaps := parseNetDev(strings.NewReader(fixture))
+	if len(snaps) != 2 {
+		t.Fatalf("got %d snapshots, want 2 (lo should be dropped)", len(snaps))
+	}
+	byIface := map[string]NetDevSnapshot{}
+	for _, s := range snaps {
+		byIface[s.Iface] = s
+	}
+	eth0, ok := byIface["eth0"]
+	if !ok {
+		t.Fatalf("eth0 missing from parsed snapshots")
+	}
+	if eth0.RxBytes != 50000000 {
+		t.Errorf("eth0 RxBytes=%d, want 50000000", eth0.RxBytes)
+	}
+	if eth0.RxErrs != 7 {
+		t.Errorf("eth0 RxErrs=%d, want 7", eth0.RxErrs)
+	}
+	if eth0.RxDrop != 12 {
+		t.Errorf("eth0 RxDrop=%d, want 12", eth0.RxDrop)
+	}
+	if eth0.TxBytes != 40000000 {
+		t.Errorf("eth0 TxBytes=%d, want 40000000", eth0.TxBytes)
+	}
+	if eth0.TxErrs != 3 {
+		t.Errorf("eth0 TxErrs=%d, want 3", eth0.TxErrs)
+	}
+	if eth0.TxDrop != 5 {
+		t.Errorf("eth0 TxDrop=%d, want 5", eth0.TxDrop)
+	}
+	if _, ok := byIface["lo"]; ok {
+		t.Errorf("lo should have been filtered out")
+	}
+}
+
+// TestParseNetDev_Empty: an empty reader returns no snapshots, not a
+// crash. Callers treat nil as "no data" and skip the delta step.
+func TestParseNetDev_Empty(t *testing.T) {
+	snaps := parseNetDev(strings.NewReader(""))
+	if len(snaps) != 0 {
+		t.Errorf("got %d snapshots from empty reader, want 0", len(snaps))
+	}
+}
+
+// TestParseNetDev_MalformedRow skips rows that don't have the expected
+// 16 columns rather than panicking. A truncated line shouldn't hide the
+// good rows that follow.
+func TestParseNetDev_MalformedRow(t *testing.T) {
+	fixture := `header line 1
+header line 2
+  bad0: 123 456
+  eth0: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
+`
+	snaps := parseNetDev(strings.NewReader(fixture))
+	if len(snaps) != 1 {
+		t.Fatalf("got %d snapshots, want 1 (bad0 should be dropped)", len(snaps))
+	}
+	if snaps[0].Iface != "eth0" {
+		t.Errorf("got iface=%q, want eth0", snaps[0].Iface)
+	}
+}
diff --git a/agent/runner.go b/agent/runner.go
index 5141cd2..b567d54 100644
--- a/agent/runner.go
+++ b/agent/runner.go
@@ -26,6 +26,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -71,7 +72,10 @@ func Run(ctx context.Context, p *bootstate.Params) error {
 	}
 	fwd.info(fmt.Sprintf("claimed run; stages=%v current_state=%s", claim.Stages, claim.CurrentState))
 
-	go thermalSidecar(ctx, c, fwd)
+	mux := NewSensorMux(ctx, c)
+	defer mux.Close()
+
+	go thermalSidecar(ctx, mux, fwd)
 
 	hbCh := make(chan HeartbeatResponse, 4)
 	go heartbeatLoop(ctx, c, fwd, hbCh)
@@ -101,7 +105,7 @@ func Run(ctx context.Context, p *bootstate.Params) error {
 		default:
 		}
 		fwd.info("stage: starting " + nextStage)
-		outcome := runStageCancellable(ctx, nextStage, claim, fwd, c, overrideFlags{})
+		outcome := runStageCancellable(ctx, nextStage, claim, fwd, c, mux, overrideFlags{})
 		if outcome.Cancelled {
 			fwd.warn("stage cancelled by operator; posting result and exiting")
 			_, _ = postResult(ctx, c, nextStage, outcome)
@@ -119,7 +123,7 @@ func Run(ctx context.Context, p *bootstate.Params) error {
 				return err
 			}
 			// Park and wait for an override directive.
-			return waitForOverride(ctx, c, fwd, hbCh, claim)
+			return waitForOverride(ctx, c, fwd, mux, hbCh, claim)
 		}
 		if resp.NextState == "Completed" || resp.NextState == "" {
 			fwd.info("pipeline complete")
@@ -144,10 +148,10 @@ func Run(ctx context.Context, p *bootstate.Params) error {
 // it runs the inventory probe and passes the result as the /result body
 // (the orchestrator persists it as an artifact). Every other stage
 // returns a tests.Outcome which postResult marshals generically.
-func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logForwarder, c *Client, ovr overrideFlags) stageOutcome {
+func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logForwarder, c *Client, mux *SensorMux, ovr overrideFlags) stageOutcome {
 	fwd.SetStage(stage)
 	defer fwd.ClearStage()
-	deps := newDeps(ctx, c, fwd, ovr, claim)
+	deps := newDeps(ctx, c, fwd, mux, ovr, claim, stage)
 	switch stage {
 	case "Inventory":
 		fwd.info("Inventory: probing host hardware")
@@ -163,6 +167,25 @@ func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logF
 			},
 			Inventory: inv,
 		}
+	case "Firmware":
+		fwd.info("Firmware: probing firmware versions")
+		snaps, warns := probes.Firmware(ctx)
+		for _, w := range warns {
+			fwd.warn(w)
+		}
+		summary := firmwareSummary(snaps)
+		fwd.info("Firmware: " + summary)
+		return stageOutcome{
+			Outcome: tests.Outcome{
+				Passed:  true,
+				Summary: summary,
+				Extras: map[string]any{
+					"warnings":  warns,
+					"snapshots": len(snaps),
+				},
+			},
+			Firmware: snaps,
+		}
 	case "SMART":
 		return stageOutcome{Outcome: tests.SMART(ctx, deps)}
 	case "CPUStress":
@@ -170,10 +193,19 @@ func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logF
 	case "Storage":
 		return stageOutcome{Outcome: tests.Storage(ctx, deps)}
 	case "Network":
+		duration := deps.NetworkKnobs.Duration
+		if duration <= 0 {
+			duration = 10 * time.Second
+		}
 		return stageOutcome{Outcome: tests.Network(ctx, deps, tests.NetworkConfig{
 			OrchestratorURL: c.BaseURL,
 			IperfPort:       claim.IperfPort,
-			Duration:        10 * time.Second,
+			Duration:        duration,
+		})}
+	case "Burn":
+		return stageOutcome{Outcome: tests.Burn(ctx, deps, tests.BurnConfig{
+			OrchestratorURL: c.BaseURL,
+			IperfPort:       claim.IperfPort,
 		})}
 	case "GPU":
 		return stageOutcome{Outcome: tests.GPU(ctx, deps)}
@@ -188,8 +220,9 @@ func runStage(ctx context.Context, stage string, claim *ClaimResponse, fwd *logF
 
 type stageOutcome struct {
 	Outcome   tests.Outcome
-	Inventory *spec.Inventory // only for Inventory stage
-	Cancelled bool            // set when the stage was cut short by operator cancel
+	Inventory *spec.Inventory           // only for Inventory stage
+	Firmware  []probes.FirmwareSnapshot // only for Firmware stage
+	Cancelled bool                      // set when the stage was cut short by operator cancel
 }
 
 // runStageCancellable wraps runStage in a per-stage context so the
@@ -197,14 +230,14 @@ type stageOutcome struct {
 // is currently running. If the derived context was cancelled while the
 // stage executed, the outcome is rewritten as a cancellation record so
 // the orchestrator has something to persist.
-func runStageCancellable(parent context.Context, stage string, claim *ClaimResponse, fwd *logForwarder, c *Client, ovr overrideFlags) stageOutcome {
+func runStageCancellable(parent context.Context, stage string, claim *ClaimResponse, fwd *logForwarder, c *Client, mux *SensorMux, ovr overrideFlags) stageOutcome {
 	stageCtx, cancel := context.WithCancel(parent)
 	stageCancel.Store(cancel)
 	defer func() {
 		cancel()
 		stageCancel.Store(context.CancelFunc(nil))
 	}()
-	out := runStage(stageCtx, stage, claim, fwd, c, ovr)
+	out := runStage(stageCtx, stage, claim, fwd, c, mux, ovr)
 	// If the parent is still live but the stage ctx was cancelled, the
 	// operator fired a cancel — mark the outcome so the caller can exit
 	// the pipeline cleanly. Plain ctx-cancel on ctx.Done (e.g. shutdown)
@@ -235,7 +268,7 @@ type overrideFlags struct {
 	Wipe bool `json:"wipe"`
 }
 
-func newDeps(ctx context.Context, c *Client, fwd *logForwarder, ovr overrideFlags, claim *ClaimResponse) tests.Deps {
+func newDeps(ctx context.Context, c *Client, fwd *logForwarder, mux *SensorMux, ovr overrideFlags, claim *ClaimResponse, stage string) tests.Deps {
 	var expected []tests.ExpectedDisk
 	for _, e := range claim.ExpectedDisks {
 		expected = append(expected, tests.ExpectedDisk{Serial: e.Serial, SizeGB: e.SizeGB})
@@ -247,17 +280,73 @@ func newDeps(ctx context.Context, c *Client, fwd *logForwarder, ovr overrideFlag
 		OverrideWipe:   ovr.Wipe,
 		NonDestructive: claim.NonDestructive,
 		ExpectedDisks:  expected,
-		StageTimeout:   2 * time.Minute,
-		Sensor: func(ctx context.Context, samples []tests.Sample) error {
+		StageTimeout:   stageTimeout(claim, stage),
+		CPUStressKnobs: tests.CPUStressKnobs{
+			CPUPass:  parseDur(claim.StageConfig.CPUStress.CPUPass),
+			MemPass:  parseDur(claim.StageConfig.CPUStress.MemPass),
+			EDACPoll: parseDur(claim.StageConfig.CPUStress.EDACPoll),
+		},
+		StorageKnobs: tests.StorageKnobs{
+			Mode:    claim.StageConfig.Storage.Mode,
+			FioSize: claim.StageConfig.Storage.FioSize,
+			FioTime: parseDur(claim.StageConfig.Storage.FioTime),
+			FioBS:   claim.StageConfig.Storage.FioBS,
+			FioRW:   claim.StageConfig.Storage.FioRW,
+			Verify:  claim.StageConfig.Storage.Verify,
+		},
+		NetworkKnobs: tests.NetworkKnobs{
+			Duration: parseDur(claim.StageConfig.Network.Duration),
+		},
+		BurnKnobs: tests.BurnKnobs{
+			Duration:      parseDur(claim.StageConfig.Burn.Duration),
+			CPUWorkers:    claim.StageConfig.Burn.CPUWorkers,
+			MemPct:        claim.StageConfig.Burn.MemPct,
+			FioOnSpare:    claim.StageConfig.Burn.FioOnSpare,
+			IperfParallel: claim.StageConfig.Burn.IperfParallel,
+		},
+		Sensor: func(_ context.Context, samples []tests.Sample) error {
 			out := make([]SensorSample, 0, len(samples))
 			for _, s := range samples {
 				out = append(out, SensorSample{Kind: s.Kind, Key: s.Key, Value: s.Value, Unit: s.Unit})
 			}
-			return c.Sensor(ctx, out)
+			mux.Send(out)
+			return nil
 		},
 	}
 }
 
+// stageTimeout reads claim.StageConfig.StageTimeouts[stage] and falls
+// back to 2 minutes (the pre-Phase-2 default). Malformed entries log and
+// fall back — we'd rather run the stage than refuse on a typo.
+func stageTimeout(claim *ClaimResponse, stage string) time.Duration {
+	if claim == nil || claim.StageConfig.StageTimeouts == nil {
+		return 2 * time.Minute
+	}
+	raw, ok := claim.StageConfig.StageTimeouts[stage]
+	if !ok || raw == "" {
+		return 2 * time.Minute
+	}
+	d, err := time.ParseDuration(raw)
+	if err != nil || d <= 0 {
+		return 2 * time.Minute
+	}
+	return d
+}
+
+// parseDur is the permissive duration parser for the knob wire shape.
+// Empty strings / parse failures yield 0 so callers can treat a zero
+// value as "use the compile-time default" without a nil-check dance.
+func parseDur(s string) time.Duration {
+	if s == "" {
+		return 0
+	}
+	d, err := time.ParseDuration(s)
+	if err != nil || d < 0 {
+		return 0
+	}
+	return d
+}
+
 // postResult marshals stageOutcome for the /result endpoint. The
 // Inventory shape is special-cased: it includes the inventory blob so
 // the orchestrator can persist it and run server-side spec diff.
@@ -276,6 +365,9 @@ func postResult(ctx context.Context, c *Client, stage string, s stageOutcome) (*
 	if s.Inventory != nil {
 		body["inventory"] = s.Inventory
 	}
+	if len(s.Firmware) > 0 {
+		body["firmware"] = s.Firmware
+	}
 	if len(s.Outcome.SubSteps) > 0 {
 		wire := make([]SubStepReport, 0, len(s.Outcome.SubSteps))
 		for _, ss := range s.Outcome.SubSteps {
@@ -304,7 +396,7 @@ func stageForState(state string) string {
 	switch state {
 	case "InventoryCheck":
 		return "Inventory"
-	case "SMART", "CPUStress", "Storage", "Network", "GPU", "PSU":
+	case "Firmware", "SMART", "CPUStress", "Storage", "Network", "Burn", "GPU", "PSU":
 		return state
 	}
 	// SpecValidate and Reporting are orchestrator-owned; we never see
@@ -315,7 +407,7 @@ func stageForState(state string) string {
 // waitForOverride parks the agent in FailedHolding. It listens for a
 // heartbeat directive that tells it to retry a stage (e.g. Storage
 // with wipe-override armed) and re-enters runStage from that point.
-func waitForOverride(ctx context.Context, c *Client, fwd *logForwarder, hb <-chan HeartbeatResponse, claim *ClaimResponse) error {
+func waitForOverride(ctx context.Context, c *Client, fwd *logForwarder, mux *SensorMux, hb <-chan HeartbeatResponse, claim *ClaimResponse) error {
 	fwd.info("holding: awaiting operator decision (heartbeat directive or ctx cancel)")
 	for {
 		select {
@@ -333,7 +425,7 @@ func waitForOverride(ctx context.Context, c *Client, fwd *logForwarder, hb <-cha
 			if len(cmd.OverrideFlags) > 0 {
 				_ = json.Unmarshal(cmd.OverrideFlags, &ovr)
 			}
-			outcome := runStageCancellable(ctx, cmd.Stage, claim, fwd, c, ovr)
+			outcome := runStageCancellable(ctx, cmd.Stage, claim, fwd, c, mux, ovr)
 			if outcome.Cancelled {
 				fwd.warn("stage cancelled by operator; posting result and exiting")
 				_, _ = postResult(ctx, c, cmd.Stage, outcome)
@@ -362,7 +454,7 @@ func waitForOverride(ctx context.Context, c *Client, fwd *logForwarder, hb <-cha
 					default:
 					}
 					fwd.info("stage: starting " + nextStage)
-					out := runStageCancellable(ctx, nextStage, claim, fwd, c, overrideFlags{})
+					out := runStageCancellable(ctx, nextStage, claim, fwd, c, mux, overrideFlags{})
 					if out.Cancelled {
 						fwd.warn("stage cancelled by operator; posting result and exiting")
 						_, _ = postResult(ctx, c, nextStage, out)
@@ -417,11 +509,32 @@ func inventorySummary(inv *spec.Inventory) string {
 		len(inv.Disks), len(inv.NICs), len(inv.GPUs))
 }
 
+// firmwareSummary renders the one-liner surfaced in the stage tile:
+// per-component counts so an operator can see "bios=1 nic=2 nvme_fw=1"
+// without opening the report.
+func firmwareSummary(snaps []probes.FirmwareSnapshot) string {
+	counts := map[string]int{}
+	for _, s := range snaps {
+		counts[s.Component]++
+	}
+	if len(counts) == 0 {
+		return "no firmware readable"
+	}
+	keys := []string{"bios", "bmc", "nic", "hba", "nvme_fw", "microcode"}
+	parts := make([]string, 0, len(keys))
+	for _, k := range keys {
+		if n := counts[k]; n > 0 {
+			parts = append(parts, fmt.Sprintf("%s=%d", k, n))
+		}
+	}
+	return strings.Join(parts, " ")
+}
+
 // thermalSidecar posts a batch of /sys/class/hwmon samples every 5s.
 // Idempotent: a dead sensor just drops out of the next batch. Errors
 // are logged but never fatal — we'd rather have a run with partial
 // thermal data than kill the agent over an I/O hiccup.
-func thermalSidecar(ctx context.Context, c *Client, fwd *logForwarder) {
+func thermalSidecar(ctx context.Context, mux *SensorMux, fwd *logForwarder) {
 	t := time.NewTicker(5 * time.Second)
 	defer t.Stop()
 	for {
@@ -437,11 +550,7 @@ func thermalSidecar(ctx context.Context, c *Client, fwd *logForwarder) {
 			for _, s := range samples {
 				out = append(out, SensorSample{Kind: s.Kind, Key: s.Key, Value: s.Value, Unit: s.Unit})
 			}
-			sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
-			if err := c.Sensor(sendCtx, out); err != nil {
-				fwd.warn("thermal sidecar: " + err.Error())
-			}
-			cancel()
+			mux.Send(out)
 		}
 	}
 }
diff --git a/agent/sensor_mux.go b/agent/sensor_mux.go
new file mode 100644
index 0000000..fd4ab7a
--- /dev/null
+++ b/agent/sensor_mux.go
@@ -0,0 +1,139 @@
+package agent
+
+import (
+	"context"
+	"log"
+	"sync"
+	"time"
+)
+
+// SensorMux coalesces sensor samples from every stage + sidecar into a
+// single batched HTTP POST stream. Without it, a Burn run that fans out
+// four concurrent workloads + thermal + PSU + EDAC sidecars can push ~50
+// samples/sec, each as a separate /sensor request — enough to either
+// saturate the orchestrator's request budget or stall a stage on its
+// own sensor-forwarding path.
+//
+// Contract:
+//   - Send is non-blocking; a full input channel drops a batch on the
+//     floor and logs a warning. That's preferred over back-pressuring
+//     a workload goroutine and skewing its timing.
+//   - Flush happens every flushInterval *or* whenever the pending buffer
+//     exceeds maxBatch samples. Chunk-at-flush keeps each HTTP request
+//     bounded regardless of the incoming rate.
+//   - Close flushes whatever is in the buffer. Callers that need the
+//     final flush to reach the server should defer Close before other
+//     deferred shutdown work.
+type SensorMux struct {
+	c             *Client
+	in            chan []SensorSample
+	flushInterval time.Duration
+	maxBatch      int
+
+	ctx    context.Context
+	cancel context.CancelFunc
+	wg     sync.WaitGroup
+}
+
+// NewSensorMux starts the flush loop. Callers hand the returned mux to
+// every code path that previously called Client.Sensor directly (stage
+// Deps.Sensor, thermal sidecar, EDAC sidecar). The mux lives for the
+// duration of the agent run.
+func NewSensorMux(parent context.Context, c *Client) *SensorMux {
+	ctx, cancel := context.WithCancel(parent)
+	m := &SensorMux{
+		c:             c,
+		in:            make(chan []SensorSample, 32),
+		flushInterval: 2 * time.Second,
+		maxBatch:      500,
+		ctx:           ctx,
+		cancel:        cancel,
+	}
+	m.wg.Add(1)
+	go m.loop()
+	return m
+}
+
+// Send enqueues a batch for the next flush tick. Empty batches are
+// silently ignored so callers with conditional sample lists don't need
+// to guard the call site.
+func (m *SensorMux) Send(samples []SensorSample) {
+	if m == nil || len(samples) == 0 {
+		return
+	}
+	// Copy so caller mutations don't race with the flush loop.
+	out := make([]SensorSample, len(samples))
+	copy(out, samples)
+	select {
+	case m.in <- out:
+	default:
+		log.Printf("sensor mux: input channel full, dropping %d samples", len(out))
+	}
+}
+
+// Close stops the flush loop and flushes the residual buffer. Safe to
+// call twice (the second is a no-op because the internal context is
+// already cancelled).
+func (m *SensorMux) Close() {
+	if m == nil {
+		return
+	}
+	m.cancel()
+	m.wg.Wait()
+}
+
+func (m *SensorMux) loop() {
+	defer m.wg.Done()
+	buf := make([]SensorSample, 0, m.maxBatch)
+	t := time.NewTicker(m.flushInterval)
+	defer t.Stop()
+	for {
+		select {
+		case <-m.ctx.Done():
+			m.flushChunks(buf)
+			buf = nil
+			// Drain whatever is still sitting in the channel so a
+			// workload that pushed right before Close doesn't lose
+			// those final samples.
+			for {
+				select {
+				case batch := <-m.in:
+					m.flushChunks(batch)
+				default:
+					return
+				}
+			}
+		case batch := <-m.in:
+			buf = append(buf, batch...)
+			if len(buf) >= m.maxBatch {
+				m.flushChunks(buf)
+				buf = buf[:0]
+			}
+		case <-t.C:
+			if len(buf) > 0 {
+				m.flushChunks(buf)
+				buf = buf[:0]
+			}
+		}
+	}
+}
+
+// flushChunks splits a potentially-large slice into maxBatch-sized
+// HTTP requests so no single POST carries more than the configured cap.
+// A 10-second per-chunk timeout keeps a stalled orchestrator from
+// freezing the flush loop.
+func (m *SensorMux) flushChunks(all []SensorSample) {
+	for len(all) > 0 {
+		n := len(all)
+		if n > m.maxBatch {
+			n = m.maxBatch
+		}
+		chunk := all[:n]
+		all = all[n:]
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		if err := m.c.Sensor(ctx, chunk); err != nil {
+			log.Printf("sensor mux: flush of %d samples failed: %v", len(chunk), err)
+		}
+		cancel()
+	}
+}
diff --git a/agent/sensor_mux_test.go b/agent/sensor_mux_test.go
new file mode 100644
index 0000000..35c3394
--- /dev/null
+++ b/agent/sensor_mux_test.go
@@ -0,0 +1,144 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// TestSensorMux_CloseFlushesBuffer confirms Close() empties the
+// pending buffer through the HTTP client before returning. Without
+// this guarantee a Burn run would drop the last 2 s of samples when
+// the stage tears down, which is exactly the window that contains the
+// peak-load PSU / thermal readings we care about.
+func TestSensorMux_CloseFlushesBuffer(t *testing.T) {
+	var batches int32
+	var totalSamples int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if !strings.HasSuffix(r.URL.Path, "/sensor") {
+			t.Errorf("unexpected path %s", r.URL.Path)
+		}
+		body, _ := io.ReadAll(r.Body)
+		var env struct {
+			Samples []SensorSample `json:"samples"`
+		}
+		if err := json.Unmarshal(body, &env); err != nil {
+			t.Errorf("decode: %v", err)
+		}
+		atomic.AddInt32(&batches, 1)
+		atomic.AddInt32(&totalSamples, int32(len(env.Samples)))
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	c := &Client{
+		BaseURL: srv.URL,
+		RunID:   1,
+		Token:   "t",
+		HTTP:    srv.Client(),
+	}
+	mux := NewSensorMux(context.Background(), c)
+	mux.Send([]SensorSample{
+		{Kind: "temp", Key: "cpu/0", Value: 72.5, Unit: "C"},
+		{Kind: "psu_volt", Key: "+12V", Value: 12.05, Unit: "V"},
+	})
+	mux.Send([]SensorSample{
+		{Kind: "mce", Key: "0", Value: 0, Unit: "count"},
+	})
+	mux.Close()
+
+	if got := atomic.LoadInt32(&totalSamples); got != 3 {
+		t.Errorf("expected 3 samples flushed, got %d across %d batch(es)", got, atomic.LoadInt32(&batches))
+	}
+	if atomic.LoadInt32(&batches) == 0 {
+		t.Errorf("expected at least one batch HTTP post")
+	}
+}
+
+// TestSensorMux_ChunksOversizedBatch verifies flushChunks splits a
+// single oversized input into maxBatch-sized HTTP requests. The plan's
+// Burn stage can legitimately push a single input larger than the cap
+// (e.g. a workload goroutine dumping a backlog), and a single giant
+// POST would defeat the point of the multiplexer.
+func TestSensorMux_ChunksOversizedBatch(t *testing.T) {
+	var batchSizes []int
+	var mu sync.Mutex
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		var env struct {
+			Samples []SensorSample `json:"samples"`
+		}
+		_ = json.Unmarshal(body, &env)
+		mu.Lock()
+		batchSizes = append(batchSizes, len(env.Samples))
+		mu.Unlock()
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	c := &Client{BaseURL: srv.URL, RunID: 1, Token: "t", HTTP: srv.Client()}
+	mux := NewSensorMux(context.Background(), c)
+
+	// One input with 1200 samples → expect chunks of 500 + 500 + 200
+	// given the default maxBatch of 500.
+	big := make([]SensorSample, 1200)
+	for i := range big {
+		big[i] = SensorSample{Kind: "burn/throughput_mbps", Key: "eth0", Value: float64(i), Unit: "Mbps"}
+	}
+	mux.Send(big)
+	mux.Close()
+
+	mu.Lock()
+	defer mu.Unlock()
+	total := 0
+	for _, n := range batchSizes {
+		total += n
+		if n > 500 {
+			t.Errorf("batch size %d exceeds maxBatch=500", n)
+		}
+	}
+	if total != 1200 {
+		t.Errorf("sum of batch sizes = %d, want 1200 (sizes=%v)", total, batchSizes)
+	}
+	if len(batchSizes) < 3 {
+		t.Errorf("expected at least 3 chunks for a 1200-sample input, got %d (%v)", len(batchSizes), batchSizes)
+	}
+}
+
+// TestSensorMux_EmptyAndNilSafe covers the defensive guards around
+// Send(nil) / Send([]) / a nil *SensorMux. Callers with conditional
+// sample lists (storage probe that skipped a disk, GPU stage with no
+// devices) should be able to call Send unconditionally without adding
+// their own nil check.
+func TestSensorMux_EmptyAndNilSafe(t *testing.T) {
+	var batches int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		atomic.AddInt32(&batches, 1)
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	// Nil receiver must be a no-op.
+	var nilMux *SensorMux
+	nilMux.Send([]SensorSample{{Kind: "x", Key: "y"}})
+	nilMux.Close()
+
+	c := &Client{BaseURL: srv.URL, RunID: 1, Token: "t", HTTP: srv.Client()}
+	mux := NewSensorMux(context.Background(), c)
+	mux.Send(nil)
+	mux.Send([]SensorSample{})
+	mux.Close()
+
+	// Give any spurious goroutine a chance to surprise us.
+	time.Sleep(50 * time.Millisecond)
+	if atomic.LoadInt32(&batches) != 0 {
+		t.Errorf("empty/nil Send must not produce HTTP batches, got %d", atomic.LoadInt32(&batches))
+	}
+}
diff --git a/agent/tests/burn.go b/agent/tests/burn.go
new file mode 100644
index 0000000..625bdef
--- /dev/null
+++ b/agent/tests/burn.go
@@ -0,0 +1,486 @@
+package tests
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os/exec"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"vetting/agent/probes"
+)
+
+// BurnConfig is what the agent passes to Burn: the orchestrator's iperf3
+// server address and port. Durations + concurrency knobs come from
+// Deps.BurnKnobs so they scale with profile.
+type BurnConfig struct {
+	OrchestratorURL string
+	IperfPort       int // 0 = 5201
+}
+
+// Burn is the concurrent soak stage. Unlike CPUStress (serial
+// CPU→memory) or Storage (serial per disk) it fans out every workload
+// at once: stress-ng hammers CPU + memory, fio drives the allow-listed
+// disks, iperf3 pushes sustained NIC traffic, and two sidecars poll
+// EDAC + PSU rails for the duration of the window.
+//
+// This is where PSU rails actually matter: 12V sag under simultaneous
+// CPU + disk + NIC load is exactly the failure a thermal/power
+// regression produces, and it's invisible to any stage that loads one
+// subsystem at a time. The PSU stage that follows Burn in the pipeline
+// re-samples rails post-window to confirm they settle back to nominal.
+//
+// Burn stays inside the stage framework — it doesn't spawn a parallel
+// stage runner. The goroutine fan-out is local; the stage converges
+// before returning an Outcome so every invariant the orchestrator
+// relies on (serial stage order, single in-flight stage per run) still
+// holds.
+func Burn(ctx context.Context, d Deps, cfg BurnConfig) Outcome {
+	duration := d.BurnKnobs.Duration
+	if duration <= 0 {
+		duration = 2 * time.Minute
+	}
+	cpuWorkers := resolveCPUWorkers(d.BurnKnobs.CPUWorkers)
+	memPct := clampMemPct(d.BurnKnobs.MemPct)
+	iperfParallel := d.BurnKnobs.IperfParallel
+	if iperfParallel <= 0 {
+		iperfParallel = 2
+	}
+	d.Info(fmt.Sprintf("Burn: window=%s cpu_workers=%d mem_pct=%d iperf_parallel=%d fio_on_spare=%v",
+		duration, cpuWorkers, memPct, iperfParallel, d.BurnKnobs.FioOnSpare))
+
+	// Sidecars run for the lifetime of the window and are cancelled on
+	// return so the main stage converges cleanly. EDAC catches DIMM
+	// bit-flips that appear only under concurrent load; PSU catches
+	// rail sag that only appears when CPU + disk + NIC pull current
+	// simultaneously.
+	sideCtx, sideCancel := context.WithCancel(ctx)
+	defer sideCancel()
+	var sideWG sync.WaitGroup
+	sideWG.Add(2)
+	go runEDACSidecar(sideCtx, &sideWG, d)
+	go runPSUSidecar(sideCtx, &sideWG, d)
+
+	runCtx, cancel := context.WithTimeout(ctx, duration+30*time.Second)
+	defer cancel()
+
+	results := make(chan burnSubResult, 4)
+	var wg sync.WaitGroup
+
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		results <- runBurnCPU(runCtx, d, duration, cpuWorkers)
+	}()
+
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		results <- runBurnMemory(runCtx, d, duration, memPct)
+	}()
+
+	// fio runs only when explicitly enabled *and* there are allow-listed
+	// disks *and* the run wasn't marked non-destructive. Any of those
+	// missing records a Skipped sub-step so the operator sees why.
+	if d.BurnKnobs.FioOnSpare && len(d.ExpectedDisks) > 0 && !d.NonDestructive {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			results <- runBurnFio(runCtx, d, duration)
+		}()
+	} else {
+		reason := burnFioSkipReason(d)
+		results <- burnSubResult{Name: "Burn fio", Skipped: true, Reason: reason}
+	}
+
+	// iperf requires an orchestrator host. Lab hosts run with the
+	// bundled iperf3 server; without a base URL we can't derive a
+	// target so we skip rather than fail the stage.
+	if cfg.OrchestratorURL != "" {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			results <- runBurnIperf(runCtx, d, duration, cfg.OrchestratorURL, cfg.IperfPort, iperfParallel)
+		}()
+	} else {
+		results <- burnSubResult{Name: "Burn iperf", Skipped: true, Reason: "no orchestrator host"}
+	}
+
+	wg.Wait()
+	sideCancel()
+	sideWG.Wait()
+	close(results)
+
+	subs, samples, failures := collectBurnResults(results)
+	if d.Sensor != nil && len(samples) > 0 {
+		_ = d.Sensor(ctx, samples)
+	}
+
+	extras := map[string]any{
+		"duration":       duration.String(),
+		"cpu_workers":    cpuWorkers,
+		"mem_pct":        memPct,
+		"iperf_parallel": iperfParallel,
+		"fio_on_spare":   d.BurnKnobs.FioOnSpare,
+	}
+	if len(failures) > 0 {
+		msg := "Burn workloads failed: " + strings.Join(failures, ", ")
+		d.Error(msg)
+		return Outcome{
+			Passed:   false,
+			Message:  msg,
+			Summary:  fmt.Sprintf("Burn failed (%d of %d workloads)", len(failures), len(subs)),
+			Extras:   extras,
+			SubSteps: subs,
+		}
+	}
+	d.Info(fmt.Sprintf("Burn: %s window passed; %d workloads converged", duration, len(subs)))
+	return Outcome{
+		Passed:   true,
+		Summary:  fmt.Sprintf("Burn %s passed (%d workloads)", duration, len(subs)),
+		Extras:   extras,
+		SubSteps: subs,
+	}
+}
+
+// burnSubResult is the per-workload return type used by the fan-out
+// goroutines. Sample slice is merged into the stage's final /sensor
+// batch; SubStep becomes a row on the /result sub-steps list.
+type burnSubResult struct {
+	Name    string
+	Passed  bool
+	Skipped bool
+	Reason  string // why a workload was skipped
+	Err     string // why a workload failed
+	Samples []Sample
+	SubStep SubStepReport
+}
+
+func collectBurnResults(ch <-chan burnSubResult) ([]SubStepReport, []Sample, []string) {
+	var subs []SubStepReport
+	var samples []Sample
+	var failures []string
+	for r := range ch {
+		// Non-skipped goroutines populate SubStep directly. Skipped slots
+		// get a synthesized row here so the /result shape stays stable.
+		if r.Skipped {
+			stamp := time.Now().UTC()
+			subs = append(subs, SubStepReport{
+				Name:        r.Name,
+				Skipped:     true,
+				StartedAt:   stamp,
+				CompletedAt: stamp,
+				SummaryJSON: mustJSON(map[string]any{"skipped": true, "reason": r.Reason}),
+			})
+			continue
+		}
+		subs = append(subs, r.SubStep)
+		samples = append(samples, r.Samples...)
+		if !r.Passed {
+			reason := r.Err
+			if reason == "" {
+				reason = "unknown"
+			}
+			failures = append(failures, r.Name+": "+reason)
+		}
+	}
+	return subs, samples, failures
+}
+
+func burnFioSkipReason(d Deps) string {
+	if !d.BurnKnobs.FioOnSpare {
+		return "fio_on_spare knob disabled"
+	}
+	if d.NonDestructive {
+		return "non-destructive run"
+	}
+	if len(d.ExpectedDisks) == 0 {
+		return "no allowlisted disks"
+	}
+	return "disabled"
+}
+
+// runBurnCPU hammers all CPU cores with stress-ng for the window. Same
+// shape as CPUStress pass 1 but with shorter label so the sub-step row
+// doesn't collide with the earlier stage's "CPU pass".
+func runBurnCPU(ctx context.Context, d Deps, duration time.Duration, workers int) burnSubResult {
+	if _, err := exec.LookPath("stress-ng"); err != nil {
+		return burnSubResult{Name: "Burn CPU", Err: "stress-ng missing"}
+	}
+	args := []string{
+		"--cpu", strconv.Itoa(workers),
+		"--cpu-method", "all",
+		"--timeout", durationSeconds(duration),
+		"--metrics-brief",
+		"--verify",
+	}
+	d.Info(fmt.Sprintf("Burn: stress-ng %s", strings.Join(args, " ")))
+	pass := runStressPass(ctx, d, "Burn CPU", duration, args)
+	return burnSubResult{
+		Name:    "Burn CPU",
+		Passed:  pass.Passed,
+		Err:     pass.Err,
+		SubStep: subStepFromPass("Burn CPU", pass),
+	}
+}
+
+// runBurnMemory drives a single --vm worker sized at memPct of
+// MemAvailable, capped so the kernel + agent + other workloads still
+// have headroom. Clamping happens here rather than in resolveBurnKnobs
+// so the cap is computed against real live memory each run.
+func runBurnMemory(ctx context.Context, d Deps, duration time.Duration, memPct int) burnSubResult {
+	if _, err := exec.LookPath("stress-ng"); err != nil {
+		return burnSubResult{Name: "Burn memory", Err: "stress-ng missing"}
+	}
+	avail, err := memAvailableBytes()
+	if err != nil {
+		return burnSubResult{Name: "Burn memory", Err: "read MemAvailable: " + err.Error()}
+	}
+	// Budget = avail * memPct / 100, then subtract the standard headroom.
+	// If the result is below the memory-pass floor we record a skipped
+	// row instead — the window is too tight to be meaningful on this box.
+	budget := int64(float64(avail) * float64(memPct) / 100.0)
+	cap := budget - memHeadroomBytes
+	if cap < memFloorBytes {
+		return burnSubResult{
+			Name:    "Burn memory",
+			Skipped: true,
+			Reason:  fmt.Sprintf("budget %s below floor %s after headroom", humanBytes(budget), humanBytes(memFloorBytes)),
+		}
+	}
+	args := []string{
+		"--vm", "1",
+		"--vm-bytes", strconv.FormatInt(cap, 10),
+		"--vm-keep",
+		"--timeout", durationSeconds(duration),
+		"--metrics-brief",
+		"--verify",
+	}
+	d.Info(fmt.Sprintf("Burn: stress-ng memory cap=%s (%d%% of MemAvailable)", humanBytes(cap), memPct))
+	pass := runStressPass(ctx, d, "Burn memory", duration, args)
+	return burnSubResult{
+		Name:    "Burn memory",
+		Passed:  pass.Passed,
+		Err:     pass.Err,
+		SubStep: subStepFromPass(fmt.Sprintf("Burn memory (cap %s)", humanBytes(cap)), pass),
+	}
+}
+
+// runBurnFio runs fio_sample against the first allow-listed disk for
+// the window. Reuses runFioVerify + parseFioJSON so the samples line
+// up with what Storage emits. Using fio_sample (bounded by --size)
+// keeps Burn's write volume predictable regardless of profile.
+func runBurnFio(ctx context.Context, d Deps, duration time.Duration) burnSubResult {
+	if _, err := exec.LookPath("fio"); err != nil {
+		return burnSubResult{Name: "Burn fio", Err: "fio missing"}
+	}
+	targets := resolveTargets(d.ExpectedDisks)
+	if len(targets) == 0 {
+		return burnSubResult{Name: "Burn fio", Skipped: true, Reason: "no allow-listed disks present"}
+	}
+	t := targets[0]
+	opts := fioOpts{
+		Mode:    "fio_sample",
+		Size:    "512MiB",
+		Runtime: duration,
+		BS:      "4k",
+		RW:      "randrw",
+		Verify:  "md5",
+	}
+	start := time.Now()
+	d.Info(fmt.Sprintf("Burn: fio %s on %s (%s window)", opts.Mode, t.Device, duration))
+	fr := runFioVerify(ctx, t.Device, opts)
+	end := time.Now()
+
+	sub := SubStepReport{
+		Name:        "Burn fio " + t.Device,
+		Passed:      fr.Error == "",
+		StartedAt:   start,
+		CompletedAt: end,
+		SummaryJSON: mustJSON(fr),
+	}
+	out := burnSubResult{Name: "Burn fio", SubStep: sub, Passed: fr.Error == "", Err: fr.Error}
+	if fr.Error == "" {
+		out.Samples = append(out.Samples,
+			Sample{Kind: "fio", Key: t.Device + "/read_iops", Value: fr.ReadIOPS, Unit: "iops"},
+			Sample{Kind: "fio", Key: t.Device + "/write_iops", Value: fr.WriteIOPS, Unit: "iops"},
+		)
+		if fr.ReadP99Us > 0 {
+			out.Samples = append(out.Samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/read", Value: fr.ReadP99Us, Unit: "us"})
+		}
+		if fr.WriteP99Us > 0 {
+			out.Samples = append(out.Samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/write", Value: fr.WriteP99Us, Unit: "us"})
+		}
+	}
+	return out
+}
+
+// runBurnIperf drives iperf3 -P N for the window. Reuses parseIperfJSON
+// so the same (mbps, retrans, bytesSent) extraction the Network stage
+// uses applies here too. Samples emitted as Burn-scoped keys so the
+// dashboard can tell at-a-glance which window they came from.
+func runBurnIperf(ctx context.Context, d Deps, duration time.Duration, orchestratorURL string, port, parallel int) burnSubResult {
+	if _, err := exec.LookPath("iperf3"); err != nil {
+		return burnSubResult{Name: "Burn iperf", Err: "iperf3 missing"}
+	}
+	host, err := deriveHost(orchestratorURL)
+	if err != nil || host == "" {
+		return burnSubResult{Name: "Burn iperf", Skipped: true, Reason: "can't derive orchestrator host"}
+	}
+	if port == 0 {
+		port = 5201
+	}
+	if parallel < 1 {
+		parallel = 1
+	}
+	args := []string{
+		"-c", host,
+		"-p", strconv.Itoa(port),
+		"-t", strconv.Itoa(int(duration.Seconds())),
+		"-P", strconv.Itoa(parallel),
+		"-J",
+	}
+	runCtx, cancel := context.WithTimeout(ctx, duration+30*time.Second)
+	defer cancel()
+	start := time.Now()
+	out, err := exec.CommandContext(runCtx, "iperf3", args...).Output()
+	end := time.Now()
+	if err != nil {
+		return burnSubResult{
+			Name:    "Burn iperf",
+			Err:     "iperf3 client error: " + err.Error(),
+			SubStep: SubStepReport{
+				Name:        "Burn iperf",
+				StartedAt:   start,
+				CompletedAt: end,
+				SummaryJSON: mustJSON(map[string]any{"error": err.Error(), "stderr_tail": tailLines(string(out), 20)}),
+			},
+		}
+	}
+	mbps, retrans, bytesSent, _, perr := parseIperfJSON(out)
+	if perr != nil {
+		return burnSubResult{
+			Name:    "Burn iperf",
+			Err:     "parse iperf3 json: " + perr.Error(),
+			SubStep: SubStepReport{
+				Name:        "Burn iperf",
+				StartedAt:   start,
+				CompletedAt: end,
+				SummaryJSON: mustJSON(map[string]any{"error": perr.Error()}),
+			},
+		}
+	}
+
+	samples := []Sample{{Kind: "iperf", Key: "burn/throughput_mbps", Value: mbps, Unit: "Mbps"}}
+	if bytesSent > 0 {
+		packets := float64(bytesSent) / 1460.0
+		if packets > 0 {
+			samples = append(samples, Sample{
+				Kind: "nic_retrans", Key: "burn/rate",
+				Value: float64(retrans) / packets, Unit: "rate",
+			})
+		}
+	}
+	passed := mbps > 0
+	errMsg := ""
+	if !passed {
+		errMsg = "zero throughput from iperf3"
+	}
+	return burnSubResult{
+		Name:    "Burn iperf",
+		Passed:  passed,
+		Err:     errMsg,
+		Samples: samples,
+		SubStep: SubStepReport{
+			Name:        fmt.Sprintf("Burn iperf (P=%d)", parallel),
+			Passed:      passed,
+			StartedAt:   start,
+			CompletedAt: end,
+			SummaryJSON: mustJSON(map[string]any{
+				"throughput_mbps": mbps,
+				"retransmits":     retrans,
+				"bytes_sent":      bytesSent,
+				"parallel":        parallel,
+			}),
+		},
+	}
+}
+
+// runPSUSidecar polls /sys/class/hwmon rails every 5s for the duration
+// of the Burn window, piping each read into the stage's sensor channel
+// as a psu_volt sample. The threshold evaluator then applies the same
+// within_pct gates used by the PSU stage — a 12V rail sagging to 10.5V
+// under load will fire the critical threshold mid-Burn and the run
+// will flip into FailedHolding without waiting for the post-Burn PSU
+// stage to catch it.
+func runPSUSidecar(ctx context.Context, wg *sync.WaitGroup, d Deps) {
+	defer wg.Done()
+	if d.Sensor == nil {
+		return
+	}
+	t := time.NewTicker(5 * time.Second)
+	defer t.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-t.C:
+			rails := scanPSURails()
+			if len(rails) == 0 {
+				continue
+			}
+			batch := make([]Sample, 0, len(rails))
+			for _, r := range rails {
+				batch = append(batch, Sample{Kind: "psu_volt", Key: r.Label, Value: r.Volts, Unit: "V"})
+			}
+			sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+			if err := d.Sensor(sendCtx, batch); err != nil {
+				d.Warn("Burn: PSU sample post: " + err.Error())
+			}
+			cancel()
+		}
+	}
+}
+
+func resolveCPUWorkers(raw string) int {
+	if raw == "" || strings.EqualFold(raw, "all") {
+		return runtime.NumCPU()
+	}
+	if n, err := strconv.Atoi(raw); err == nil && n > 0 {
+		return n
+	}
+	return runtime.NumCPU()
+}
+
+// clampMemPct keeps the knob in a sane band. 0 means "use default 50%";
+// above 90 would crowd the kernel + agent + fio + iperf3 workers off the
+// page cache. Anything outside [10, 90] is clamped.
+func clampMemPct(pct int) int {
+	if pct <= 0 {
+		return 50
+	}
+	if pct < 10 {
+		return 10
+	}
+	if pct > 90 {
+		return 90
+	}
+	return pct
+}
+
+func mustJSON(v any) json.RawMessage {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return json.RawMessage([]byte(`{"marshal_error":"` + err.Error() + `"}`))
+	}
+	return b
+}
+
+// Ensure the probes package import stays anchored — the Burn sidecars
+// use probes.EDAC + the PSU rail scanner defined in psu.go which
+// otherwise wouldn't pull probes in on its own.
+var _ = probes.EDAC
diff --git a/agent/tests/burn_test.go b/agent/tests/burn_test.go
new file mode 100644
index 0000000..ebe8c38
--- /dev/null
+++ b/agent/tests/burn_test.go
@@ -0,0 +1,58 @@
+package tests
+
+import (
+	"runtime"
+	"testing"
+)
+
+// TestResolveCPUWorkers covers the three parse branches: empty/"all"
+// falls back to NumCPU, a valid integer is used verbatim, and garbage
+// also falls back to NumCPU rather than returning zero. Zero workers
+// would make stress-ng a no-op and silently defeat Burn's CPU load.
+func TestResolveCPUWorkers(t *testing.T) {
+	np := runtime.NumCPU()
+	cases := []struct {
+		name string
+		in   string
+		want int
+	}{
+		{"empty defaults to NumCPU", "", np},
+		{"all defaults to NumCPU", "all", np},
+		{"ALL is case-insensitive", "ALL", np},
+		{"explicit integer", "3", 3},
+		{"negative falls back", "-1", np},
+		{"zero falls back", "0", np},
+		{"garbage falls back", "lots", np},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			if got := resolveCPUWorkers(tc.in); got != tc.want {
+				t.Errorf("resolveCPUWorkers(%q) = %d, want %d", tc.in, got, tc.want)
+			}
+		})
+	}
+}
+
+// TestClampMemPct ensures the mem_pct knob never drives the memory
+// burner into OOM territory (upper clamp) or into uselessness (lower
+// clamp). Zero is treated as "use default 50" so a missing knob in an
+// older orchestrator's claim response doesn't collapse the workload.
+func TestClampMemPct(t *testing.T) {
+	cases := []struct {
+		in, want int
+	}{
+		{0, 50},   // default
+		{-10, 50}, // negative treated as default
+		{5, 10},   // below lower band → clamp up
+		{10, 10},
+		{50, 50},
+		{90, 90},
+		{95, 90}, // above upper band → clamp down
+		{1000, 90},
+	}
+	for _, tc := range cases {
+		if got := clampMemPct(tc.in); got != tc.want {
+			t.Errorf("clampMemPct(%d) = %d, want %d", tc.in, got, tc.want)
+		}
+	}
+}
diff --git a/agent/tests/cpustress.go b/agent/tests/cpustress.go
index 857d007..dabda54 100644
--- a/agent/tests/cpustress.go
+++ b/agent/tests/cpustress.go
@@ -11,7 +11,10 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
+
+	"vetting/agent/probes"
 )
 
 // CPUStress runs stress-ng as two serial passes. The previous shape
@@ -55,11 +58,28 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
 	extras := map[string]any{"cores": cores}
 	var subs []SubStepReport
 
+	// EDAC sidecar runs for the lifetime of the stage; cancelled on
+	// return. It polls /sys/devices/system/edac/mc/*/{ce,ue}_count and
+	// posts the current counters so the server-side threshold evaluator
+	// can gate edac_ue > 0 → fail the run. Zero-valued poll falls back
+	// to 10s — the same cadence rasdaemon uses by default.
+	sideCtx, sideCancel := context.WithCancel(ctx)
+	defer sideCancel()
+	var sideWG sync.WaitGroup
+	sideWG.Add(1)
+	go runEDACSidecar(sideCtx, &sideWG, d)
+
+	// Per-profile durations come from Deps; zero values (missing knobs
+	// or legacy orchestrator) fall back to the package default so the
+	// stage always has a defined budget.
+	cpuDur := nonzeroDur(d.CPUStressKnobs.CPUPass, cpuPassDuration)
+	memDur := nonzeroDur(d.CPUStressKnobs.MemPass, memPassDuration)
+
 	// Pass 1: CPU
-	cpu := runStressPass(ctx, d, "CPU", cpuPassDuration, []string{
+	cpu := runStressPass(ctx, d, "CPU", cpuDur, []string{
 		"--cpu", strconv.Itoa(cores),
 		"--cpu-method", "all",
-		"--timeout", durationSeconds(cpuPassDuration),
+		"--timeout", durationSeconds(cpuDur),
 		"--metrics-brief",
 		"--verify",
 	})
@@ -104,11 +124,11 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
 			SubSteps: subs,
 		}
 	}
-	mem := runStressPass(ctx, d, "memory", memPassDuration, []string{
+	mem := runStressPass(ctx, d, "memory", memDur, []string{
 		"--vm", "1",
 		"--vm-bytes", strconv.FormatInt(cap, 10),
 		"--vm-keep",
-		"--timeout", durationSeconds(memPassDuration),
+		"--timeout", durationSeconds(memDur),
 		"--metrics-brief",
 		"--verify",
 	})
@@ -133,6 +153,64 @@ func CPUStress(ctx context.Context, d Deps) Outcome {
 	}
 }
 
+// runEDACSidecar polls /sys EDAC counters on d.CPUStressKnobs.EDACPoll
+// cadence (or 10s fallback) for the lifetime of the stage ctx, emitting
+// one sample per (memory-controller × {ce,ue}) pair on each tick. A
+// single failing read is tolerated: the next tick picks up the counter.
+//
+// This is where the critical edac_ue threshold becomes a hard-fail: as
+// soon as a UE counter advances past 0, the server-side evaluator trips
+// and flips the run into FailedHolding. The sidecar emits whether or
+// not stress-ng is still running; that keeps the signal live during
+// inter-pass gaps.
+//
+// MCE counts are intentionally not sampled here — they require
+// rasdaemon or mcelog and vary by live-image packaging. The threshold
+// rule for mce stays seeded (so the DB shape is stable) but only fires
+// once a matching kind lands, which is a follow-up.
+func runEDACSidecar(ctx context.Context, wg *sync.WaitGroup, d Deps) {
+	defer wg.Done()
+	if d.Sensor == nil {
+		return
+	}
+	poll := d.CPUStressKnobs.EDACPoll
+	if poll <= 0 {
+		poll = 10 * time.Second
+	}
+	t := time.NewTicker(poll)
+	defer t.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-t.C:
+			edac := probes.EDAC()
+			if len(edac) == 0 {
+				continue
+			}
+			batch := make([]Sample, 0, len(edac))
+			for _, s := range edac {
+				batch = append(batch, Sample{Kind: s.Kind, Key: s.Key, Value: s.Value, Unit: s.Unit})
+			}
+			sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+			if err := d.Sensor(sendCtx, batch); err != nil {
+				d.Warn("CPUStress: edac sample post: " + err.Error())
+			}
+			cancel()
+		}
+	}
+}
+
+// nonzeroDur picks override over fallback, but only when override is
+// strictly positive. Lets callers pass a zero-value duration to mean
+// "no override; use fallback" without a separate ok return.
+func nonzeroDur(override, fallback time.Duration) time.Duration {
+	if override > 0 {
+		return override
+	}
+	return fallback
+}
+
 // subStepFromPass projects a stressPass into a SubStepReport — shared by
 // both passes and by the mid-stage early-return paths so the UI always
 // sees exactly one row per pass, even on failure.
diff --git a/agent/tests/fakes/dmidecode/main.go b/agent/tests/fakes/dmidecode/main.go
new file mode 100644
index 0000000..c5545bb
--- /dev/null
+++ b/agent/tests/fakes/dmidecode/main.go
@@ -0,0 +1,24 @@
+// fake_dmidecode simulates `dmidecode -t bios` for unit tests of the
+// firmware probe's BIOS parser. Prints deterministic output modeled on
+// a real Supermicro host; exits 0 regardless of flags.
+package main
+
+import "fmt"
+
+func main() {
+	fmt.Println(`# dmidecode 3.3
+Getting SMBIOS data from sysfs.
+SMBIOS 3.2.0 present.
+
+Handle 0x0000, DMI type 0, 26 bytes
+BIOS Information
+	Vendor: American Megatrends Inc.
+	Version: 3.2
+	Release Date: 07/15/2021
+	Address: 0xF0000
+	Runtime Size: 64 kB
+	ROM Size: 32 MB
+	Characteristics:
+		PCI is supported
+		BIOS is upgradeable`)
+}
diff --git a/agent/tests/fakes/doc.go b/agent/tests/fakes/doc.go
new file mode 100644
index 0000000..01541d2
--- /dev/null
+++ b/agent/tests/fakes/doc.go
@@ -0,0 +1,22 @@
+// Package fakes is the umbrella for deterministic stand-ins for
+// external probe binaries that Vetting's stage code normally shells
+// out to (stress-ng, fio, iperf3, dmidecode, ethtool, nvidia-smi,
+// mcelog, nvme). Each real binary gets its own subpackage under
+// fakes/<name>/ with `package main` and a main() that prints golden
+// output — build with `go build -o <tmp>/<name> ./agent/tests/fakes/<name>`
+// and point a test's tests.Deps.LookPath at <tmp>/<name>.
+//
+// The seam in tests is tests.Deps.LookPath: when non-nil the stage
+// code uses it instead of os/exec.LookPath. Outside tests, nil
+// LookPath means "use the real binary on $PATH" — stages continue to
+// work on production hosts without the fakes package around.
+//
+// How to add a new fake:
+//  1. Create agent/tests/fakes/<binaryname>/main.go.
+//  2. Write `package main` with a main() that prints exactly the
+//     bytes the real tool would produce for the input you care to
+//     simulate. Determinism > completeness — tests want a known
+//     sample, not a realistic one.
+//  3. Reference the fake from the unit test with `go test` compiling
+//     it via t.TempDir() + `go build -o` before the test body runs.
+package fakes
diff --git a/agent/tests/fakes/stress_ng/main.go b/agent/tests/fakes/stress_ng/main.go
new file mode 100644
index 0000000..b7f5178
--- /dev/null
+++ b/agent/tests/fakes/stress_ng/main.go
@@ -0,0 +1,18 @@
+// fake_stress_ng simulates stress-ng for unit tests. Accepts (and
+// ignores) any flag, sleeps briefly so callers that measure wall-clock
+// see a non-zero elapsed, and prints the "passed" lines CPUStress
+// expects. Exits 0.
+package main
+
+import (
+	"fmt"
+	"os"
+	"time"
+)
+
+func main() {
+	fmt.Fprintln(os.Stderr, "fake_stress_ng invoked:", os.Args[1:])
+	time.Sleep(50 * time.Millisecond)
+	fmt.Println("stress-ng: info:  [1] dispatching hogs: 1 cpu")
+	fmt.Println("stress-ng: info:  [1] successful run completed in 0.05s")
+}
diff --git a/agent/tests/network.go b/agent/tests/network.go
index 089dc89..e673150 100644
--- a/agent/tests/network.go
+++ b/agent/tests/network.go
@@ -9,19 +9,27 @@ import (
 	"strconv"
 	"strings"
 	"time"
+
+	"vetting/agent/probes"
 )
 
 // NetworkConfig is what the agent passes to Network: the orchestrator's
-// iperf3 server address and port. We derive host from OrchestratorURL.
+// iperf3 server address, port, and the per-profile duration.
 type NetworkConfig struct {
 	OrchestratorURL string
 	IperfPort       int // 0 = 5201
 	Duration        time.Duration
 }
 
-// Network runs iperf3 against the orchestrator's bundled server. Records
-// bandwidth as a measurement; fails if iperf3 is missing, the server
-// isn't reachable, or throughput is zero.
+// Network runs iperf3 against the orchestrator's bundled server for
+// the profile-configured duration. Records throughput as a measurement;
+// records per-interface rx/tx error-rate deltas as nic_retrans samples
+// so the server-side threshold gate (`nic_retrans rate < 0.001`) fires
+// on a flaky PHY or a wire that drops half its packets under load.
+//
+// Failure cases: iperf3 missing, server unreachable, zero throughput.
+// Zero throughput is treated as a hard failure — an iperf that finished
+// cleanly but pushed zero bytes is indistinguishable from a bad run.
 func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
 	if _, err := exec.LookPath("iperf3"); err != nil {
 		// Live image ships iperf3; absence means packaging regression.
@@ -51,6 +59,11 @@ func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
 		duration = 10 * time.Second
 	}
 
+	// Snapshot /proc/net/dev before the test so we can attribute any
+	// error-count growth to *this stage's* traffic. The same snapshot
+	// taken after iperf returns is the end of the window.
+	netStart := indexNetDev(probes.NetDev())
+
 	args := []string{
 		"-c", host,
 		"-p", strconv.Itoa(port),
@@ -72,7 +85,7 @@ func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
 			Extras:  map[string]any{"stderr_tail": tailLines(string(out), 20)},
 		}
 	}
-	mbps, parsed, err := parseIperfJSON(out)
+	mbps, retrans, bytesSent, parsed, err := parseIperfJSON(out)
 	if err != nil {
 		d.Error("Network: parse iperf3 output: " + err.Error())
 		return Outcome{
@@ -82,12 +95,58 @@ func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
 			Extras:  map[string]any{"raw": string(out)},
 		}
 	}
+
+	netEnd := indexNetDev(probes.NetDev())
+	netDelta := diffNetDev(netStart, netEnd)
+
+	samples := []Sample{{Kind: "iperf", Key: "throughput_mbps", Value: mbps, Unit: "Mbps"}}
+
+	// iperf-derived retrans rate: retrans_count / packet_count_estimate.
+	// TCP typical MTU 1500; payload ~1460. We divide bytes by 1460 to
+	// approximate packets. This keeps the rate bounded in [0, 1].
+	if bytesSent > 0 {
+		packets := float64(bytesSent) / 1460.0
+		if packets > 0 {
+			samples = append(samples, Sample{
+				Kind:  "nic_retrans",
+				Key:   "iperf/rate",
+				Value: float64(retrans) / packets,
+				Unit:  "rate",
+			})
+		}
+	}
+
+	// Per-interface error-rate deltas. A flaky cable typically surfaces
+	// as tx_errs or tx_drop on the originating interface, not inside
+	// iperf's own tally.
+	for iface, delta := range netDelta {
+		if delta.TxBytes > 0 {
+			packets := float64(delta.TxBytes) / 1460.0
+			if packets > 0 {
+				rate := float64(delta.TxErrs+delta.TxDrop) / packets
+				samples = append(samples, Sample{
+					Kind: "nic_retrans", Key: iface + "/rate", Value: rate, Unit: "rate",
+				})
+			}
+		}
+		// Diagnostic raw counts so the report can show which interface
+		// bled. These don't fire a threshold today but are useful for
+		// post-mortem.
+		samples = append(samples,
+			Sample{Kind: "nic_errs", Key: iface + "/rx", Value: float64(delta.RxErrs + delta.RxDrop), Unit: "count"},
+			Sample{Kind: "nic_errs", Key: iface + "/tx", Value: float64(delta.TxErrs + delta.TxDrop), Unit: "count"},
+		)
+	}
+
 	if d.Sensor != nil {
-		_ = d.Sensor(ctx, []Sample{{Kind: "iperf", Key: "throughput_mbps", Value: mbps, Unit: "Mbps"}})
+		_ = d.Sensor(ctx, samples)
 	}
 
 	extras := map[string]any{
 		"throughput_mbps": mbps,
+		"retransmits":     retrans,
+		"bytes_sent":      bytesSent,
+		"net_delta":       netDelta,
 		"iperf_end":       parsed,
 	}
 	if mbps <= 0 {
@@ -98,14 +157,55 @@ func Network(ctx context.Context, d Deps, cfg NetworkConfig) Outcome {
 			Extras:  extras,
 		}
 	}
-	d.Info(fmt.Sprintf("Network: iperf3 PASSED: %.1f Mbps", mbps))
+	d.Info(fmt.Sprintf("Network: iperf3 PASSED: %.1f Mbps (retransmits=%d)", mbps, retrans))
 	return Outcome{
 		Passed:  true,
-		Summary: fmt.Sprintf("%.1f Mbps to %s", mbps, host),
+		Summary: fmt.Sprintf("%.1f Mbps to %s (retransmits=%d)", mbps, host, retrans),
 		Extras:  extras,
 	}
 }
 
+// indexNetDev flattens a NetDev slice into a map keyed by interface
+// name so diffNetDev can pair start/end by name without O(n²) scans.
+func indexNetDev(snaps []probes.NetDevSnapshot) map[string]probes.NetDevSnapshot {
+	out := map[string]probes.NetDevSnapshot{}
+	for _, s := range snaps {
+		out[s.Iface] = s
+	}
+	return out
+}
+
+// diffNetDev computes end − start for each interface present in both
+// snapshots. An interface that dropped away mid-run is dropped from
+// the result (can't compute a delta). Underflow (end < start, rare
+// after a counter reset) is clamped to 0.
+func diffNetDev(start, end map[string]probes.NetDevSnapshot) map[string]probes.NetDevSnapshot {
+	out := map[string]probes.NetDevSnapshot{}
+	for iface, e := range end {
+		s, ok := start[iface]
+		if !ok {
+			continue
+		}
+		out[iface] = probes.NetDevSnapshot{
+			Iface:   iface,
+			RxBytes: subU64(e.RxBytes, s.RxBytes),
+			RxErrs:  subU64(e.RxErrs, s.RxErrs),
+			RxDrop:  subU64(e.RxDrop, s.RxDrop),
+			TxBytes: subU64(e.TxBytes, s.TxBytes),
+			TxErrs:  subU64(e.TxErrs, s.TxErrs),
+			TxDrop:  subU64(e.TxDrop, s.TxDrop),
+		}
+	}
+	return out
+}
+
+func subU64(a, b uint64) uint64 {
+	if a < b {
+		return 0
+	}
+	return a - b
+}
+
 // deriveHost pulls the hostname out of an https://host:port base URL.
 func deriveHost(raw string) (string, error) {
 	if raw == "" {
@@ -119,18 +219,22 @@ func deriveHost(raw string) (string, error) {
 	return strings.TrimSpace(h), nil
 }
 
-// parseIperfJSON pulls end.sum_sent.bits_per_second out of iperf3 -J.
-// Returns (Mbps, full-json-map, err).
-func parseIperfJSON(b []byte) (float64, map[string]any, error) {
+// parseIperfJSON pulls end.sum_sent.bits_per_second and retransmits out
+// of iperf3 -J. Returns (Mbps, retransmits, bytes_sent, full-end-map, err).
+func parseIperfJSON(b []byte) (float64, int64, int64, map[string]any, error) {
 	var top map[string]any
 	if err := json.Unmarshal(b, &top); err != nil {
-		return 0, nil, err
+		return 0, 0, 0, nil, err
 	}
 	end, ok := top["end"].(map[string]any)
 	if !ok {
-		return 0, top, fmt.Errorf("missing end")
+		return 0, 0, 0, nil, fmt.Errorf("missing end")
 	}
-	// iperf3 reports either sum_sent (when -R not set) or sum_received.
+	// Pull the first sum that carries bits_per_second; retransmits +
+	// bytes live there too for TCP.
+	var mbps float64
+	var retrans int64
+	var bytesSent int64
 	for _, key := range []string{"sum_sent", "sum_received", "sum"} {
 		sum, ok := end[key].(map[string]any)
 		if !ok {
@@ -140,7 +244,17 @@ func parseIperfJSON(b []byte) (float64, map[string]any, error) {
 		if !ok {
 			continue
 		}
-		return bps / 1_000_000, end, nil
+		mbps = bps / 1_000_000
+		if r, ok := sum["retransmits"].(float64); ok {
+			retrans = int64(r)
+		}
+		if bs, ok := sum["bytes"].(float64); ok {
+			bytesSent = int64(bs)
+		}
+		break
 	}
-	return 0, end, fmt.Errorf("no bits_per_second in end.sum_*")
+	if mbps == 0 {
+		return 0, 0, 0, end, fmt.Errorf("no bits_per_second in end.sum_*")
+	}
+	return mbps, retrans, bytesSent, end, nil
 }
diff --git a/agent/tests/network_test.go b/agent/tests/network_test.go
new file mode 100644
index 0000000..7ee5e63
--- /dev/null
+++ b/agent/tests/network_test.go
@@ -0,0 +1,192 @@
+package tests
+
+import (
+	"encoding/json"
+	"testing"
+
+	"vetting/agent/probes"
+)
+
+// TestParseIperfJSON_SumSent confirms we pull throughput, retransmits,
+// and bytes_sent from end.sum_sent. Real iperf3 -J output nests these
+// three under end.sum_sent for TCP streams.
+func TestParseIperfJSON_SumSent(t *testing.T) {
+	raw := `{
+		"end": {
+			"sum_sent": {
+				"bits_per_second": 950000000,
+				"retransmits": 42,
+				"bytes": 1187500000
+			}
+		}
+	}`
+	mbps, retrans, bytesSent, _, err := parseIperfJSON([]byte(raw))
+	if err != nil {
+		t.Fatalf("parseIperfJSON: %v", err)
+	}
+	if mbps != 950 {
+		t.Errorf("mbps = %v, want 950", mbps)
+	}
+	if retrans != 42 {
+		t.Errorf("retransmits = %d, want 42", retrans)
+	}
+	if bytesSent != 1187500000 {
+		t.Errorf("bytesSent = %d, want 1187500000", bytesSent)
+	}
+}
+
+// TestParseIperfJSON_MissingEnd fails cleanly when iperf returned
+// something without an end block (partial/aborted run).
+func TestParseIperfJSON_MissingEnd(t *testing.T) {
+	raw := `{"start": {}}`
+	if _, _, _, _, err := parseIperfJSON([]byte(raw)); err == nil {
+		t.Errorf("expected error on iperf output missing end block")
+	}
+}
+
+// TestParseIperfJSON_ZeroBps returns an error so the stage can fail
+// fast. A successful-exit iperf that pushed zero bits is indistinguishable
+// from a broken run and must not pass.
+func TestParseIperfJSON_ZeroBps(t *testing.T) {
+	raw := `{"end": {"sum_sent": {"bits_per_second": 0}}}`
+	if _, _, _, _, err := parseIperfJSON([]byte(raw)); err == nil {
+		t.Errorf("expected error when bits_per_second is 0")
+	}
+}
+
+// TestParseIperfJSON_FallsBackToSumReceived: UDP tests and some edge
+// cases don't populate sum_sent. The parser walks sum_sent → sum_received
+// → sum and picks the first that has a throughput number.
+func TestParseIperfJSON_FallsBackToSumReceived(t *testing.T) {
+	raw := `{
+		"end": {
+			"sum_received": {"bits_per_second": 500000000}
+		}
+	}`
+	mbps, _, _, _, err := parseIperfJSON([]byte(raw))
+	if err != nil {
+		t.Fatalf("parseIperfJSON: %v", err)
+	}
+	if mbps != 500 {
+		t.Errorf("mbps = %v, want 500", mbps)
+	}
+}
+
+// TestDiffNetDev_HappyPath confirms end − start on a shared interface
+// produces the delta we expect. eth0 pushed 10k bytes and accumulated
+// 3 tx errors during the window.
+func TestDiffNetDev_HappyPath(t *testing.T) {
+	start := map[string]probes.NetDevSnapshot{
+		"eth0": {Iface: "eth0", RxBytes: 1000, RxErrs: 0, TxBytes: 5000, TxErrs: 1},
+	}
+	end := map[string]probes.NetDevSnapshot{
+		"eth0": {Iface: "eth0", RxBytes: 2000, RxErrs: 0, TxBytes: 15000, TxErrs: 4},
+	}
+	delta := diffNetDev(start, end)
+	got, ok := delta["eth0"]
+	if !ok {
+		t.Fatalf("eth0 missing from diff output")
+	}
+	if got.RxBytes != 1000 {
+		t.Errorf("RxBytes delta=%d, want 1000", got.RxBytes)
+	}
+	if got.TxBytes != 10000 {
+		t.Errorf("TxBytes delta=%d, want 10000", got.TxBytes)
+	}
+	if got.TxErrs != 3 {
+		t.Errorf("TxErrs delta=%d, want 3", got.TxErrs)
+	}
+}
+
+// TestDiffNetDev_InterfaceVanished: an interface present at start but
+// gone at end drops from the diff rather than carrying a negative or
+// stale number.
+func TestDiffNetDev_InterfaceVanished(t *testing.T) {
+	start := map[string]probes.NetDevSnapshot{
+		"eth0": {Iface: "eth0", TxBytes: 1000},
+		"eth1": {Iface: "eth1", TxBytes: 500},
+	}
+	end := map[string]probes.NetDevSnapshot{
+		"eth0": {Iface: "eth0", TxBytes: 2000},
+	}
+	delta := diffNetDev(start, end)
+	if _, ok := delta["eth1"]; ok {
+		t.Errorf("eth1 should have been dropped (gone at end)")
+	}
+	if delta["eth0"].TxBytes != 1000 {
+		t.Errorf("eth0 TxBytes delta=%d, want 1000", delta["eth0"].TxBytes)
+	}
+}
+
+// TestDiffNetDev_CounterReset: if a counter resets between snapshots
+// (kernel restart, wrap-around on a 32-bit counter) we clamp to 0
+// rather than underflow a uint64.
+func TestDiffNetDev_CounterReset(t *testing.T) {
+	start := map[string]probes.NetDevSnapshot{
+		"eth0": {Iface: "eth0", TxBytes: 9999, TxErrs: 5},
+	}
+	end := map[string]probes.NetDevSnapshot{
+		"eth0": {Iface: "eth0", TxBytes: 100, TxErrs: 0},
+	}
+	delta := diffNetDev(start, end)
+	if delta["eth0"].TxBytes != 0 {
+		t.Errorf("reset TxBytes delta=%d, want 0 (clamped)", delta["eth0"].TxBytes)
+	}
+	if delta["eth0"].TxErrs != 0 {
+		t.Errorf("reset TxErrs delta=%d, want 0 (clamped)", delta["eth0"].TxErrs)
+	}
+}
+
+// TestDeriveHost: orchestrator URL → host extraction is how the agent
+// picks the iperf3 server target. Handles both https://host and
+// https://host:port shapes.
+func TestDeriveHost(t *testing.T) {
+	cases := []struct {
+		raw  string
+		want string
+	}{
+		{"https://orch.local", "orch.local"},
+		{"https://orch.local:8443", "orch.local"},
+		{"http://10.0.0.5:8080", "10.0.0.5"},
+	}
+	for _, c := range cases {
+		got, err := deriveHost(c.raw)
+		if err != nil {
+			t.Errorf("deriveHost(%q) error: %v", c.raw, err)
+			continue
+		}
+		if got != c.want {
+			t.Errorf("deriveHost(%q) = %q, want %q", c.raw, got, c.want)
+		}
+	}
+}
+
+func TestDeriveHost_Empty(t *testing.T) {
+	if _, err := deriveHost(""); err == nil {
+		t.Errorf("deriveHost(\"\") should error")
+	}
+}
+
+// TestParseIperfJSON_ParsesEndMap confirms the full end map is returned
+// so extras can show every field iperf produced, not just the three we
+// extract by hand.
+func TestParseIperfJSON_ParsesEndMap(t *testing.T) {
+	raw := `{
+		"end": {
+			"sum_sent": {"bits_per_second": 1000000, "retransmits": 0, "bytes": 125000},
+			"cpu_utilization_percent": {"host_total": 12.3}
+		}
+	}`
+	_, _, _, endMap, err := parseIperfJSON([]byte(raw))
+	if err != nil {
+		t.Fatalf("parseIperfJSON: %v", err)
+	}
+	if endMap == nil {
+		t.Fatalf("endMap is nil")
+	}
+	// Sanity: both keys round-trip via json.
+	b, _ := json.Marshal(endMap)
+	if len(b) == 0 {
+		t.Errorf("endMap marshaled to empty")
+	}
+}
diff --git a/agent/tests/psu.go b/agent/tests/psu.go
index 8e8991e..7bedecb 100644
--- a/agent/tests/psu.go
+++ b/agent/tests/psu.go
@@ -7,12 +7,20 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
+	"time"
 )
 
 // PSU walks /sys/class/hwmon for in*_input (mV) and in*_label to find
-// PSU rails. In home-lab hosts the kernel surfaces a handful of named
-// rails (12V, 5V, 3V3). No rails → auto-skip. Any rail outside a ±10%
-// window of its nominal value → fail.
+// PSU rails, then samples each rail every psuSampleInterval for a
+// window sized by the stage timeout. During Burn a separate sidecar
+// (see burn.go) runs the same probe concurrently with workload — the
+// PSU stage itself catches slow post-load sag that only surfaces once
+// the 12V rail starts recovering from a brownout under concurrent CPU
+// + fio + iperf load.
+//
+// Any rail outside ±10% of its nominal value at any tick fires the
+// critical threshold (server-side) and fails the stage. A host with no
+// PSU rails wired to hwmon auto-skips.
 func PSU(ctx context.Context, d Deps) Outcome {
 	rails := scanPSURails()
 	if len(rails) == 0 {
@@ -24,39 +32,150 @@ func PSU(ctx context.Context, d Deps) Outcome {
 		}
 	}
 
-	var samples []Sample
-	problems := []string{}
-	for _, rail := range rails {
-		samples = append(samples, Sample{Kind: "psu_volt", Key: rail.Label, Value: rail.Volts, Unit: "V"})
-		if ok, why := voltageInRange(rail); !ok {
-			problems = append(problems, fmt.Sprintf("%s=%.2fV (%s)", rail.Label, rail.Volts, why))
+	window := resolvePSUWindow(d.StageTimeout)
+	deadline := time.Now().Add(window)
+	interval := psuSampleInterval
+	if window < interval*2 {
+		// Tiny window (tests, pathological stage_timeout) — at least two
+		// ticks so aggregate stats are meaningful.
+		interval = window / 2
+		if interval < time.Second {
+			interval = time.Second
 		}
 	}
-	if d.Sensor != nil {
-		_ = d.Sensor(ctx, samples)
+
+	// Per-label tracking: min/max across the window, count of out-of-range
+	// hits, last-observed value (shown in the summary).
+	type railStats struct {
+		label    string
+		minV     float64
+		maxV     float64
+		lastV    float64
+		ticks    int
+		breaches int
+		reason   string
+	}
+	stats := map[string]*railStats{}
+
+	tick := time.NewTicker(interval)
+	defer tick.Stop()
+	// Start with an immediate sample so a sub-45s window still produces
+	// at least one reading.
+	sampleOnce := func() {
+		cur := scanPSURails()
+		if len(cur) == 0 {
+			return
+		}
+		batch := make([]Sample, 0, len(cur))
+		for _, r := range cur {
+			s, ok := stats[r.Label]
+			if !ok {
+				s = &railStats{label: r.Label, minV: r.Volts, maxV: r.Volts}
+				stats[r.Label] = s
+			}
+			s.ticks++
+			s.lastV = r.Volts
+			if r.Volts < s.minV {
+				s.minV = r.Volts
+			}
+			if r.Volts > s.maxV {
+				s.maxV = r.Volts
+			}
+			if ok, why := voltageInRange(r); !ok {
+				s.breaches++
+				if s.reason == "" {
+					s.reason = why
+				}
+			}
+			batch = append(batch, Sample{Kind: "psu_volt", Key: r.Label, Value: r.Volts, Unit: "V"})
+		}
+		if d.Sensor != nil && len(batch) > 0 {
+			sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+			_ = d.Sensor(sendCtx, batch)
+			cancel()
+		}
+	}
+	sampleOnce()
+sampling:
+	for time.Now().Before(deadline) {
+		select {
+		case <-ctx.Done():
+			break sampling
+		case <-tick.C:
+			sampleOnce()
+		}
+	}
+
+	// Build the outcome. Extras carry per-rail rollup so the report can
+	// show "12V min=11.1 max=12.05 (3/120 ticks out of range)".
+	type railRollup struct {
+		Label    string  `json:"label"`
+		MinV     float64 `json:"min_v"`
+		MaxV     float64 `json:"max_v"`
+		LastV    float64 `json:"last_v"`
+		Ticks    int     `json:"ticks"`
+		Breaches int     `json:"breaches"`
+		Reason   string  `json:"reason,omitempty"`
+	}
+	rollups := make([]railRollup, 0, len(stats))
+	problems := []string{}
+	for _, s := range stats {
+		rollups = append(rollups, railRollup{
+			Label: s.label, MinV: s.minV, MaxV: s.maxV, LastV: s.lastV,
+			Ticks: s.ticks, Breaches: s.breaches, Reason: s.reason,
+		})
+		if s.breaches > 0 {
+			problems = append(problems, fmt.Sprintf("%s min=%.2fV max=%.2fV (%s)", s.label, s.minV, s.maxV, s.reason))
+		}
 	}
 
 	extras := map[string]any{
-		"rails":    rails,
-		"problems": problems,
+		"rails":       rollups,
+		"problems":    problems,
+		"window":      window.String(),
+		"interval":    interval.String(),
 	}
 	if len(problems) > 0 {
-		d.Error("PSU: out-of-range rails: " + strings.Join(problems, ", "))
+		d.Error("PSU: out-of-range rails: " + strings.Join(problems, "; "))
 		return Outcome{
 			Passed:  false,
-			Message: "PSU rails out of range: " + strings.Join(problems, ", "),
-			Summary: fmt.Sprintf("%d rails, %d failing", len(rails), len(problems)),
+			Message: "PSU rails out of range: " + strings.Join(problems, "; "),
+			Summary: fmt.Sprintf("%d rails, %d failing", len(rollups), len(problems)),
 			Extras:  extras,
 		}
 	}
-	d.Info(fmt.Sprintf("PSU: %d rails within ±10%% nominal", len(rails)))
+	d.Info(fmt.Sprintf("PSU: %d rails within ±10%% nominal across %s window", len(rollups), window))
 	return Outcome{
 		Passed:  true,
-		Summary: fmt.Sprintf("%d rails nominal", len(rails)),
+		Summary: fmt.Sprintf("%d rails nominal (%s)", len(rollups), window),
 		Extras:  extras,
 	}
 }
 
+// psuSampleInterval is the default tick for post-Burn rail sampling.
+// Five seconds is slow enough to stay under the HTTP budget and fast
+// enough to catch rail recovery transients.
+const psuSampleInterval = 5 * time.Second
+
+// resolvePSUWindow maps the stage timeout to the sampling window.
+// With no timeout (tests / pre-Phase-2 orchestrator), stay snapshot-
+// like at 30 s. Otherwise take stage_timeout - 5 s to leave headroom
+// for sensor flush + result post, capped at 10 min so a 24 h soak
+// doesn't spend all day in PSU.
+func resolvePSUWindow(stageTimeout time.Duration) time.Duration {
+	if stageTimeout <= 0 {
+		return 30 * time.Second
+	}
+	w := stageTimeout - 5*time.Second
+	if w < 30*time.Second {
+		w = 30 * time.Second
+	}
+	if w > 10*time.Minute {
+		w = 10 * time.Minute
+	}
+	return w
+}
+
 type psuRail struct {
 	Label string  `json:"label"`
 	Volts float64 `json:"volts"`
diff --git a/agent/tests/psu_test.go b/agent/tests/psu_test.go
new file mode 100644
index 0000000..3bc9e03
--- /dev/null
+++ b/agent/tests/psu_test.go
@@ -0,0 +1,112 @@
+package tests
+
+import (
+	"testing"
+	"time"
+)
+
+// TestIsPSULabel keeps the allowlist narrow enough that CPU VRM rails
+// don't get misclassified as PSU-out-of-range failures but wide enough
+// that common SuperMicro/Intel hwmon labels land in the Yes bucket.
+func TestIsPSULabel(t *testing.T) {
+	cases := []struct {
+		label string
+		want  bool
+	}{
+		{"+12V", true},
+		{"12V", true},
+		{"+5V", true},
+		{"5V", true},
+		{"+3.3V", true},
+		{"3V3", true},
+		{"VCCIN", true},
+		{"vccin", true},
+		{"Vcore", false},
+		{"CPU VCORE", false},
+		{"AVCC", false},
+		{"", false},
+	}
+	for _, tc := range cases {
+		if got := isPSULabel(tc.label); got != tc.want {
+			t.Errorf("isPSULabel(%q) = %v, want %v", tc.label, got, tc.want)
+		}
+	}
+}
+
+// TestNominalFor maps rail labels back to expected nominal voltages.
+// Unknown labels must return 0 so voltageInRange short-circuits — an
+// accidental nominal would invent out-of-range failures.
+func TestNominalFor(t *testing.T) {
+	cases := []struct {
+		label string
+		want  float64
+	}{
+		{"+12V", 12.0},
+		{"12V", 12.0},
+		{"+5V", 5.0},
+		{"+3.3V", 3.3},
+		{"3V3", 3.3},
+		{"VCCIN", 0},
+		{"unknown", 0},
+	}
+	for _, tc := range cases {
+		if got := nominalFor(tc.label); got != tc.want {
+			t.Errorf("nominalFor(%q) = %v, want %v", tc.label, got, tc.want)
+		}
+	}
+}
+
+// TestVoltageInRange verifies the ±10% band: 12V passes in [10.8,
+// 13.2], fails anywhere outside. Unknown labels always pass (since
+// nominalFor returned 0 above).
+func TestVoltageInRange(t *testing.T) {
+	cases := []struct {
+		rail psuRail
+		ok   bool
+	}{
+		{psuRail{Label: "+12V", Volts: 12.0}, true},
+		{psuRail{Label: "+12V", Volts: 10.8}, true},  // exactly at the band
+		{psuRail{Label: "+12V", Volts: 13.2}, true},  // exactly at the band
+		{psuRail{Label: "+12V", Volts: 10.7}, false}, // just below
+		{psuRail{Label: "+12V", Volts: 13.3}, false}, // just above
+		{psuRail{Label: "+12V", Volts: 10.5}, false}, // real sag
+		{psuRail{Label: "+5V", Volts: 4.6}, true},    // 8% low on 5V still in band
+		{psuRail{Label: "+5V", Volts: 4.4}, false},   // 12% low on 5V — out of band
+		{psuRail{Label: "+5V", Volts: 5.0}, true},
+		{psuRail{Label: "VCCIN", Volts: 1.8}, true}, // unknown nominal → pass
+	}
+	for _, tc := range cases {
+		got, _ := voltageInRange(tc.rail)
+		if got != tc.ok {
+			t.Errorf("voltageInRange(%+v) = %v, want %v", tc.rail, got, tc.ok)
+		}
+	}
+}
+
+// TestResolvePSUWindow maps stage timeouts to the sampling window.
+// Quick's 1m stage_timeout → 55s window; deep's 10m → capped at 10m;
+// missing/zero → 30s (test / legacy orchestrator path); sub-35s → at
+// least 30s so aggregates are non-trivial.
+func TestResolvePSUWindow(t *testing.T) {
+	cases := []struct {
+		name string
+		in   time.Duration
+		want time.Duration
+	}{
+		{"zero → snapshot fallback", 0, 30 * time.Second},
+		{"negative → snapshot fallback", -1 * time.Second, 30 * time.Second},
+		{"tiny timeout clamps up to 30s floor", 10 * time.Second, 30 * time.Second},
+		{"35s - 5s = 30s", 35 * time.Second, 30 * time.Second},
+		{"1m quick → 55s", time.Minute, 55 * time.Second},
+		{"10m deep → 9m55s", 10 * time.Minute, 9*time.Minute + 55*time.Second},
+		{"15m soak → capped at 10m", 15 * time.Minute, 10 * time.Minute},
+		{"1h → capped at 10m", time.Hour, 10 * time.Minute},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			if got := resolvePSUWindow(tc.in); got != tc.want {
+				t.Errorf("resolvePSUWindow(%s) = %s, want %s", tc.in, got, tc.want)
+			}
+		})
+	}
+}
diff --git a/agent/tests/stage.go b/agent/tests/stage.go
index 4acffdd..5f4ac09 100644
--- a/agent/tests/stage.go
+++ b/agent/tests/stage.go
@@ -59,6 +59,11 @@ func (o Outcome) MarshalSummary() (json.RawMessage, error) {
 // Deps bundles what stages need without pulling in the whole agent.
 // Logger methods print to stdout + forward to the orchestrator; Sensor
 // drops numeric samples; OverrideFlags carries operator-set bypasses.
+//
+// CPUStressKnobs / StorageKnobs / NetworkKnobs are Phase-2 profile
+// knobs. Zero-valued fields mean "fall back to the compile-time
+// default" — that keeps the stages runnable even when the runner can't
+// materialize a profile (tests, legacy orchestrator, etc).
 type Deps struct {
 	Info           func(string)
 	Warn           func(string)
@@ -68,6 +73,58 @@ type Deps struct {
 	NonDestructive bool           // skip wipe-probe + writes in Storage
 	ExpectedDisks  []ExpectedDisk // serials + sizes from host.expected_spec
 	StageTimeout   time.Duration
+	CPUStressKnobs CPUStressKnobs
+	StorageKnobs   StorageKnobs
+	NetworkKnobs   NetworkKnobs
+	BurnKnobs      BurnKnobs
+	// LookPath is the unit-test seam for swapping a real external
+	// binary (stress-ng, fio, iperf3, dmidecode, …) for a fake. When
+	// nil the stage falls back to os/exec.LookPath — production and
+	// existing tests keep working unchanged. Tests under
+	// agent/tests/fakes/ populate this to redirect lookups to a built
+	// fake binary in a tempdir.
+	LookPath func(name string) (string, error)
+}
+
+// CPUStressKnobs parameterizes the CPUStress stage. Zero durations fall
+// back to the package's compile-time defaults (cpuPassDuration etc).
+type CPUStressKnobs struct {
+	CPUPass  time.Duration
+	MemPass  time.Duration
+	EDACPoll time.Duration
+}
+
+// StorageKnobs parameterizes the Storage stage. Mode picks between
+// "fio_sample" (bounded tempfile inside the device, quick profile) and
+// "full_disk" (whole-device write verify, deep/soak). Empty strings
+// fall back to the stage's safe defaults.
+type StorageKnobs struct {
+	Mode    string
+	FioSize string
+	FioTime time.Duration
+	FioBS   string
+	FioRW   string
+	Verify  string
+}
+
+// NetworkKnobs parameterizes the Network stage.
+type NetworkKnobs struct {
+	Duration time.Duration
+}
+
+// BurnKnobs parameterizes the Burn super-stage. Duration is the total
+// Burn window; sub-workloads run concurrently inside that window.
+// CPUWorkers is "all" (runtime.NumCPU) or a numeric string. MemPct is a
+// percentage of MemAvailable to allocate for the memory burner (clamped
+// 0-90 by the stage). IperfParallel feeds iperf3 -P to generate sustained
+// NIC load. FioOnSpare gates the storage sub-workload: true = fio runs
+// against the allow-listed disks for the same window; false = skip fio.
+type BurnKnobs struct {
+	Duration      time.Duration
+	CPUWorkers    string
+	MemPct        int
+	FioOnSpare    bool
+	IperfParallel int
 }
 
 // Sample mirrors the server's SensorSample but lives in the tests
diff --git a/agent/tests/storage.go b/agent/tests/storage.go
index 0c5e78e..6f29889 100644
--- a/agent/tests/storage.go
+++ b/agent/tests/storage.go
@@ -5,24 +5,36 @@ import (
 	"encoding/json"
 	"fmt"
 	"os/exec"
+	"strconv"
 	"strings"
 	"time"
 )
 
-// Storage is the destructive stage: badblocks (write-mode sample) + fio
-// random IO, persisting IOPS + latency as measurements. Pre-gates:
+// Storage is the destructive stage. Phase 2 replaced the old
+// badblocks + 128 MiB fio combo with a single fio run per disk that
+// writes, verifies md5 of what it wrote, and reports p99 latency.
+// Modes:
+//
+//   - fio_sample (quick): bounded 1 GiB write per disk, ~3 min runtime.
+//   - full_disk (deep/soak): writes the whole device, time-bounded by
+//     the fio_time knob (2 h deep, 6 h soak).
+//
+// Pre-gates kept from Phase 1:
 //
 //  1. Device allowlist: only act on /dev/<X> where the kernel-reported
-//     serial matches one of Deps.ExpectedDisks. This is the operator's
-//     contract for what can be written to. USB sticks and unexpected
+//     serial matches one of Deps.ExpectedDisks. USB sticks and unexpected
 //     drives are excluded.
 //  2. Wipe probe: blkid + wipefs --no-act on each target; any filesystem
-//     signatures, partition tables, or LVM metadata → fail with
+//     signature, partition table, or LVM metadata → fail with
 //     UnexpectedData unless Deps.OverrideWipe is set.
 //
-// Only after those pass does the stage run `badblocks -b 4096 -c 64 -w`
-// and `fio` in write mode. This matches the plan's "destructive disk
-// tests are always-on, gated by layered safety."
+// After fio, the stage captures a SMART diff (start snapshot taken
+// before any writes; end snapshot after all writes finish) and posts
+// deltas on attributes like Reallocated_Sector_Ct and Current_Pending_Sector.
+// The threshold evaluator isn't seeded to gate smart_delta out of the
+// box — those samples are diagnostic for the report. Fio's p99 latency
+// posts as fio_p99_us so the per-stage Storage warning threshold can
+// fire on a latency cliff.
 func Storage(ctx context.Context, d Deps) Outcome {
 	if len(d.ExpectedDisks) == 0 {
 		d.Info("Storage: no expected disks in spec — skipping stage")
@@ -44,10 +56,10 @@ func Storage(ctx context.Context, d Deps) Outcome {
 		}
 	}
 
-	// Non-destructive runs skip wipe-probe (nothing to refuse), badblocks
-	// -w, and write-mode fio. Every expected disk is still asserted
-	// present + readable by listing /sys/block and reading SMART-accessible
-	// identity; the per-disk map flags the shortcut so the report is clear.
+	// Non-destructive runs skip wipe-probe (nothing to refuse), fio
+	// writes, and SMART delta (nothing changed so no delta to report).
+	// Every expected disk is still asserted present so a vanished drive
+	// still fails the stage.
 	if d.NonDestructive {
 		perDisk := map[string]any{}
 		for _, t := range targets {
@@ -79,9 +91,9 @@ func Storage(ctx context.Context, d Deps) Outcome {
 			Message: "UnexpectedData: " + strings.Join(dirty, ", ") + " (operator override required)",
 			Summary: fmt.Sprintf("wipe-probe halt (%d disk(s) have data)", len(dirty)),
 			Extras: map[string]any{
-				"wipe_probe":     probes,
-				"override_hint":  "click 'Override wipe & retry' in the held tile",
-				"dirty_devices":  dirty,
+				"wipe_probe":    probes,
+				"override_hint": "click 'Override wipe & retry' in the held tile",
+				"dirty_devices": dirty,
 			},
 		}
 	}
@@ -89,64 +101,80 @@ func Storage(ctx context.Context, d Deps) Outcome {
 		d.Warn("Storage: operator override engaged — proceeding despite data on " + strings.Join(dirty, ", "))
 	}
 
-	// Per target: short badblocks write sample + fio random-read/write.
+	// Capture start-of-stage SMART attributes before we write anything
+	// so the delta is attributable to *this* stage's writes and not the
+	// host's prior history. Per-disk failures are tolerated (e.g. the
+	// device doesn't expose SMART); we just can't emit a delta for it.
+	startSMART := captureSMARTAttrs(ctx, targets)
+
+	fioOpts := resolveFioOpts(d.StorageKnobs)
+	d.Info(fmt.Sprintf("Storage: fio mode=%s size=%s runtime=%s bs=%s rw=%s verify=%s",
+		fioOpts.Mode, fioOpts.Size, fioOpts.Runtime, fioOpts.BS, fioOpts.RW, fioOpts.Verify))
+
 	var samples []Sample
 	var subs []SubStepReport
 	perDisk := map[string]any{}
+	failed := ""
 	for _, t := range targets {
-		d.Info("Storage: running badblocks write sample on " + t.Device)
-		bbStart := time.Now()
-		bb := runBadblocks(ctx, t.Device)
-		bbEnd := time.Now()
-		bbSummary, _ := json.Marshal(bb)
-		subs = append(subs, SubStepReport{
-			Name:        fmt.Sprintf("badblocks %s", t.Device),
-			Passed:      bb.OK,
-			StartedAt:   bbStart,
-			CompletedAt: bbEnd,
-			SummaryJSON: bbSummary,
-		})
-
-		d.Info(fmt.Sprintf("Storage: running fio random rw on %s", t.Device))
+		d.Info(fmt.Sprintf("Storage: running fio %s on %s", fioOpts.Mode, t.Device))
 		fioStart := time.Now()
-		fr := runFio(ctx, t.Device)
+		fr := runFioVerify(ctx, t.Device, fioOpts)
 		fioEnd := time.Now()
 		fioSummary, _ := json.Marshal(fr)
 		subs = append(subs, SubStepReport{
-			Name:        fmt.Sprintf("fio %s", t.Device),
+			Name:        fmt.Sprintf("fio %s %s", fioOpts.Mode, t.Device),
 			Passed:      fr.Error == "",
 			StartedAt:   fioStart,
 			CompletedAt: fioEnd,
 			SummaryJSON: fioSummary,
 		})
+		perDisk[t.Device] = map[string]any{"fio": fr}
 
-		perDisk[t.Device] = map[string]any{
-			"badblocks": bb,
-			"fio":       fr,
-		}
-		samples = append(samples,
-			Sample{Kind: "fio", Key: t.Device + "/read_iops", Value: fr.ReadIOPS, Unit: "iops"},
-			Sample{Kind: "fio", Key: t.Device + "/write_iops", Value: fr.WriteIOPS, Unit: "iops"},
-		)
-		if !bb.OK {
-			return Outcome{
-				Passed:   false,
-				Message:  "badblocks found errors on " + t.Device,
-				Summary:  "badblocks failed on " + t.Device,
-				Extras:   map[string]any{"per_disk": perDisk, "wipe_probe": probes},
-				SubSteps: subs,
+		if fr.Error == "" {
+			samples = append(samples,
+				Sample{Kind: "fio", Key: t.Device + "/read_iops", Value: fr.ReadIOPS, Unit: "iops"},
+				Sample{Kind: "fio", Key: t.Device + "/write_iops", Value: fr.WriteIOPS, Unit: "iops"},
+			)
+			if fr.ReadP99Us > 0 {
+				samples = append(samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/read", Value: fr.ReadP99Us, Unit: "us"})
 			}
+			if fr.WriteP99Us > 0 {
+				samples = append(samples, Sample{Kind: "fio_p99_us", Key: t.Device + "/write", Value: fr.WriteP99Us, Unit: "us"})
+			}
+		} else if failed == "" {
+			failed = t.Device
 		}
 	}
-	if d.Sensor != nil {
+
+	// End-of-stage SMART snapshot + diff. We capture whether or not fio
+	// succeeded — a mid-run failure still produces attributable deltas,
+	// which is often more interesting than the stage outcome itself.
+	endSMART := captureSMARTAttrs(ctx, targets)
+	deltas := diffSMARTAttrs(startSMART, endSMART)
+	for dev, attrs := range deltas {
+		for attr, delta := range attrs {
+			samples = append(samples, Sample{Kind: "smart_delta", Key: dev + "/" + attr, Value: delta, Unit: "count"})
+		}
+	}
+	if d.Sensor != nil && len(samples) > 0 {
 		_ = d.Sensor(ctx, samples)
 	}
 
-	d.Info(fmt.Sprintf("Storage: %d disk(s) passed badblocks + fio", len(targets)))
+	if failed != "" {
+		return Outcome{
+			Passed:   false,
+			Message:  "fio verify failed on " + failed,
+			Summary:  "fio failed on " + failed,
+			Extras:   map[string]any{"per_disk": perDisk, "wipe_probe": probes, "smart_delta": deltas, "fio_opts": fioOpts},
+			SubSteps: subs,
+		}
+	}
+
+	d.Info(fmt.Sprintf("Storage: %d disk(s) passed fio --verify", len(targets)))
 	return Outcome{
 		Passed:   true,
-		Summary:  fmt.Sprintf("%d disks passed", len(targets)),
-		Extras:   map[string]any{"per_disk": perDisk, "wipe_probe": probes},
+		Summary:  fmt.Sprintf("%d disks passed (%s)", len(targets), fioOpts.Mode),
+		Extras:   map[string]any{"per_disk": perDisk, "wipe_probe": probes, "smart_delta": deltas, "fio_opts": fioOpts},
 		SubSteps: subs,
 	}
 }
@@ -229,8 +257,8 @@ type wipeProbeResult struct {
 
 // probeWipe runs blkid + wipefs -n. Any non-empty output from either is
 // a "has data" signal. This is deliberately conservative: we'd rather
-// halt on a bare ext4 signature than hand badblocks a disk with real
-// bytes on it.
+// halt on a bare ext4 signature than hand fio a disk with real bytes on
+// it.
 func probeWipe(ctx context.Context, device string) wipeProbeResult {
 	out := wipeProbeResult{Device: device}
 
@@ -257,84 +285,269 @@ func probeWipe(ctx context.Context, device string) wipeProbeResult {
 	return out
 }
 
-// ---------- badblocks ----------
+// ---------- fio ----------
 
-type badblocksResult struct {
-	OK        bool   `json:"ok"`
-	Elapsed   string `json:"elapsed"`
-	Error     string `json:"error,omitempty"`
-	OutputTail string `json:"output_tail,omitempty"`
+// fioOpts resolves the probe knobs into the concrete flag values fio
+// needs. Defaults match the quick profile's fio_sample shape so callers
+// with zero knobs still run something bounded.
+type fioOpts struct {
+	Mode    string        `json:"mode"`     // "fio_sample" | "full_disk"
+	Size    string        `json:"size"`     // "1GiB"; only used for fio_sample
+	Runtime time.Duration `json:"runtime"`  // bounding time
+	BS      string        `json:"bs"`       // "4k"
+	RW      string        `json:"rw"`       // "randrw"
+	Verify  string        `json:"verify"`   // "md5" | ""
 }
 
-func runBadblocks(ctx context.Context, device string) badblocksResult {
-	// -c 64 blocks per check, -w destructive write, -b 4096 block size,
-	// -t pattern. We only sample 256MiB (65536 × 4k) so the stage stays
-	// bounded. A real burn-in would run the whole disk; that belongs in
-	// a separate "deep" stage.
-	args := []string{"-b", "4096", "-c", "64", "-w", "-t", "random", device, "65536"}
-	start := time.Now()
-	runCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
+// resolveFioOpts normalizes the knobs into a runnable config. Zero-
+// valued fields fall back to the quick defaults so a stage that's
+// missing its knobs still has coherent behavior (safer than refusing).
+func resolveFioOpts(k StorageKnobs) fioOpts {
+	o := fioOpts{
+		Mode:    firstNonEmpty(k.Mode, "fio_sample"),
+		Size:    firstNonEmpty(k.FioSize, "1GiB"),
+		Runtime: k.FioTime,
+		BS:      firstNonEmpty(k.FioBS, "4k"),
+		RW:      firstNonEmpty(k.FioRW, "randrw"),
+		Verify:  firstNonEmpty(k.Verify, "md5"),
+	}
+	if o.Runtime <= 0 {
+		o.Runtime = 3 * time.Minute
+	}
+	return o
+}
+
+func firstNonEmpty(vs ...string) string {
+	for _, v := range vs {
+		if v != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+type fioResult struct {
+	Mode        string  `json:"mode"`
+	ReadIOPS    float64 `json:"read_iops"`
+	WriteIOPS   float64 `json:"write_iops"`
+	ReadBWKBps  float64 `json:"read_bw_kbps"`
+	WriteBWKBps float64 `json:"write_bw_kbps"`
+	ReadP99Us   float64 `json:"read_p99_us,omitempty"`
+	WriteP99Us  float64 `json:"write_p99_us,omitempty"`
+	Error       string  `json:"error,omitempty"`
+	OutputTail  string  `json:"output_tail,omitempty"`
+}
+
+// runFioVerify invokes fio with md5-verify semantics. fio_sample mode
+// caps the IO at opts.Size; full_disk drives the whole device bounded
+// by runtime. Both use direct IO to bypass the page cache — we want
+// real disk latency, not Linux' cheerful buffer.
+func runFioVerify(ctx context.Context, device string, opts fioOpts) fioResult {
+	// 30s grace over runtime so fio has time to flush + close cleanly.
+	runCtx, cancel := context.WithTimeout(ctx, opts.Runtime+30*time.Second)
 	defer cancel()
-	cmd := exec.CommandContext(runCtx, "badblocks", args...)
-	out, err := cmd.CombinedOutput()
-	r := badblocksResult{Elapsed: time.Since(start).Round(time.Second).String(), OutputTail: tailLines(string(out), 10)}
+
+	args := []string{
+		"--name=verify-" + strings.TrimPrefix(device, "/dev/"),
+		"--filename=" + device,
+		"--rw=" + opts.RW,
+		"--bs=" + opts.BS,
+		"--numjobs=1",
+		"--direct=1",
+		"--group_reporting",
+		"--output-format=json",
+		"--runtime=" + strconv.Itoa(int(opts.Runtime.Seconds())),
+	}
+	if opts.Verify != "" {
+		args = append(args,
+			"--verify="+opts.Verify,
+			"--verify_pattern=random",
+			"--do_verify=1",
+		)
+	}
+	switch opts.Mode {
+	case "full_disk":
+		// Time-bounded across the full device — fio uses the device's
+		// full size when --size is omitted on a block device.
+		args = append(args, "--time_based=1")
+	default:
+		// fio_sample: bounded write. Setting --size= limits the IO
+		// volume regardless of runtime.
+		args = append(args, "--size="+opts.Size, "--time_based=0")
+	}
+
+	cmd := exec.CommandContext(runCtx, "fio", args...)
+	out, err := cmd.Output()
+	r := fioResult{Mode: opts.Mode, OutputTail: tailLines(string(out), 20)}
 	if err != nil {
 		r.Error = err.Error()
 		return r
 	}
-	// badblocks prints each bad block to stdout. Empty output = clean.
-	if strings.TrimSpace(string(out)) == "" {
-		r.OK = true
-	} else {
-		r.Error = "bad blocks found"
+	parsed, perr := parseFioJSON(out)
+	if perr != nil {
+		r.Error = "parse fio json: " + perr.Error()
+		return r
 	}
+	r.ReadIOPS = parsed.ReadIOPS
+	r.WriteIOPS = parsed.WriteIOPS
+	r.ReadBWKBps = parsed.ReadBWKBps
+	r.WriteBWKBps = parsed.WriteBWKBps
+	r.ReadP99Us = parsed.ReadP99Us
+	r.WriteP99Us = parsed.WriteP99Us
 	return r
 }
 
-// ---------- fio ----------
-
-type fioResult struct {
-	ReadIOPS   float64 `json:"read_iops"`
-	WriteIOPS  float64 `json:"write_iops"`
-	ReadBWKBps float64 `json:"read_bw_kbps"`
-	WriteBWKBps float64 `json:"write_bw_kbps"`
-	Error      string  `json:"error,omitempty"`
-}
-
-// runFio kicks off a tiny random-rw job: 2 jobs × 64MB × 4k blocks.
-// This is a health bar, not a benchmark — we want to know the disk
-// services IO, not how fast it is at p99.
-func runFio(ctx context.Context, device string) fioResult {
-	runCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
-	defer cancel()
-	args := []string{
-		"--name=health", "--filename=" + device, "--rw=randrw",
-		"--bs=4k", "--size=64M", "--numjobs=2", "--time_based=0",
-		"--group_reporting", "--output-format=json", "--direct=1",
-	}
-	cmd := exec.CommandContext(runCtx, "fio", args...)
-	out, err := cmd.Output()
-	if err != nil {
-		return fioResult{Error: err.Error()}
-	}
+// parseFioJSON extracts the bits we care about from fio's --output-format=json.
+// Latency percentiles live at .jobs[0].read.clat_ns.percentile["99.000000"];
+// we convert nanoseconds to microseconds for the fio_p99_us sample.
+func parseFioJSON(out []byte) (fioResult, error) {
 	var top struct {
 		Jobs []struct {
-			Read  struct {
+			Read struct {
 				IOPS float64 `json:"iops"`
 				BW   float64 `json:"bw"`
+				CLat struct {
+					Percentile map[string]float64 `json:"percentile"`
+				} `json:"clat_ns"`
 			} `json:"read"`
 			Write struct {
 				IOPS float64 `json:"iops"`
 				BW   float64 `json:"bw"`
+				CLat struct {
+					Percentile map[string]float64 `json:"percentile"`
+				} `json:"clat_ns"`
 			} `json:"write"`
 		} `json:"jobs"`
 	}
-	if err := json.Unmarshal(out, &top); err != nil || len(top.Jobs) == 0 {
-		return fioResult{Error: "parse fio json: " + fmt.Sprint(err)}
+	if err := json.Unmarshal(out, &top); err != nil {
+		return fioResult{}, err
+	}
+	if len(top.Jobs) == 0 {
+		return fioResult{}, fmt.Errorf("no jobs in fio output")
 	}
 	j := top.Jobs[0]
-	return fioResult{
+	r := fioResult{
 		ReadIOPS: j.Read.IOPS, WriteIOPS: j.Write.IOPS,
 		ReadBWKBps: j.Read.BW, WriteBWKBps: j.Write.BW,
 	}
+	if p := j.Read.CLat.Percentile["99.000000"]; p > 0 {
+		r.ReadP99Us = p / 1000.0
+	}
+	if p := j.Write.CLat.Percentile["99.000000"]; p > 0 {
+		r.WriteP99Us = p / 1000.0
+	}
+	return r, nil
+}
+
+// ---------- SMART delta ----------
+
+// smartAttrMap: device → attribute → raw counter value. ATA drives
+// populate named attributes (Reallocated_Sector_Ct etc); NVMe drives
+// populate a flatter nvme-specific map. We track a curated whitelist
+// of wear indicators — anything else is diagnostic and drops to the raw
+// report output.
+type smartAttrMap map[string]map[string]float64
+
+// captureSMARTAttrs runs smartctl -aj on each target and pulls the
+// whitelisted attributes. Per-device failures (virtio, permission
+// issues) degrade silently — the delta step just shows no data for
+// that device.
+func captureSMARTAttrs(ctx context.Context, targets []diskTarget) smartAttrMap {
+	out := smartAttrMap{}
+	for _, t := range targets {
+		parsed, err := runSmartctl(ctx, t.Device)
+		if err != nil {
+			continue
+		}
+		attrs := extractSMARTAttrs(parsed)
+		if len(attrs) > 0 {
+			out[t.Device] = attrs
+		}
+	}
+	return out
+}
+
+// smartAttributeWhitelist is the set of attributes we diff across a
+// stage. They're the ones that reflect *this stage's* IO damage, not
+// cumulative drive history. Adding attributes is cheap — missing ones
+// just drop to zero.
+var smartAttributeWhitelist = map[string]bool{
+	// ATA SMART attribute names (smartctl normalizes to these)
+	"Reallocated_Sector_Ct":   true,
+	"Current_Pending_Sector":  true,
+	"Offline_Uncorrectable":   true,
+	"UDMA_CRC_Error_Count":    true,
+	"Reported_Uncorrect":      true,
+	"Raw_Read_Error_Rate":     true,
+	// NVMe log fields (flat keys at top of nvme_smart_health_information_log)
+	"media_errors":            true,
+	"num_err_log_entries":     true,
+	"percentage_used":         true,
+}
+
+// extractSMARTAttrs walks smartctl's JSON for whitelisted attribute
+// values. Handles both the ATA shape (ata_smart_attributes.table[]) and
+// the NVMe shape (nvme_smart_health_information_log). Returns a map
+// keyed by the canonical attribute name.
+func extractSMARTAttrs(raw map[string]any) map[string]float64 {
+	out := map[string]float64{}
+	// ATA attributes are in ata_smart_attributes.table[] — each element
+	// has {"name": "Reallocated_Sector_Ct", "raw": {"value": N}}.
+	if ata, ok := raw["ata_smart_attributes"].(map[string]any); ok {
+		if tbl, ok := ata["table"].([]any); ok {
+			for _, row := range tbl {
+				rm, ok := row.(map[string]any)
+				if !ok {
+					continue
+				}
+				name, _ := rm["name"].(string)
+				if !smartAttributeWhitelist[name] {
+					continue
+				}
+				if r, ok := rm["raw"].(map[string]any); ok {
+					if v, ok := r["value"].(float64); ok {
+						out[name] = v
+					}
+				}
+			}
+		}
+	}
+	// NVMe attributes live flat under nvme_smart_health_information_log.
+	if nvme, ok := raw["nvme_smart_health_information_log"].(map[string]any); ok {
+		for k, v := range nvme {
+			if !smartAttributeWhitelist[k] {
+				continue
+			}
+			if n, ok := v.(float64); ok {
+				out[k] = n
+			}
+		}
+	}
+	return out
+}
+
+// diffSMARTAttrs subtracts start from end per (device, attribute).
+// Only attributes present in both ends produce a delta; missing
+// attributes drop out (can't attribute a zero-to-present delta safely).
+// Negative deltas are kept so a drive that resets a counter is visible.
+func diffSMARTAttrs(start, end smartAttrMap) map[string]map[string]float64 {
+	out := map[string]map[string]float64{}
+	for dev, endAttrs := range end {
+		startAttrs, ok := start[dev]
+		if !ok {
+			continue
+		}
+		devOut := map[string]float64{}
+		for attr, endV := range endAttrs {
+			startV, ok := startAttrs[attr]
+			if !ok {
+				continue
+			}
+			devOut[attr] = endV - startV
+		}
+		if len(devOut) > 0 {
+			out[dev] = devOut
+		}
+	}
+	return out
 }
diff --git a/agent/tests/storage_test.go b/agent/tests/storage_test.go
new file mode 100644
index 0000000..1e52d64
--- /dev/null
+++ b/agent/tests/storage_test.go
@@ -0,0 +1,218 @@
+package tests
+
+import (
+	"encoding/json"
+	"testing"
+	"time"
+)
+
+// TestParseFioJSON_ATAReadWrite confirms we pull IOPS, BW, and p99
+// latency from both read and write sides. P99 is read from clat_ns and
+// converted ns → us (the unit we emit to the threshold evaluator).
+func TestParseFioJSON_ATAReadWrite(t *testing.T) {
+	raw := `{
+		"jobs": [{
+			"read":  {"iops": 1234.5, "bw": 5000, "clat_ns": {"percentile": {"99.000000": 250000}}},
+			"write": {"iops": 432.1,  "bw": 2000, "clat_ns": {"percentile": {"99.000000": 500000}}}
+		}]
+	}`
+	r, err := parseFioJSON([]byte(raw))
+	if err != nil {
+		t.Fatalf("parseFioJSON: %v", err)
+	}
+	if r.ReadIOPS != 1234.5 {
+		t.Errorf("ReadIOPS = %v, want 1234.5", r.ReadIOPS)
+	}
+	if r.WriteIOPS != 432.1 {
+		t.Errorf("WriteIOPS = %v, want 432.1", r.WriteIOPS)
+	}
+	if r.ReadBWKBps != 5000 {
+		t.Errorf("ReadBWKBps = %v, want 5000", r.ReadBWKBps)
+	}
+	// 250000 ns → 250 us
+	if r.ReadP99Us != 250 {
+		t.Errorf("ReadP99Us = %v, want 250", r.ReadP99Us)
+	}
+	// 500000 ns → 500 us
+	if r.WriteP99Us != 500 {
+		t.Errorf("WriteP99Us = %v, want 500", r.WriteP99Us)
+	}
+}
+
+// TestParseFioJSON_ReadOnlyJob: if only one side has p99 populated the
+// other stays zero (not emitted as a sample). Mirrors a randread job.
+func TestParseFioJSON_ReadOnlyJob(t *testing.T) {
+	raw := `{
+		"jobs": [{
+			"read":  {"iops": 1000, "bw": 4000, "clat_ns": {"percentile": {"99.000000": 100000}}},
+			"write": {"iops": 0, "bw": 0}
+		}]
+	}`
+	r, err := parseFioJSON([]byte(raw))
+	if err != nil {
+		t.Fatalf("parseFioJSON: %v", err)
+	}
+	if r.WriteP99Us != 0 {
+		t.Errorf("WriteP99Us = %v on read-only job, want 0", r.WriteP99Us)
+	}
+	if r.ReadP99Us != 100 {
+		t.Errorf("ReadP99Us = %v, want 100", r.ReadP99Us)
+	}
+}
+
+// TestParseFioJSON_NoJobs fails rather than reporting zeroes silently.
+// An empty jobs array means fio didn't run anything.
+func TestParseFioJSON_NoJobs(t *testing.T) {
+	raw := `{"jobs": []}`
+	if _, err := parseFioJSON([]byte(raw)); err == nil {
+		t.Errorf("expected error on empty jobs array")
+	}
+}
+
+// TestExtractSMARTAttrs_ATA picks attributes out of ata_smart_attributes.table
+// when present. Attributes outside the whitelist drop out silently.
+func TestExtractSMARTAttrs_ATA(t *testing.T) {
+	raw := map[string]any{}
+	smartJSON := `{
+		"ata_smart_attributes": {
+			"table": [
+				{"name": "Reallocated_Sector_Ct",   "raw": {"value": 7}},
+				{"name": "Current_Pending_Sector",  "raw": {"value": 3}},
+				{"name": "Spin_Retry_Count",        "raw": {"value": 99}}
+			]
+		}
+	}`
+	if err := json.Unmarshal([]byte(smartJSON), &raw); err != nil {
+		t.Fatalf("unmarshal fixture: %v", err)
+	}
+	out := extractSMARTAttrs(raw)
+	if out["Reallocated_Sector_Ct"] != 7 {
+		t.Errorf("Reallocated_Sector_Ct = %v, want 7", out["Reallocated_Sector_Ct"])
+	}
+	if out["Current_Pending_Sector"] != 3 {
+		t.Errorf("Current_Pending_Sector = %v, want 3", out["Current_Pending_Sector"])
+	}
+	if _, ok := out["Spin_Retry_Count"]; ok {
+		t.Errorf("Spin_Retry_Count should not appear (not in whitelist)")
+	}
+}
+
+// TestExtractSMARTAttrs_NVMe picks media_errors and friends from the
+// nvme health log shape, which is a flat map at the top of the JSON.
+func TestExtractSMARTAttrs_NVMe(t *testing.T) {
+	raw := map[string]any{}
+	smartJSON := `{
+		"nvme_smart_health_information_log": {
+			"media_errors": 2,
+			"num_err_log_entries": 15,
+			"percentage_used": 7,
+			"temperature": 42
+		}
+	}`
+	if err := json.Unmarshal([]byte(smartJSON), &raw); err != nil {
+		t.Fatalf("unmarshal fixture: %v", err)
+	}
+	out := extractSMARTAttrs(raw)
+	if out["media_errors"] != 2 {
+		t.Errorf("media_errors = %v, want 2", out["media_errors"])
+	}
+	if out["num_err_log_entries"] != 15 {
+		t.Errorf("num_err_log_entries = %v, want 15", out["num_err_log_entries"])
+	}
+	if out["percentage_used"] != 7 {
+		t.Errorf("percentage_used = %v, want 7", out["percentage_used"])
+	}
+	if _, ok := out["temperature"]; ok {
+		t.Errorf("temperature should not appear (not in whitelist)")
+	}
+}
+
+// TestDiffSMARTAttrs: end − start per (device, attr). Only attrs in
+// both snapshots yield a delta; any disappearing attribute just drops
+// out instead of showing a misleading negative.
+func TestDiffSMARTAttrs(t *testing.T) {
+	start := smartAttrMap{
+		"/dev/sda": {"Reallocated_Sector_Ct": 5, "Current_Pending_Sector": 0},
+	}
+	end := smartAttrMap{
+		"/dev/sda": {"Reallocated_Sector_Ct": 8, "Current_Pending_Sector": 2, "UDMA_CRC_Error_Count": 1},
+	}
+	out := diffSMARTAttrs(start, end)
+	if out["/dev/sda"]["Reallocated_Sector_Ct"] != 3 {
+		t.Errorf("Reallocated_Sector_Ct delta = %v, want 3", out["/dev/sda"]["Reallocated_Sector_Ct"])
+	}
+	if out["/dev/sda"]["Current_Pending_Sector"] != 2 {
+		t.Errorf("Current_Pending_Sector delta = %v, want 2", out["/dev/sda"]["Current_Pending_Sector"])
+	}
+	if _, ok := out["/dev/sda"]["UDMA_CRC_Error_Count"]; ok {
+		t.Errorf("UDMA_CRC_Error_Count should not appear (missing at start)")
+	}
+}
+
+// TestDiffSMARTAttrs_DeviceNewAtEnd: a device only present in the end
+// snapshot (drive hot-plugged mid-run, or SMART read succeeded only at
+// end) is dropped from the diff — no start baseline to subtract from.
+func TestDiffSMARTAttrs_DeviceNewAtEnd(t *testing.T) {
+	start := smartAttrMap{}
+	end := smartAttrMap{
+		"/dev/sda": {"Reallocated_Sector_Ct": 10},
+	}
+	out := diffSMARTAttrs(start, end)
+	if _, ok := out["/dev/sda"]; ok {
+		t.Errorf("/dev/sda should drop from diff when absent at start")
+	}
+}
+
+// TestResolveFioOpts_Defaults: zero-valued knobs resolve to the quick
+// profile's fio_sample shape. Any stage that's missing per-profile
+// knobs (legacy claim response, test harness) still has coherent
+// bounded defaults — we won't accidentally fall into unbounded writes.
+func TestResolveFioOpts_Defaults(t *testing.T) {
+	o := resolveFioOpts(StorageKnobs{})
+	if o.Mode != "fio_sample" {
+		t.Errorf("Mode = %q, want fio_sample", o.Mode)
+	}
+	if o.Size != "1GiB" {
+		t.Errorf("Size = %q, want 1GiB", o.Size)
+	}
+	if o.Runtime != 3*time.Minute {
+		t.Errorf("Runtime = %v, want 3m", o.Runtime)
+	}
+	if o.BS != "4k" {
+		t.Errorf("BS = %q, want 4k", o.BS)
+	}
+	if o.RW != "randrw" {
+		t.Errorf("RW = %q, want randrw", o.RW)
+	}
+	if o.Verify != "md5" {
+		t.Errorf("Verify = %q, want md5", o.Verify)
+	}
+}
+
+// TestResolveFioOpts_FullDiskOverride confirms the deep/soak shape
+// round-trips. FioTime as 2h overrides the 3-minute default.
+func TestResolveFioOpts_FullDiskOverride(t *testing.T) {
+	k := StorageKnobs{
+		Mode:    "full_disk",
+		FioTime: 2 * time.Hour,
+		FioBS:   "64k",
+		FioRW:   "write",
+	}
+	o := resolveFioOpts(k)
+	if o.Mode != "full_disk" {
+		t.Errorf("Mode = %q, want full_disk", o.Mode)
+	}
+	if o.Runtime != 2*time.Hour {
+		t.Errorf("Runtime = %v, want 2h", o.Runtime)
+	}
+	if o.BS != "64k" {
+		t.Errorf("BS = %q, want 64k", o.BS)
+	}
+	if o.RW != "write" {
+		t.Errorf("RW = %q, want write", o.RW)
+	}
+	// Verify should fall back to md5 default since knob was empty.
+	if o.Verify != "md5" {
+		t.Errorf("Verify = %q, want md5 (default)", o.Verify)
+	}
+}
diff --git a/cmd/vetting/main.go b/cmd/vetting/main.go
index 7a0df9b..e361235 100644
--- a/cmd/vetting/main.go
+++ b/cmd/vetting/main.go
@@ -60,6 +60,8 @@ func main() {
 	artifactStore := &store.Artifacts{DB: conn}
 	specDiffStore := &store.SpecDiffs{DB: conn}
 	measurementStore := &store.Measurements{DB: conn}
+	thresholdStore := &store.Thresholds{DB: conn}
+	firmwareStore := &store.Firmware{DB: conn}
 
 	hub := events.NewHub()
 
@@ -99,17 +101,19 @@ func main() {
 	}
 
 	ui := &api.UI{
-		Hosts:     hostStore,
-		Runs:      runStore,
-		Stages:    stageStore,
-		SubSteps:  subStepStore,
-		SpecDiffs: specDiffStore,
-		Artifacts: artifactStore,
-		EventHub:  hub,
-		Logs:      logHub,
-		Runner:    runner,
-		Tiles:     tiles,
-		PublicURL: cfg.Server.PublicURL,
+		Hosts:      hostStore,
+		Runs:       runStore,
+		Stages:     stageStore,
+		SubSteps:   subStepStore,
+		SpecDiffs:  specDiffStore,
+		Artifacts:  artifactStore,
+		Thresholds: thresholdStore,
+		Profiles:   cfg.Profiles,
+		EventHub:   hub,
+		Logs:       logHub,
+		Runner:     runner,
+		Tiles:      tiles,
+		PublicURL:  cfg.Server.PublicURL,
 	}
 
 	// Inject the host-page + run-page fragment renderers. Each reuses
@@ -157,6 +161,9 @@ func main() {
 		Artifacts:       artifactStore,
 		SpecDiffs:       specDiffStore,
 		Measurements:    measurementStore,
+		Thresholds:      thresholdStore,
+		Firmware:        firmwareStore,
+		Profiles:        cfg.Profiles,
 		Runner:          runner,
 		EventHub:        hub,
 		Logs:            logHub,
diff --git a/deploy/vetting.example.yaml b/deploy/vetting.example.yaml
index 373efd2..b9db53d 100644
--- a/deploy/vetting.example.yaml
+++ b/deploy/vetting.example.yaml
@@ -85,3 +85,54 @@ agent:
 
 notifiers: []
 routes: []
+
+# Vetting pipeline shared defaults. Every profile (quick/deep/soak)
+# walks the same stage list; only per-stage durations differ.
+# Thresholds here apply to every profile — a 92°C CPU fails a
+# 2-minute quick run and a 12-hour soak run alike.
+vetting:
+  stages: [Inventory, SpecValidate, SMART, CPUStress, Storage, Network, GPU, PSU, Reporting]
+  thresholds:
+    - { stage: "*",       kind: temp,        key: "cpu/*",           op: lt,         value: 92,   unit: C, severity: critical }
+    - { stage: PSU,       kind: psu_volt,    key: "+12V",            op: within_pct, value: 5,  nominal: 12.0, severity: critical }
+    - { stage: PSU,       kind: psu_volt,    key: "+5V",             op: within_pct, value: 5,  nominal: 5.0,  severity: critical }
+    - { stage: PSU,       kind: psu_volt,    key: "+3.3V",           op: within_pct, value: 5,  nominal: 3.3,  severity: critical }
+    - { stage: Storage,   kind: fio_p99_us,  key: "*",               op: lt,         value: 50000,                 severity: warning }
+    - { stage: Network,   kind: iperf,       key: throughput_mbps,   op: gte,        value: 900,                   severity: critical }
+    - { stage: Network,   kind: nic_retrans, key: "*/rate",          op: lt,         value: 0.001,                 severity: warning }
+    - { stage: CPUStress, kind: edac_ue,     key: "*",               op: lte,        value: 0,                     severity: critical }
+    - { stage: CPUStress, kind: mce,         key: "*",               op: lte,        value: 0,                     severity: critical }
+
+# Per-profile durations + probe knobs. Only the *durations* scale across
+# profiles — every profile exercises every probe and gate. Quick is a
+# ~10-minute same-day sanity check; deep is the 8–12 h overnight soak;
+# soak is the opt-in 36–40 h extreme run.
+profiles:
+  quick:
+    stage_timeouts:
+      CPUStress: 5m
+      Storage:   5m
+      Network:   2m
+    defaults:
+      cpustress: { cpu_pass: 2m, mem_pass: 2m, edac_poll: 10s }
+      storage:   { mode: fio_sample, fio_size: 1GiB, fio_time: 3m, fio_bs: 4k, fio_rw: randrw, verify: md5 }
+      network:   { duration: 60s }
+  deep:
+    stage_timeouts:
+      CPUStress: 2h
+      Storage:   4h
+      Network:   35m
+    defaults:
+      cpustress: { cpu_pass: 60m, mem_pass: 60m, edac_poll: 10s }
+      storage:   { mode: full_disk, fio_time: 2h, fio_bs: 4k, fio_rw: randrw, verify: md5 }
+      network:   { duration: 30m }
+  soak:
+    inherit: deep
+    stage_timeouts:
+      CPUStress: 14h
+      Storage:   8h
+      Network:   2h30m
+    defaults:
+      cpustress: { cpu_pass: 12h }
+      storage:   { mode: full_disk, fio_time: 6h }
+      network:   { duration: 2h }
diff --git a/deploy/vetting.production.yaml b/deploy/vetting.production.yaml
index 2191661..7f16f0d 100644
--- a/deploy/vetting.production.yaml
+++ b/deploy/vetting.production.yaml
@@ -75,3 +75,41 @@ agent:
 
 notifiers: []
 routes: []
+
+# Vetting pipeline shared defaults. Every profile (quick/deep/soak)
+# walks the same stage list; only per-stage durations differ.
+# Thresholds apply to every profile — critical breaches fail a run
+# regardless of which profile the operator picked.
+vetting:
+  stages: [Inventory, SpecValidate, SMART, CPUStress, Storage, Network, GPU, PSU, Reporting]
+  thresholds:
+    - { stage: "*",       kind: temp,        key: "cpu/*",           op: lt,         value: 92,   unit: C, severity: critical }
+    - { stage: PSU,       kind: psu_volt,    key: "+12V",            op: within_pct, value: 5,  nominal: 12.0, severity: critical }
+    - { stage: PSU,       kind: psu_volt,    key: "+5V",             op: within_pct, value: 5,  nominal: 5.0,  severity: critical }
+    - { stage: PSU,       kind: psu_volt,    key: "+3.3V",           op: within_pct, value: 5,  nominal: 3.3,  severity: critical }
+    - { stage: Storage,   kind: fio_p99_us,  key: "*",               op: lt,         value: 50000,                 severity: warning }
+    - { stage: Network,   kind: iperf,       key: throughput_mbps,   op: gte,        value: 900,                   severity: critical }
+    - { stage: Network,   kind: nic_retrans, key: "*/rate",          op: lt,         value: 0.001,                 severity: warning }
+    - { stage: CPUStress, kind: edac_ue,     key: "*",               op: lte,        value: 0,                     severity: critical }
+    - { stage: CPUStress, kind: mce,         key: "*",               op: lte,        value: 0,                     severity: critical }
+
+profiles:
+  quick:
+    stage_timeouts: { CPUStress: 5m, Storage: 5m, Network: 2m }
+    defaults:
+      cpustress: { cpu_pass: 2m, mem_pass: 2m, edac_poll: 10s }
+      storage:   { mode: fio_sample, fio_size: 1GiB, fio_time: 3m, fio_bs: 4k, fio_rw: randrw, verify: md5 }
+      network:   { duration: 60s }
+  deep:
+    stage_timeouts: { CPUStress: 2h, Storage: 4h, Network: 35m }
+    defaults:
+      cpustress: { cpu_pass: 60m, mem_pass: 60m, edac_poll: 10s }
+      storage:   { mode: full_disk, fio_time: 2h, fio_bs: 4k, fio_rw: randrw, verify: md5 }
+      network:   { duration: 30m }
+  soak:
+    inherit: deep
+    stage_timeouts: { CPUStress: 14h, Storage: 8h, Network: 2h30m }
+    defaults:
+      cpustress: { cpu_pass: 12h }
+      storage:   { mode: full_disk, fio_time: 6h }
+      network:   { duration: 2h }
diff --git a/internal/api/agent_handlers.go b/internal/api/agent_handlers.go
index 04215f6..dd164e3 100644
--- a/internal/api/agent_handlers.go
+++ b/internal/api/agent_handlers.go
@@ -19,6 +19,7 @@ import (
 
 	"github.com/go-chi/chi/v5"
 
+	"vetting/internal/config"
 	"vetting/internal/events"
 	"vetting/internal/hold"
 	"vetting/internal/logs"
@@ -41,6 +42,9 @@ type Agent struct {
 	Artifacts       *store.Artifacts
 	SpecDiffs       *store.SpecDiffs
 	Measurements    *store.Measurements
+	Thresholds      *store.Thresholds // Phase 1: seeded per run; consulted on each /sensor batch
+	Firmware        *store.Firmware   // Phase 4: firmware snapshots (unused before then)
+	Profiles        *config.ProfileRegistry // Phase 2: /claim resolves the run's profile → stage knobs
 	Runner          *orchestrator.Runner
 	EventHub        *events.Hub
 	Logs            *logs.Hub
@@ -216,6 +220,21 @@ func (a *Agent) Claim(w http.ResponseWriter, r *http.Request) {
 	if iperfPort == 0 {
 		iperfPort = 5201
 	}
+
+	// Resolve the run's profile → agent-visible stage knobs. The agent
+	// reads these to size CPUStress / Storage / Network work. An empty
+	// profile (legacy runs seeded before Phase 1) falls back to "quick".
+	profileName := run.Profile
+	if profileName == "" {
+		profileName = config.ProfileQuick
+	}
+	var stageCfg config.StageConfig
+	if a.Profiles != nil {
+		stageCfg = a.Profiles.ResolveStageConfig(profileName)
+	} else {
+		stageCfg = config.StageConfig{Profile: profileName}
+	}
+
 	writeJSON(w, http.StatusOK, map[string]any{
 		"ok":              true,
 		"run_id":          runID,
@@ -224,6 +243,7 @@ func (a *Agent) Claim(w http.ResponseWriter, r *http.Request) {
 		"iperf_port":      iperfPort,
 		"non_destructive": run.NonDestructive,
 		"current_state":   string(currentState),
+		"stage_config":    stageCfg,
 	})
 }
 
@@ -398,10 +418,24 @@ type StageResult struct {
 	Passed    bool                `json:"passed"`
 	Summary   json.RawMessage     `json:"summary,omitempty"`
 	Inventory *spec.Inventory     `json:"inventory,omitempty"`
+	Firmware  []FirmwareLine      `json:"firmware,omitempty"`
 	Message   string              `json:"message,omitempty"`
 	SubSteps  []SubStepResultLine `json:"sub_steps,omitempty"`
 }
 
+// FirmwareLine is a single firmware snapshot POSTed alongside the
+// Firmware stage's /result body. Mirrors agent/probes.FirmwareSnapshot.
+// The server converts each line to a store.FirmwareSnapshot and persists
+// it under the run — SpecValidate reads these back to diff against the
+// host's expected_firmware.
+type FirmwareLine struct {
+	Component  string            `json:"component"`
+	Identifier string            `json:"identifier"`
+	Version    string            `json:"version"`
+	Vendor     string            `json:"vendor,omitempty"`
+	Raw        map[string]string `json:"raw,omitempty"`
+}
+
 // SubStepResultLine is one entry in StageResult.SubSteps. Ordinal is
 // assigned from slice index server-side; the agent doesn't set it.
 type SubStepResultLine struct {
@@ -476,6 +510,20 @@ func (a *Agent) Result(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
+	// Aggregate threshold gate: flip Passed=false server-side when any
+	// critical breach landed for this stage. The agent's verdict is
+	// advisory — a stage-executor can miss a runaway sample that the
+	// sidecar caught. We check this *before* writing the stage state
+	// so the DB reflects the server-side decision.
+	thresholdDetail := ""
+	if body.Passed {
+		if breached, detail := a.stageHadCriticalBreach(r.Context(), runID, body.Stage); breached {
+			body.Passed = false
+			thresholdDetail = detail
+			a.appendLog(runID, "error", fmt.Sprintf("%s reported passed but %s — flipping to failed", body.Stage, detail))
+		}
+	}
+
 	stageState := model.StagePassed
 	if !body.Passed {
 		stageState = model.StageFailed
@@ -488,6 +536,9 @@ func (a *Agent) Result(w http.ResponseWriter, r *http.Request) {
 		http.Error(w, "complete stage: "+err.Error(), http.StatusInternalServerError)
 		return
 	}
+	if thresholdDetail != "" && body.Message == "" {
+		body.Message = thresholdDetail
+	}
 
 	// Agent-authored sub-steps: persist in slice order (ordinal = index)
 	// and fan out a per-row SSE event each so the detail pane shows them
@@ -502,6 +553,14 @@ func (a *Agent) Result(w http.ResponseWriter, r *http.Request) {
 		}
 	}
 
+	// Firmware-specific: persist each snapshot into firmware_snapshots.
+	// SpecValidate reads them back to diff against expected_firmware.
+	if body.Stage == "Firmware" && len(body.Firmware) > 0 {
+		if err := a.persistFirmware(r.Context(), runID, body.Firmware); err != nil {
+			log.Printf("persist firmware run %d: %v", runID, err)
+		}
+	}
+
 	if !body.Passed {
 		if err := a.Runs.SetFailedStage(r.Context(), runID, body.Stage); err != nil {
 			log.Printf("set failed stage: %v", err)
@@ -615,6 +674,34 @@ func parseResultTime(s string) *time.Time {
 	return nil
 }
 
+// persistFirmware writes the reported snapshots. A nil/unset a.Firmware
+// store is a no-op so tests that don't wire it up stay green; a mid-run
+// persist error is logged but doesn't fail the stage (Firmware is
+// advisory — SpecValidate is the gate).
+func (a *Agent) persistFirmware(ctx context.Context, runID int64, lines []FirmwareLine) error {
+	if a.Firmware == nil || len(lines) == 0 {
+		return nil
+	}
+	rows := make([]store.FirmwareSnapshot, 0, len(lines))
+	for _, l := range lines {
+		raw := "{}"
+		if len(l.Raw) > 0 {
+			if b, err := json.Marshal(l.Raw); err == nil {
+				raw = string(b)
+			}
+		}
+		rows = append(rows, store.FirmwareSnapshot{
+			RunID:      runID,
+			Component:  l.Component,
+			Identifier: l.Identifier,
+			Version:    l.Version,
+			Vendor:     l.Vendor,
+			RawJSON:    raw,
+		})
+	}
+	return a.Firmware.CreateBatch(ctx, rows)
+}
+
 func (a *Agent) persistInventory(r *http.Request, run *model.Run, inv *spec.Inventory) error {
 	dir := filepath.Join(a.ArtifactsDir, fmt.Sprintf("run-%d", run.ID))
 	if err := os.MkdirAll(dir, 0o755); err != nil {
@@ -667,6 +754,22 @@ func (a *Agent) resolveSpecValidate(r *http.Request, runID int64) {
 		return
 	}
 	diffs := spec.Diff(expected, inv)
+	if a.Firmware != nil && len(expected.Firmware) > 0 {
+		snaps, err := a.Firmware.ListForRun(r.Context(), runID)
+		if err != nil {
+			log.Printf("specvalidate: list firmware: %v", err)
+		} else {
+			observed := make([]spec.FirmwareObserved, 0, len(snaps))
+			for _, s := range snaps {
+				observed = append(observed, spec.FirmwareObserved{
+					Component:  s.Component,
+					Identifier: s.Identifier,
+					Version:    s.Version,
+				})
+			}
+			diffs = append(diffs, spec.DiffFirmware(expected.Firmware, observed)...)
+		}
+	}
 	if err := a.SpecDiffs.ReplaceForRun(r.Context(), runID, diffs); err != nil {
 		log.Printf("specvalidate: write diffs: %v", err)
 	}
@@ -884,13 +987,17 @@ type SensorSample struct {
 }
 
 // Sensor persists a batch of numeric samples. The thermal sidecar hits
-// this on a tick; stage executors (iperf, fio) also drop here.
+// this on a tick; stage executors (iperf, fio) also drop here. Each
+// sample is evaluated against the run's seeded thresholds — critical
+// breaches fail the run immediately (thermal runaway, EDAC UE, voltage
+// sag); warning breaches are recorded for the report only.
 func (a *Agent) Sensor(w http.ResponseWriter, r *http.Request) {
 	runID, ok := runIDFromURL(w, r)
 	if !ok {
 		return
 	}
-	if _, ok := a.authenticate(w, r, runID); !ok {
+	run, ok := a.authenticate(w, r, runID)
+	if !ok {
 		return
 	}
 	if a.Measurements == nil {
@@ -903,8 +1010,12 @@ func (a *Agent) Sensor(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 	rows := make([]model.Measurement, 0, len(body.Samples))
+	sampleStages := make([]string, 0, len(body.Samples))
 	for _, s := range body.Samples {
 		ts, _ := time.Parse(time.RFC3339Nano, s.TS)
+		if ts.IsZero() {
+			ts = time.Now().UTC()
+		}
 		rows = append(rows, model.Measurement{
 			RunID: runID,
 			TS:    ts,
@@ -913,12 +1024,139 @@ func (a *Agent) Sensor(w http.ResponseWriter, r *http.Request) {
 			Value: s.Value,
 			Unit:  s.Unit,
 		})
+		// Stage the sample belongs to drives threshold selector
+		// matching. We use the run's current state — the agent does
+		// not tag samples with a stage.
+		sampleStages = append(sampleStages, orchestrator.StageNameForState(run.State))
 	}
 	if err := a.Measurements.CreateBatch(r.Context(), rows); err != nil {
 		http.Error(w, "write samples: "+err.Error(), http.StatusInternalServerError)
 		return
 	}
-	writeJSON(w, http.StatusOK, map[string]any{"ok": true, "written": len(rows)})
+	critical := a.evaluateSensorBatch(r.Context(), runID, rows, sampleStages)
+	writeJSON(w, http.StatusOK, map[string]any{
+		"ok":          true,
+		"written":     len(rows),
+		"breach":      critical != "",
+		"breach_kind": critical,
+	})
+	if critical != "" {
+		a.failRunOnCriticalBreach(r, run, critical)
+	}
+}
+
+// evaluateSensorBatch runs each sample through the run's thresholds,
+// persists evaluations, and returns a short human-readable label for
+// the first critical breach it sees (empty when all samples pass or
+// only hit warning-severity rules).
+func (a *Agent) evaluateSensorBatch(ctx context.Context, runID int64, rows []model.Measurement, sampleStages []string) string {
+	if a.Thresholds == nil || len(rows) == 0 {
+		return ""
+	}
+	rules, err := a.Thresholds.ListForRun(ctx, runID)
+	if err != nil {
+		log.Printf("sensor: list thresholds run %d: %v", runID, err)
+		return ""
+	}
+	if len(rules) == 0 {
+		return ""
+	}
+	evalRules := make([]orchestrator.Threshold, 0, len(rules))
+	for _, r := range rules {
+		evalRules = append(evalRules, orchestrator.Threshold{
+			ID:       r.ID,
+			Stage:    r.Stage,
+			Kind:     r.Kind,
+			Key:      r.Key,
+			Op:       orchestrator.ThresholdOp(r.Op),
+			Value:    r.Threshold,
+			Nominal:  r.Nominal,
+			Severity: orchestrator.ThresholdSeverity(r.Severity),
+		})
+	}
+	evals := make([]store.ThresholdEvaluation, 0, len(rows))
+	critical := ""
+	for i, m := range rows {
+		sample := orchestrator.Sample{
+			Stage: sampleStages[i],
+			Kind:  m.Kind,
+			Key:   m.Key,
+			Value: m.Value,
+		}
+		for _, res := range orchestrator.Evaluate(sample, evalRules) {
+			evals = append(evals, store.ThresholdEvaluation{
+				RunID:       runID,
+				ThresholdID: res.Threshold.ID,
+				Stage:       sample.Stage,
+				Kind:        sample.Kind,
+				Key:         sample.Key,
+				TS:          m.TS,
+				Observed:    res.Observed,
+				Passed:      res.Passed,
+			})
+			if critical == "" && res.CriticalBreach() {
+				critical = fmt.Sprintf("%s %s=%g breached %s %g",
+					res.Threshold.Kind, sample.Key, res.Observed, res.Threshold.Op, res.Threshold.Value)
+			}
+		}
+	}
+	if err := a.Thresholds.RecordBatch(ctx, evals); err != nil {
+		log.Printf("sensor: record evals run %d: %v", runID, err)
+	}
+	return critical
+}
+
+// stageHadCriticalBreach returns true if any critical-severity
+// threshold evaluation for this run matched samples attributed to the
+// given stage (stage selector "*" or exact). Called at /result close
+// so even an agent that reports Passed=true gets overridden when the
+// aggregate view says the stage tripped a gate.
+func (a *Agent) stageHadCriticalBreach(ctx context.Context, runID int64, stage string) (bool, string) {
+	if a.Thresholds == nil {
+		return false, ""
+	}
+	breaches, err := a.Thresholds.CriticalBreaches(ctx, runID)
+	if err != nil {
+		log.Printf("result: list breaches run %d: %v", runID, err)
+		return false, ""
+	}
+	for _, b := range breaches {
+		if b.Stage == stage || b.Stage == "" || b.Stage == "*" {
+			return true, fmt.Sprintf("critical threshold breach: %s %s=%g", b.Kind, b.Key, b.Observed)
+		}
+	}
+	return false, ""
+}
+
+// failRunOnCriticalBreach flips the run to FailedHolding in response
+// to a live threshold breach (thermal runaway, EDAC UE, rail sag).
+// The agent's pending /result for the current stage may still arrive —
+// the silent-skip guard handles that by refusing to double-transition.
+func (a *Agent) failRunOnCriticalBreach(r *http.Request, run *model.Run, detail string) {
+	stage := orchestrator.StageNameForState(run.State)
+	if stage == "" {
+		stage = "threshold"
+	}
+	if err := a.Runs.SetFailedStage(r.Context(), run.ID, stage+" (threshold)"); err != nil {
+		log.Printf("sensor: set failed stage run %d: %v", run.ID, err)
+	}
+	if _, err := a.Runner.Transition(r.Context(), run.ID, orchestrator.TriggerStageFailed); err != nil {
+		// If we're already in FailedHolding the transition errors —
+		// that's fine, the first breach wins.
+		log.Printf("sensor: fail-transition run %d: %v", run.ID, err)
+		return
+	}
+	hostName := a.hostNameFor(r.Context(), run.HostID)
+	a.dispatchEvent(notify.Event{
+		Kind:     notify.KindStageFailed,
+		Severity: notify.SeverityCritical,
+		RunID:    run.ID,
+		HostName: hostName,
+		Title:    fmt.Sprintf("[vetting] %s FAILED: %s (threshold)", hostName, stage),
+		Body:     fmt.Sprintf("Run %d on %s tripped a critical threshold during %s: %s", run.ID, hostName, stage, detail),
+		URL:      a.runLinkURL(run.ID),
+	})
+	a.appendLog(run.ID, "error", fmt.Sprintf("threshold breach during %s: %s — run parked in FailedHolding", stage, detail))
 }
 
 // resolveReporting runs when the pipeline advances into StateReporting.
@@ -956,12 +1194,20 @@ func (a *Agent) resolveReporting(r *http.Request, runID int64) {
 			log.Printf("reporting: list measurements: %v", err)
 		}
 	}
+	var firmware []store.FirmwareSnapshot
+	if a.Firmware != nil {
+		firmware, err = a.Firmware.ListForRun(ctx, runID)
+		if err != nil {
+			log.Printf("reporting: list firmware: %v", err)
+		}
+	}
 	bundle := map[string]any{
 		"run":          run,
 		"host":         host,
 		"stages":       stages,
 		"spec_diffs":   diffs,
 		"measurements": measurements,
+		"firmware":     firmware,
 		"generated_at": time.Now().UTC().Format(time.RFC3339),
 	}
 	buf, err := json.MarshalIndent(bundle, "", "  ")
@@ -993,6 +1239,15 @@ func (a *Agent) resolveReporting(r *http.Request, runID int64) {
 	// Also render the operator-facing HTML summary alongside the JSON.
 	// Failures here are non-fatal — the JSON is the source of truth.
 	if host != nil {
+		fwRows := make([]report.FirmwareSnapshot, 0, len(firmware))
+		for _, f := range firmware {
+			fwRows = append(fwRows, report.FirmwareSnapshot{
+				Component:  f.Component,
+				Identifier: f.Identifier,
+				Version:    f.Version,
+				Vendor:     f.Vendor,
+			})
+		}
 		htmlData := report.Data{
 			GeneratedAt: time.Now().UTC(),
 			Run:         *run,
@@ -1000,6 +1255,7 @@ func (a *Agent) resolveReporting(r *http.Request, runID int64) {
 			Stages:      stages,
 			SpecDiffs:   diffs,
 			Aggregates:  report.AggregateMeasurements(measurements),
+			Firmware:    fwRows,
 		}
 		if htmlBuf, err := report.RenderHTML(htmlData); err != nil {
 			log.Printf("reporting: render html: %v", err)
diff --git a/internal/api/run_page_test.go b/internal/api/run_page_test.go
index 586a509..7cc61dc 100644
--- a/internal/api/run_page_test.go
+++ b/internal/api/run_page_test.go
@@ -108,7 +108,7 @@ func TestRunPage_DefaultStep_Running(t *testing.T) {
 	})
 	runID, _ := runs.Create(ctx, id, "rr", false)
 	_ = ui.Stages.Seed(ctx, runID)
-	for _, name := range []string{"Inventory", "SpecValidate"} {
+	for _, name := range []string{"Inventory", "Firmware", "SpecValidate"} {
 		_ = ui.Stages.StartByName(ctx, runID, name)
 		_ = ui.Stages.CompleteByName(ctx, runID, name, model.StagePassed, "")
 	}
@@ -135,7 +135,7 @@ func TestRunPage_DefaultStep_Failed(t *testing.T) {
 	})
 	runID, _ := runs.Create(ctx, id, "rf", false)
 	_ = ui.Stages.Seed(ctx, runID)
-	for _, name := range []string{"Inventory", "SpecValidate", "SMART"} {
+	for _, name := range []string{"Inventory", "Firmware", "SpecValidate", "SMART"} {
 		_ = ui.Stages.StartByName(ctx, runID, name)
 		_ = ui.Stages.CompleteByName(ctx, runID, name, model.StagePassed, "")
 	}
diff --git a/internal/api/sensor_thresholds_test.go b/internal/api/sensor_thresholds_test.go
new file mode 100644
index 0000000..d49973b
--- /dev/null
+++ b/internal/api/sensor_thresholds_test.go
@@ -0,0 +1,169 @@
+package api_test
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"path/filepath"
+	"strconv"
+	"testing"
+
+	"vetting/internal/api"
+	"vetting/internal/db"
+	"vetting/internal/events"
+	"vetting/internal/model"
+	"vetting/internal/orchestrator"
+	"vetting/internal/store"
+)
+
+// setupAgentWithThresholds builds an Agent wired up to the thresholds
+// store + a Runner so the /sensor handler can drive the state machine.
+// Seeds one critical thermal threshold and parks the run in CPUStress
+// so the handler will stamp a stage-relevant failed_stage.
+func setupAgentWithThresholds(t *testing.T) (*api.Agent, int64, string) {
+	t.Helper()
+	path := filepath.Join(t.TempDir(), "vetting.db")
+	conn, err := db.Open(path)
+	if err != nil {
+		t.Fatalf("open db: %v", err)
+	}
+	t.Cleanup(func() { _ = conn.Close() })
+
+	hosts := &store.Hosts{DB: conn}
+	runs := &store.Runs{DB: conn}
+	stages := &store.Stages{DB: conn}
+	meas := &store.Measurements{DB: conn}
+	thresholds := &store.Thresholds{DB: conn}
+	hub := events.NewHub()
+	runner := &orchestrator.Runner{Runs: runs, Hosts: hosts, Stages: stages, EventHub: hub}
+
+	hostID, err := hosts.Create(context.Background(), model.Host{
+		Name:             "thresh-host",
+		MAC:              "aa:bb:cc:dd:ee:aa",
+		WoLBroadcastIP:   "10.0.0.255",
+		WoLPort:          9,
+		ExpectedSpecYAML: "memory:\n  total_gib: 16\n",
+	})
+	if err != nil {
+		t.Fatalf("create host: %v", err)
+	}
+	plain, hash, err := orchestrator.IssueRunToken()
+	if err != nil {
+		t.Fatalf("issue token: %v", err)
+	}
+	runID, err := runs.Create(context.Background(), hostID, hash, false)
+	if err != nil {
+		t.Fatalf("create run: %v", err)
+	}
+	if err := stages.Seed(context.Background(), runID); err != nil {
+		t.Fatalf("seed stages: %v", err)
+	}
+	// Park the run where a real thermal sidecar would be posting samples.
+	if err := runs.SetState(context.Background(), runID, model.StateCPUStress); err != nil {
+		t.Fatalf("set state: %v", err)
+	}
+	// Seed one critical thermal threshold.
+	if _, err := thresholds.SeedForRun(context.Background(), runID, []store.ThresholdSpec{
+		{Stage: "*", Kind: "temp", Key: "cpu/*", Op: "lt", Value: 92, Unit: "C", Severity: "critical", Source: "profile"},
+	}); err != nil {
+		t.Fatalf("seed thresholds: %v", err)
+	}
+	return &api.Agent{
+		Hosts:        hosts,
+		Runs:         runs,
+		Stages:       stages,
+		Measurements: meas,
+		Thresholds:   thresholds,
+		Runner:       runner,
+	}, runID, plain
+}
+
+// TestSensor_ThermalRunawayFailsRun: a sample that breaches a critical
+// threshold lands in threshold_evaluations (passed=0) and flips the
+// run into FailedHolding with failed_stage naming the current stage.
+// This is the Phase-1 behavior gate — without the evaluator, the sample
+// would just sit in measurements and the run would happily march on.
+func TestSensor_ThermalRunawayFailsRun(t *testing.T) {
+	a, runID, token := setupAgentWithThresholds(t)
+	batch := api.SensorBatch{Samples: []api.SensorSample{
+		{Kind: "temp", Key: "cpu/0", Value: 95.3, Unit: "C"},
+	}}
+	buf, _ := json.Marshal(batch)
+	req := routedRequest(runID, http.MethodPost,
+		"/api/v1/runs/"+strconv.FormatInt(runID, 10)+"/sensor", buf)
+	req.Header.Set("Authorization", "Bearer "+token)
+	req.Header.Set("Content-Type", "application/json")
+
+	rr := httptest.NewRecorder()
+	a.Sensor(rr, req)
+	if rr.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String())
+	}
+	var resp struct {
+		OK     bool   `json:"ok"`
+		Breach bool   `json:"breach"`
+		Kind   string `json:"breach_kind"`
+	}
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("decode: %v", err)
+	}
+	if !resp.Breach {
+		t.Fatalf("expected breach=true, got %+v", resp)
+	}
+	run, err := a.Runs.Get(context.Background(), runID)
+	if err != nil {
+		t.Fatalf("get run: %v", err)
+	}
+	if run.State != model.StateFailedHolding {
+		t.Fatalf("state = %s, want FailedHolding", run.State)
+	}
+	if run.FailedStage == "" {
+		t.Fatalf("failed_stage empty; want stage-named breach")
+	}
+	evals, err := a.Thresholds.ListEvaluations(context.Background(), runID)
+	if err != nil {
+		t.Fatalf("list evaluations: %v", err)
+	}
+	if len(evals) != 1 {
+		t.Fatalf("want 1 evaluation recorded, got %d", len(evals))
+	}
+	if evals[0].Passed {
+		t.Fatalf("evaluation recorded as passed for 95.3C sample against <92C rule")
+	}
+}
+
+// TestSensor_WithinThresholdPasses: a sample comfortably inside the
+// threshold writes an evaluation row with passed=1 and leaves the run
+// state untouched.
+func TestSensor_WithinThresholdPasses(t *testing.T) {
+	a, runID, token := setupAgentWithThresholds(t)
+	batch := api.SensorBatch{Samples: []api.SensorSample{
+		{Kind: "temp", Key: "cpu/0", Value: 55.0, Unit: "C"},
+	}}
+	buf, _ := json.Marshal(batch)
+	req := routedRequest(runID, http.MethodPost,
+		"/api/v1/runs/"+strconv.FormatInt(runID, 10)+"/sensor", buf)
+	req.Header.Set("Authorization", "Bearer "+token)
+	req.Header.Set("Content-Type", "application/json")
+
+	rr := httptest.NewRecorder()
+	a.Sensor(rr, req)
+	if rr.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %q", rr.Code, rr.Body.String())
+	}
+	run, err := a.Runs.Get(context.Background(), runID)
+	if err != nil {
+		t.Fatalf("get run: %v", err)
+	}
+	if run.State != model.StateCPUStress {
+		t.Fatalf("state = %s, want CPUStress unchanged", run.State)
+	}
+	evals, err := a.Thresholds.ListEvaluations(context.Background(), runID)
+	if err != nil {
+		t.Fatalf("list evaluations: %v", err)
+	}
+	if len(evals) != 1 || !evals[0].Passed {
+		t.Fatalf("want 1 passed evaluation, got %+v", evals)
+	}
+}
diff --git a/internal/api/smoke_test.go b/internal/api/smoke_test.go
index cba8ea4..46f6dec 100644
--- a/internal/api/smoke_test.go
+++ b/internal/api/smoke_test.go
@@ -75,6 +75,12 @@ func newCaptureRegistry(c *captureNotifier) *notify.Registry {
 // (agent, runID, plainTokenForBearer). Caller is responsible for
 // transitioning the run out of Queued.
 func fullAgent(t *testing.T) (*api.Agent, int64, string) {
+	return fullAgentWithSpec(t, "")
+}
+
+// fullAgentWithSpec is the same as fullAgent but seeds the host with
+// an ExpectedSpecYAML so SpecValidate can pick up diffs in the test.
+func fullAgentWithSpec(t *testing.T, expectedSpecYAML string) (*api.Agent, int64, string) {
 	t.Helper()
 	tmp := t.TempDir()
 	conn, err := db.Open(filepath.Join(tmp, "vetting.db"))
@@ -89,6 +95,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
 	artifactStore := &store.Artifacts{DB: conn}
 	specDiffStore := &store.SpecDiffs{DB: conn}
 	measurementStore := &store.Measurements{DB: conn}
+	firmwareStore := &store.Firmware{DB: conn}
 
 	hub := events.NewHub()
 	logHub, err := logs.NewHub(filepath.Join(tmp, "logs"), hub)
@@ -109,7 +116,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
 		MAC:              "aa:bb:cc:dd:ee:10",
 		WoLBroadcastIP:   "10.0.0.255",
 		WoLPort:          9,
-		ExpectedSpecYAML: "", // empty spec → no diffs
+		ExpectedSpecYAML: expectedSpecYAML,
 	})
 	if err != nil {
 		t.Fatalf("create host: %v", err)
@@ -132,6 +139,7 @@ func fullAgent(t *testing.T) (*api.Agent, int64, string) {
 		Artifacts:    artifactStore,
 		SpecDiffs:    specDiffStore,
 		Measurements: measurementStore,
+		Firmware:     firmwareStore,
 		Runner:       runner,
 		EventHub:     hub,
 		Logs:         logHub,
@@ -195,20 +203,24 @@ func TestFullPipelineToCompleted(t *testing.T) {
 		Memory: spec.MemorySpec{TotalGiB: 16},
 	}
 	next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv})
-	// After Inventory → SpecValidate resolves inline → SMART
-	if next != "SMART" {
-		t.Fatalf("after Inventory, next_state = %q, want SMART", next)
+	// After Inventory → Firmware
+	if next != "Firmware" {
+		t.Fatalf("after Inventory, next_state = %q, want Firmware", next)
 	}
 
-	// The remaining stages advance one-for-one in order.
+	// The remaining stages advance one-for-one in order. After Firmware
+	// the inline SpecValidate resolver advances through SpecValidate to
+	// SMART without a dedicated /result POST for SpecValidate.
 	walkPlan := []struct {
 		stage    string
 		expected string
 	}{
+		{"Firmware", "SMART"},
 		{"SMART", "CPUStress"},
 		{"CPUStress", "Storage"},
 		{"Storage", "Network"},
-		{"Network", "GPU"},
+		{"Network", "Burn"},
+		{"Burn", "GPU"},
 		{"GPU", "PSU"},
 		{"PSU", "Completed"}, // PSU → Reporting resolves inline → Completed
 	}
@@ -287,8 +299,11 @@ func TestFaultInjectionSMART(t *testing.T) {
 	}
 
 	inv := spec.Inventory{Memory: spec.MemorySpec{TotalGiB: 16}}
-	if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "SMART" {
-		t.Fatalf("after Inventory, next = %q want SMART", next)
+	if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "Firmware" {
+		t.Fatalf("after Inventory, next = %q want Firmware", next)
+	}
+	if next := walkStage(t, a, runID, token, "Firmware", true, nil); next != "SMART" {
+		t.Fatalf("after Firmware, next = %q want SMART (inline SpecValidate)", next)
 	}
 
 	// Fake SMART failure → expect FailedHolding.
@@ -316,3 +331,76 @@ func TestFaultInjectionSMART(t *testing.T) {
 		t.Errorf("StageFailed severity = %q, want critical", ev.Severity)
 	}
 }
+
+// TestFirmwarePersistAndSpecMismatch exercises the Phase 4 firmware
+// integration: the agent POSTs Firmware snapshots; server persists; the
+// following SpecValidate diff picks up a firmware mismatch and parks
+// the run in FailedHolding with FailedStage=SpecValidate.
+func TestFirmwarePersistAndSpecMismatch(t *testing.T) {
+	// Host demands BIOS 3.3; agent will POST 3.2 → one critical firmware diff.
+	yaml := "firmware:\n  - component: bios\n    version: \"3.3\"\n"
+	a, runID, token := fullAgentWithSpec(t, yaml)
+	a.Notify = newCaptureRegistry(&captureNotifier{name: "capture"})
+
+	if err := a.Runs.SetState(context.Background(), runID, model.StateInventoryCheck); err != nil {
+		t.Fatalf("set state: %v", err)
+	}
+
+	inv := spec.Inventory{Memory: spec.MemorySpec{TotalGiB: 16}}
+	if next := walkStage(t, a, runID, token, "Inventory", true, map[string]any{"inventory": inv}); next != "Firmware" {
+		t.Fatalf("after Inventory, next = %q want Firmware", next)
+	}
+
+	// Firmware stage: agent reports actual BIOS 3.2 → one row persisted.
+	fw := []map[string]any{
+		{"component": "bios", "identifier": "system", "version": "3.2", "vendor": "AMI"},
+	}
+	next := walkStage(t, a, runID, token, "Firmware", true, map[string]any{"firmware": fw})
+	// Inline SpecValidate should detect the firmware mismatch and send
+	// the run to FailedHolding without the agent posting SpecValidate.
+	if next != "FailedHolding" {
+		t.Fatalf("after Firmware mismatch, next = %q want FailedHolding", next)
+	}
+
+	run, err := a.Runs.Get(context.Background(), runID)
+	if err != nil {
+		t.Fatalf("get run: %v", err)
+	}
+	if run.State != model.StateFailedHolding {
+		t.Fatalf("run.State = %q, want FailedHolding", run.State)
+	}
+	if run.FailedStage != "SpecValidate" {
+		t.Fatalf("run.FailedStage = %q, want SpecValidate", run.FailedStage)
+	}
+
+	// Persistence: row landed in firmware_snapshots.
+	snaps, err := a.Firmware.ListForRun(context.Background(), runID)
+	if err != nil {
+		t.Fatalf("ListForRun firmware: %v", err)
+	}
+	if len(snaps) != 1 {
+		t.Fatalf("firmware rows = %d, want 1: %+v", len(snaps), snaps)
+	}
+	if snaps[0].Component != "bios" || snaps[0].Version != "3.2" {
+		t.Errorf("persisted snapshot = %+v", snaps[0])
+	}
+
+	// Diff row: SpecDiffs has a firmware-specific entry (rather than
+	// only CPU/memory/disk rows) and is critical.
+	diffs, err := a.SpecDiffs.ListForRun(context.Background(), runID)
+	if err != nil {
+		t.Fatalf("ListForRun specdiffs: %v", err)
+	}
+	found := false
+	for _, d := range diffs {
+		if strings.HasPrefix(d.Field, "firmware[") {
+			found = true
+			if d.Severity != "critical" {
+				t.Errorf("firmware diff severity = %q, want critical", d.Severity)
+			}
+		}
+	}
+	if !found {
+		t.Fatalf("no firmware[...] entry in spec diffs: %+v", diffs)
+	}
+}
diff --git a/internal/api/ui_handlers.go b/internal/api/ui_handlers.go
index c3f2a9f..745b6db 100644
--- a/internal/api/ui_handlers.go
+++ b/internal/api/ui_handlers.go
@@ -16,6 +16,7 @@ import (
 	"github.com/go-chi/chi/v5"
 	"gopkg.in/yaml.v3"
 
+	"vetting/internal/config"
 	"vetting/internal/events"
 	"vetting/internal/logs"
 	"vetting/internal/model"
@@ -26,17 +27,19 @@ import (
 )
 
 type UI struct {
-	Hosts     *store.Hosts
-	Runs      *store.Runs
-	Stages    *store.Stages
-	SubSteps  *store.SubSteps
-	SpecDiffs *store.SpecDiffs
-	Artifacts *store.Artifacts
-	EventHub  *events.Hub
-	Logs      *logs.Hub
-	Runner    *orchestrator.Runner
-	Tiles     *TileEnricher
-	PublicURL string // user-visible base URL baked into the quick-register one-liner
+	Hosts      *store.Hosts
+	Runs       *store.Runs
+	Stages     *store.Stages
+	SubSteps   *store.SubSteps
+	SpecDiffs  *store.SpecDiffs
+	Artifacts  *store.Artifacts
+	Thresholds *store.Thresholds // Phase 1: seeded at StartRun from Profiles
+	Profiles   *config.ProfileRegistry
+	EventHub   *events.Hub
+	Logs       *logs.Hub
+	Runner     *orchestrator.Runner
+	Tiles      *TileEnricher
+	PublicURL  string // user-visible base URL baked into the quick-register one-liner
 	// PXE, when non-nil, gets Reload()ed after host create/delete so
 	// dnsmasq's dhcp-host= allowlist reflects the current registry.
 	// Without this, a newly-registered host PXE-boots and gets
@@ -316,23 +319,71 @@ func (u *UI) StartRun(w http.ResponseWriter, r *http.Request) {
 	}
 
 	nonDestructive := r.PostFormValue("non_destructive") == "1"
+	profile := strings.TrimSpace(r.PostFormValue("profile"))
+	if profile == "" {
+		profile = config.ProfileQuick
+	}
+	if !config.IsValidProfile(profile) {
+		http.Error(w, "unknown profile: "+profile, http.StatusBadRequest)
+		return
+	}
 
 	_, hash, err := orchestrator.IssueRunToken()
 	if err != nil {
 		http.Error(w, "token: "+err.Error(), http.StatusInternalServerError)
 		return
 	}
-	runID, err := u.Runs.Create(r.Context(), hostID, hash, nonDestructive)
+	runID, err := u.Runs.CreateWithProfile(r.Context(), hostID, hash, nonDestructive, profile)
 	if err != nil {
 		http.Error(w, "create run: "+err.Error(), http.StatusInternalServerError)
 		return
 	}
-	log.Printf("ui: created run %d for host %d (state=Queued)", runID, hostID)
+	if err := u.seedThresholds(r.Context(), runID, host, profile); err != nil {
+		// A threshold-seed failure shouldn't orphan a run row — log
+		// and continue. Samples will just accumulate without a gate
+		// until the operator retries, same as before Phase 1.
+		log.Printf("ui: seed thresholds run %d: %v", runID, err)
+	}
+	log.Printf("ui: created run %d for host %d profile=%s (state=Queued)", runID, hostID, profile)
 	// Send the operator straight to the new run — the button they clicked
 	// was "Start vetting", the thing they want next is to watch it.
 	http.Redirect(w, r, fmt.Sprintf("/runs/%d", runID), http.StatusSeeOther)
 }
 
+// seedThresholds materializes the per-run threshold table from the
+// ProfileRegistry. The shared vetting.thresholds block applies to
+// every profile; future per-profile overrides will layer on top here,
+// and per-host overrides (Phase 1 extra) land via ExpectedSpecYAML in
+// a later iteration. Safe to skip silently when Thresholds or the
+// registry isn't wired — tests do not always build one.
+func (u *UI) seedThresholds(ctx context.Context, runID int64, host *model.Host, profile string) error {
+	if u.Thresholds == nil || u.Profiles == nil {
+		return nil
+	}
+	_ = host    // reserved for per-host override layer
+	_ = profile // reserved for per-profile override layer
+	defaults := u.Profiles.Vetting.Thresholds
+	if len(defaults) == 0 {
+		return nil
+	}
+	specs := make([]store.ThresholdSpec, 0, len(defaults))
+	for _, d := range defaults {
+		specs = append(specs, store.ThresholdSpec{
+			Stage:    d.Stage,
+			Kind:     d.Kind,
+			Key:      d.Key,
+			Op:       d.Op,
+			Value:    d.Value,
+			Nominal:  d.Nominal,
+			Unit:     d.Unit,
+			Severity: d.Severity,
+			Source:   "profile",
+		})
+	}
+	_, err := u.Thresholds.SeedForRun(ctx, runID, specs)
+	return err
+}
+
 func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) {
 	_ = templates.Registration(templates.RegistrationForm{
 		QuickRegisterURL: u.baseURL(r),
diff --git a/internal/config/config.go b/internal/config/config.go
index a064f20..1c0460b 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -20,6 +20,13 @@ type Config struct {
 	Agent      Agent      `yaml:"agent"`
 	Notifiers  []Notifier `yaml:"notifiers"`
 	Routes     []Route    `yaml:"routes"`
+
+	// Profiles holds the Phase-1 quick/deep/soak registry (stage order,
+	// threshold defaults, per-profile stage timeouts + probe knobs).
+	// Populated from the `vetting:` and `profiles:` top-level blocks
+	// during Load. Nil is never returned — Load installs a default
+	// registry when those blocks are absent.
+	Profiles *ProfileRegistry `yaml:"-"`
 }
 
 type Server struct {
@@ -111,6 +118,20 @@ func Load(path string) (*Config, error) {
 	if err := yaml.Unmarshal(b, &c); err != nil {
 		return nil, fmt.Errorf("parse config: %w", err)
 	}
+	// The `vetting:` + `profiles:` blocks live alongside the existing
+	// fields but we decode them into the raw shape because YAML
+	// durations arrive as strings. Reusing the same byte buffer is
+	// safe: yaml.Unmarshal is happy to ignore keys the target doesn't
+	// know about.
+	var rawProfiles rawProfilesBlock
+	if err := yaml.Unmarshal(b, &rawProfiles); err != nil {
+		return nil, fmt.Errorf("parse profiles: %w", err)
+	}
+	reg, err := buildProfileRegistry(rawProfiles)
+	if err != nil {
+		return nil, fmt.Errorf("profiles: %w", err)
+	}
+	c.Profiles = reg
 	if c.Server.Bind == "" {
 		c.Server.Bind = "127.0.0.1:8080"
 	}
diff --git a/internal/config/profiles.go b/internal/config/profiles.go
new file mode 100644
index 0000000..f68a76c
--- /dev/null
+++ b/internal/config/profiles.go
@@ -0,0 +1,441 @@
+package config
+
+import (
+	"fmt"
+	"strings"
+	"time"
+)
+
+// ProfileName is the set of legal values for a Run's profile column.
+// Exposed as constants so callers (UI handler, tests, agent) don't
+// sprinkle literal strings.
+const (
+	ProfileQuick = "quick"
+	ProfileDeep  = "deep"
+	ProfileSoak  = "soak"
+)
+
+// AllProfiles is the canonical ordering shown in the picker. Leftmost
+// is the default; rightmost is the longest-running.
+var AllProfiles = []string{ProfileQuick, ProfileDeep, ProfileSoak}
+
+// IsValidProfile returns true when name is one of the known profile
+// identifiers. Used at the UI boundary to reject malformed POSTs and in
+// store code as a fallback guard.
+func IsValidProfile(name string) bool {
+	for _, p := range AllProfiles {
+		if p == name {
+			return true
+		}
+	}
+	return false
+}
+
+// Vetting holds the stage order + threshold defaults that are shared
+// across all profiles. Only the per-stage durations/concurrency differ
+// between quick/deep/soak; gates like "CPU > 92C fails the run" apply
+// to a 2-minute quick run and a 12-hour soak alike.
+type Vetting struct {
+	Stages     []string            `yaml:"stages"`
+	Thresholds []ThresholdDefaults `yaml:"thresholds"`
+}
+
+// ThresholdDefaults is the YAML shape of a threshold declaration. One
+// stanza can declare a per-stage rule ("stage: Network") or a global
+// rule ("stage: *") — the threshold evaluator applies both to samples
+// with matching (stage, kind, key).
+type ThresholdDefaults struct {
+	Stage    string  `yaml:"stage"`
+	Kind     string  `yaml:"kind"`
+	Key      string  `yaml:"key"`
+	Op       string  `yaml:"op"`       // lt|lte|gt|gte|within_pct
+	Value    float64 `yaml:"value"`
+	Nominal  float64 `yaml:"nominal"`  // only used by within_pct (e.g. 12.0 for +12V rail)
+	Unit     string  `yaml:"unit"`
+	Severity string  `yaml:"severity"` // critical|warning
+}
+
+// ProfileRegistry is the in-memory view of the `profiles:` block in
+// vetting.yaml. The orchestrator queries it at run creation time to
+// seed thresholds and (in Phase 3+) to scale per-stage durations.
+type ProfileRegistry struct {
+	// Shared stage ordering + threshold defaults. Every profile walks
+	// the same list; only durations/concurrency differ.
+	Vetting Vetting
+
+	// Profiles is keyed by name ("quick"/"deep"/"soak"). Inherit is
+	// already resolved at load time — a caller sees a flattened view.
+	Profiles map[string]Profile
+}
+
+// Profile is a loaded profile. StageTimeouts is keyed by stage name.
+// Defaults carries the free-form knobs each probe reads.
+type Profile struct {
+	Name          string
+	Inherit       string
+	StageTimeouts map[string]time.Duration
+	Defaults      map[string]map[string]any
+}
+
+// StageConfig is the flat view of a profile's knobs, shipped on the
+// claim response so the agent can size CPUStress/Storage/Network/Burn
+// work without parsing YAML. Empty values mean "fall back to the
+// agent's compile-time default" — an older orchestrator that doesn't
+// set these fields keeps working unchanged.
+type StageConfig struct {
+	Profile       string            `json:"profile"`
+	StageTimeouts map[string]string `json:"stage_timeouts,omitempty"`
+	CPUStress     CPUStressKnobs    `json:"cpustress"`
+	Storage       StorageKnobs      `json:"storage"`
+	Network       NetworkKnobs      `json:"network"`
+	Burn          BurnKnobs         `json:"burn"`
+}
+
+// CPUStressKnobs parallels the `cpustress:` block under `profiles.<name>.defaults`.
+// Durations are YAML duration strings ("2m", "60m", "12h").
+type CPUStressKnobs struct {
+	CPUPass  string `json:"cpu_pass,omitempty"`
+	MemPass  string `json:"mem_pass,omitempty"`
+	EDACPoll string `json:"edac_poll,omitempty"`
+}
+
+// StorageKnobs parallels `storage:` defaults. Mode is "fio_sample" (quick)
+// or "full_disk" (deep/soak). Verify names the integrity mode ("md5" or "").
+type StorageKnobs struct {
+	Mode    string `json:"mode,omitempty"`
+	FioSize string `json:"fio_size,omitempty"`
+	FioTime string `json:"fio_time,omitempty"`
+	FioBS   string `json:"fio_bs,omitempty"`
+	FioRW   string `json:"fio_rw,omitempty"`
+	Verify  string `json:"verify,omitempty"`
+}
+
+// NetworkKnobs parallels `network:` defaults. Duration is a YAML string.
+type NetworkKnobs struct {
+	Duration string `json:"duration,omitempty"`
+}
+
+// BurnKnobs parallels `burn:` defaults. Duration is the total Burn window.
+// CPUWorkers is "all" (agent picks runtime.NumCPU) or a numeric string.
+// MemPct is a percentage of MemAvailable to stress. FioOnSpare gates
+// whether fio runs inside Burn (set false if operator lacks a spare
+// partition). IperfParallel is the parallel stream count fed to iperf3 -P.
+type BurnKnobs struct {
+	Duration      string `json:"duration,omitempty"`
+	CPUWorkers    string `json:"cpu_workers,omitempty"`
+	MemPct        int    `json:"mem_pct,omitempty"`
+	FioOnSpare    bool   `json:"fio_on_spare,omitempty"`
+	IperfParallel int    `json:"iperf_parallel,omitempty"`
+}
+
+// ResolveStageConfig flattens the named profile into the wire shape the
+// claim handler ships. Missing keys render as empty strings so the agent
+// falls back to its own defaults.
+func (pr *ProfileRegistry) ResolveStageConfig(name string) StageConfig {
+	if pr == nil {
+		return StageConfig{Profile: name}
+	}
+	p, err := pr.Lookup(name)
+	if err != nil {
+		return StageConfig{Profile: name}
+	}
+	out := StageConfig{Profile: p.Name}
+	if len(p.StageTimeouts) > 0 {
+		out.StageTimeouts = make(map[string]string, len(p.StageTimeouts))
+		for k, v := range p.StageTimeouts {
+			out.StageTimeouts[k] = v.String()
+		}
+	}
+	cpu := p.Defaults["cpustress"]
+	out.CPUStress.CPUPass = yamlString(cpu, "cpu_pass")
+	out.CPUStress.MemPass = yamlString(cpu, "mem_pass")
+	out.CPUStress.EDACPoll = yamlString(cpu, "edac_poll")
+	st := p.Defaults["storage"]
+	out.Storage.Mode = yamlString(st, "mode")
+	out.Storage.FioSize = yamlString(st, "fio_size")
+	out.Storage.FioTime = yamlString(st, "fio_time")
+	out.Storage.FioBS = yamlString(st, "fio_bs")
+	out.Storage.FioRW = yamlString(st, "fio_rw")
+	out.Storage.Verify = yamlString(st, "verify")
+	net := p.Defaults["network"]
+	out.Network.Duration = yamlString(net, "duration")
+	burn := p.Defaults["burn"]
+	out.Burn.Duration = yamlString(burn, "duration")
+	out.Burn.CPUWorkers = yamlString(burn, "cpu_workers")
+	out.Burn.MemPct = yamlInt(burn, "mem_pct")
+	out.Burn.FioOnSpare = yamlBool(burn, "fio_on_spare")
+	out.Burn.IperfParallel = yamlInt(burn, "iperf_parallel")
+	return out
+}
+
+// yamlInt coerces a map[string]any entry to int. Accepts native int,
+// float64 (JSON numbers round-trip as float), or numeric string. Missing
+// / malformed values return 0 so the agent falls back to its default.
+func yamlInt(m map[string]any, key string) int {
+	v, ok := m[key]
+	if !ok || v == nil {
+		return 0
+	}
+	switch x := v.(type) {
+	case int:
+		return x
+	case int64:
+		return int(x)
+	case float64:
+		return int(x)
+	case string:
+		// Best-effort string → int. Empty and non-numeric fall through
+		// to zero.
+		var n int
+		if _, err := fmt.Sscanf(x, "%d", &n); err == nil {
+			return n
+		}
+	}
+	return 0
+}
+
+// yamlBool accepts native bool or "true"/"false" strings. Anything else
+// (missing key, numeric, typo) returns false — a safer default than
+// "true" for a destructive knob like fio_on_spare.
+func yamlBool(m map[string]any, key string) bool {
+	v, ok := m[key]
+	if !ok || v == nil {
+		return false
+	}
+	switch x := v.(type) {
+	case bool:
+		return x
+	case string:
+		return strings.EqualFold(x, "true")
+	}
+	return false
+}
+
+// yamlString coerces a map[string]any entry to its string form. YAML
+// durations like "2m" parse as strings; numeric literals like 5 parse as
+// int. We format non-string scalars with fmt.Sprint so the agent can
+// still interpret them.
+func yamlString(m map[string]any, key string) string {
+	v, ok := m[key]
+	if !ok || v == nil {
+		return ""
+	}
+	if s, ok := v.(string); ok {
+		return s
+	}
+	return fmt.Sprint(v)
+}
+
+// Lookup returns the profile with the given name. Falls back to the
+// default profile (quick) if the name is empty. Returns an error when
+// the name is non-empty but unknown so the caller can surface it.
+func (pr *ProfileRegistry) Lookup(name string) (Profile, error) {
+	if name == "" {
+		name = ProfileQuick
+	}
+	p, ok := pr.Profiles[name]
+	if !ok {
+		return Profile{}, fmt.Errorf("unknown profile %q", name)
+	}
+	return p, nil
+}
+
+// Names returns the registry's profile names in the canonical
+// picker order (quick/deep/soak). Profiles present in the config but
+// unknown to AllProfiles are appended after, alphabetically.
+func (pr *ProfileRegistry) Names() []string {
+	out := make([]string, 0, len(pr.Profiles))
+	seen := map[string]bool{}
+	for _, n := range AllProfiles {
+		if _, ok := pr.Profiles[n]; ok {
+			out = append(out, n)
+			seen[n] = true
+		}
+	}
+	for n := range pr.Profiles {
+		if !seen[n] {
+			out = append(out, n)
+		}
+	}
+	return out
+}
+
+// Stages returns the shared stage order, or a safe default when the
+// config didn't declare one — keeps tests that don't build a full
+// ProfileRegistry from tripping over a nil slice.
+func (pr *ProfileRegistry) Stages() []string {
+	if len(pr.Vetting.Stages) == 0 {
+		return DefaultStages()
+	}
+	out := make([]string, len(pr.Vetting.Stages))
+	copy(out, pr.Vetting.Stages)
+	return out
+}
+
+// DefaultStages is the canonical stage list the orchestrator walks
+// when no config is loaded. Mirrored in the vetting.yaml shipped with
+// the repo so edits to the slice and the file stay in sync.
+func DefaultStages() []string {
+	return []string{
+		"Inventory",
+		"Firmware",
+		"SpecValidate",
+		"SMART",
+		"CPUStress",
+		"Storage",
+		"Network",
+		"Burn",
+		"GPU",
+		"PSU",
+		"Reporting",
+	}
+}
+
+// rawProfile is the YAML shape before inherit resolution. Durations
+// arrive as strings (e.g. "2h") so we can parse them with
+// time.ParseDuration instead of rolling our own.
+type rawProfile struct {
+	Inherit       string                       `yaml:"inherit"`
+	StageTimeouts map[string]string            `yaml:"stage_timeouts"`
+	Defaults      map[string]map[string]any    `yaml:"defaults"`
+}
+
+type rawProfilesBlock struct {
+	Vetting  Vetting               `yaml:"vetting"`
+	Profiles map[string]rawProfile `yaml:"profiles"`
+}
+
+// buildProfileRegistry flattens a rawProfilesBlock into a ProfileRegistry.
+// Resolves `inherit:` by recursive merge (child keys win), parses
+// stage_timeouts strings into time.Durations, and returns an error if
+// the inherit chain loops or references an unknown profile.
+func buildProfileRegistry(raw rawProfilesBlock) (*ProfileRegistry, error) {
+	if len(raw.Profiles) == 0 {
+		raw.Profiles = defaultRawProfiles()
+	}
+	out := &ProfileRegistry{
+		Vetting:  raw.Vetting,
+		Profiles: make(map[string]Profile, len(raw.Profiles)),
+	}
+	if len(out.Vetting.Stages) == 0 {
+		out.Vetting.Stages = DefaultStages()
+	}
+	for name := range raw.Profiles {
+		resolved, err := resolveProfile(raw.Profiles, name, nil)
+		if err != nil {
+			return nil, err
+		}
+		out.Profiles[name] = resolved
+	}
+	return out, nil
+}
+
+// resolveProfile recursively walks inherit chains, depth-first. The
+// visited slice is a cycle guard — we add the current name before
+// recursing and bail if we ever see it again.
+func resolveProfile(all map[string]rawProfile, name string, visited []string) (Profile, error) {
+	for _, v := range visited {
+		if v == name {
+			return Profile{}, fmt.Errorf("profile inherit cycle: %s -> %s", strings.Join(visited, " -> "), name)
+		}
+	}
+	raw, ok := all[name]
+	if !ok {
+		return Profile{}, fmt.Errorf("unknown profile %q", name)
+	}
+	base := Profile{
+		Name:          name,
+		Inherit:       raw.Inherit,
+		StageTimeouts: map[string]time.Duration{},
+		Defaults:      map[string]map[string]any{},
+	}
+	if raw.Inherit != "" {
+		parent, err := resolveProfile(all, raw.Inherit, append(visited, name))
+		if err != nil {
+			return Profile{}, err
+		}
+		for k, v := range parent.StageTimeouts {
+			base.StageTimeouts[k] = v
+		}
+		for k, v := range parent.Defaults {
+			copyMap := make(map[string]any, len(v))
+			for kk, vv := range v {
+				copyMap[kk] = vv
+			}
+			base.Defaults[k] = copyMap
+		}
+	}
+	for stage, s := range raw.StageTimeouts {
+		d, err := time.ParseDuration(s)
+		if err != nil {
+			return Profile{}, fmt.Errorf("profile %s stage_timeouts[%s]: %w", name, stage, err)
+		}
+		base.StageTimeouts[stage] = d
+	}
+	for group, kv := range raw.Defaults {
+		dest, ok := base.Defaults[group]
+		if !ok {
+			dest = map[string]any{}
+			base.Defaults[group] = dest
+		}
+		for k, v := range kv {
+			dest[k] = v
+		}
+	}
+	return base, nil
+}
+
+// defaultRawProfiles returns sane per-profile durations + probe knobs
+// used when vetting.yaml omits the `profiles:` block entirely. Matches
+// the plan's per-stage budget table so the agent still gets coherent
+// CPUStress/Storage/Network knobs without any operator-visible config.
+func defaultRawProfiles() map[string]rawProfile {
+	return map[string]rawProfile{
+		ProfileQuick: {
+			StageTimeouts: map[string]string{
+				"CPUStress": "5m",
+				"Storage":   "5m",
+				"Network":   "2m",
+				"Burn":      "3m",
+				"PSU":       "1m",
+			},
+			Defaults: map[string]map[string]any{
+				"cpustress": {"cpu_pass": "2m", "mem_pass": "2m", "edac_poll": "10s"},
+				"storage":   {"mode": "fio_sample", "fio_size": "1GiB", "fio_time": "3m", "fio_bs": "4k", "fio_rw": "randrw", "verify": "md5"},
+				"network":   {"duration": "60s"},
+				"burn":      {"duration": "2m", "cpu_workers": "all", "mem_pct": 50, "fio_on_spare": true, "iperf_parallel": 2},
+			},
+		},
+		ProfileDeep: {
+			StageTimeouts: map[string]string{
+				"CPUStress": "2h",
+				"Storage":   "4h",
+				"Network":   "35m",
+				"Burn":      "3h",
+				"PSU":       "10m",
+			},
+			Defaults: map[string]map[string]any{
+				"cpustress": {"cpu_pass": "60m", "mem_pass": "60m", "edac_poll": "10s"},
+				"storage":   {"mode": "full_disk", "fio_time": "2h", "fio_bs": "4k", "fio_rw": "randrw", "verify": "md5"},
+				"network":   {"duration": "30m"},
+				"burn":      {"duration": "2h", "cpu_workers": "all", "mem_pct": 70, "fio_on_spare": true, "iperf_parallel": 4},
+			},
+		},
+		ProfileSoak: {
+			Inherit: ProfileDeep,
+			StageTimeouts: map[string]string{
+				"CPUStress": "14h",
+				"Storage":   "8h",
+				"Network":   "2h30m",
+				"Burn":      "20h",
+				"PSU":       "15m",
+			},
+			Defaults: map[string]map[string]any{
+				"cpustress": {"cpu_pass": "12h"},
+				"storage":   {"mode": "full_disk", "fio_time": "6h"},
+				"network":   {"duration": "2h"},
+				"burn":      {"duration": "18h", "iperf_parallel": 8},
+			},
+		},
+	}
+}
diff --git a/internal/db/migrations/0005_profiles_thresholds_firmware.sql b/internal/db/migrations/0005_profiles_thresholds_firmware.sql
new file mode 100644
index 0000000..61bd253
--- /dev/null
+++ b/internal/db/migrations/0005_profiles_thresholds_firmware.sql
@@ -0,0 +1,57 @@
+-- Phase-1 groundwork for profile-aware, threshold-gated vetting.
+--
+-- Adds:
+--   * runs.profile             — which profile the run is executing
+--                                (quick|deep|soak; defaults to quick for
+--                                backfill of older rows + tests).
+--   * thresholds               — seeded per run at creation from the
+--                                ProfileRegistry + per-host overrides;
+--                                immutable for that run so a late config
+--                                edit can't retroactively pass/fail it.
+--   * threshold_evaluations    — one row per observed sample vs threshold;
+--                                drives the report + pipeline badges.
+--   * firmware_snapshots       — per-run BIOS/BMC/NIC/HBA/microcode/NVMe
+--                                version captures used by SpecValidate
+--                                diffing in Phase 4.
+
+ALTER TABLE runs ADD COLUMN profile TEXT NOT NULL DEFAULT 'quick';
+
+CREATE TABLE IF NOT EXISTS thresholds (
+    id         INTEGER PRIMARY KEY AUTOINCREMENT,
+    run_id     INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
+    stage_name TEXT    NOT NULL,                  -- "*" matches any stage
+    kind       TEXT    NOT NULL,                  -- temp|psu_volt|iperf|fio_p99_us|nic_retrans|edac_ce|edac_ue|mce|...
+    key        TEXT    NOT NULL,                  -- "*" or glob-ish match (prefix* / *suffix / exact)
+    op         TEXT    NOT NULL,                  -- lt|lte|gt|gte|within_pct
+    threshold  REAL    NOT NULL,
+    nominal    REAL    NOT NULL DEFAULT 0,         -- used by within_pct; 0 elsewhere
+    unit       TEXT    NOT NULL DEFAULT '',
+    severity   TEXT    NOT NULL,                  -- critical|warning
+    source     TEXT    NOT NULL                   -- profile|host_override
+);
+CREATE INDEX IF NOT EXISTS idx_thresholds_run  ON thresholds(run_id);
+CREATE INDEX IF NOT EXISTS idx_thresholds_kind ON thresholds(run_id, stage_name, kind);
+
+CREATE TABLE IF NOT EXISTS threshold_evaluations (
+    id           INTEGER PRIMARY KEY AUTOINCREMENT,
+    run_id       INTEGER   NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
+    threshold_id INTEGER   NOT NULL REFERENCES thresholds(id) ON DELETE CASCADE,
+    stage_name   TEXT      NOT NULL,
+    kind         TEXT      NOT NULL,
+    key          TEXT      NOT NULL,
+    ts           TIMESTAMP NOT NULL,
+    observed     REAL      NOT NULL,
+    passed       INTEGER   NOT NULL                -- 1 = sample within threshold, 0 = breach
+);
+CREATE INDEX IF NOT EXISTS idx_threshold_evals_run ON threshold_evaluations(run_id, passed);
+
+CREATE TABLE IF NOT EXISTS firmware_snapshots (
+    id         INTEGER PRIMARY KEY AUTOINCREMENT,
+    run_id     INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
+    component  TEXT    NOT NULL,                   -- bios|bmc|nic|hba|microcode|nvme_fw
+    identifier TEXT    NOT NULL,                   -- slot/serial/device path that distinguishes this component
+    version    TEXT    NOT NULL,
+    vendor     TEXT    NOT NULL DEFAULT '',
+    raw_json   TEXT    NOT NULL DEFAULT '{}'
+);
+CREATE INDEX IF NOT EXISTS idx_firmware_run ON firmware_snapshots(run_id, component);
diff --git a/internal/model/model.go b/internal/model/model.go
index a896d01..85543ba 100644
--- a/internal/model/model.go
+++ b/internal/model/model.go
@@ -26,11 +26,13 @@ const (
 	StateWaitingReboot  RunState = "WaitingReboot"
 	StateBooting        RunState = "Booting"
 	StateInventoryCheck RunState = "InventoryCheck"
+	StateFirmware       RunState = "Firmware"
 	StateSpecValidate   RunState = "SpecValidate"
 	StateSMART          RunState = "SMART"
 	StateCPUStress      RunState = "CPUStress"
 	StateStorage        RunState = "Storage"
 	StateNetwork        RunState = "Network"
+	StateBurn           RunState = "Burn"
 	StateGPU            RunState = "GPU"
 	StatePSU            RunState = "PSU"
 	StateReporting      RunState = "Reporting"
@@ -63,6 +65,7 @@ type Run struct {
 	HoldIP            string
 	OverrideFlagsJSON string
 	NonDestructive    bool
+	Profile           string // quick|deep|soak; empty is treated as "quick"
 }
 
 type StageState string
diff --git a/internal/orchestrator/dispatcher.go b/internal/orchestrator/dispatcher.go
index 85c637f..cc255bb 100644
--- a/internal/orchestrator/dispatcher.go
+++ b/internal/orchestrator/dispatcher.go
@@ -119,9 +119,9 @@ func (d *Dispatcher) pickNext(ctx context.Context) {
 				queued = &runs[i]
 			}
 		case model.StateWaitingWoL, model.StateWaitingReboot, model.StateBooting,
-			model.StateInventoryCheck, model.StateSpecValidate, model.StateSMART,
+			model.StateInventoryCheck, model.StateFirmware, model.StateSpecValidate, model.StateSMART,
 			model.StateCPUStress, model.StateStorage, model.StateNetwork,
-			model.StateGPU, model.StatePSU, model.StateReporting:
+			model.StateBurn, model.StateGPU, model.StatePSU, model.StateReporting:
 			inFlight++
 		}
 	}
diff --git a/internal/orchestrator/statemachine.go b/internal/orchestrator/statemachine.go
index 5e3e57b..c94c96d 100644
--- a/internal/orchestrator/statemachine.go
+++ b/internal/orchestrator/statemachine.go
@@ -30,11 +30,13 @@ const (
 // "InventoryCheck". Later stages share a name with their state.
 var stageStates = map[string]model.RunState{
 	"Inventory":    model.StateInventoryCheck,
+	"Firmware":     model.StateFirmware,
 	"SpecValidate": model.StateSpecValidate,
 	"SMART":        model.StateSMART,
 	"CPUStress":    model.StateCPUStress,
 	"Storage":      model.StateStorage,
 	"Network":      model.StateNetwork,
+	"Burn":         model.StateBurn,
 	"GPU":          model.StateGPU,
 	"PSU":          model.StatePSU,
 	"Reporting":    model.StateReporting,
@@ -44,11 +46,13 @@ var stageStates = map[string]model.RunState{
 // first stage to Completed. Kept in sync with store.DefaultStageOrder.
 var stageOrder = []model.RunState{
 	model.StateInventoryCheck,
+	model.StateFirmware,
 	model.StateSpecValidate,
 	model.StateSMART,
 	model.StateCPUStress,
 	model.StateStorage,
 	model.StateNetwork,
+	model.StateBurn,
 	model.StateGPU,
 	model.StatePSU,
 	model.StateReporting,
@@ -143,9 +147,9 @@ func nextStageState(current model.RunState) (model.RunState, error) {
 func allActiveStates() []model.RunState {
 	return []model.RunState{
 		model.StateQueued, model.StateWaitingWoL, model.StateWaitingReboot, model.StateBooting,
-		model.StateInventoryCheck, model.StateSpecValidate, model.StateSMART,
+		model.StateInventoryCheck, model.StateFirmware, model.StateSpecValidate, model.StateSMART,
 		model.StateCPUStress, model.StateStorage, model.StateNetwork,
-		model.StateGPU, model.StatePSU, model.StateReporting,
+		model.StateBurn, model.StateGPU, model.StatePSU, model.StateReporting,
 	}
 }
 
diff --git a/internal/orchestrator/statemachine_test.go b/internal/orchestrator/statemachine_test.go
index 50ecf0b..32231a9 100644
--- a/internal/orchestrator/statemachine_test.go
+++ b/internal/orchestrator/statemachine_test.go
@@ -80,11 +80,13 @@ func TestTriggerAgentClaimedFromWaitingReboot(t *testing.T) {
 func TestTriggerStageMismatch(t *testing.T) {
 	stageStates := []model.RunState{
 		model.StateInventoryCheck,
+		model.StateFirmware,
 		model.StateSpecValidate,
 		model.StateSMART,
 		model.StateCPUStress,
 		model.StateStorage,
 		model.StateNetwork,
+		model.StateBurn,
 		model.StateGPU,
 		model.StatePSU,
 		model.StateReporting,
@@ -114,11 +116,13 @@ func TestTriggerStageMismatch(t *testing.T) {
 func TestStageNameForState(t *testing.T) {
 	pairs := map[string]model.RunState{
 		"Inventory":    model.StateInventoryCheck,
+		"Firmware":     model.StateFirmware,
 		"SpecValidate": model.StateSpecValidate,
 		"SMART":        model.StateSMART,
 		"CPUStress":    model.StateCPUStress,
 		"Storage":      model.StateStorage,
 		"Network":      model.StateNetwork,
+		"Burn":         model.StateBurn,
 		"GPU":          model.StateGPU,
 		"PSU":          model.StatePSU,
 		"Reporting":    model.StateReporting,
@@ -143,11 +147,13 @@ func TestNextStageWalk(t *testing.T) {
 	// one in the canonical order, and from Reporting onto Completed.
 	chain := []model.RunState{
 		model.StateInventoryCheck,
+		model.StateFirmware,
 		model.StateSpecValidate,
 		model.StateSMART,
 		model.StateCPUStress,
 		model.StateStorage,
 		model.StateNetwork,
+		model.StateBurn,
 		model.StateGPU,
 		model.StatePSU,
 		model.StateReporting,
diff --git a/internal/orchestrator/thresholds.go b/internal/orchestrator/thresholds.go
new file mode 100644
index 0000000..0b3be00
--- /dev/null
+++ b/internal/orchestrator/thresholds.go
@@ -0,0 +1,182 @@
+package orchestrator
+
+import (
+	"fmt"
+	"strings"
+)
+
+// ThresholdOp is one of the comparison operators a threshold supports.
+// within_pct is the only one that cares about a "nominal" value for
+// the key — used for PSU rails ("+12V within 5% of 12.0").
+type ThresholdOp string
+
+const (
+	OpLT        ThresholdOp = "lt"
+	OpLTE       ThresholdOp = "lte"
+	OpGT        ThresholdOp = "gt"
+	OpGTE       ThresholdOp = "gte"
+	OpWithinPct ThresholdOp = "within_pct"
+)
+
+// ThresholdSeverity routes a breach to either "fail the run" or "just
+// surface a warning in the report". The evaluator returns it alongside
+// the Pass flag so the caller can decide whether to transition the run.
+type ThresholdSeverity string
+
+const (
+	SeverityCritical ThresholdSeverity = "critical"
+	SeverityWarning  ThresholdSeverity = "warning"
+)
+
+// Threshold is the evaluator's view of a stored threshold row. It's a
+// flat, already-parsed value-object — the evaluator doesn't look at
+// the DB and the store doesn't look at the evaluator.
+type Threshold struct {
+	ID        int64
+	Stage     string // "*" matches any stage
+	Kind      string
+	Key       string // glob-ish: "*" / "prefix*" / "*suffix" / exact
+	Op        ThresholdOp
+	Value     float64
+	Nominal   float64 // for within_pct (nominal voltage/frequency)
+	Severity  ThresholdSeverity
+}
+
+// Sample is a single observation the evaluator tests against matching
+// thresholds. Stage may be empty when the agent doesn't know which
+// stage posted it (e.g. the thermal sidecar running across stages) —
+// empty-stage samples only match thresholds with Stage == "*".
+type Sample struct {
+	Stage string
+	Kind  string
+	Key   string
+	Value float64
+}
+
+// EvalResult is the per-sample outcome of a threshold evaluation:
+// which threshold was consulted, whether the sample passed, and the
+// severity so the caller can fast-fail on critical breaches.
+type EvalResult struct {
+	Threshold Threshold
+	Passed    bool
+	Observed  float64
+}
+
+// Breached returns true when the sample violated the threshold.
+func (r EvalResult) Breached() bool { return !r.Passed }
+
+// CriticalBreach returns true only for critical-severity breaches —
+// the "fail the run right now" case.
+func (r EvalResult) CriticalBreach() bool {
+	return r.Breached() && r.Threshold.Severity == SeverityCritical
+}
+
+// Evaluate runs a single sample through every threshold that applies
+// to it. A sample may match more than one threshold (a generic "*"
+// rule + a stage-specific override); each match produces its own
+// EvalResult in the returned slice so both get persisted.
+func Evaluate(sample Sample, thresholds []Threshold) []EvalResult {
+	out := make([]EvalResult, 0, 1)
+	for _, t := range thresholds {
+		if !thresholdMatchesSample(t, sample) {
+			continue
+		}
+		passed, err := evaluateOp(t.Op, sample.Value, t.Value, t.Nominal)
+		if err != nil {
+			// Unknown operator — skip. The caller could validate on
+			// insert; here we prefer to drop the threshold than to
+			// return an error that forces every Sensor write to 500.
+			continue
+		}
+		out = append(out, EvalResult{
+			Threshold: t,
+			Passed:    passed,
+			Observed:  sample.Value,
+		})
+	}
+	return out
+}
+
+// thresholdMatchesSample applies the stage + kind + key filter. Kind
+// is always literal — there's no "any kind" threshold and if there
+// ever is we'll add a `kind: *` escape hatch. Stage and key both
+// support glob-ish matching.
+func thresholdMatchesSample(t Threshold, s Sample) bool {
+	if t.Kind != s.Kind {
+		return false
+	}
+	if !stageMatches(t.Stage, s.Stage) {
+		return false
+	}
+	if !keyMatches(t.Key, s.Key) {
+		return false
+	}
+	return true
+}
+
+// stageMatches returns true if the threshold's stage selector applies
+// to the sample's stage. "*" matches everything; empty threshold
+// selector is treated as "*" so a threshold declared without a stage
+// key isn't accidentally inert. A sample without a stage only matches
+// the "*" selector — we don't guess.
+func stageMatches(selector, sampleStage string) bool {
+	if selector == "" || selector == "*" {
+		return true
+	}
+	return selector == sampleStage
+}
+
+// keyMatches handles "*", "prefix*", "*suffix", and exact match. We
+// avoid pulling in filepath.Match so Windows `\`-vs-`/` rules don't
+// leak into the sample namespace (key "eth0/rx_errors" is not a path).
+func keyMatches(pattern, key string) bool {
+	if pattern == "" || pattern == "*" {
+		return true
+	}
+	hasPrefix := strings.HasPrefix(pattern, "*")
+	hasSuffix := strings.HasSuffix(pattern, "*")
+	switch {
+	case hasPrefix && hasSuffix:
+		inner := strings.TrimPrefix(strings.TrimSuffix(pattern, "*"), "*")
+		return strings.Contains(key, inner)
+	case hasSuffix:
+		return strings.HasPrefix(key, strings.TrimSuffix(pattern, "*"))
+	case hasPrefix:
+		return strings.HasSuffix(key, strings.TrimPrefix(pattern, "*"))
+	default:
+		return pattern == key
+	}
+}
+
+// evaluateOp does the numeric comparison. within_pct is the oddball:
+// it tests |observed - nominal| <= (pct / 100) * nominal. Returns an
+// error for unknown operators so the caller can log + drop.
+func evaluateOp(op ThresholdOp, observed, threshold, nominal float64) (bool, error) {
+	switch op {
+	case OpLT:
+		return observed < threshold, nil
+	case OpLTE:
+		return observed <= threshold, nil
+	case OpGT:
+		return observed > threshold, nil
+	case OpGTE:
+		return observed >= threshold, nil
+	case OpWithinPct:
+		if nominal == 0 {
+			// within_pct against a 0 nominal is meaningless. Treat as
+			// pass so a misconfigured rule doesn't spuriously fail.
+			return true, nil
+		}
+		allowed := (threshold / 100.0) * nominal
+		if allowed < 0 {
+			allowed = -allowed
+		}
+		diff := observed - nominal
+		if diff < 0 {
+			diff = -diff
+		}
+		return diff <= allowed, nil
+	default:
+		return false, fmt.Errorf("unknown op %q", op)
+	}
+}
diff --git a/internal/orchestrator/thresholds_test.go b/internal/orchestrator/thresholds_test.go
new file mode 100644
index 0000000..47117ec
--- /dev/null
+++ b/internal/orchestrator/thresholds_test.go
@@ -0,0 +1,152 @@
+package orchestrator
+
+import "testing"
+
+// TestEvaluate_Ops covers every operator against the boundary case
+// (equal to threshold) plus one clearly-inside and one clearly-outside
+// value. Table-driven because the logic is regular.
+func TestEvaluate_Ops(t *testing.T) {
+	cases := []struct {
+		name     string
+		op       ThresholdOp
+		value    float64
+		nominal  float64
+		observed float64
+		want     bool
+	}{
+		{"lt strict below", OpLT, 10, 0, 5, true},
+		{"lt equal fails", OpLT, 10, 0, 10, false},
+		{"lt above fails", OpLT, 10, 0, 15, false},
+
+		{"lte below", OpLTE, 10, 0, 5, true},
+		{"lte equal passes", OpLTE, 10, 0, 10, true},
+		{"lte above fails", OpLTE, 10, 0, 11, false},
+
+		{"gt below fails", OpGT, 900, 0, 800, false},
+		{"gt equal fails", OpGT, 900, 0, 900, false},
+		{"gt above passes", OpGT, 900, 0, 950, true},
+
+		{"gte equal passes", OpGTE, 900, 0, 900, true},
+		{"gte below fails", OpGTE, 900, 0, 800, false},
+
+		{"within_pct exact", OpWithinPct, 5, 12.0, 12.0, true},
+		{"within_pct inside", OpWithinPct, 5, 12.0, 11.7, true},
+		{"within_pct outside low", OpWithinPct, 5, 12.0, 11.0, false},
+		{"within_pct outside high", OpWithinPct, 5, 12.0, 12.7, false},
+		{"within_pct zero nominal passes", OpWithinPct, 5, 0, 99, true},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			rules := []Threshold{{
+				Stage: "*", Kind: "k", Key: "k", Op: tc.op,
+				Value: tc.value, Nominal: tc.nominal, Severity: SeverityCritical,
+			}}
+			res := Evaluate(Sample{Stage: "Any", Kind: "k", Key: "k", Value: tc.observed}, rules)
+			if len(res) != 1 {
+				t.Fatalf("expected 1 match, got %d", len(res))
+			}
+			if res[0].Passed != tc.want {
+				t.Fatalf("op=%s observed=%v want passed=%v got %v", tc.op, tc.observed, tc.want, res[0].Passed)
+			}
+		})
+	}
+}
+
+// TestEvaluate_StageMatching: a Network-scoped rule ignores samples
+// stamped with other stages. Global "*" catches everything.
+func TestEvaluate_StageMatching(t *testing.T) {
+	rules := []Threshold{
+		{Stage: "*", Kind: "temp", Key: "cpu/*", Op: OpLT, Value: 92, Severity: SeverityCritical},
+		{Stage: "Burn", Kind: "temp", Key: "cpu/*", Op: OpLT, Value: 88, Severity: SeverityCritical},
+	}
+	// Sample from CPUStress — only the global rule applies.
+	res := Evaluate(Sample{Stage: "CPUStress", Kind: "temp", Key: "cpu/0", Value: 89}, rules)
+	if len(res) != 1 {
+		t.Fatalf("cpustress sample: expected 1 match, got %d", len(res))
+	}
+	if res[0].Threshold.Value != 92 {
+		t.Fatalf("cpustress sample matched wrong rule: %+v", res[0].Threshold)
+	}
+
+	// Sample from Burn — both rules match. The stricter one breaches.
+	res = Evaluate(Sample{Stage: "Burn", Kind: "temp", Key: "cpu/0", Value: 89}, rules)
+	if len(res) != 2 {
+		t.Fatalf("burn sample: expected 2 matches, got %d", len(res))
+	}
+	var globalPassed, burnPassed bool
+	for _, r := range res {
+		switch r.Threshold.Value {
+		case 92:
+			globalPassed = r.Passed
+		case 88:
+			burnPassed = r.Passed
+		}
+	}
+	if !globalPassed {
+		t.Fatalf("global 92C rule should pass at 89C")
+	}
+	if burnPassed {
+		t.Fatalf("burn 88C rule should breach at 89C")
+	}
+}
+
+// TestEvaluate_KeyWildcards covers "*" / "prefix*" / "*suffix".
+func TestEvaluate_KeyWildcards(t *testing.T) {
+	cases := []struct {
+		pattern string
+		key     string
+		match   bool
+	}{
+		{"*", "anything", true},
+		{"", "anything", true},
+		{"cpu/*", "cpu/0", true},
+		{"cpu/*", "gpu/0", false},
+		{"*/rate", "eth0/rate", true},
+		{"*/rate", "eth0/count", false},
+		{"exact", "exact", true},
+		{"exact", "exactly", false},
+	}
+	for _, tc := range cases {
+		t.Run(tc.pattern+"_vs_"+tc.key, func(t *testing.T) {
+			got := keyMatches(tc.pattern, tc.key)
+			if got != tc.match {
+				t.Fatalf("keyMatches(%q, %q) = %v, want %v", tc.pattern, tc.key, got, tc.match)
+			}
+		})
+	}
+}
+
+// TestEvaluate_SeverityDispatch: only critical breaches flip
+// CriticalBreach; warning-severity breaches stay advisory.
+func TestEvaluate_SeverityDispatch(t *testing.T) {
+	rules := []Threshold{
+		{Stage: "*", Kind: "temp", Key: "cpu", Op: OpLT, Value: 92, Severity: SeverityCritical},
+		{Stage: "*", Kind: "fio", Key: "p99", Op: OpLT, Value: 50000, Severity: SeverityWarning},
+	}
+	res := Evaluate(Sample{Stage: "CPU", Kind: "temp", Key: "cpu", Value: 95}, rules)
+	if len(res) != 1 || !res[0].CriticalBreach() {
+		t.Fatalf("critical breach not detected: %+v", res)
+	}
+	res = Evaluate(Sample{Stage: "Storage", Kind: "fio", Key: "p99", Value: 80000}, rules)
+	if len(res) != 1 {
+		t.Fatalf("expected 1 match, got %d", len(res))
+	}
+	if res[0].CriticalBreach() {
+		t.Fatalf("warning-severity breach should not be critical")
+	}
+	if !res[0].Breached() {
+		t.Fatalf("warning-severity rule should still show breach=true")
+	}
+}
+
+// TestEvaluate_NoMatchingThreshold: a sample that doesn't hit any rule
+// produces an empty result slice — callers treat that as "advisory".
+func TestEvaluate_NoMatchingThreshold(t *testing.T) {
+	rules := []Threshold{
+		{Stage: "*", Kind: "temp", Key: "cpu/*", Op: OpLT, Value: 92, Severity: SeverityCritical},
+	}
+	res := Evaluate(Sample{Stage: "Network", Kind: "iperf", Key: "throughput", Value: 950}, rules)
+	if len(res) != 0 {
+		t.Fatalf("unmatched sample should yield 0 results, got %d", len(res))
+	}
+}
diff --git a/internal/report/report.go b/internal/report/report.go
index 2370ec2..37f709e 100644
--- a/internal/report/report.go
+++ b/internal/report/report.go
@@ -28,7 +28,17 @@ type Data struct {
 	Host        model.Host
 	Stages      []model.Stage
 	SpecDiffs   []model.SpecDiff
-	Aggregates  []Aggregate // flattened measurement summary; see Aggregate
+	Aggregates  []Aggregate        // flattened measurement summary; see Aggregate
+	Firmware    []FirmwareSnapshot // captured firmware versions, empty if none
+}
+
+// FirmwareSnapshot is the report-facing view of one firmware row.
+// Package-local so the HTML template stays decoupled from store types.
+type FirmwareSnapshot struct {
+	Component  string
+	Identifier string
+	Version    string
+	Vendor     string
 }
 
 // Aggregate is a per (kind, key) summary of a run's measurements. Min/
@@ -196,6 +206,27 @@ const htmlTemplate = `<!doctype html>
 </table>
 </section>
 
+<section>
+<h2>Firmware ({{len .Firmware}})</h2>
+{{if .Firmware}}
+<table>
+  <thead><tr><th>Component</th><th>Identifier</th><th>Version</th><th>Vendor</th></tr></thead>
+  <tbody>
+  {{range .Firmware}}
+    <tr>
+      <td>{{.Component}}</td>
+      <td><code>{{.Identifier}}</code></td>
+      <td><code>{{.Version}}</code></td>
+      <td>{{.Vendor}}</td>
+    </tr>
+  {{end}}
+  </tbody>
+</table>
+{{else}}
+<p>No firmware snapshots captured.</p>
+{{end}}
+</section>
+
 <section>
 <h2>Spec diffs ({{len .SpecDiffs}})</h2>
 {{if .SpecDiffs}}
diff --git a/internal/spec/spec.go b/internal/spec/spec.go
index c433665..108de80 100644
--- a/internal/spec/spec.go
+++ b/internal/spec/spec.go
@@ -21,11 +21,36 @@ import (
 )
 
 type Spec struct {
-	CPU    *CPUSpec    `yaml:"cpu,omitempty"`
-	Memory *MemorySpec `yaml:"memory,omitempty"`
-	Disks  []DiskSpec  `yaml:"disks,omitempty"`
-	NICs   []NICSpec   `yaml:"nics,omitempty"`
-	GPUs   []GPUSpec   `yaml:"gpus,omitempty"`
+	CPU      *CPUSpec       `yaml:"cpu,omitempty"`
+	Memory   *MemorySpec    `yaml:"memory,omitempty"`
+	Disks    []DiskSpec     `yaml:"disks,omitempty"`
+	NICs     []NICSpec      `yaml:"nics,omitempty"`
+	GPUs     []GPUSpec      `yaml:"gpus,omitempty"`
+	Firmware []FirmwareSpec `yaml:"firmware,omitempty"`
+}
+
+// FirmwareSpec is one row in the expected-spec YAML's `firmware:` block.
+// Component is one of bios|bmc|nic|hba|microcode|nvme_fw (matches the
+// on-wire value from agent/probes.FirmwareSnapshot.Component). Identifier
+// is optional — when empty the rule applies to every observed snapshot
+// of that component (use for single-instance things like BIOS/microcode);
+// when set it pins the check to a specific NIC port / NVMe controller /
+// PCI address. Version is the literal string expected; comparison is
+// exact after trimming whitespace.
+type FirmwareSpec struct {
+	Component  string `yaml:"component"`
+	Identifier string `yaml:"identifier,omitempty"`
+	Version    string `yaml:"version"`
+}
+
+// FirmwareObserved is what the agent reported, in a spec-package-local
+// shape so callers don't need to thread store types through the diff.
+// The server converts store.FirmwareSnapshot → FirmwareObserved before
+// calling DiffFirmware.
+type FirmwareObserved struct {
+	Component  string
+	Identifier string
+	Version    string
 }
 
 type CPUSpec struct {
@@ -175,6 +200,73 @@ func diffNICs(expected, actual []NICSpec) []model.SpecDiff {
 	return out
 }
 
+// DiffFirmware returns a SpecDiff per firmware expectation that doesn't
+// find a matching observed snapshot. Matching rules:
+//   - An expected rule with Identifier set matches by (component, id);
+//     a missing observed snapshot yields a "present=false" diff.
+//   - An expected rule with Identifier empty applies to every observed
+//     snapshot of that component — useful for "all NICs must run fw
+//     8.30" without listing each port. Zero observed snapshots of the
+//     component yields a single "present=false" diff, not N.
+//   - Version mismatch emits an exact-string expected→actual diff.
+// Case is preserved (firmware versions are case-sensitive in practice).
+func DiffFirmware(expected []FirmwareSpec, actual []FirmwareObserved) []model.SpecDiff {
+	if len(expected) == 0 {
+		return nil
+	}
+	byCompIdent := map[string]FirmwareObserved{}
+	byComp := map[string][]FirmwareObserved{}
+	for _, o := range actual {
+		byCompIdent[fwKey(o.Component, o.Identifier)] = o
+		byComp[o.Component] = append(byComp[o.Component], o)
+	}
+	var out []model.SpecDiff
+	for _, exp := range expected {
+		comp := strings.TrimSpace(exp.Component)
+		if comp == "" || strings.TrimSpace(exp.Version) == "" {
+			continue
+		}
+		label := "firmware[" + comp
+		if exp.Identifier != "" {
+			label += "/" + exp.Identifier
+		}
+		label += "]"
+		if exp.Identifier != "" {
+			got, ok := byCompIdent[fwKey(comp, exp.Identifier)]
+			if !ok {
+				out = append(out, diff(label+".present", "true", "false"))
+				continue
+			}
+			if !strings.EqualFold(strings.TrimSpace(got.Version), strings.TrimSpace(exp.Version)) {
+				out = append(out, diff(label+".version", exp.Version, got.Version))
+			}
+			continue
+		}
+		// No identifier: fan out across every observed snapshot of this
+		// component. Missing is one diff; a mismatching port/controller
+		// emits one diff per mismatch.
+		observed := byComp[comp]
+		if len(observed) == 0 {
+			out = append(out, diff(label+".present", "true", "false"))
+			continue
+		}
+		for _, got := range observed {
+			if !strings.EqualFold(strings.TrimSpace(got.Version), strings.TrimSpace(exp.Version)) {
+				slot := got.Identifier
+				if slot == "" {
+					slot = "*"
+				}
+				out = append(out, diff("firmware["+comp+"/"+slot+"].version", exp.Version, got.Version))
+			}
+		}
+	}
+	return out
+}
+
+func fwKey(component, identifier string) string {
+	return strings.ToLower(component) + "|" + strings.ToLower(identifier)
+}
+
 func diffGPUs(expected, actual []GPUSpec) []model.SpecDiff {
 	if len(expected) == 0 {
 		return nil
diff --git a/internal/spec/spec_test.go b/internal/spec/spec_test.go
index 761c83a..c97fb47 100644
--- a/internal/spec/spec_test.go
+++ b/internal/spec/spec_test.go
@@ -119,3 +119,96 @@ func TestDiffSeverityAlwaysCritical(t *testing.T) {
 		}
 	}
 }
+
+func TestDiffFirmwareIdentifierMatch(t *testing.T) {
+	exp := []FirmwareSpec{{Component: "bios", Version: "3.2"}}
+	obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
+	if d := DiffFirmware(exp, obs); len(d) != 0 {
+		t.Fatalf("matching bios version should produce no diff, got %+v", d)
+	}
+}
+
+func TestDiffFirmwareVersionMismatch(t *testing.T) {
+	exp := []FirmwareSpec{{Component: "bios", Version: "3.3"}}
+	obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
+	d := DiffFirmware(exp, obs)
+	if len(d) != 1 {
+		t.Fatalf("want 1 diff, got %d: %+v", len(d), d)
+	}
+	if d[0].Expected != "3.3" || d[0].Actual != "3.2" {
+		t.Fatalf("diff expected/actual = %q/%q, want 3.3/3.2", d[0].Expected, d[0].Actual)
+	}
+	if d[0].Severity != "critical" {
+		t.Errorf("severity = %q, want critical", d[0].Severity)
+	}
+}
+
+func TestDiffFirmwareMissingComponentPresent(t *testing.T) {
+	// Expected rule with no identifier + zero observed snapshots →
+	// single "present=false" diff, not N.
+	exp := []FirmwareSpec{{Component: "bmc", Version: "1.74"}}
+	d := DiffFirmware(exp, nil)
+	if len(d) != 1 {
+		t.Fatalf("want 1 diff for missing BMC, got %d: %+v", len(d), d)
+	}
+	if d[0].Field != "firmware[bmc].present" || d[0].Expected != "true" || d[0].Actual != "false" {
+		t.Fatalf("missing-BMC diff = %+v", d[0])
+	}
+}
+
+func TestDiffFirmwareWildcardFanOut(t *testing.T) {
+	// Expected rule with empty identifier fans across every observed
+	// snapshot of the component — one port matches, one doesn't → one diff.
+	exp := []FirmwareSpec{{Component: "nic", Version: "16.32.1010"}}
+	obs := []FirmwareObserved{
+		{Component: "nic", Identifier: "eth0", Version: "16.32.1010"},
+		{Component: "nic", Identifier: "eth1", Version: "14.28.0000"},
+	}
+	d := DiffFirmware(exp, obs)
+	if len(d) != 1 {
+		t.Fatalf("want 1 diff (mismatched eth1 only), got %d: %+v", len(d), d)
+	}
+	if d[0].Field != "firmware[nic/eth1].version" {
+		t.Errorf("field = %q, want firmware[nic/eth1].version", d[0].Field)
+	}
+}
+
+func TestDiffFirmwareIdentifierPin(t *testing.T) {
+	// Identifier set: pins the rule to a specific port. Other ports
+	// with mismatched firmware are not evaluated by this rule.
+	exp := []FirmwareSpec{{Component: "nic", Identifier: "eth0", Version: "1.0"}}
+	obs := []FirmwareObserved{
+		{Component: "nic", Identifier: "eth0", Version: "1.0"},
+		{Component: "nic", Identifier: "eth1", Version: "9.9"},
+	}
+	if d := DiffFirmware(exp, obs); len(d) != 0 {
+		t.Fatalf("pinned rule should ignore other ports, got %+v", d)
+	}
+}
+
+func TestDiffFirmwareIdentifierPinMissing(t *testing.T) {
+	// Pinned rule with no matching observed snapshot → present=false diff.
+	exp := []FirmwareSpec{{Component: "nic", Identifier: "eth0", Version: "1.0"}}
+	if d := DiffFirmware(exp, nil); len(d) != 1 || d[0].Field != "firmware[nic/eth0].present" {
+		t.Fatalf("want present=false for pinned rule, got %+v", d)
+	}
+}
+
+func TestDiffFirmwareEmptyRuleSkipped(t *testing.T) {
+	// Empty component or empty version silently skip rather than panic.
+	exp := []FirmwareSpec{{Component: "", Version: "x"}, {Component: "bios", Version: ""}}
+	obs := []FirmwareObserved{{Component: "bios", Identifier: "system", Version: "3.2"}}
+	if d := DiffFirmware(exp, obs); len(d) != 0 {
+		t.Fatalf("empty rules should skip, got %+v", d)
+	}
+}
+
+func TestDiffFirmwareCaseInsensitive(t *testing.T) {
+	// Version match is case-insensitive after trim; avoids spurious diff
+	// from ethtool's "FW1234" vs expected YAML's "fw1234".
+	exp := []FirmwareSpec{{Component: "nvme_fw", Identifier: "nvme0", Version: "fw1234"}}
+	obs := []FirmwareObserved{{Component: "nvme_fw", Identifier: "nvme0", Version: "FW1234"}}
+	if d := DiffFirmware(exp, obs); len(d) != 0 {
+		t.Fatalf("case-insensitive match expected, got %+v", d)
+	}
+}
diff --git a/internal/store/firmware.go b/internal/store/firmware.go
new file mode 100644
index 0000000..bd431f6
--- /dev/null
+++ b/internal/store/firmware.go
@@ -0,0 +1,97 @@
+package store
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+)
+
+// FirmwareSnapshot is one row in firmware_snapshots. A run captures
+// many (one per BIOS/BMC/NIC/HBA/microcode/NVMe) so SpecValidate can
+// diff them against the host's expected spec in Phase 4.
+type FirmwareSnapshot struct {
+	ID         int64
+	RunID      int64
+	Component  string // bios|bmc|nic|hba|microcode|nvme_fw
+	Identifier string // slot/serial/device path
+	Version    string
+	Vendor     string
+	RawJSON    string
+}
+
+// Firmware is the CRUD seam. The agent's Phase-4 probe POSTs captured
+// rows; the orchestrator stores them. SpecValidate reads them back.
+type Firmware struct {
+	DB *sql.DB
+}
+
+// Create inserts a single firmware snapshot. One call per (run, component,
+// identifier) — the agent probe owns dedup/formatting.
+func (f *Firmware) Create(ctx context.Context, s FirmwareSnapshot) (int64, error) {
+	raw := s.RawJSON
+	if raw == "" {
+		raw = "{}"
+	}
+	res, err := f.DB.ExecContext(ctx, `
+		INSERT INTO firmware_snapshots(run_id, component, identifier, version, vendor, raw_json)
+		VALUES(?,?,?,?,?,?)
+	`, s.RunID, s.Component, s.Identifier, s.Version, s.Vendor, raw)
+	if err != nil {
+		return 0, fmt.Errorf("insert firmware: %w", err)
+	}
+	return res.LastInsertId()
+}
+
+// CreateBatch persists a slice of snapshots under one transaction.
+// Agent probe enumerates all components in one pass, so batching wins.
+func (f *Firmware) CreateBatch(ctx context.Context, rows []FirmwareSnapshot) error {
+	if len(rows) == 0 {
+		return nil
+	}
+	tx, err := f.DB.BeginTx(ctx, nil)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = tx.Rollback() }()
+	stmt, err := tx.PrepareContext(ctx, `
+		INSERT INTO firmware_snapshots(run_id, component, identifier, version, vendor, raw_json)
+		VALUES(?,?,?,?,?,?)
+	`)
+	if err != nil {
+		return fmt.Errorf("prepare firmware insert: %w", err)
+	}
+	defer func() { _ = stmt.Close() }()
+	for _, s := range rows {
+		raw := s.RawJSON
+		if raw == "" {
+			raw = "{}"
+		}
+		if _, err := stmt.ExecContext(ctx, s.RunID, s.Component, s.Identifier, s.Version, s.Vendor, raw); err != nil {
+			return fmt.Errorf("insert firmware %s/%s: %w", s.Component, s.Identifier, err)
+		}
+	}
+	return tx.Commit()
+}
+
+// ListForRun returns every firmware snapshot for a run in stable order.
+// Report page + SpecValidate both read this.
+func (f *Firmware) ListForRun(ctx context.Context, runID int64) ([]FirmwareSnapshot, error) {
+	rows, err := f.DB.QueryContext(ctx, `
+		SELECT id, run_id, component, identifier, version, vendor, raw_json
+		FROM firmware_snapshots WHERE run_id = ? ORDER BY id
+	`, runID)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	var out []FirmwareSnapshot
+	for rows.Next() {
+		var s FirmwareSnapshot
+		if err := rows.Scan(&s.ID, &s.RunID, &s.Component, &s.Identifier,
+			&s.Version, &s.Vendor, &s.RawJSON); err != nil {
+			return nil, err
+		}
+		out = append(out, s)
+	}
+	return out, rows.Err()
+}
diff --git a/internal/store/runs.go b/internal/store/runs.go
index 59ab104..1d142e7 100644
--- a/internal/store/runs.go
+++ b/internal/store/runs.go
@@ -14,16 +14,30 @@ type Runs struct {
 	DB *sql.DB
 }
 
+// Create inserts a new run using the default "quick" profile. Older
+// call sites (and most tests) target this form — the profile column's
+// DEFAULT 'quick' on runs takes care of the backfill.
 func (r *Runs) Create(ctx context.Context, hostID int64, tokenHash string, nonDestructive bool) (int64, error) {
+	return r.CreateWithProfile(ctx, hostID, tokenHash, nonDestructive, "quick")
+}
+
+// CreateWithProfile inserts a new run with an explicit profile
+// ("quick"|"deep"|"soak"). The UI handler is the authoritative caller;
+// empty profile falls back to "quick" so a misconfigured form doesn't
+// leave a row with a blank profile column.
+func (r *Runs) CreateWithProfile(ctx context.Context, hostID int64, tokenHash string, nonDestructive bool, profile string) (int64, error) {
+	if profile == "" {
+		profile = "quick"
+	}
 	now := time.Now().UTC()
 	nd := 0
 	if nonDestructive {
 		nd = 1
 	}
 	res, err := r.DB.ExecContext(ctx, `
-		INSERT INTO runs(host_id, state, agent_token_hash, next_boot_target, started_at, non_destructive)
-		VALUES(?,?,?,?,?,?)
-	`, hostID, string(model.StateQueued), tokenHash, "linux", now, nd)
+		INSERT INTO runs(host_id, state, agent_token_hash, next_boot_target, started_at, non_destructive, profile)
+		VALUES(?,?,?,?,?,?,?)
+	`, hostID, string(model.StateQueued), tokenHash, "linux", now, nd, profile)
 	if err != nil {
 		return 0, fmt.Errorf("insert run: %w", err)
 	}
@@ -107,14 +121,15 @@ func (r *Runs) Get(ctx context.Context, id int64) (*model.Run, error) {
 		SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
 		       COALESCE(next_boot_target,''), agent_token_hash, started_at,
 		       completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
-		       COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
+		       COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
+		       COALESCE(profile,'quick')
 		FROM runs WHERE id = ?
 	`, id)
 	var run model.Run
 	var completedAt sql.NullTime
 	err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
 		&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
-		&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
+		&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
 	if errors.Is(err, sql.ErrNoRows) {
 		return nil, ErrNotFound
 	}
@@ -133,7 +148,8 @@ func (r *Runs) LatestForHost(ctx context.Context, hostID int64) (*model.Run, err
 		SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
 		       COALESCE(next_boot_target,''), agent_token_hash, started_at,
 		       completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
-		       COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
+		       COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
+		       COALESCE(profile,'quick')
 		FROM runs WHERE host_id = ?
 		ORDER BY id DESC LIMIT 1
 	`, hostID)
@@ -141,7 +157,7 @@ func (r *Runs) LatestForHost(ctx context.Context, hostID int64) (*model.Run, err
 	var completedAt sql.NullTime
 	err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
 		&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
-		&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
+		&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
 	if errors.Is(err, sql.ErrNoRows) {
 		return nil, nil
 	}
@@ -165,7 +181,8 @@ func (r *Runs) ListForHost(ctx context.Context, hostID int64, limit int) ([]mode
 		SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
 		       COALESCE(next_boot_target,''), agent_token_hash, started_at,
 		       completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
-		       COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
+		       COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
+		       COALESCE(profile,'quick')
 		FROM runs
 		WHERE host_id = ?
 		ORDER BY id DESC
@@ -181,7 +198,7 @@ func (r *Runs) ListForHost(ctx context.Context, hostID int64, limit int) ([]mode
 		var completedAt sql.NullTime
 		if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
 			&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
-			&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive); err != nil {
+			&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile); err != nil {
 			return nil, err
 		}
 		if completedAt.Valid {
@@ -206,7 +223,8 @@ func (r *Runs) Active(ctx context.Context) ([]model.Run, error) {
 		SELECT id, host_id, state, COALESCE(result,''), COALESCE(failed_stage,''),
 		       COALESCE(next_boot_target,''), agent_token_hash, started_at,
 		       completed_at, COALESCE(report_path,''), COALESCE(hold_ip,''),
-		       COALESCE(override_flags_json,''), COALESCE(non_destructive,0)
+		       COALESCE(override_flags_json,''), COALESCE(non_destructive,0),
+		       COALESCE(profile,'quick')
 		FROM runs
 		WHERE state NOT IN ('Completed','Released','Cancelled')
 		ORDER BY id
@@ -221,7 +239,7 @@ func (r *Runs) Active(ctx context.Context) ([]model.Run, error) {
 		var completedAt sql.NullTime
 		if err := rows.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
 			&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
-			&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive); err != nil {
+			&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile); err != nil {
 			return nil, err
 		}
 		if completedAt.Valid {
@@ -275,7 +293,7 @@ func (r *Runs) FindActiveByMAC(ctx context.Context, mac string) (*model.Run, err
 	var completedAt sql.NullTime
 	err := row.Scan(&run.ID, &run.HostID, &run.State, &run.Result, &run.FailedStage,
 		&run.NextBootTarget, &run.AgentTokenHash, &run.StartedAt,
-		&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive)
+		&completedAt, &run.ReportPath, &run.HoldIP, &run.OverrideFlagsJSON, &run.NonDestructive, &run.Profile)
 	if errors.Is(err, sql.ErrNoRows) {
 		return nil, nil
 	}
diff --git a/internal/store/stages.go b/internal/store/stages.go
index 63189e6..537b620 100644
--- a/internal/store/stages.go
+++ b/internal/store/stages.go
@@ -17,11 +17,13 @@ type Stages struct {
 // reaches Inventory; later phases add more executors but the list is fixed.
 var DefaultStageOrder = []string{
 	"Inventory",
+	"Firmware",
 	"SpecValidate",
 	"SMART",
 	"CPUStress",
 	"Storage",
 	"Network",
+	"Burn",
 	"GPU",
 	"PSU",
 	"Reporting",
diff --git a/internal/store/thresholds.go b/internal/store/thresholds.go
new file mode 100644
index 0000000..22c75d9
--- /dev/null
+++ b/internal/store/thresholds.go
@@ -0,0 +1,280 @@
+package store
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"time"
+)
+
+// Threshold is the DB view of a per-run threshold row. Mirrors the
+// orchestrator.Threshold value-object but keeps Severity/Op as strings
+// so callers higher up don't force this package to import orchestrator.
+type Threshold struct {
+	ID        int64
+	RunID     int64
+	Stage     string
+	Kind      string
+	Key       string
+	Op        string
+	Threshold float64
+	Nominal   float64
+	Unit      string
+	Severity  string
+	Source    string // profile|host_override
+}
+
+// ThresholdEvaluation is one recorded comparison — the evaluator calls
+// this for every sample that matched a threshold, whether it passed
+// or breached. The report page aggregates these to show the operator
+// why a run failed (or was flagged as warning-only).
+type ThresholdEvaluation struct {
+	ID          int64
+	RunID       int64
+	ThresholdID int64
+	Stage       string
+	Kind        string
+	Key         string
+	TS          time.Time
+	Observed    float64
+	Passed      bool
+}
+
+// Thresholds is the CRUD seam. Kept intentionally narrow: seed at run
+// creation, list for evaluation on each sensor batch, record eval
+// results, aggregate for the report.
+type Thresholds struct {
+	DB *sql.DB
+}
+
+// ThresholdSpec is the caller-supplied shape for seeding — a flat
+// value-object that carries the threshold rule plus its source so
+// the ProfileRegistry-driven seed and per-host overrides converge
+// on one insert path. Kept here (not in config) so the store layer
+// doesn't have to import config.
+type ThresholdSpec struct {
+	Stage    string
+	Kind     string
+	Key      string
+	Op       string
+	Value    float64
+	Nominal  float64
+	Unit     string
+	Severity string
+	Source   string
+}
+
+// SeedForRun converts the caller's specs into Threshold rows for the
+// given run and bulk-inserts them. Returns the inserted rows with IDs
+// populated so the evaluator can pin evaluations without a re-read.
+func (t *Thresholds) SeedForRun(ctx context.Context, runID int64, specs []ThresholdSpec) ([]Threshold, error) {
+	rows := make([]Threshold, 0, len(specs))
+	for _, s := range specs {
+		rows = append(rows, Threshold{
+			RunID:     runID,
+			Stage:     s.Stage,
+			Kind:      s.Kind,
+			Key:       s.Key,
+			Op:        s.Op,
+			Threshold: s.Value,
+			Nominal:   s.Nominal,
+			Unit:      s.Unit,
+			Severity:  s.Severity,
+			Source:    s.Source,
+		})
+	}
+	return t.CreateBatch(ctx, rows)
+}
+
+// Create inserts a single threshold row — used by the seed path when
+// the orchestrator materializes per-run rules from the ProfileRegistry.
+// Returns the row's ID so the evaluator can pin evaluations to it.
+func (t *Thresholds) Create(ctx context.Context, th Threshold) (int64, error) {
+	res, err := t.DB.ExecContext(ctx, `
+		INSERT INTO thresholds(run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source)
+		VALUES(?,?,?,?,?,?,?,?,?,?)
+	`, th.RunID, th.Stage, th.Kind, th.Key, th.Op, th.Threshold, th.Nominal, th.Unit, th.Severity, th.Source)
+	if err != nil {
+		return 0, fmt.Errorf("insert threshold: %w", err)
+	}
+	return res.LastInsertId()
+}
+
+// CreateBatch is the fast path for run seeding — one transaction per
+// run, one row per threshold. Returns the inserted rows with IDs set
+// so the caller can drop them into the in-memory evaluator without a
+// follow-up read.
+func (t *Thresholds) CreateBatch(ctx context.Context, rows []Threshold) ([]Threshold, error) {
+	if len(rows) == 0 {
+		return nil, nil
+	}
+	tx, err := t.DB.BeginTx(ctx, nil)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = tx.Rollback() }()
+	stmt, err := tx.PrepareContext(ctx, `
+		INSERT INTO thresholds(run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source)
+		VALUES(?,?,?,?,?,?,?,?,?,?)
+	`)
+	if err != nil {
+		return nil, fmt.Errorf("prepare threshold insert: %w", err)
+	}
+	defer func() { _ = stmt.Close() }()
+	out := make([]Threshold, 0, len(rows))
+	for _, th := range rows {
+		res, err := stmt.ExecContext(ctx, th.RunID, th.Stage, th.Kind, th.Key, th.Op,
+			th.Threshold, th.Nominal, th.Unit, th.Severity, th.Source)
+		if err != nil {
+			return nil, fmt.Errorf("insert threshold %s/%s: %w", th.Stage, th.Key, err)
+		}
+		id, err := res.LastInsertId()
+		if err != nil {
+			return nil, err
+		}
+		th.ID = id
+		out = append(out, th)
+	}
+	if err := tx.Commit(); err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+// ListForRun returns every threshold seeded for a run, in stable ID
+// order. Evaluator expects this to be cheap (few tens of rows per run)
+// and pulls it on each /sensor batch.
+func (t *Thresholds) ListForRun(ctx context.Context, runID int64) ([]Threshold, error) {
+	rows, err := t.DB.QueryContext(ctx, `
+		SELECT id, run_id, stage_name, kind, key, op, threshold, nominal, unit, severity, source
+		FROM thresholds WHERE run_id = ? ORDER BY id
+	`, runID)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	var out []Threshold
+	for rows.Next() {
+		var th Threshold
+		if err := rows.Scan(&th.ID, &th.RunID, &th.Stage, &th.Kind, &th.Key,
+			&th.Op, &th.Threshold, &th.Nominal, &th.Unit, &th.Severity, &th.Source); err != nil {
+			return nil, err
+		}
+		out = append(out, th)
+	}
+	return out, rows.Err()
+}
+
+// RecordEvaluation persists a single evaluation outcome. Called per
+// matching sample so the run's report has a full audit trail ("temp
+// hit 95 at 14:22:03" rather than just "temp failed").
+func (t *Thresholds) RecordEvaluation(ctx context.Context, ev ThresholdEvaluation) error {
+	passed := 0
+	if ev.Passed {
+		passed = 1
+	}
+	if ev.TS.IsZero() {
+		ev.TS = time.Now().UTC()
+	}
+	_, err := t.DB.ExecContext(ctx, `
+		INSERT INTO threshold_evaluations(run_id, threshold_id, stage_name, kind, key, ts, observed, passed)
+		VALUES(?,?,?,?,?,?,?,?)
+	`, ev.RunID, ev.ThresholdID, ev.Stage, ev.Kind, ev.Key, ev.TS, ev.Observed, passed)
+	if err != nil {
+		return fmt.Errorf("record evaluation: %w", err)
+	}
+	return nil
+}
+
+// RecordBatch persists a slice of evaluations in one transaction. The
+// agent-handler hot path builds these one per sample and batches them
+// under the same Sensor POST so we take one round-trip rather than N.
+func (t *Thresholds) RecordBatch(ctx context.Context, evals []ThresholdEvaluation) error {
+	if len(evals) == 0 {
+		return nil
+	}
+	tx, err := t.DB.BeginTx(ctx, nil)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = tx.Rollback() }()
+	stmt, err := tx.PrepareContext(ctx, `
+		INSERT INTO threshold_evaluations(run_id, threshold_id, stage_name, kind, key, ts, observed, passed)
+		VALUES(?,?,?,?,?,?,?,?)
+	`)
+	if err != nil {
+		return fmt.Errorf("prepare eval insert: %w", err)
+	}
+	defer func() { _ = stmt.Close() }()
+	for _, ev := range evals {
+		passed := 0
+		if ev.Passed {
+			passed = 1
+		}
+		if ev.TS.IsZero() {
+			ev.TS = time.Now().UTC()
+		}
+		if _, err := stmt.ExecContext(ctx, ev.RunID, ev.ThresholdID, ev.Stage, ev.Kind, ev.Key, ev.TS, ev.Observed, passed); err != nil {
+			return fmt.Errorf("insert eval: %w", err)
+		}
+	}
+	return tx.Commit()
+}
+
+// ListEvaluations returns the evaluation history for a run, newest
+// last. Bounded at a sane cap so a pathological run with a sample-per-
+// second sidecar doesn't blow up the report page.
+func (t *Thresholds) ListEvaluations(ctx context.Context, runID int64) ([]ThresholdEvaluation, error) {
+	rows, err := t.DB.QueryContext(ctx, `
+		SELECT id, run_id, threshold_id, stage_name, kind, key, ts, observed, passed
+		FROM threshold_evaluations WHERE run_id = ?
+		ORDER BY id LIMIT 5000
+	`, runID)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	var out []ThresholdEvaluation
+	for rows.Next() {
+		var ev ThresholdEvaluation
+		var passed int
+		if err := rows.Scan(&ev.ID, &ev.RunID, &ev.ThresholdID, &ev.Stage, &ev.Kind,
+			&ev.Key, &ev.TS, &ev.Observed, &passed); err != nil {
+			return nil, err
+		}
+		ev.Passed = passed == 1
+		out = append(out, ev)
+	}
+	return out, rows.Err()
+}
+
+// CriticalBreaches returns the evaluations that fire the "fail the
+// run" gate — critical-severity thresholds with passed=0. The
+// agent-handler calls this at /result close so an aggregate breach
+// (p99 latency > bound) still flips the run to FailedHolding even if
+// no single sample tripped the fast-fail path.
+func (t *Thresholds) CriticalBreaches(ctx context.Context, runID int64) ([]ThresholdEvaluation, error) {
+	rows, err := t.DB.QueryContext(ctx, `
+		SELECT e.id, e.run_id, e.threshold_id, e.stage_name, e.kind, e.key, e.ts, e.observed, e.passed
+		FROM threshold_evaluations e
+		JOIN thresholds t ON t.id = e.threshold_id
+		WHERE e.run_id = ? AND e.passed = 0 AND t.severity = 'critical'
+		ORDER BY e.id
+	`, runID)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	var out []ThresholdEvaluation
+	for rows.Next() {
+		var ev ThresholdEvaluation
+		var passed int
+		if err := rows.Scan(&ev.ID, &ev.RunID, &ev.ThresholdID, &ev.Stage, &ev.Kind,
+			&ev.Key, &ev.TS, &ev.Observed, &passed); err != nil {
+			return nil, err
+		}
+		ev.Passed = passed == 1
+		out = append(out, ev)
+	}
+	return out, rows.Err()
+}
diff --git a/internal/web/static/app.css b/internal/web/static/app.css
index 5d94577..7a19397 100644
--- a/internal/web/static/app.css
+++ b/internal/web/static/app.css
@@ -636,6 +636,21 @@ body.bare main { max-width: none; }
 .run-failed-stage { color: var(--danger); }
 .run-failed-stage strong { font-family: var(--mono); }
 .run-diffs { color: var(--danger); }
+.run-profile-chip {
+  display: inline-block;
+  font-family: var(--mono);
+  font-size: 11px;
+  text-transform: uppercase;
+  letter-spacing: .04em;
+  padding: 2px 8px;
+  border-radius: 999px;
+  border: 1px solid rgba(255,255,255,.15);
+  background: rgba(255,255,255,.05);
+  color: var(--text-dim);
+}
+.run-profile-quick { color: var(--accent); border-color: rgba(60,130,246,.45); background: rgba(60,130,246,.08); }
+.run-profile-deep  { color: #e5b94f;       border-color: rgba(229,185,79,.45); background: rgba(229,185,79,.08); }
+.run-profile-soak  { color: #d97a57;       border-color: rgba(217,122,87,.45); background: rgba(217,122,87,.08); }
 
 .hold-banner {
   background: rgba(229,100,102,.1);
@@ -890,6 +905,17 @@ body.bare main { max-width: none; }
 .host-actions { padding: 0; }
 .host-actions-row { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
 .host-nd-toggle { display: inline-flex; gap: 6px; align-items: center; color: var(--text-dim); font-size: 13px; }
+.host-profile-picker {
+  border: 1px solid var(--border);
+  border-radius: var(--radius);
+  padding: 6px 10px;
+  display: inline-flex;
+  gap: 12px;
+  align-items: center;
+  margin: 0 8px 0 0;
+}
+.host-profile-picker legend { font-size: 11px; color: var(--text-dim); text-transform: uppercase; letter-spacing: .05em; padding: 0 4px; }
+.host-profile-picker label { display: inline-flex; gap: 4px; align-items: center; font-family: var(--mono); font-size: 13px; cursor: pointer; }
 
 .in-flight-banner-wrap { display: contents; }
 .in-flight-banner {
diff --git a/internal/web/templates/active_step_templ.go b/internal/web/templates/active_step_templ.go
index 4c7c13b..1e0dea7 100644
--- a/internal/web/templates/active_step_templ.go
+++ b/internal/web/templates/active_step_templ.go
@@ -65,7 +65,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var3 string
 		templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 1, Col: 0}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 1, Col: 0}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
 		if templ_7745c5c3_Err != nil {
@@ -88,7 +88,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var4 string
 		templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 28, Col: 102}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 28, Col: 102}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
 		if templ_7745c5c3_Err != nil {
@@ -110,7 +110,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var6 string
 		templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var5).String())
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 1, Col: 0}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 1, Col: 0}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
 		if templ_7745c5c3_Err != nil {
@@ -123,7 +123,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var7 string
 		templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(stageMarker(string(d.Stage.State)))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 30, Col: 105}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 30, Col: 105}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
 		if templ_7745c5c3_Err != nil {
@@ -136,7 +136,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var8 string
 		templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 31, Col: 41}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 31, Col: 41}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
 		if templ_7745c5c3_Err != nil {
@@ -149,7 +149,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var9 string
 		templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(stageDurationFromStage(d.Stage))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 32, Col: 64}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 32, Col: 64}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
 		if templ_7745c5c3_Err != nil {
@@ -182,7 +182,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var10 string
 		templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(d.Stage.Name)
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 43, Col: 99}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 43, Col: 99}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
 		if templ_7745c5c3_Err != nil {
@@ -195,7 +195,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var11 string
 		templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("log-%d-%s", d.RunID, d.Stage.Name))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 47, Col: 56}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 47, Col: 56}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
 		if templ_7745c5c3_Err != nil {
@@ -208,7 +208,7 @@ func ActiveStep(d ActiveStepData) templ.Component {
 		var templ_7745c5c3_Var12 string
 		templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("log-%d-%s", d.RunID, d.Stage.Name))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/active_step.templ`, Line: 48, Col: 62}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `active_step.templ`, Line: 48, Col: 62}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
 		if templ_7745c5c3_Err != nil {
diff --git a/internal/web/templates/host_page.templ b/internal/web/templates/host_page.templ
index 5e29e50..d0f711b 100644
--- a/internal/web/templates/host_page.templ
+++ b/internal/web/templates/host_page.templ
@@ -102,6 +102,21 @@ templ HostActions(d HostPageData) {
 		<div class="host-actions-row">
 			if hostCanStart(d) {
 				<form method="post" action={ templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)) } class="inline host-start-form">
+					<fieldset class="host-profile-picker">
+						<legend>Profile</legend>
+						<label title="~10 min — post-repair sanity: all probes + gates, short budgets">
+							<input type="radio" name="profile" value="quick" checked/>
+							quick
+						</label>
+						<label title="~8–12 h — overnight soak: long CPU/RAM, full-disk fio verify, 30 min network">
+							<input type="radio" name="profile" value="deep"/>
+							deep
+						</label>
+						<label title="≥24 h — week-long burn-in; opt-in when you suspect intermittent faults">
+							<input type="radio" name="profile" value="soak"/>
+							soak
+						</label>
+					</fieldset>
 					<label class="host-nd-toggle">
 						<input type="checkbox" name="non_destructive" value="1"/>
 						Non-destructive (skip wipe-probe + disk writes)
@@ -258,6 +273,16 @@ func hostCanStartIfOnline(d HostPageData) bool {
 	return d.ActiveRun == nil
 }
 
+// profileChipValue normalizes a Run.Profile string for display on the
+// run page chip. Older runs with an empty column predate Phase 1 — show
+// them as "quick" (the prior implicit default).
+func profileChipValue(p string) string {
+	if p == "" {
+		return "quick"
+	}
+	return p
+}
+
 // runDuration formats the elapsed time for a run using the same buckets
 // as stageDuration. In-flight runs clock from StartedAt to now so the
 // run-page header + runs-table row keep ticking on each SSE push.
diff --git a/internal/web/templates/host_page_templ.go b/internal/web/templates/host_page_templ.go
index cfc9c85..7dea6d0 100644
--- a/internal/web/templates/host_page_templ.go
+++ b/internal/web/templates/host_page_templ.go
@@ -361,7 +361,7 @@ func HostActions(d HostPageData) templ.Component {
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "\" class=\"inline host-start-form\"><label class=\"host-nd-toggle\"><input type=\"checkbox\" name=\"non_destructive\" value=\"1\"> Non-destructive (skip wipe-probe + disk writes)</label> <button type=\"submit\" class=\"btn-primary\">Start vetting</button></form>")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "\" class=\"inline host-start-form\"><fieldset class=\"host-profile-picker\"><legend>Profile</legend> <label title=\"~10 min — post-repair sanity: all probes + gates, short budgets\"><input type=\"radio\" name=\"profile\" value=\"quick\" checked> quick</label> <label title=\"~8–12 h — overnight soak: long CPU/RAM, full-disk fio verify, 30 min network\"><input type=\"radio\" name=\"profile\" value=\"deep\"> deep</label> <label title=\"≥24 h — week-long burn-in; opt-in when you suspect intermittent faults\"><input type=\"radio\" name=\"profile\" value=\"soak\"> soak</label></fieldset><label class=\"host-nd-toggle\"><input type=\"checkbox\" name=\"non_destructive\" value=\"1\"> Non-destructive (skip wipe-probe + disk writes)</label> <button type=\"submit\" class=\"btn-primary\">Start vetting</button></form>")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
@@ -383,7 +383,7 @@ func HostActions(d HostPageData) templ.Component {
 		var templ_7745c5c3_Var19 templ.SafeURL
 		templ_7745c5c3_Var19, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/delete", d.Host.ID)))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 116, Col: 89}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 131, Col: 89}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var19))
 		if templ_7745c5c3_Err != nil {
@@ -428,7 +428,7 @@ func InFlightBanner(d HostPageData) templ.Component {
 		var templ_7745c5c3_Var21 string
 		templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-inflight-%d", d.Host.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 128, Col: 51}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 143, Col: 51}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var21))
 		if templ_7745c5c3_Err != nil {
@@ -441,7 +441,7 @@ func InFlightBanner(d HostPageData) templ.Component {
 		var templ_7745c5c3_Var22 string
 		templ_7745c5c3_Var22, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-inflight-%d", d.Host.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 130, Col: 57}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 145, Col: 57}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var22))
 		if templ_7745c5c3_Err != nil {
@@ -459,7 +459,7 @@ func InFlightBanner(d HostPageData) templ.Component {
 			var templ_7745c5c3_Var23 templ.SafeURL
 			templ_7745c5c3_Var23, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.ActiveRun.ID)))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 134, Col: 92}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 149, Col: 92}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var23))
 			if templ_7745c5c3_Err != nil {
@@ -472,7 +472,7 @@ func InFlightBanner(d HostPageData) templ.Component {
 			var templ_7745c5c3_Var24 string
 			templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d", d.ActiveRun.ID))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 135, Col: 74}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 150, Col: 74}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
 			if templ_7745c5c3_Err != nil {
@@ -485,7 +485,7 @@ func InFlightBanner(d HostPageData) templ.Component {
 			var templ_7745c5c3_Var25 string
 			templ_7745c5c3_Var25, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(d.ActiveRun))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 136, Col: 59}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 151, Col: 59}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var25))
 			if templ_7745c5c3_Err != nil {
@@ -541,7 +541,7 @@ func HostEmptyState(d HostPageData) templ.Component {
 			var templ_7745c5c3_Var27 templ.SafeURL
 			templ_7745c5c3_Var27, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 152, Col: 88}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 167, Col: 88}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var27))
 			if templ_7745c5c3_Err != nil {
@@ -655,7 +655,7 @@ func RunRow(d RunRowData) templ.Component {
 		var templ_7745c5c3_Var31 string
 		templ_7745c5c3_Var31, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("runrow-%d", d.Run.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 204, Col: 41}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 219, Col: 41}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var31))
 		if templ_7745c5c3_Err != nil {
@@ -681,7 +681,7 @@ func RunRow(d RunRowData) templ.Component {
 		var templ_7745c5c3_Var33 string
 		templ_7745c5c3_Var33, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("runrow-%d", d.Run.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 206, Col: 47}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 221, Col: 47}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var33))
 		if templ_7745c5c3_Err != nil {
@@ -694,7 +694,7 @@ func RunRow(d RunRowData) templ.Component {
 		var templ_7745c5c3_Var34 templ.SafeURL
 		templ_7745c5c3_Var34, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 210, Col: 61}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 225, Col: 61}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var34))
 		if templ_7745c5c3_Err != nil {
@@ -707,7 +707,7 @@ func RunRow(d RunRowData) templ.Component {
 		var templ_7745c5c3_Var35 string
 		templ_7745c5c3_Var35, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("#%d", d.Run.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 210, Col: 94}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 225, Col: 94}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
 		if templ_7745c5c3_Err != nil {
@@ -742,7 +742,7 @@ func RunRow(d RunRowData) templ.Component {
 		var templ_7745c5c3_Var38 string
 		templ_7745c5c3_Var38, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(&d.Run))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 213, Col: 92}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 228, Col: 92}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var38))
 		if templ_7745c5c3_Err != nil {
@@ -755,7 +755,7 @@ func RunRow(d RunRowData) templ.Component {
 		var templ_7745c5c3_Var39 string
 		templ_7745c5c3_Var39, templ_7745c5c3_Err = templ.JoinStringErrs(relativeTime(d.Run.StartedAt))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 215, Col: 62}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 230, Col: 62}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var39))
 		if templ_7745c5c3_Err != nil {
@@ -768,7 +768,7 @@ func RunRow(d RunRowData) templ.Component {
 		var templ_7745c5c3_Var40 string
 		templ_7745c5c3_Var40, templ_7745c5c3_Err = templ.JoinStringErrs(runDuration(&d.Run))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 216, Col: 53}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 231, Col: 53}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var40))
 		if templ_7745c5c3_Err != nil {
@@ -805,7 +805,7 @@ func RunRow(d RunRowData) templ.Component {
 			var templ_7745c5c3_Var43 string
 			templ_7745c5c3_Var43, templ_7745c5c3_Err = templ.JoinStringErrs(name)
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 221, Col: 94}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 236, Col: 94}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var43))
 			if templ_7745c5c3_Err != nil {
@@ -823,7 +823,7 @@ func RunRow(d RunRowData) templ.Component {
 		var templ_7745c5c3_Var44 templ.SafeURL
 		templ_7745c5c3_Var44, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/runs/%d", d.Run.ID)))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 226, Col: 84}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_page.templ`, Line: 241, Col: 84}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var44))
 		if templ_7745c5c3_Err != nil {
@@ -867,6 +867,16 @@ func hostCanStartIfOnline(d HostPageData) bool {
 	return d.ActiveRun == nil
 }
 
+// profileChipValue normalizes a Run.Profile string for display on the
+// run page chip. Older runs with an empty column predate Phase 1 — show
+// them as "quick" (the prior implicit default).
+func profileChipValue(p string) string {
+	if p == "" {
+		return "quick"
+	}
+	return p
+}
+
 // runDuration formats the elapsed time for a run using the same buckets
 // as stageDuration. In-flight runs clock from StartedAt to now so the
 // run-page header + runs-table row keep ticking on each SSE push.
diff --git a/internal/web/templates/host_tile_templ.go b/internal/web/templates/host_tile_templ.go
index c3663b0..cc1bfd9 100644
--- a/internal/web/templates/host_tile_templ.go
+++ b/internal/web/templates/host_tile_templ.go
@@ -55,7 +55,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var3 string
 		templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("host-%d", t.Host.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 19, Col: 40}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 19, Col: 40}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
 		if templ_7745c5c3_Err != nil {
@@ -68,7 +68,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var4 string
 		templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 1, Col: 0}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 1, Col: 0}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
 		if templ_7745c5c3_Err != nil {
@@ -81,7 +81,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var5 string
 		templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("tile-%d", t.Host.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 21, Col: 46}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 21, Col: 46}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
 		if templ_7745c5c3_Err != nil {
@@ -94,7 +94,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var6 templ.SafeURL
 		templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d", t.Host.ID)))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 24, Col: 80}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 24, Col: 80}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
 		if templ_7745c5c3_Err != nil {
@@ -107,7 +107,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var7 string
 		templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs("Open " + t.Host.Name)
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 24, Col: 117}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 24, Col: 117}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
 		if templ_7745c5c3_Err != nil {
@@ -120,7 +120,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var8 string
 		templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(t.Host.Name)
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 26, Col: 39}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 26, Col: 39}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
 		if templ_7745c5c3_Err != nil {
@@ -142,7 +142,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var10 string
 		templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var9).String())
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 1, Col: 0}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 1, Col: 0}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
 		if templ_7745c5c3_Err != nil {
@@ -155,7 +155,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var11 string
 		templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(lastSeenLabel(t.LastSeenAt))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 28, Col: 95}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 28, Col: 95}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
 		if templ_7745c5c3_Err != nil {
@@ -168,7 +168,7 @@ func HostTile(t TileData) templ.Component {
 		var templ_7745c5c3_Var12 string
 		templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(tileStatus(t.Latest))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 29, Col: 51}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 29, Col: 51}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
 		if templ_7745c5c3_Err != nil {
@@ -186,7 +186,7 @@ func HostTile(t TileData) templ.Component {
 			var templ_7745c5c3_Var13 templ.SafeURL
 			templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", t.Host.ID)))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 34, Col: 89}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 34, Col: 89}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var13))
 			if templ_7745c5c3_Err != nil {
@@ -209,7 +209,7 @@ func HostTile(t TileData) templ.Component {
 			var templ_7745c5c3_Var14 templ.SafeURL
 			templ_7745c5c3_Var14, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/cancel", t.Host.ID)))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 44, Col: 90}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 44, Col: 90}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var14))
 			if templ_7745c5c3_Err != nil {
@@ -227,7 +227,7 @@ func HostTile(t TileData) templ.Component {
 			var templ_7745c5c3_Var15 templ.SafeURL
 			templ_7745c5c3_Var15, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/reports/%d", t.Latest.ID)))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/host_tile.templ`, Line: 48, Col: 88}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `host_tile.templ`, Line: 48, Col: 88}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var15))
 			if templ_7745c5c3_Err != nil {
diff --git a/internal/web/templates/layout_templ.go b/internal/web/templates/layout_templ.go
index 0d5ce70..5b9aec0 100644
--- a/internal/web/templates/layout_templ.go
+++ b/internal/web/templates/layout_templ.go
@@ -36,7 +36,7 @@ func Layout(title string) templ.Component {
 		var templ_7745c5c3_Var2 string
 		templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(title)
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/layout.templ`, Line: 9, Col: 17}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `layout.templ`, Line: 9, Col: 17}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var2))
 		if templ_7745c5c3_Err != nil {
@@ -86,7 +86,7 @@ func BareLayout(title string) templ.Component {
 		var templ_7745c5c3_Var4 string
 		templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(title)
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/layout.templ`, Line: 39, Col: 17}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `layout.templ`, Line: 39, Col: 17}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
 		if templ_7745c5c3_Err != nil {
diff --git a/internal/web/templates/pipeline.templ b/internal/web/templates/pipeline.templ
index 910f52d..9a2419e 100644
--- a/internal/web/templates/pipeline.templ
+++ b/internal/web/templates/pipeline.templ
@@ -40,11 +40,13 @@ func runStateRank(s model.RunState) int {
 		model.StateWaitingReboot,
 		model.StateBooting,
 		model.StateInventoryCheck,
+		model.StateFirmware,
 		model.StateSpecValidate,
 		model.StateSMART,
 		model.StateCPUStress,
 		model.StateStorage,
 		model.StateNetwork,
+		model.StateBurn,
 		model.StateGPU,
 		model.StatePSU,
 		model.StateReporting,
@@ -205,11 +207,13 @@ func firstStageState(run *model.Run) model.RunState {
 func stageStateByName(name string) (model.RunState, bool) {
 	m := map[string]model.RunState{
 		"Inventory":    model.StateInventoryCheck,
+		"Firmware":     model.StateFirmware,
 		"SpecValidate": model.StateSpecValidate,
 		"SMART":        model.StateSMART,
 		"CPUStress":    model.StateCPUStress,
 		"Storage":      model.StateStorage,
 		"Network":      model.StateNetwork,
+		"Burn":         model.StateBurn,
 		"GPU":          model.StateGPU,
 		"PSU":          model.StatePSU,
 		"Reporting":    model.StateReporting,
diff --git a/internal/web/templates/pipeline_templ.go b/internal/web/templates/pipeline_templ.go
index 09930b5..b81db67 100644
--- a/internal/web/templates/pipeline_templ.go
+++ b/internal/web/templates/pipeline_templ.go
@@ -48,11 +48,13 @@ func runStateRank(s model.RunState) int {
 		model.StateWaitingReboot,
 		model.StateBooting,
 		model.StateInventoryCheck,
+		model.StateFirmware,
 		model.StateSpecValidate,
 		model.StateSMART,
 		model.StateCPUStress,
 		model.StateStorage,
 		model.StateNetwork,
+		model.StateBurn,
 		model.StateGPU,
 		model.StatePSU,
 		model.StateReporting,
@@ -213,11 +215,13 @@ func firstStageState(run *model.Run) model.RunState {
 func stageStateByName(name string) (model.RunState, bool) {
 	m := map[string]model.RunState{
 		"Inventory":    model.StateInventoryCheck,
+		"Firmware":     model.StateFirmware,
 		"SpecValidate": model.StateSpecValidate,
 		"SMART":        model.StateSMART,
 		"CPUStress":    model.StateCPUStress,
 		"Storage":      model.StateStorage,
 		"Network":      model.StateNetwork,
+		"Burn":         model.StateBurn,
 		"GPU":          model.StateGPU,
 		"PSU":          model.StatePSU,
 		"Reporting":    model.StateReporting,
@@ -312,7 +316,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
 				var templ_7745c5c3_Var3 string
 				templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
 				if templ_7745c5c3_Err != nil {
-					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
+					return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
 				}
 				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
 				if templ_7745c5c3_Err != nil {
@@ -339,7 +343,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
 			var templ_7745c5c3_Var5 string
 			templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var4).String())
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
 			if templ_7745c5c3_Err != nil {
@@ -361,7 +365,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
 			var templ_7745c5c3_Var7 string
 			templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var6).String())
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 1, Col: 0}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 1, Col: 0}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
 			if templ_7745c5c3_Err != nil {
@@ -374,7 +378,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
 			var templ_7745c5c3_Var8 string
 			templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(stageMarker(n.State))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 275, Col: 77}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 279, Col: 77}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
 			if templ_7745c5c3_Err != nil {
@@ -387,7 +391,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
 			var templ_7745c5c3_Var9 string
 			templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(n.Name)
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 276, Col: 36}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 280, Col: 36}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
 			if templ_7745c5c3_Err != nil {
@@ -400,7 +404,7 @@ func Pipeline(nodes []PipelineNode) templ.Component {
 			var templ_7745c5c3_Var10 string
 			templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(stageDuration(n))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 277, Col: 50}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 281, Col: 50}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
 			if templ_7745c5c3_Err != nil {
@@ -454,7 +458,7 @@ func PipelineSection(run *model.Run, nodes []PipelineNode) templ.Component {
 		var templ_7745c5c3_Var12 string
 		templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("pipeline-%d", run.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 292, Col: 41}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 296, Col: 41}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
 		if templ_7745c5c3_Err != nil {
@@ -467,7 +471,7 @@ func PipelineSection(run *model.Run, nodes []PipelineNode) templ.Component {
 		var templ_7745c5c3_Var13 string
 		templ_7745c5c3_Var13, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("pipeline-%d", run.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/pipeline.templ`, Line: 294, Col: 47}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `pipeline.templ`, Line: 298, Col: 47}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var13))
 		if templ_7745c5c3_Err != nil {
diff --git a/internal/web/templates/pipeline_test.go b/internal/web/templates/pipeline_test.go
index fde9feb..51fec30 100644
--- a/internal/web/templates/pipeline_test.go
+++ b/internal/web/templates/pipeline_test.go
@@ -8,26 +8,28 @@ import (
 )
 
 // node indexes for the default pipeline layout: pre-stages (3) + stage
-// rows (9) + terminal Completed (1) = 13 nodes.
+// rows (11) + terminal Completed (1) = 15 nodes.
 const (
 	idxQueued        = 0
 	idxWaitingReboot = 1
 	idxBooting       = 2
 	idxInventory     = 3
-	idxSpecValidate  = 4
-	idxSMART         = 5
-	idxCPUStress     = 6
-	idxStorage       = 7
-	idxNetwork       = 8
-	idxGPU           = 9
-	idxPSU           = 10
-	idxReporting     = 11
-	idxCompleted     = 12
+	idxFirmware      = 4
+	idxSpecValidate  = 5
+	idxSMART         = 6
+	idxCPUStress     = 7
+	idxStorage       = 8
+	idxNetwork       = 9
+	idxBurn          = 10
+	idxGPU           = 11
+	idxPSU           = 12
+	idxReporting     = 13
+	idxCompleted     = 14
 )
 
 // seedStages returns a fresh all-pending stage slice in the canonical order.
 func seedStages() []model.Stage {
-	names := []string{"Inventory", "SpecValidate", "SMART", "CPUStress", "Storage", "Network", "GPU", "PSU", "Reporting"}
+	names := []string{"Inventory", "Firmware", "SpecValidate", "SMART", "CPUStress", "Storage", "Network", "Burn", "GPU", "PSU", "Reporting"}
 	out := make([]model.Stage, len(names))
 	for i, n := range names {
 		out[i] = model.Stage{Name: n, Ordinal: i, State: model.StagePending}
@@ -37,10 +39,10 @@ func seedStages() []model.Stage {
 
 func TestBuildPipeline_NoRun(t *testing.T) {
 	nodes := BuildPipeline(nil, nil)
-	// Ghost pipeline: 3 pre-stages + 9 stage ghosts + 1 terminal = 13
+	// Ghost pipeline: 3 pre-stages + 10 stage ghosts + 1 terminal = 14
 	// nodes, all pending.
-	if len(nodes) != 13 {
-		t.Fatalf("len = %d, want 13", len(nodes))
+	if len(nodes) != 15 {
+		t.Fatalf("len = %d, want 15", len(nodes))
 	}
 	for i, n := range nodes {
 		if n.State != "pending" {
@@ -56,8 +58,8 @@ func TestBuildPipeline_NoRun(t *testing.T) {
 func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
 	run := &model.Run{State: model.StateWaitingReboot}
 	nodes := BuildPipeline(run, nil)
-	if len(nodes) != 13 {
-		t.Fatalf("len = %d, want 13", len(nodes))
+	if len(nodes) != 15 {
+		t.Fatalf("len = %d, want 15", len(nodes))
 	}
 	if nodes[idxQueued].State != "passed" {
 		t.Errorf("Queued = %q, want passed", nodes[idxQueued].State)
@@ -65,7 +67,7 @@ func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
 	if nodes[idxWaitingReboot].State != "running" {
 		t.Errorf("WaitingReboot = %q, want running", nodes[idxWaitingReboot].State)
 	}
-	// All 9 stage ghosts must be pending — nothing has started yet.
+	// All 11 stage ghosts must be pending — nothing has started yet.
 	for i := idxInventory; i <= idxReporting; i++ {
 		if nodes[i].State != "pending" {
 			t.Errorf("%s (ghost) = %q, want pending", nodes[i].Name, nodes[i].State)
@@ -81,19 +83,20 @@ func TestBuildPipeline_GhostStagesBeforeClaim(t *testing.T) {
 // pending ghosts rather than silently disappearing.
 func TestBuildPipeline_GhostStagesDuringStage(t *testing.T) {
 	run := &model.Run{State: model.StateSMART}
-	// Only Inventory + SpecValidate seeded; SMART onwards are ghosts.
+	// Only Inventory + Firmware + SpecValidate seeded; SMART onwards are ghosts.
 	stages := []model.Stage{
 		{Name: "Inventory", Ordinal: 0, State: model.StagePassed},
-		{Name: "SpecValidate", Ordinal: 1, State: model.StagePassed},
+		{Name: "Firmware", Ordinal: 1, State: model.StagePassed},
+		{Name: "SpecValidate", Ordinal: 2, State: model.StagePassed},
 	}
 	nodes := BuildPipeline(run, stages)
-	if len(nodes) != 13 {
-		t.Fatalf("len = %d, want 13", len(nodes))
+	if len(nodes) != 15 {
+		t.Fatalf("len = %d, want 15", len(nodes))
 	}
 	if nodes[idxSMART].State != "running" {
 		t.Errorf("SMART (ghost) = %q, want running", nodes[idxSMART].State)
 	}
-	for _, i := range []int{idxCPUStress, idxStorage, idxNetwork, idxGPU, idxPSU, idxReporting} {
+	for _, i := range []int{idxCPUStress, idxStorage, idxNetwork, idxBurn, idxGPU, idxPSU, idxReporting} {
 		if nodes[i].State != "pending" {
 			t.Errorf("%s (ghost) = %q, want pending", nodes[i].Name, nodes[i].State)
 		}
@@ -103,12 +106,13 @@ func TestBuildPipeline_GhostStagesDuringStage(t *testing.T) {
 func TestBuildPipeline_Running(t *testing.T) {
 	run := &model.Run{State: model.StateSMART}
 	stages := seedStages()
-	stages[0].State = model.StagePassed
-	stages[1].State = model.StagePassed
-	stages[2].State = model.StageRunning
+	stages[0].State = model.StagePassed // Inventory
+	stages[1].State = model.StagePassed // Firmware
+	stages[2].State = model.StagePassed // SpecValidate
+	stages[3].State = model.StageRunning // SMART
 	nodes := BuildPipeline(run, stages)
-	if len(nodes) != 13 {
-		t.Fatalf("len = %d, want 13", len(nodes))
+	if len(nodes) != 15 {
+		t.Fatalf("len = %d, want 15", len(nodes))
 	}
 	// Pre-stages are all past for a run that has reached SMART.
 	for i := idxQueued; i <= idxBooting; i++ {
@@ -136,10 +140,10 @@ func TestBuildPipeline_Running(t *testing.T) {
 func TestBuildPipeline_Failed(t *testing.T) {
 	run := &model.Run{State: model.StateFailedHolding, FailedStage: "Storage"}
 	stages := seedStages()
-	for i := 0; i <= 3; i++ {
+	for i := 0; i <= 4; i++ {
 		stages[i].State = model.StagePassed
 	}
-	stages[4].State = model.StageFailed // Storage
+	stages[5].State = model.StageFailed // Storage
 	nodes := BuildPipeline(run, stages)
 	// Pre-stages are past a run that reached Storage.
 	for i := idxQueued; i <= idxBooting; i++ {
@@ -150,7 +154,7 @@ func TestBuildPipeline_Failed(t *testing.T) {
 	if nodes[idxStorage].State != "failed" {
 		t.Errorf("Storage = %q, want failed", nodes[idxStorage].State)
 	}
-	for _, i := range []int{idxNetwork, idxGPU, idxPSU, idxReporting} {
+	for _, i := range []int{idxNetwork, idxBurn, idxGPU, idxPSU, idxReporting} {
 		if nodes[i].State != "skipped" {
 			t.Errorf("%s = %q, want skipped", nodes[i].Name, nodes[i].State)
 		}
diff --git a/internal/web/templates/registration_templ.go b/internal/web/templates/registration_templ.go
index ed0cfeb..f294366 100644
--- a/internal/web/templates/registration_templ.go
+++ b/internal/web/templates/registration_templ.go
@@ -64,7 +64,7 @@ func Registration(form RegistrationForm) templ.Component {
 				var templ_7745c5c3_Var3 string
 				templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(form.Error)
 				if templ_7745c5c3_Err != nil {
-					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 22, Col: 35}
+					return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 22, Col: 35}
 				}
 				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
 				if templ_7745c5c3_Err != nil {
@@ -83,7 +83,7 @@ func Registration(form RegistrationForm) templ.Component {
 				var templ_7745c5c3_Var4 string
 				templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs("curl -fsSL " + form.QuickRegisterURL + "/register/quick.sh | sudo bash")
 				if templ_7745c5c3_Err != nil {
-					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 28, Col: 108}
+					return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 28, Col: 108}
 				}
 				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
 				if templ_7745c5c3_Err != nil {
@@ -101,7 +101,7 @@ func Registration(form RegistrationForm) templ.Component {
 			var templ_7745c5c3_Var5 string
 			templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(form.Name)
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 38, Col: 55}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 38, Col: 55}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
 			if templ_7745c5c3_Err != nil {
@@ -114,7 +114,7 @@ func Registration(form RegistrationForm) templ.Component {
 			var templ_7745c5c3_Var6 string
 			templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs(form.MAC)
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 42, Col: 53}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 42, Col: 53}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
 			if templ_7745c5c3_Err != nil {
@@ -127,7 +127,7 @@ func Registration(form RegistrationForm) templ.Component {
 			var templ_7745c5c3_Var7 string
 			templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(form.WoLBroadcastIP)
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 47, Col: 78}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 47, Col: 78}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
 			if templ_7745c5c3_Err != nil {
@@ -140,7 +140,7 @@ func Registration(form RegistrationForm) templ.Component {
 			var templ_7745c5c3_Var8 string
 			templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(defaultPort(form.WoLPort))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 51, Col: 78}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 51, Col: 78}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
 			if templ_7745c5c3_Err != nil {
@@ -153,7 +153,7 @@ func Registration(form RegistrationForm) templ.Component {
 			var templ_7745c5c3_Var9 string
 			templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(form.ExpectedSpecYAML)
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 56, Col: 127}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 56, Col: 127}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
 			if templ_7745c5c3_Err != nil {
@@ -166,7 +166,7 @@ func Registration(form RegistrationForm) templ.Component {
 			var templ_7745c5c3_Var10 string
 			templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(form.Notes)
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/registration.templ`, Line: 60, Col: 51}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `registration.templ`, Line: 60, Col: 51}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
 			if templ_7745c5c3_Err != nil {
diff --git a/internal/web/templates/run_detail.templ b/internal/web/templates/run_detail.templ
index 1b2421a..493a0f8 100644
--- a/internal/web/templates/run_detail.templ
+++ b/internal/web/templates/run_detail.templ
@@ -83,6 +83,7 @@ templ RunHeader(d RunPageData) {
 		<div class="run-header-left">
 			<h1 class="run-header-name">{ fmt.Sprintf("Run #%d", d.Run.ID) }</h1>
 			<span class={ "run-status-badge", "run-status-" + tileMood(&d.Run) }>{ tileStatus(&d.Run) }</span>
+			<span class={ "run-profile-chip", "run-profile-" + profileChipValue(d.Run.Profile) }>{ profileChipValue(d.Run.Profile) }</span>
 			<span class="run-duration">{ runDuration(&d.Run) }</span>
 			if d.Run.FailedStage != "" {
 				<span class="run-failed-stage">failed at <strong>{ d.Run.FailedStage }</strong></span>
diff --git a/internal/web/templates/run_detail_templ.go b/internal/web/templates/run_detail_templ.go
index 7ffd9b5..a46a0f2 100644
--- a/internal/web/templates/run_detail_templ.go
+++ b/internal/web/templates/run_detail_templ.go
@@ -286,142 +286,177 @@ func RunHeader(d RunPageData) templ.Component {
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
-		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, "</span> <span class=\"run-duration\">")
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, "</span> ")
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
-		var templ_7745c5c3_Var15 string
-		templ_7745c5c3_Var15, templ_7745c5c3_Err = templ.JoinStringErrs(runDuration(&d.Run))
-		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 86, Col: 51}
-		}
-		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var15))
+		var templ_7745c5c3_Var15 = []any{"run-profile-chip", "run-profile-" + profileChipValue(d.Run.Profile)}
+		templ_7745c5c3_Err = templ.RenderCSSItems(ctx, templ_7745c5c3_Buffer, templ_7745c5c3_Var15...)
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
-		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "</span> ")
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "<span class=\"")
+		if templ_7745c5c3_Err != nil {
+			return templ_7745c5c3_Err
+		}
+		var templ_7745c5c3_Var16 string
+		templ_7745c5c3_Var16, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var15).String())
+		if templ_7745c5c3_Err != nil {
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 1, Col: 0}
+		}
+		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var16))
+		if templ_7745c5c3_Err != nil {
+			return templ_7745c5c3_Err
+		}
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 17, "\">")
+		if templ_7745c5c3_Err != nil {
+			return templ_7745c5c3_Err
+		}
+		var templ_7745c5c3_Var17 string
+		templ_7745c5c3_Var17, templ_7745c5c3_Err = templ.JoinStringErrs(profileChipValue(d.Run.Profile))
+		if templ_7745c5c3_Err != nil {
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 86, Col: 121}
+		}
+		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var17))
+		if templ_7745c5c3_Err != nil {
+			return templ_7745c5c3_Err
+		}
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 18, "</span> <span class=\"run-duration\">")
+		if templ_7745c5c3_Err != nil {
+			return templ_7745c5c3_Err
+		}
+		var templ_7745c5c3_Var18 string
+		templ_7745c5c3_Var18, templ_7745c5c3_Err = templ.JoinStringErrs(runDuration(&d.Run))
+		if templ_7745c5c3_Err != nil {
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 87, Col: 51}
+		}
+		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var18))
+		if templ_7745c5c3_Err != nil {
+			return templ_7745c5c3_Err
+		}
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 19, "</span> ")
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
 		if d.Run.FailedStage != "" {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 17, "<span class=\"run-failed-stage\">failed at <strong>")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 20, "<span class=\"run-failed-stage\">failed at <strong>")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			var templ_7745c5c3_Var16 string
-			templ_7745c5c3_Var16, templ_7745c5c3_Err = templ.JoinStringErrs(d.Run.FailedStage)
+			var templ_7745c5c3_Var19 string
+			templ_7745c5c3_Var19, templ_7745c5c3_Err = templ.JoinStringErrs(d.Run.FailedStage)
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 88, Col: 72}
-			}
-			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var16))
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 18, "</strong></span> ")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-		}
-		if d.SpecDiffCritical > 0 {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 19, "<span class=\"run-diffs bad\">")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			var templ_7745c5c3_Var17 string
-			templ_7745c5c3_Var17, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d critical diff", d.SpecDiffCritical))
-			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 91, Col: 85}
-			}
-			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var17))
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 20, "</span>")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-		}
-		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "</div><div class=\"run-header-right\">")
-		if templ_7745c5c3_Err != nil {
-			return templ_7745c5c3_Err
-		}
-		if canCancel(&d.Run) {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 22, "<form method=\"post\" action=\"")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			var templ_7745c5c3_Var18 templ.SafeURL
-			templ_7745c5c3_Var18, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/cancel", d.Host.ID)))
-			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 96, Col: 90}
-			}
-			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var18))
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 23, "\" class=\"inline\" onsubmit=\"return confirm('Cancel run? Destructive stages may leave the host in an intermediate state requiring manual cleanup.');\"><button type=\"submit\" class=\"btn-danger\">Cancel run</button></form>")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-		}
-		if canOverrideWipe(&d.Run) {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 24, "<form method=\"post\" action=\"")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			var templ_7745c5c3_Var19 templ.SafeURL
-			templ_7745c5c3_Var19, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/override-wipe", d.Host.ID)))
-			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 101, Col: 97}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 89, Col: 72}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var19))
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 25, "\" class=\"inline\"><button type=\"submit\" class=\"btn-danger\">Override wipe-probe</button></form>")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 21, "</strong></span> ")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 		}
-		if hasReport(&d.Run) {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "<a class=\"button-like\" href=\"")
+		if d.SpecDiffCritical > 0 {
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 22, "<span class=\"run-diffs bad\">")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			var templ_7745c5c3_Var20 templ.SafeURL
-			templ_7745c5c3_Var20, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/reports/%d", d.Run.ID)))
+			var templ_7745c5c3_Var20 string
+			templ_7745c5c3_Var20, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d critical diff", d.SpecDiffCritical))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 106, Col: 85}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 92, Col: 85}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var20))
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 27, "\" target=\"_blank\" rel=\"noopener\">View report</a> ")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 23, "</span>")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 		}
-		if d.Run.State.IsTerminal() {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 28, "<form method=\"post\" action=\"")
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 24, "</div><div class=\"run-header-right\">")
+		if templ_7745c5c3_Err != nil {
+			return templ_7745c5c3_Err
+		}
+		if canCancel(&d.Run) {
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 25, "<form method=\"post\" action=\"")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 			var templ_7745c5c3_Var21 templ.SafeURL
-			templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)))
+			templ_7745c5c3_Var21, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/cancel", d.Host.ID)))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 109, Col: 89}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 97, Col: 90}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var21))
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 29, "\" class=\"inline\"><button type=\"submit\" class=\"btn-primary\">Start new run</button></form>")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\" class=\"inline\" onsubmit=\"return confirm('Cancel run? Destructive stages may leave the host in an intermediate state requiring manual cleanup.');\"><button type=\"submit\" class=\"btn-danger\">Cancel run</button></form>")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 		}
-		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 30, "</div></header>")
+		if canOverrideWipe(&d.Run) {
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 27, "<form method=\"post\" action=\"")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			var templ_7745c5c3_Var22 templ.SafeURL
+			templ_7745c5c3_Var22, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/override-wipe", d.Host.ID)))
+			if templ_7745c5c3_Err != nil {
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 102, Col: 97}
+			}
+			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var22))
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 28, "\" class=\"inline\"><button type=\"submit\" class=\"btn-danger\">Override wipe-probe</button></form>")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+		}
+		if hasReport(&d.Run) {
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 29, "<a class=\"button-like\" href=\"")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			var templ_7745c5c3_Var23 templ.SafeURL
+			templ_7745c5c3_Var23, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/reports/%d", d.Run.ID)))
+			if templ_7745c5c3_Err != nil {
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 107, Col: 85}
+			}
+			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var23))
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 30, "\" target=\"_blank\" rel=\"noopener\">View report</a> ")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+		}
+		if d.Run.State.IsTerminal() {
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 31, "<form method=\"post\" action=\"")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			var templ_7745c5c3_Var24 templ.SafeURL
+			templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("/hosts/%d/start", d.Host.ID)))
+			if templ_7745c5c3_Err != nil {
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 110, Col: 89}
+			}
+			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 32, "\" class=\"inline\"><button type=\"submit\" class=\"btn-primary\">Start new run</button></form>")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+		}
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 33, "</div></header>")
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
@@ -449,83 +484,83 @@ func HoldBanner(d RunPageData) templ.Component {
 			}()
 		}
 		ctx = templ.InitializeContext(ctx)
-		templ_7745c5c3_Var22 := templ.GetChildren(ctx)
-		if templ_7745c5c3_Var22 == nil {
-			templ_7745c5c3_Var22 = templ.NopComponent
+		templ_7745c5c3_Var25 := templ.GetChildren(ctx)
+		if templ_7745c5c3_Var25 == nil {
+			templ_7745c5c3_Var25 = templ.NopComponent
 		}
 		ctx = templ.ClearChildren(ctx)
 		if d.Run.State == model.StateFailedHolding && d.Run.HoldIP != "" {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 31, "<section id=\"")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			var templ_7745c5c3_Var23 string
-			templ_7745c5c3_Var23, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
-			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 124, Col: 47}
-			}
-			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var23))
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 32, "\" class=\"hold-banner\" sse-swap=\"")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			var templ_7745c5c3_Var24 string
-			templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
-			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 126, Col: 53}
-			}
-			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 33, "\" hx-swap=\"outerHTML\"><span class=\"hold-banner-label\">Host is holding — SSH available:</span> <code class=\"hold-ssh\">")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			var templ_7745c5c3_Var25 string
-			templ_7745c5c3_Var25, templ_7745c5c3_Err = templ.JoinStringErrs(sshInvocation(d.HoldKeyPath, d.Run.HoldIP))
-			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 130, Col: 70}
-			}
-			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var25))
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 34, "</code></section>")
-			if templ_7745c5c3_Err != nil {
-				return templ_7745c5c3_Err
-			}
-		} else {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 35, "<section id=\"")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 34, "<section id=\"")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 			var templ_7745c5c3_Var26 string
 			templ_7745c5c3_Var26, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 134, Col: 47}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 125, Col: 47}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var26))
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 36, "\" class=\"detail-hold-placeholder\" sse-swap=\"")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 35, "\" class=\"hold-banner\" sse-swap=\"")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 			var templ_7745c5c3_Var27 string
 			templ_7745c5c3_Var27, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 136, Col: 53}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 127, Col: 53}
 			}
 			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var27))
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 37, "\" hx-swap=\"outerHTML\"></section>")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 36, "\" hx-swap=\"outerHTML\"><span class=\"hold-banner-label\">Host is holding — SSH available:</span> <code class=\"hold-ssh\">")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			var templ_7745c5c3_Var28 string
+			templ_7745c5c3_Var28, templ_7745c5c3_Err = templ.JoinStringErrs(sshInvocation(d.HoldKeyPath, d.Run.HoldIP))
+			if templ_7745c5c3_Err != nil {
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 131, Col: 70}
+			}
+			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var28))
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 37, "</code></section>")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+		} else {
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 38, "<section id=\"")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			var templ_7745c5c3_Var29 string
+			templ_7745c5c3_Var29, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
+			if templ_7745c5c3_Err != nil {
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 135, Col: 47}
+			}
+			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var29))
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 39, "\" class=\"detail-hold-placeholder\" sse-swap=\"")
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			var templ_7745c5c3_Var30 string
+			templ_7745c5c3_Var30, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-hold-%d", d.Run.ID))
+			if templ_7745c5c3_Err != nil {
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 137, Col: 53}
+			}
+			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var30))
+			if templ_7745c5c3_Err != nil {
+				return templ_7745c5c3_Err
+			}
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 40, "\" hx-swap=\"outerHTML\"></section>")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
@@ -553,138 +588,138 @@ func RunSpecDiffs(d RunPageData) templ.Component {
 			}()
 		}
 		ctx = templ.InitializeContext(ctx)
-		templ_7745c5c3_Var28 := templ.GetChildren(ctx)
-		if templ_7745c5c3_Var28 == nil {
-			templ_7745c5c3_Var28 = templ.NopComponent
+		templ_7745c5c3_Var31 := templ.GetChildren(ctx)
+		if templ_7745c5c3_Var31 == nil {
+			templ_7745c5c3_Var31 = templ.NopComponent
 		}
 		ctx = templ.ClearChildren(ctx)
-		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 38, "<section id=\"")
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 41, "<section id=\"")
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
-		var templ_7745c5c3_Var29 string
-		templ_7745c5c3_Var29, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-specdiffs-%d", d.Run.ID))
+		var templ_7745c5c3_Var32 string
+		templ_7745c5c3_Var32, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-specdiffs-%d", d.Run.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 147, Col: 51}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 148, Col: 51}
 		}
-		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var29))
+		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var32))
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
-		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 39, "\" class=\"detail-section detail-diffs\" sse-swap=\"")
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 42, "\" class=\"detail-section detail-diffs\" sse-swap=\"")
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
-		var templ_7745c5c3_Var30 string
-		templ_7745c5c3_Var30, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-specdiffs-%d", d.Run.ID))
+		var templ_7745c5c3_Var33 string
+		templ_7745c5c3_Var33, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("detail-specdiffs-%d", d.Run.ID))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 149, Col: 57}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 150, Col: 57}
 		}
-		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var30))
+		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var33))
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
-		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 40, "\" hx-swap=\"outerHTML\">")
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 43, "\" hx-swap=\"outerHTML\">")
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
 		if len(d.SpecDiffs) > 0 {
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 41, "<details")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 44, "<details")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 			if hasCriticalDiff(d.SpecDiffs) {
-				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 42, " open")
+				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 45, " open")
 				if templ_7745c5c3_Err != nil {
 					return templ_7745c5c3_Err
 				}
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 43, "><summary><h2>Spec diffs (")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 46, "><summary><h2>Spec diffs (")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			var templ_7745c5c3_Var31 string
-			templ_7745c5c3_Var31, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d", len(d.SpecDiffs)))
+			var templ_7745c5c3_Var34 string
+			templ_7745c5c3_Var34, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%d", len(d.SpecDiffs)))
 			if templ_7745c5c3_Err != nil {
-				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 154, Col: 66}
+				return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 155, Col: 66}
 			}
-			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var31))
+			_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var34))
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 44, ")</h2></summary><ul class=\"diff-list\">")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 47, ")</h2></summary><ul class=\"diff-list\">")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 			for _, diff := range d.SpecDiffs {
-				var templ_7745c5c3_Var32 = []any{"diff-row", "diff-" + diff.Severity}
-				templ_7745c5c3_Err = templ.RenderCSSItems(ctx, templ_7745c5c3_Buffer, templ_7745c5c3_Var32...)
+				var templ_7745c5c3_Var35 = []any{"diff-row", "diff-" + diff.Severity}
+				templ_7745c5c3_Err = templ.RenderCSSItems(ctx, templ_7745c5c3_Buffer, templ_7745c5c3_Var35...)
 				if templ_7745c5c3_Err != nil {
 					return templ_7745c5c3_Err
 				}
-				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 45, "<li class=\"")
-				if templ_7745c5c3_Err != nil {
-					return templ_7745c5c3_Err
-				}
-				var templ_7745c5c3_Var33 string
-				templ_7745c5c3_Var33, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var32).String())
-				if templ_7745c5c3_Err != nil {
-					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 1, Col: 0}
-				}
-				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var33))
-				if templ_7745c5c3_Err != nil {
-					return templ_7745c5c3_Err
-				}
-				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 46, "\"><div class=\"diff-field\">")
-				if templ_7745c5c3_Err != nil {
-					return templ_7745c5c3_Err
-				}
-				var templ_7745c5c3_Var34 string
-				templ_7745c5c3_Var34, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Field)
-				if templ_7745c5c3_Err != nil {
-					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 158, Col: 43}
-				}
-				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var34))
-				if templ_7745c5c3_Err != nil {
-					return templ_7745c5c3_Err
-				}
-				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 47, "</div><div class=\"diff-expected\">expected: <code>")
-				if templ_7745c5c3_Err != nil {
-					return templ_7745c5c3_Err
-				}
-				var templ_7745c5c3_Var35 string
-				templ_7745c5c3_Var35, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Expected)
-				if templ_7745c5c3_Err != nil {
-					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 159, Col: 65}
-				}
-				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
-				if templ_7745c5c3_Err != nil {
-					return templ_7745c5c3_Err
-				}
-				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 48, "</code></div><div class=\"diff-actual\">actual: <code>")
+				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 48, "<li class=\"")
 				if templ_7745c5c3_Err != nil {
 					return templ_7745c5c3_Err
 				}
 				var templ_7745c5c3_Var36 string
-				templ_7745c5c3_Var36, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Actual)
+				templ_7745c5c3_Var36, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var35).String())
 				if templ_7745c5c3_Err != nil {
-					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 160, Col: 59}
+					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 1, Col: 0}
 				}
 				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var36))
 				if templ_7745c5c3_Err != nil {
 					return templ_7745c5c3_Err
 				}
-				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 49, "</code></div></li>")
+				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 49, "\"><div class=\"diff-field\">")
+				if templ_7745c5c3_Err != nil {
+					return templ_7745c5c3_Err
+				}
+				var templ_7745c5c3_Var37 string
+				templ_7745c5c3_Var37, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Field)
+				if templ_7745c5c3_Err != nil {
+					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 159, Col: 43}
+				}
+				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var37))
+				if templ_7745c5c3_Err != nil {
+					return templ_7745c5c3_Err
+				}
+				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 50, "</div><div class=\"diff-expected\">expected: <code>")
+				if templ_7745c5c3_Err != nil {
+					return templ_7745c5c3_Err
+				}
+				var templ_7745c5c3_Var38 string
+				templ_7745c5c3_Var38, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Expected)
+				if templ_7745c5c3_Err != nil {
+					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 160, Col: 65}
+				}
+				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var38))
+				if templ_7745c5c3_Err != nil {
+					return templ_7745c5c3_Err
+				}
+				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 51, "</code></div><div class=\"diff-actual\">actual: <code>")
+				if templ_7745c5c3_Err != nil {
+					return templ_7745c5c3_Err
+				}
+				var templ_7745c5c3_Var39 string
+				templ_7745c5c3_Var39, templ_7745c5c3_Err = templ.JoinStringErrs(diff.Actual)
+				if templ_7745c5c3_Err != nil {
+					return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/run_detail.templ`, Line: 161, Col: 59}
+				}
+				_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var39))
+				if templ_7745c5c3_Err != nil {
+					return templ_7745c5c3_Err
+				}
+				templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 52, "</code></div></li>")
 				if templ_7745c5c3_Err != nil {
 					return templ_7745c5c3_Err
 				}
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 50, "</ul></details>")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 53, "</ul></details>")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
 		}
-		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 51, "</section>")
+		templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 54, "</section>")
 		if templ_7745c5c3_Err != nil {
 			return templ_7745c5c3_Err
 		}
diff --git a/internal/web/templates/substep_row_templ.go b/internal/web/templates/substep_row_templ.go
index 02cae74..d0c9c1a 100644
--- a/internal/web/templates/substep_row_templ.go
+++ b/internal/web/templates/substep_row_templ.go
@@ -99,7 +99,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
 		var templ_7745c5c3_Var3 string
 		templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("substep-%d-%s-%d", ss.RunID, ss.StageName, ss.Ordinal))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 63, Col: 74}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 63, Col: 74}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
 		if templ_7745c5c3_Err != nil {
@@ -112,7 +112,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
 		var templ_7745c5c3_Var4 string
 		templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var2).String())
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 1, Col: 0}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 1, Col: 0}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
 		if templ_7745c5c3_Err != nil {
@@ -125,7 +125,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
 		var templ_7745c5c3_Var5 string
 		templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("substep-%d-%s-%d", ss.RunID, ss.StageName, ss.Ordinal))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 65, Col: 80}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 65, Col: 80}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
 		if templ_7745c5c3_Err != nil {
@@ -147,7 +147,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
 		var templ_7745c5c3_Var7 string
 		templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(templ.CSSClasses(templ_7745c5c3_Var6).String())
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 1, Col: 0}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 1, Col: 0}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
 		if templ_7745c5c3_Err != nil {
@@ -160,7 +160,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
 		var templ_7745c5c3_Var8 string
 		templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs(subStepMarker(ss.State))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 68, Col: 96}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 68, Col: 96}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
 		if templ_7745c5c3_Err != nil {
@@ -173,7 +173,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
 		var templ_7745c5c3_Var9 string
 		templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(ss.Name)
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 69, Col: 38}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 69, Col: 38}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
 		if templ_7745c5c3_Err != nil {
@@ -186,7 +186,7 @@ func SubStepRow(ss model.SubStep) templ.Component {
 		var templ_7745c5c3_Var10 string
 		templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(subStepDuration(ss))
 		if templ_7745c5c3_Err != nil {
-			return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/web/templates/substep_row.templ`, Line: 70, Col: 54}
+			return templ.Error{Err: templ_7745c5c3_Err, FileName: `substep_row.templ`, Line: 70, Col: 54}
 		}
 		_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
 		if templ_7745c5c3_Err != nil {