// Package tests contains the per-stage executors the agent runs on the // host under test. Each stage implements Runner, is called with a // Context that carries the client + forwarder + run params, and returns // an Outcome that the caller POSTs to /result. package tests import ( "context" "encoding/json" "time" ) // Outcome is what a stage returns; it maps directly to the /result body. // - Passed=true and len(Skipped)>0 counts as a pass but surfaces in the // tile summary so operators can see "GPU: skipped (no VGA device)". // - Message is only used on failure; the UI displays it in the log. // - Extras is merged into the posted summary so stages can add // their own shape (e.g. Storage returns per-disk probe results). // - SubSteps carries agent-authored sub-step rows (CPU/Memory passes, // per-disk SMART, per-device GPU, …). Empty for stages with no // natural breakdown; persisted verbatim by the /result handler. type Outcome struct { Passed bool Message string Summary string // short human-readable one-liner Extras map[string]any // merged into posted summary JSON SubSteps []SubStepReport // agent-authored granular rows } // SubStepReport is one entry a stage contributes to its sub-step list. // Ordinal is assigned in the order entries appear in the slice — the // agent shouldn't set it manually. State is derived from Passed/Skipped // the same way Outcome is: Skipped wins if set, else Passed ? passed : // failed. StartedAt/CompletedAt are required so the UI can order rows // and slice the stage log by time window. type SubStepReport struct { Name string Passed bool Skipped bool StartedAt time.Time CompletedAt time.Time SummaryJSON json.RawMessage } // MarshalSummary builds the summary JSON body POSTed to /result. // Stages accumulate fields via Extras; this helper adds "summary" (the // human-readable line) and serializes. func (o Outcome) MarshalSummary() (json.RawMessage, error) { body := map[string]any{} for k, v := range o.Extras { body[k] = v } if o.Summary != "" { body["summary"] = o.Summary } return json.Marshal(body) } // Deps bundles what stages need without pulling in the whole agent. // Logger methods print to stdout + forward to the orchestrator; Sensor // drops numeric samples; OverrideFlags carries operator-set bypasses. // // CPUStressKnobs / StorageKnobs / NetworkKnobs are Phase-2 profile // knobs. Zero-valued fields mean "fall back to the compile-time // default" — that keeps the stages runnable even when the runner can't // materialize a profile (tests, legacy orchestrator, etc). type Deps struct { Info func(string) Warn func(string) Error func(string) Sensor func(ctx context.Context, samples []Sample) error OverrideWipe bool NonDestructive bool // skip wipe-probe + writes in Storage ExpectedDisks []ExpectedDisk // serials + sizes from host.expected_spec StageTimeout time.Duration CPUStressKnobs CPUStressKnobs StorageKnobs StorageKnobs NetworkKnobs NetworkKnobs BurnKnobs BurnKnobs // LookPath is the unit-test seam for swapping a real external // binary (stress-ng, fio, iperf3, dmidecode, …) for a fake. When // nil the stage falls back to os/exec.LookPath — production and // existing tests keep working unchanged. Tests under // agent/tests/fakes/ populate this to redirect lookups to a built // fake binary in a tempdir. LookPath func(name string) (string, error) } // CPUStressKnobs parameterizes the CPUStress stage. Zero durations fall // back to the package's compile-time defaults (cpuPassDuration etc). type CPUStressKnobs struct { CPUPass time.Duration MemPass time.Duration EDACPoll time.Duration } // StorageKnobs parameterizes the Storage stage. Mode picks between // "fio_sample" (bounded tempfile inside the device, quick profile) and // "full_disk" (whole-device write verify, deep/soak). Empty strings // fall back to the stage's safe defaults. type StorageKnobs struct { Mode string FioSize string FioTime time.Duration FioBS string FioRW string Verify string } // NetworkKnobs parameterizes the Network stage. type NetworkKnobs struct { Duration time.Duration } // BurnKnobs parameterizes the Burn super-stage. Duration is the total // Burn window; sub-workloads run concurrently inside that window. // CPUWorkers is "all" (runtime.NumCPU) or a numeric string. MemPct is a // percentage of MemAvailable to allocate for the memory burner (clamped // 0-90 by the stage). IperfParallel feeds iperf3 -P to generate sustained // NIC load. FioOnSpare gates the storage sub-workload: true = fio runs // against the allow-listed disks for the same window; false = skip fio. type BurnKnobs struct { Duration time.Duration CPUWorkers string MemPct int FioOnSpare bool IperfParallel int } // Sample mirrors the server's SensorSample but lives in the tests // package so probe code doesn't import internal/api. type Sample struct { Kind string Key string Value float64 Unit string } // ExpectedDisk is the subset of internal/spec.DiskSpec that Storage // needs: a device allowlist keyed on serial. type ExpectedDisk struct { Serial string SizeGB int }