// Package spec owns the expected-vs-actual hardware diff for Vetting. // // The operator writes an expected spec YAML per host when registering. // The agent submits an Inventory artifact after boot. Diff() compares // them and emits per-field SpecDiff rows; the orchestrator fails the // SpecValidate stage if any row is classified critical. // // Phase 3 rule (operator decision): every mismatch is critical. Missing // expected fields skip that check entirely so partial specs stay useful // instead of exploding. package spec import ( "fmt" "sort" "strings" "gopkg.in/yaml.v3" "vetting/internal/model" ) type Spec struct { CPU *CPUSpec `yaml:"cpu,omitempty"` Memory *MemorySpec `yaml:"memory,omitempty"` Disks []DiskSpec `yaml:"disks,omitempty"` NICs []NICSpec `yaml:"nics,omitempty"` GPUs []GPUSpec `yaml:"gpus,omitempty"` Firmware []FirmwareSpec `yaml:"firmware,omitempty"` } // FirmwareSpec is one row in the expected-spec YAML's `firmware:` block. // Component is one of bios|bmc|nic|hba|microcode|nvme_fw (matches the // on-wire value from agent/probes.FirmwareSnapshot.Component). Identifier // is optional — when empty the rule applies to every observed snapshot // of that component (use for single-instance things like BIOS/microcode); // when set it pins the check to a specific NIC port / NVMe controller / // PCI address. Version is the literal string expected; comparison is // exact after trimming whitespace. type FirmwareSpec struct { Component string `yaml:"component"` Identifier string `yaml:"identifier,omitempty"` Version string `yaml:"version"` } // FirmwareObserved is what the agent reported, in a spec-package-local // shape so callers don't need to thread store types through the diff. // The server converts store.FirmwareSnapshot → FirmwareObserved before // calling DiffFirmware. type FirmwareObserved struct { Component string Identifier string Version string } type CPUSpec struct { Model string `json:"model,omitempty" yaml:"model,omitempty"` LogicalCores int `json:"logical_cores,omitempty" yaml:"logical_cores,omitempty"` } type MemorySpec struct { TotalGiB int `json:"total_gib,omitempty" yaml:"total_gib,omitempty"` } type DiskSpec struct { Serial string `json:"serial,omitempty" yaml:"serial,omitempty"` SizeGB int `json:"size_gb,omitempty" yaml:"size_gb,omitempty"` } type NICSpec struct { MAC string `json:"mac,omitempty" yaml:"mac,omitempty"` SpeedGbps int `json:"speed_gbps,omitempty" yaml:"speed_gbps,omitempty"` } type GPUSpec struct { Model string `json:"model,omitempty" yaml:"model,omitempty"` } // Inventory is the actual measured hardware. Field names deliberately // match Spec so the diff reads cleanly. type Inventory struct { CPU CPUSpec `json:"cpu" yaml:"cpu"` Memory MemorySpec `json:"memory" yaml:"memory"` Disks []DiskSpec `json:"disks" yaml:"disks"` NICs []NICSpec `json:"nics" yaml:"nics"` GPUs []GPUSpec `json:"gpus" yaml:"gpus"` } // Parse reads expected-spec YAML. Empty YAML parses to a zero Spec and // yields an empty diff — i.e. "no expectations" is a legal stance. func Parse(src string) (*Spec, error) { var s Spec if err := yaml.Unmarshal([]byte(src), &s); err != nil { return nil, fmt.Errorf("parse spec yaml: %w", err) } return &s, nil } // Diff returns the per-field differences with severity. Phase 3 rule: // every present-expected-field-that-mismatches is critical. Missing // expected fields are skipped (not info-logged) so the diff list stays // focused on real problems. func Diff(expected *Spec, actual *Inventory) []model.SpecDiff { if expected == nil { return nil } out := []model.SpecDiff{} if expected.CPU != nil { if expected.CPU.Model != "" { if !cpuModelMatches(expected.CPU.Model, actual.CPU.Model) { out = append(out, diff("cpu.model", expected.CPU.Model, actual.CPU.Model)) } } if expected.CPU.LogicalCores > 0 && expected.CPU.LogicalCores != actual.CPU.LogicalCores { out = append(out, diff("cpu.logical_cores", itoa(expected.CPU.LogicalCores), itoa(actual.CPU.LogicalCores))) } } if expected.Memory != nil && expected.Memory.TotalGiB > 0 { // Allow ±2 GiB tolerance: BIOS-reserved, kernel, reporting // quantization. A dead 16 GiB stick will still surface. if absInt(expected.Memory.TotalGiB-actual.Memory.TotalGiB) > 2 { out = append(out, diff("memory.total_gib", itoa(expected.Memory.TotalGiB), itoa(actual.Memory.TotalGiB))) } } out = append(out, diffDisks(expected.Disks, actual.Disks)...) out = append(out, diffNICs(expected.NICs, actual.NICs)...) out = append(out, diffGPUs(expected.GPUs, actual.GPUs)...) return out } func diffDisks(expected, actual []DiskSpec) []model.SpecDiff { if len(expected) == 0 { return nil } actualBySerial := map[string]DiskSpec{} for _, d := range actual { if d.Serial != "" { actualBySerial[strings.ToLower(d.Serial)] = d } } var out []model.SpecDiff seen := map[string]bool{} for _, exp := range expected { if exp.Serial == "" { continue } key := strings.ToLower(exp.Serial) seen[key] = true got, ok := actualBySerial[key] if !ok { out = append(out, diff("disks["+exp.Serial+"].present", "true", "false")) continue } if exp.SizeGB > 0 && absInt(exp.SizeGB-got.SizeGB) > 1 { out = append(out, diff("disks["+exp.Serial+"].size_gb", itoa(exp.SizeGB), itoa(got.SizeGB))) } } // Extra disks on the host that operator didn't declare are flagged: // a leftover USB stick could be a destructive-test target we'd // rather the operator know about. for _, got := range actual { if got.Serial == "" { continue } if !seen[strings.ToLower(got.Serial)] { out = append(out, diff("disks[unexpected "+got.Serial+"]", "", "present")) } } return out } func diffNICs(expected, actual []NICSpec) []model.SpecDiff { if len(expected) == 0 { return nil } actualByMAC := map[string]NICSpec{} for _, n := range actual { if n.MAC != "" { actualByMAC[strings.ToLower(n.MAC)] = n } } var out []model.SpecDiff for _, exp := range expected { if exp.MAC == "" { continue } got, ok := actualByMAC[strings.ToLower(exp.MAC)] if !ok { out = append(out, diff("nics["+exp.MAC+"].present", "true", "false")) continue } if exp.SpeedGbps > 0 && got.SpeedGbps > 0 && exp.SpeedGbps != got.SpeedGbps { out = append(out, diff("nics["+exp.MAC+"].speed_gbps", itoa(exp.SpeedGbps), itoa(got.SpeedGbps))) } } return out } // DiffFirmware returns a SpecDiff per firmware expectation that doesn't // find a matching observed snapshot. Matching rules: // - An expected rule with Identifier set matches by (component, id); // a missing observed snapshot yields a "present=false" diff. // - An expected rule with Identifier empty applies to every observed // snapshot of that component — useful for "all NICs must run fw // 8.30" without listing each port. Zero observed snapshots of the // component yields a single "present=false" diff, not N. // - Version mismatch emits an exact-string expected→actual diff. // Case is preserved (firmware versions are case-sensitive in practice). func DiffFirmware(expected []FirmwareSpec, actual []FirmwareObserved) []model.SpecDiff { if len(expected) == 0 { return nil } byCompIdent := map[string]FirmwareObserved{} byComp := map[string][]FirmwareObserved{} for _, o := range actual { byCompIdent[fwKey(o.Component, o.Identifier)] = o byComp[o.Component] = append(byComp[o.Component], o) } var out []model.SpecDiff for _, exp := range expected { comp := strings.TrimSpace(exp.Component) if comp == "" || strings.TrimSpace(exp.Version) == "" { continue } label := "firmware[" + comp if exp.Identifier != "" { label += "/" + exp.Identifier } label += "]" if exp.Identifier != "" { got, ok := byCompIdent[fwKey(comp, exp.Identifier)] if !ok { out = append(out, diff(label+".present", "true", "false")) continue } if !strings.EqualFold(strings.TrimSpace(got.Version), strings.TrimSpace(exp.Version)) { out = append(out, diff(label+".version", exp.Version, got.Version)) } continue } // No identifier: fan out across every observed snapshot of this // component. Missing is one diff; a mismatching port/controller // emits one diff per mismatch. observed := byComp[comp] if len(observed) == 0 { out = append(out, diff(label+".present", "true", "false")) continue } for _, got := range observed { if !strings.EqualFold(strings.TrimSpace(got.Version), strings.TrimSpace(exp.Version)) { slot := got.Identifier if slot == "" { slot = "*" } out = append(out, diff("firmware["+comp+"/"+slot+"].version", exp.Version, got.Version)) } } } return out } func fwKey(component, identifier string) string { return strings.ToLower(component) + "|" + strings.ToLower(identifier) } func diffGPUs(expected, actual []GPUSpec) []model.SpecDiff { if len(expected) == 0 { return nil } // GPU matching is by model string. Multiple identical cards match // by count, not identity, since PCI-slot order isn't meaningful. want := map[string]int{} for _, g := range expected { want[strings.ToLower(g.Model)]++ } got := map[string]int{} for _, g := range actual { got[strings.ToLower(g.Model)]++ } var keys []string for k := range want { keys = append(keys, k) } sort.Strings(keys) var out []model.SpecDiff for _, k := range keys { if got[k] < want[k] { out = append(out, diff("gpus["+k+"].count", itoa(want[k]), itoa(got[k]))) } } return out } // cpuModelMatches compares model strings case-insensitively and allows // the operator to declare a substring (e.g. "E5-2680 v4") that matches // the verbose kernel-reported string ("Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz"). func cpuModelMatches(expected, actual string) bool { e := strings.ToLower(strings.TrimSpace(expected)) a := strings.ToLower(strings.TrimSpace(actual)) return e == a || strings.Contains(a, e) } // In Phase 3 all diffs are critical. Later phases may tier them. func diff(field, expected, actual string) model.SpecDiff { return model.SpecDiff{ Field: field, Expected: expected, Actual: actual, Severity: "critical", } } func absInt(n int) int { if n < 0 { return -n } return n } func itoa(n int) string { return fmt.Sprintf("%d", n) }