package probes import ( "os" "path/filepath" "strconv" "strings" ) // EDACSample is one counter reading from /sys/devices/system/edac/mc/. // Kind is "edac_ce" (correctable ECC errors) or "edac_ue" // (uncorrectable — always a critical signal). Key identifies the memory // controller (e.g. "mc0"). Value is the cumulative count since boot; // the threshold evaluator flags it the moment it exceeds 0. type EDACSample struct { Kind string Key string Value float64 Unit string } // EDAC returns one EDACSample per (memory-controller × {ce,ue}) pair // that /sys exposes. Returns an empty slice when EDAC isn't available // (virtualized host, missing kernel driver, mdadm-style boards without // a controller node) — callers treat an empty return as "no data", // not "passed". Errors are swallowed for the same reason: a hot- // swapped DIMM that makes /sys blink briefly shouldn't fail the stage // before the real counter can be read. // // This is intentionally small — the sidecar polls periodically, so one // bad read is recovered on the next tick. The counters are monotonic, // so emitting the current raw value is correct. func EDAC() []EDACSample { root := "/sys/devices/system/edac/mc" entries, err := os.ReadDir(root) if err != nil { return nil } var out []EDACSample for _, e := range entries { name := e.Name() if !strings.HasPrefix(name, "mc") { continue } base := filepath.Join(root, name) if ce, ok := readCount(filepath.Join(base, "ce_count")); ok { out = append(out, EDACSample{Kind: "edac_ce", Key: name, Value: ce, Unit: "count"}) } if ue, ok := readCount(filepath.Join(base, "ue_count")); ok { out = append(out, EDACSample{Kind: "edac_ue", Key: name, Value: ue, Unit: "count"}) } } return out } // readCount reads a single decimal integer from a sysfs file and // returns it as a float. Returns (0, false) on any failure so callers // can skip the sample without a diagnostic. func readCount(path string) (float64, bool) { b, err := os.ReadFile(path) if err != nil { return 0, false } s := strings.TrimSpace(string(b)) n, err := strconv.ParseInt(s, 10, 64) if err != nil { return 0, false } return float64(n), true }