Initial commit: full Phases 1-6 implementation
CI / Lint + build + test (push) Has been cancelled

Post-repair hardware validation pipeline for Proxmox cluster hosts.
Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq
PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
This commit is contained in:
2026-04-17 21:32:10 -04:00
commit 9bb4b09a04
98 changed files with 11960 additions and 0 deletions
+134
View File
@@ -0,0 +1,134 @@
// Package logs owns per-run flat-file logs and their live SSE fan-out.
// A single Writer serialises writes for one run; a Hub keeps a cache
// per run so handlers can open/close freely without stepping on each
// other. Lines go to disk for persistence (reload + replay) and onto
// the events.Hub so the UI tile can tail live.
package logs
import (
"fmt"
"html"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
"vetting/internal/events"
)
type Line struct {
TS time.Time
Level string // info|warn|error|debug
Text string
}
type Writer struct {
runID int64
mu sync.Mutex
f *os.File
hub *events.Hub
}
// Hub owns the per-run Writers. The orchestrator creates one Hub at
// startup and hands it to the api package.
type Hub struct {
dir string
events *events.Hub
mu sync.Mutex
writers map[int64]*Writer
}
func NewHub(dir string, ev *events.Hub) (*Hub, error) {
if err := os.MkdirAll(dir, 0o755); err != nil {
return nil, fmt.Errorf("mkdir log dir: %w", err)
}
return &Hub{dir: dir, events: ev, writers: map[int64]*Writer{}}, nil
}
// WriterFor returns a cached Writer, opening the file lazily. The file
// is append-only; if an existing run's log is reopened (e.g. after a
// restart) we append rather than truncate so nothing is lost.
func (h *Hub) WriterFor(runID int64) (*Writer, error) {
h.mu.Lock()
defer h.mu.Unlock()
if w, ok := h.writers[runID]; ok {
return w, nil
}
path := filepath.Join(h.dir, fmt.Sprintf("run-%d.log", runID))
f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return nil, fmt.Errorf("open %s: %w", path, err)
}
w := &Writer{runID: runID, f: f, hub: h.events}
h.writers[runID] = w
return w, nil
}
// Close flushes and closes all open run files. Called from main on
// shutdown so the logs aren't left with buffered data.
func (h *Hub) Close() {
h.mu.Lock()
defer h.mu.Unlock()
for id, w := range h.writers {
if err := w.Close(); err != nil {
log.Printf("logs: close run-%d: %v", id, err)
}
}
h.writers = nil
}
// PathFor returns the on-disk path for a run's log; used by replay
// handlers and the report generator.
func (h *Hub) PathFor(runID int64) string {
return filepath.Join(h.dir, fmt.Sprintf("run-%d.log", runID))
}
// Append writes a line to disk and publishes an SSE event. Failures
// on disk log but don't block the SSE fan-out — the operator can still
// see the live tail even if disk IO is degraded.
func (w *Writer) Append(line Line) {
w.mu.Lock()
defer w.mu.Unlock()
if line.TS.IsZero() {
line.TS = time.Now().UTC()
}
if line.Level == "" {
line.Level = "info"
}
stamped := fmt.Sprintf("%s %5s %s\n", line.TS.Format(time.RFC3339Nano), strings.ToUpper(line.Level), line.Text)
if _, err := w.f.WriteString(stamped); err != nil {
log.Printf("logs: write run-%d: %v", w.runID, err)
}
if w.hub != nil {
w.hub.Publish(events.Event{
Name: fmt.Sprintf("log-%d", w.runID),
Payload: renderLogSSE(line),
})
}
}
func (w *Writer) Close() error {
w.mu.Lock()
defer w.mu.Unlock()
if w.f == nil {
return nil
}
err := w.f.Close()
w.f = nil
return err
}
// renderLogSSE returns an HTMX-compatible fragment. The tile contains
// a <div id="log-N" hx-swap-oob="beforeend">: each event appends one
// <div class="log-line log-LEVEL"> to it.
func renderLogSSE(l Line) string {
level := strings.ToLower(l.Level)
return fmt.Sprintf(
`<div class="log-line log-%s">%s %s</div>`,
html.EscapeString(level),
html.EscapeString(l.TS.Format("15:04:05")),
html.EscapeString(l.Text),
)
}
+120
View File
@@ -0,0 +1,120 @@
package logs_test
import (
"os"
"path/filepath"
"strings"
"testing"
"time"
"vetting/internal/events"
"vetting/internal/logs"
)
// TestAppendFansOutToSSE verifies the two guarantees of the log hub:
// (a) every line is persisted to the per-run file, and (b) every line
// is published as an SSE event with name log-<runID>. The UI relies on
// both — the file for reload replay, the event for live tail.
func TestAppendFansOutToSSE(t *testing.T) {
dir := t.TempDir()
hub := events.NewHub()
lh, err := logs.NewHub(dir, hub)
if err != nil {
t.Fatalf("NewHub: %v", err)
}
defer lh.Close()
_, ch, cancel := hub.Subscribe()
defer cancel()
w, err := lh.WriterFor(77)
if err != nil {
t.Fatalf("WriterFor: %v", err)
}
w.Append(logs.Line{Level: "info", Text: "hello from agent"})
w.Append(logs.Line{Level: "error", Text: "<script>pwn</script>"})
got := collect(ch, 3, 500*time.Millisecond)
// Filter out heartbeats that may sneak in.
var logEvents []events.Event
for _, ev := range got {
if strings.HasPrefix(ev.Name, "log-") {
logEvents = append(logEvents, ev)
}
}
if len(logEvents) < 2 {
t.Fatalf("expected 2 log events, got %d (all=%+v)", len(logEvents), got)
}
for _, ev := range logEvents {
if ev.Name != "log-77" {
t.Fatalf("unexpected event name %q", ev.Name)
}
}
// XSS protection: raw <script> must not appear — it's HTML-escaped.
if strings.Contains(logEvents[1].Payload, "<script>") {
t.Fatalf("log payload not escaped: %q", logEvents[1].Payload)
}
if !strings.Contains(logEvents[1].Payload, "&lt;script&gt;") {
t.Fatalf("expected escaped <script>, got %q", logEvents[1].Payload)
}
// On disk: the file must contain both lines.
path := filepath.Join(dir, "run-77.log")
body, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read log file: %v", err)
}
text := string(body)
if !strings.Contains(text, "hello from agent") {
t.Fatalf("disk log missing info line: %q", text)
}
if !strings.Contains(text, "<script>pwn</script>") {
t.Fatalf("disk log should keep raw text (unescaped): %q", text)
}
if !strings.Contains(text, "INFO") || !strings.Contains(text, "ERROR") {
t.Fatalf("disk log missing level prefix: %q", text)
}
}
// TestWriterForIsCached verifies a second call returns the same Writer
// — otherwise parallel /log POSTs would race on file opens and possibly
// stomp on in-flight writes.
func TestWriterForIsCached(t *testing.T) {
hub := events.NewHub()
lh, err := logs.NewHub(t.TempDir(), hub)
if err != nil {
t.Fatalf("NewHub: %v", err)
}
defer lh.Close()
w1, err := lh.WriterFor(1)
if err != nil {
t.Fatalf("WriterFor: %v", err)
}
w2, err := lh.WriterFor(1)
if err != nil {
t.Fatalf("WriterFor: %v", err)
}
if w1 != w2 {
t.Fatalf("Writer not cached: %p vs %p", w1, w2)
}
}
// collect drains up to max events or bails after deadline.
func collect(ch <-chan events.Event, max int, deadline time.Duration) []events.Event {
out := []events.Event{}
timer := time.NewTimer(deadline)
defer timer.Stop()
for len(out) < max {
select {
case ev, ok := <-ch:
if !ok {
return out
}
out = append(out, ev)
case <-timer.C:
return out
}
}
return out
}