From bda568b25c4713c80e696d66b7db4c20f6277c7c Mon Sep 17 00:00:00 2001 From: josh Date: Sun, 3 May 2026 20:55:14 -0400 Subject: [PATCH] Initial implementation: host lifecycle + PXE + admin dashboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Go service for Proxmox homelab cluster provisioning. Handles PXE boot, Proxmox autoinstall (answer file generation), cluster join via SSH, and Infrastructure API registration. - Host state machine (registered → pxe_ready → installing → ready) - dnsmasq supervisor with MAC-based allowlist - iPXE script and Proxmox answer file generation - First-boot phone-home → cluster join → infra registration - Operation locking with expiry (409 on conflict) - SSE event hub for real-time dashboard updates - Admin dashboard (host grid, detail, registration form) - Config-driven server types with hot-reload - Docker deployment (multi-stage fat image) Co-Authored-By: Claude Opus 4.6 --- .gitignore | 27 +++ Makefile | 16 ++ deploy/Dockerfile | 28 +++ deploy/provisioning.example.yaml | 40 +++++ deploy/server-types.example.yaml | 22 +++ go.mod | 28 +++ go.sum | 63 +++++++ internal/api/boot.go | 165 ++++++++++++++++++ internal/api/hosts.go | 180 ++++++++++++++++++++ internal/api/render.go | 185 ++++++++++++++++++++ internal/api/smoke_test.go | 246 +++++++++++++++++++++++++++ internal/api/ui.go | 159 +++++++++++++++++ internal/config/config.go | 108 ++++++++++++ internal/config/servertypes.go | 118 +++++++++++++ internal/db/db.go | 81 +++++++++ internal/db/migrations/0001_init.sql | 43 +++++ internal/events/hub.go | 160 +++++++++++++++++ internal/httpserver/router.go | 59 +++++++ internal/infra/client.go | 92 ++++++++++ internal/model/model.go | 82 +++++++++ internal/orchestrator/cluster.go | 60 +++++++ internal/orchestrator/host.go | 111 ++++++++++++ internal/orchestrator/runner.go | 51 ++++++ internal/pxe/answer.go | 44 +++++ internal/pxe/firstboot.go | 28 +++ internal/pxe/ipxe.go | 19 +++ internal/pxe/signal_unix.go | 12 ++ internal/pxe/signal_windows.go | 9 + internal/pxe/supervisor.go | 161 ++++++++++++++++++ internal/statemachine/host.go | 82 +++++++++ internal/statemachine/host_test.go | 67 ++++++++ internal/store/hosts.go | 126 ++++++++++++++ internal/store/images.go | 101 +++++++++++ internal/store/locks.go | 46 +++++ internal/store/operations.go | 87 ++++++++++ internal/store/store.go | 5 + internal/web/embed.go | 6 + internal/web/static/app.css | 127 ++++++++++++++ internal/web/static/app.js | 23 +++ 39 files changed, 3067 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 deploy/Dockerfile create mode 100644 deploy/provisioning.example.yaml create mode 100644 deploy/server-types.example.yaml create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/api/boot.go create mode 100644 internal/api/hosts.go create mode 100644 internal/api/render.go create mode 100644 internal/api/smoke_test.go create mode 100644 internal/api/ui.go create mode 100644 internal/config/config.go create mode 100644 internal/config/servertypes.go create mode 100644 internal/db/db.go create mode 100644 internal/db/migrations/0001_init.sql create mode 100644 internal/events/hub.go create mode 100644 internal/httpserver/router.go create mode 100644 internal/infra/client.go create mode 100644 internal/model/model.go create mode 100644 internal/orchestrator/cluster.go create mode 100644 internal/orchestrator/host.go create mode 100644 internal/orchestrator/runner.go create mode 100644 internal/pxe/answer.go create mode 100644 internal/pxe/firstboot.go create mode 100644 internal/pxe/ipxe.go create mode 100644 internal/pxe/signal_unix.go create mode 100644 internal/pxe/signal_windows.go create mode 100644 internal/pxe/supervisor.go create mode 100644 internal/statemachine/host.go create mode 100644 internal/statemachine/host_test.go create mode 100644 internal/store/hosts.go create mode 100644 internal/store/images.go create mode 100644 internal/store/locks.go create mode 100644 internal/store/operations.go create mode 100644 internal/store/store.go create mode 100644 internal/web/embed.go create mode 100644 internal/web/static/app.css create mode 100644 internal/web/static/app.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1cf3019 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Binary +provisioning +provisioning.exe + +# Runtime data +data/ +var/ + +# Config with secrets +provisioning.yaml +provisioning.production.yaml +server-types.yaml + +# IDE +.idea/ +.vscode/ +*.swp + +# OS +.DS_Store +Thumbs.db + +# Go +vendor/ + +# Templ generated +*_templ.go diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1347991 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +.PHONY: build dev test clean + +build: + go build -o provisioning ./cmd/provisioning + +dev: + go run ./cmd/provisioning -config deploy/provisioning.example.yaml + +test: + go test ./... + +clean: + rm -f provisioning + +docker: + docker build -f deploy/Dockerfile -t provisioning . diff --git a/deploy/Dockerfile b/deploy/Dockerfile new file mode 100644 index 0000000..f73e2aa --- /dev/null +++ b/deploy/Dockerfile @@ -0,0 +1,28 @@ +FROM golang:1.23-bookworm AS builder + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -o /provisioning ./cmd/provisioning + +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + dnsmasq \ + openssh-client \ + ipxe \ + ca-certificates \ + dmidecode \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /provisioning /usr/local/bin/provisioning + +RUN mkdir -p /data /etc/provisioning/keys + +EXPOSE 8080 +VOLUME ["/data", "/etc/provisioning"] + +ENTRYPOINT ["/usr/local/bin/provisioning"] +CMD ["-config", "/etc/provisioning/provisioning.yaml"] diff --git a/deploy/provisioning.example.yaml b/deploy/provisioning.example.yaml new file mode 100644 index 0000000..5d74764 --- /dev/null +++ b/deploy/provisioning.example.yaml @@ -0,0 +1,40 @@ +server: + bind: "0.0.0.0:8080" + public_url: "http://192.168.1.100:8080" + +database: + path: "./data/provisioning.db" + +pxe: + enabled: true + interface: "eth0" + subnet: "192.168.1.0/24" + runtime_dir: "./data/pxe" + tftp_root: "./data/tftp" + dnsmasq_bin: "/usr/sbin/dnsmasq" + +images: + dir: "./data/images" + +proxmox: + existing_node: "192.168.1.10" + cluster_name: "homelab" + join_fingerprint: "AA:BB:CC:DD:EE:FF:00:11:22:33:44:55:66:77:88:99:AA:BB:CC:DD:EE:FF:00:11:22:33:44:55:66:77:88:99" + +credentials: + ssh_private_key_path: "/etc/provisioning/keys/id_ed25519" + ssh_public_key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAEXAMPLE provisioning@homelab" + root_password_hash: "$6$rounds=5000$randomsalt$hashedpasswordhere" + +infrastructure: + base_url: "http://192.168.1.50:3000" + room_id: 1 + server_type_map: + minisforum-ms-01: 1 + minisforum-um790: 2 + timeout_seconds: 10 + +locks: + ttl_minutes: 60 + +server_types_path: "./server-types.yaml" diff --git a/deploy/server-types.example.yaml b/deploy/server-types.example.yaml new file mode 100644 index 0000000..5519502 --- /dev/null +++ b/deploy/server-types.example.yaml @@ -0,0 +1,22 @@ +server_types: + minisforum-ms-01: + display_name: "Minisforum MS-01" + boot_disk: "/dev/nvme0n1" + management_nic: "enp2s0" + gpu: false + hostname_prefix: "pve-ms" + expected_nics: + - name: "enp2s0" + speed: "2500" + - name: "enp3s0" + speed: "2500" + + minisforum-um790: + display_name: "Minisforum UM790 Pro" + boot_disk: "/dev/nvme0n1" + management_nic: "enp1s0" + gpu: true + hostname_prefix: "pve-um" + expected_nics: + - name: "enp1s0" + speed: "2500" diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b3bfa71 --- /dev/null +++ b/go.mod @@ -0,0 +1,28 @@ +module provisioning + +go 1.23.0 + +require ( + github.com/fsnotify/fsnotify v1.8.0 + github.com/go-chi/chi/v5 v5.1.0 + golang.org/x/crypto v0.28.0 + gopkg.in/yaml.v3 v3.0.1 + modernc.org/sqlite v1.33.1 +) + +require ( + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v0.1.9 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + golang.org/x/sys v0.34.0 // indirect + golang.org/x/tools v0.35.0 // indirect + modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect + modernc.org/libc v1.55.3 // indirect + modernc.org/mathutil v1.6.0 // indirect + modernc.org/memory v1.8.0 // indirect + modernc.org/strutil v1.2.0 // indirect + modernc.org/token v1.1.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..a4a6297 --- /dev/null +++ b/go.sum @@ -0,0 +1,63 @@ +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= +github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/go-chi/chi/v5 v5.1.0 h1:acVI1TYaD+hhedDJ3r54HyA6sExp3HfXq7QWEEY/xMw= +github.com/go-chi/chi/v5 v5.1.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= +github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo= +github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= +github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg= +golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ= +golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= +golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= +golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +modernc.org/cc/v4 v4.21.4 h1:3Be/Rdo1fpr8GrQ7IVw9OHtplU4gWbb+wNgeoBMmGLQ= +modernc.org/cc/v4 v4.21.4/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ= +modernc.org/ccgo/v4 v4.19.2 h1:lwQZgvboKD0jBwdaeVCTouxhxAyN6iawF3STraAal8Y= +modernc.org/ccgo/v4 v4.19.2/go.mod h1:ysS3mxiMV38XGRTTcgo0DQTeTmAO4oCmJl1nX9VFI3s= +modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE= +modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ= +modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw= +modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU= +modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI= +modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4= +modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U= +modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w= +modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= +modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo= +modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E= +modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU= +modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4= +modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc= +modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss= +modernc.org/sqlite v1.33.1 h1:trb6Z3YYoeM9eDL1O8do81kP+0ejv+YzgyFo+Gwy0nM= +modernc.org/sqlite v1.33.1/go.mod h1:pXV2xHxhzXZsgT/RtTFAPY6JJDEvOTcTdwADQCCWD4k= +modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= +modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/internal/api/boot.go b/internal/api/boot.go new file mode 100644 index 0000000..8364cbb --- /dev/null +++ b/internal/api/boot.go @@ -0,0 +1,165 @@ +package api + +import ( + "encoding/json" + "errors" + "log" + "net/http" + "strings" + + "provisioning/internal/config" + "provisioning/internal/model" + "provisioning/internal/orchestrator" + "provisioning/internal/pxe" + "provisioning/internal/statemachine" + "provisioning/internal/store" + + "github.com/go-chi/chi/v5" +) + +type BootAPI struct { + Hosts *store.Hosts + Images *store.Images + Runner *orchestrator.Runner + Orchestrator *orchestrator.HostOrchestrator + Config *config.Config + ServerTypes *config.ServerTypeRegistry +} + +func (a *BootAPI) IPXEScript(w http.ResponseWriter, r *http.Request) { + mac := normalizeMAC(chi.URLParam(r, "mac")) + host, err := a.Hosts.GetByMAC(r.Context(), mac) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + http.Error(w, "#!ipxe\nexit", http.StatusNotFound) + return + } + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + img, err := a.Images.GetDefault(r.Context()) + if err != nil { + http.Error(w, "#!ipxe\necho No default image configured\nshell", http.StatusServiceUnavailable) + return + } + + if host.State == model.StatePXEReady { + a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerPXEScriptServed) + } + + w.Header().Set("Content-Type", "text/plain") + w.Write([]byte(pxe.BuildIPXEScript(a.Config.Server.PublicURL, img, mac))) +} + +func (a *BootAPI) AnswerFile(w http.ResponseWriter, r *http.Request) { + var sysInfo struct { + MAC string `json:"mac"` + } + if err := json.NewDecoder(r.Body).Decode(&sysInfo); err != nil { + http.Error(w, "invalid json", http.StatusBadRequest) + return + } + + mac := normalizeMAC(sysInfo.MAC) + host, err := a.Hosts.GetByMAC(r.Context(), mac) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + http.Error(w, "unknown host", http.StatusForbidden) + return + } + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + st, ok := a.ServerTypes.Get(host.ServerType) + if !ok { + http.Error(w, "unknown server type", http.StatusInternalServerError) + return + } + + if host.State == model.StatePXEBooted { + a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerAnswerServed) + } + + answer := pxe.GenerateAnswerFile(host, st, a.Config) + w.Header().Set("Content-Type", "application/toml") + w.Write([]byte(answer)) +} + +func (a *BootAPI) InstallComplete(w http.ResponseWriter, r *http.Request) { + id, ok := idFromURL(w, r) + if !ok { + return + } + + host, err := a.Hosts.Get(r.Context(), id) + if err != nil { + writeJSONErr(w, http.StatusNotFound, "host not found") + return + } + + if host.State == model.StateInstalling { + if _, err := a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerInstallWebhook); err != nil { + log.Printf("host %d: install-complete transition failed: %v", host.ID, err) + } + } + + w.WriteHeader(http.StatusOK) +} + +func (a *BootAPI) FirstBootScript(w http.ResponseWriter, r *http.Request) { + id, ok := idFromURL(w, r) + if !ok { + return + } + + host, err := a.Hosts.Get(r.Context(), id) + if err != nil { + http.Error(w, "host not found", http.StatusNotFound) + return + } + + st, ok := a.ServerTypes.Get(host.ServerType) + if !ok { + http.Error(w, "unknown server type", http.StatusInternalServerError) + return + } + + script := pxe.GenerateFirstBootScript(host, st, a.Config) + w.Header().Set("Content-Type", "text/x-shellscript") + w.Write([]byte(script)) +} + +func (a *BootAPI) PhoneHome(w http.ResponseWriter, r *http.Request) { + id, ok := idFromURL(w, r) + if !ok { + return + } + + var req struct { + IP string `json:"ip"` + HardwareID string `json:"hardware_id"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeJSONErr(w, http.StatusBadRequest, "invalid json") + return + } + + host, err := a.Hosts.Get(r.Context(), id) + if err != nil { + writeJSONErr(w, http.StatusNotFound, "host not found") + return + } + + log.Printf("host %d (%s): phone-home from %s, hwid=%s", host.ID, host.Hostname, req.IP, req.HardwareID) + a.Orchestrator.HandlePhoneHome(r.Context(), host.ID, req.IP, req.HardwareID) + + writeJSON(w, http.StatusOK, map[string]any{"ok": true}) +} + +func normalizeMAC(m string) string { + m = strings.ToLower(strings.TrimSpace(m)) + m = strings.ReplaceAll(m, "-", ":") + return m +} diff --git a/internal/api/hosts.go b/internal/api/hosts.go new file mode 100644 index 0000000..4e0f766 --- /dev/null +++ b/internal/api/hosts.go @@ -0,0 +1,180 @@ +package api + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "strconv" + + "provisioning/internal/config" + "provisioning/internal/model" + "provisioning/internal/orchestrator" + "provisioning/internal/pxe" + "provisioning/internal/statemachine" + "provisioning/internal/store" + + "github.com/go-chi/chi/v5" +) + +type HostAPI struct { + Hosts *store.Hosts + Ops *store.Operations + Locks *store.Locks + Images *store.Images + Runner *orchestrator.Runner + PXE *pxe.Supervisor + Config *config.Config + ServerTypes *config.ServerTypeRegistry +} + +func (a *HostAPI) List(w http.ResponseWriter, r *http.Request) { + hosts, err := a.Hosts.List(r.Context()) + if err != nil { + writeJSONErr(w, http.StatusInternalServerError, "failed to list hosts") + return + } + writeJSON(w, http.StatusOK, hosts) +} + +func (a *HostAPI) Get(w http.ResponseWriter, r *http.Request) { + host, ok := a.hostFromURL(w, r) + if !ok { + return + } + writeJSON(w, http.StatusOK, host) +} + +func (a *HostAPI) Create(w http.ResponseWriter, r *http.Request) { + var req struct { + Hostname string `json:"hostname"` + MAC string `json:"mac"` + ServerType string `json:"server_type"` + Notes string `json:"notes"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeJSONErr(w, http.StatusBadRequest, "invalid json") + return + } + if req.Hostname == "" || req.MAC == "" || req.ServerType == "" { + writeJSONErr(w, http.StatusBadRequest, "hostname, mac, and server_type are required") + return + } + if _, ok := a.ServerTypes.Get(req.ServerType); !ok { + writeJSONErr(w, http.StatusBadRequest, "unknown server_type") + return + } + + id, err := a.Hosts.Create(r.Context(), model.Host{ + Hostname: req.Hostname, + MAC: req.MAC, + ServerType: req.ServerType, + Notes: req.Notes, + }) + if err != nil { + writeJSONErr(w, http.StatusConflict, "host already exists: "+err.Error()) + return + } + + a.reloadPXE() + + host, _ := a.Hosts.Get(r.Context(), id) + writeJSON(w, http.StatusCreated, host) +} + +func (a *HostAPI) Delete(w http.ResponseWriter, r *http.Request) { + id, ok := idFromURL(w, r) + if !ok { + return + } + if err := a.Hosts.Delete(r.Context(), id); err != nil { + if errors.Is(err, store.ErrNotFound) { + writeJSONErr(w, http.StatusNotFound, "host not found") + return + } + writeJSONErr(w, http.StatusInternalServerError, "failed to delete host") + return + } + a.reloadPXE() + w.WriteHeader(http.StatusNoContent) +} + +func (a *HostAPI) Rebuild(w http.ResponseWriter, r *http.Request) { + host, ok := a.hostFromURL(w, r) + if !ok { + return + } + + locked, _ := a.Locks.IsLocked(r.Context(), host.ID) + if locked { + writeJSONErr(w, http.StatusConflict, "host is locked by another operation") + return + } + + opID, err := a.Ops.Create(r.Context(), model.Operation{ + HostID: host.ID, + Kind: model.OpRebuildProxmox, + }) + if err != nil { + writeJSONErr(w, http.StatusInternalServerError, "failed to create operation") + return + } + if err := a.Locks.Acquire(r.Context(), host.ID, opID); err != nil { + writeJSONErr(w, http.StatusInternalServerError, "failed to acquire lock") + return + } + + if _, err := a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerRebuildRequested); err != nil { + _ = a.Locks.Release(r.Context(), host.ID) + writeJSONErr(w, http.StatusConflict, err.Error()) + return + } + + a.reloadPXE() + writeJSON(w, http.StatusOK, map[string]any{"ok": true, "operation_id": opID}) +} + +func (a *HostAPI) hostFromURL(w http.ResponseWriter, r *http.Request) (*model.Host, bool) { + id, ok := idFromURL(w, r) + if !ok { + return nil, false + } + host, err := a.Hosts.Get(r.Context(), id) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + writeJSONErr(w, http.StatusNotFound, "host not found") + } else { + writeJSONErr(w, http.StatusInternalServerError, "failed to get host") + } + return nil, false + } + return host, true +} + +func (a *HostAPI) reloadPXE() { + if a.PXE == nil { + return + } + hosts, _ := a.Hosts.List(context.Background()) + _ = a.PXE.Reload(hosts) +} + +func idFromURL(w http.ResponseWriter, r *http.Request) (int64, bool) { + s := chi.URLParam(r, "id") + id, err := strconv.ParseInt(s, 10, 64) + if err != nil || id <= 0 { + writeJSONErr(w, http.StatusBadRequest, "invalid id") + return 0, false + } + return id, true +} + +func writeJSON(w http.ResponseWriter, status int, body any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(body) +} + +func writeJSONErr(w http.ResponseWriter, status int, msg string) { + writeJSON(w, status, map[string]any{"ok": false, "error": msg}) +} diff --git a/internal/api/render.go b/internal/api/render.go new file mode 100644 index 0000000..fdc9326 --- /dev/null +++ b/internal/api/render.go @@ -0,0 +1,185 @@ +package api + +import ( + "fmt" + "html" + "net/http" + "strings" + + "provisioning/internal/model" +) + +func renderHTML(w http.ResponseWriter, body string) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.Write([]byte(body)) +} + +func dashboardPage(hosts []model.Host) string { + var tiles strings.Builder + for _, h := range hosts { + tiles.WriteString(hostTile(h)) + } + if len(hosts) == 0 { + tiles.WriteString(`

No hosts registered. Register one.

`) + } + return layout("Dashboard", fmt.Sprintf(` +
+ Register Host + %d hosts +
+
%s
+ `, len(hosts), tiles.String())) +} + +func hostTile(h model.Host) string { + stateClass := stateColor(h.State) + return fmt.Sprintf(` + +
%s
+
%s
+
%s
+
%s
+
+ `, h.ID, stateClass, h.ID, html.EscapeString(h.Hostname), html.EscapeString(h.ServerType), h.State, h.MAC) +} + +func hostFormPage(types []string, errMsg string, prefill *model.Host) string { + var opts strings.Builder + for _, t := range types { + selected := "" + if prefill != nil && prefill.ServerType == t { + selected = " selected" + } + opts.WriteString(fmt.Sprintf(``, t, selected, t)) + } + errHTML := "" + if errMsg != "" { + errHTML = fmt.Sprintf(`
%s
`, html.EscapeString(errMsg)) + } + hostname, mac, notes := "", "", "" + if prefill != nil { + hostname = html.EscapeString(prefill.Hostname) + mac = html.EscapeString(prefill.MAC) + notes = html.EscapeString(prefill.Notes) + } + return layout("Register Host", fmt.Sprintf(` +

Register Host

+ %s +
+ + + + + +
+ `, errHTML, hostname, mac, opts.String(), notes)) +} + +func hostDetailPage(h *model.Host, ops []model.Operation) string { + stateClass := stateColor(h.State) + canRebuild := h.State == model.StateRegistered || h.State == model.StateReady || h.State == model.StateFailed + + var actions strings.Builder + if canRebuild { + actions.WriteString(fmt.Sprintf(`
`, h.ID)) + } + actions.WriteString(fmt.Sprintf(`
`, h.ID)) + + var opsHTML strings.Builder + for _, op := range ops { + duration := "" + if op.CompletedAt != nil { + duration = op.CompletedAt.Sub(op.StartedAt).Truncate(1e9).String() + } + errCell := "" + if op.ErrorMessage != "" { + errCell = html.EscapeString(op.ErrorMessage) + } + opsHTML.WriteString(fmt.Sprintf(`%s%s%s%s%s`, + op.Kind, op.State, op.StartedAt.Format("2006-01-02 15:04"), duration, errCell)) + } + + ip := h.IPAddress + if ip == "" { + ip = "—" + } + + return layout(h.Hostname, fmt.Sprintf(` +
+

%s

+ %s +
+ + + + + +
MAC%s
Server Type%s
IP Address%s
Notes%s
+
%s
+

Operations

+ + + %s +
KindStateStartedDurationError
+ `, html.EscapeString(h.Hostname), stateClass, h.State, h.MAC, h.ServerType, ip, html.EscapeString(h.Notes), actions.String(), opsHTML.String())) +} + +func imagesPage(images []model.Image) string { + var rows strings.Builder + for _, img := range images { + def := "" + if img.IsDefault { + def = "✓" + } + rows.WriteString(fmt.Sprintf(`%s%s%s%s%s`, + html.EscapeString(img.Name), img.Kind, img.Version, def, img.CreatedAt.Format("2006-01-02"))) + } + return layout("Images", fmt.Sprintf(` +

Boot Images

+ + + %s +
NameKindVersionDefaultAdded
+ `, rows.String())) +} + +func stateColor(s model.HostState) string { + switch s { + case model.StateRegistered: + return "state-grey" + case model.StatePXEReady, model.StatePXEBooted, model.StateInstalling: + return "state-blue" + case model.StateInstalled, model.StateFirstBoot, model.StateJoining: + return "state-amber" + case model.StateReady: + return "state-green" + case model.StateFailed: + return "state-red" + default: + return "state-grey" + } +} + +func layout(title, body string) string { + return fmt.Sprintf(` + + + + + %s — Provisioning + + + + +
%s
+ + +`, html.EscapeString(title), body) +} diff --git a/internal/api/smoke_test.go b/internal/api/smoke_test.go new file mode 100644 index 0000000..5bf6e57 --- /dev/null +++ b/internal/api/smoke_test.go @@ -0,0 +1,246 @@ +package api_test + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "provisioning/internal/api" + "provisioning/internal/config" + "provisioning/internal/db" + "provisioning/internal/events" + "provisioning/internal/httpserver" + "provisioning/internal/model" + "provisioning/internal/orchestrator" + "provisioning/internal/pxe" + "provisioning/internal/store" +) + +func newTestServer(t *testing.T) *httptest.Server { + t.Helper() + tmp := t.TempDir() + + database, err := db.Open(filepath.Join(tmp, "test.db")) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { database.Close() }) + + hosts := &store.Hosts{DB: database} + ops := &store.Operations{DB: database} + locks := &store.Locks{DB: database, TTLMinutes: 60} + images := &store.Images{DB: database} + hub := events.NewHub() + t.Cleanup(func() { hub.Shutdown(context.Background()) }) + + cfg := &config.Config{ + Server: config.Server{ + Bind: "127.0.0.1:0", + PublicURL: "http://localhost:8080", + }, + Locks: config.Locks{TTLMinutes: 60}, + } + + serverTypes := mustLoadServerTypes(t, tmp) + + runner := &orchestrator.Runner{ + Hosts: hosts, + Ops: ops, + Locks: locks, + Hub: hub, + } + + pxeSupervisor := pxe.NewSupervisor(pxe.SupervisorConfig{Enabled: false}) + + hostAPI := &api.HostAPI{ + Hosts: hosts, + Ops: ops, + Locks: locks, + Images: images, + Runner: runner, + PXE: pxeSupervisor, + Config: cfg, + ServerTypes: serverTypes, + } + + hostOrch := &orchestrator.HostOrchestrator{ + Runner: runner, + Hosts: hosts, + Ops: ops, + Locks: locks, + Cluster: &orchestrator.ClusterJoiner{}, + Config: cfg, + ServerTypes: serverTypes, + } + + bootAPI := &api.BootAPI{ + Hosts: hosts, + Images: images, + Runner: runner, + Orchestrator: hostOrch, + Config: cfg, + ServerTypes: serverTypes, + } + + ui := &api.UI{ + Hosts: hosts, + Ops: ops, + Locks: locks, + Images: images, + Runner: runner, + Hub: hub, + PXE: pxeSupervisor, + Config: cfg, + ServerTypes: serverTypes, + } + + router := httpserver.NewRouter(httpserver.Deps{ + HostAPI: hostAPI, + BootAPI: bootAPI, + UI: ui, + Hub: hub, + }) + + return httptest.NewServer(router) +} + +func mustLoadServerTypes(t *testing.T, dir string) *config.ServerTypeRegistry { + t.Helper() + path := filepath.Join(dir, "server-types.yaml") + content := []byte(`server_types: + test-type: + display_name: "Test Type" + boot_disk: "/dev/sda" + management_nic: "eth0" + gpu: false + hostname_prefix: "pve-test" +`) + if err := writeTestFile(path, content); err != nil { + t.Fatal(err) + } + reg, err := config.LoadServerTypes(path) + if err != nil { + t.Fatal(err) + } + return reg +} + +func writeTestFile(path string, data []byte) error { + return os.WriteFile(path, data, 0o644) +} + +func TestCreateAndListHosts(t *testing.T) { + ts := newTestServer(t) + defer ts.Close() + + // Create host via JSON API + body := `{"hostname":"pve-test-01","mac":"aa:bb:cc:dd:ee:01","server_type":"test-type"}` + resp, err := http.Post(ts.URL+"/api/hosts", "application/json", bytes.NewBufferString(body)) + if err != nil { + t.Fatal(err) + } + if resp.StatusCode != http.StatusCreated { + t.Fatalf("create: got %d, want %d", resp.StatusCode, http.StatusCreated) + } + + var created model.Host + json.NewDecoder(resp.Body).Decode(&created) + resp.Body.Close() + if created.Hostname != "pve-test-01" { + t.Fatalf("hostname = %q, want %q", created.Hostname, "pve-test-01") + } + if created.State != model.StateRegistered { + t.Fatalf("state = %q, want %q", created.State, model.StateRegistered) + } + + // List hosts + resp, err = http.Get(ts.URL + "/api/hosts") + if err != nil { + t.Fatal(err) + } + var hosts []model.Host + json.NewDecoder(resp.Body).Decode(&hosts) + resp.Body.Close() + if len(hosts) != 1 { + t.Fatalf("list: got %d hosts, want 1", len(hosts)) + } +} + +func TestRebuildTransition(t *testing.T) { + ts := newTestServer(t) + defer ts.Close() + + // Create host + body := `{"hostname":"pve-test-02","mac":"aa:bb:cc:dd:ee:02","server_type":"test-type"}` + resp, _ := http.Post(ts.URL+"/api/hosts", "application/json", bytes.NewBufferString(body)) + var created model.Host + json.NewDecoder(resp.Body).Decode(&created) + resp.Body.Close() + + // Trigger rebuild + resp, err := http.Post(ts.URL+"/api/hosts/"+itoa(created.ID)+"/rebuild", "application/json", nil) + if err != nil { + t.Fatal(err) + } + if resp.StatusCode != http.StatusOK { + t.Fatalf("rebuild: got %d, want %d", resp.StatusCode, http.StatusOK) + } + resp.Body.Close() + + // Verify state is pxe_ready + resp, _ = http.Get(ts.URL + "/api/hosts/" + itoa(created.ID)) + var host model.Host + json.NewDecoder(resp.Body).Decode(&host) + resp.Body.Close() + if host.State != model.StatePXEReady { + t.Fatalf("state = %q, want %q", host.State, model.StatePXEReady) + } +} + +func TestDuplicateRebuildConflict(t *testing.T) { + ts := newTestServer(t) + defer ts.Close() + + body := `{"hostname":"pve-test-03","mac":"aa:bb:cc:dd:ee:03","server_type":"test-type"}` + resp, _ := http.Post(ts.URL+"/api/hosts", "application/json", bytes.NewBufferString(body)) + var created model.Host + json.NewDecoder(resp.Body).Decode(&created) + resp.Body.Close() + + // First rebuild + resp, _ = http.Post(ts.URL+"/api/hosts/"+itoa(created.ID)+"/rebuild", "application/json", nil) + resp.Body.Close() + + // Second rebuild should 409 + resp, _ = http.Post(ts.URL+"/api/hosts/"+itoa(created.ID)+"/rebuild", "application/json", nil) + if resp.StatusCode != http.StatusConflict { + t.Fatalf("second rebuild: got %d, want %d", resp.StatusCode, http.StatusConflict) + } + resp.Body.Close() +} + +func TestDashboardHTML(t *testing.T) { + ts := newTestServer(t) + defer ts.Close() + + resp, err := http.Get(ts.URL + "/") + if err != nil { + t.Fatal(err) + } + if resp.StatusCode != http.StatusOK { + t.Fatalf("dashboard: got %d", resp.StatusCode) + } + if ct := resp.Header.Get("Content-Type"); ct != "text/html; charset=utf-8" { + t.Fatalf("content-type = %q", ct) + } +} + +func itoa(i int64) string { + return fmt.Sprintf("%d", i) +} diff --git a/internal/api/ui.go b/internal/api/ui.go new file mode 100644 index 0000000..0b18227 --- /dev/null +++ b/internal/api/ui.go @@ -0,0 +1,159 @@ +package api + +import ( + "errors" + "fmt" + "net/http" + "regexp" + "strings" + + "provisioning/internal/config" + "provisioning/internal/events" + "provisioning/internal/model" + "provisioning/internal/orchestrator" + "provisioning/internal/pxe" + "provisioning/internal/statemachine" + "provisioning/internal/store" + + "github.com/go-chi/chi/v5" +) + +type UI struct { + Hosts *store.Hosts + Ops *store.Operations + Locks *store.Locks + Images *store.Images + Runner *orchestrator.Runner + Hub *events.Hub + PXE *pxe.Supervisor + Config *config.Config + ServerTypes *config.ServerTypeRegistry +} + +func (u *UI) Dashboard(w http.ResponseWriter, r *http.Request) { + hosts, _ := u.Hosts.List(r.Context()) + renderHTML(w, dashboardPage(hosts)) +} + +func (u *UI) NewHostForm(w http.ResponseWriter, r *http.Request) { + types := u.ServerTypes.Keys() + renderHTML(w, hostFormPage(types, "", nil)) +} + +func (u *UI) CreateHost(w http.ResponseWriter, r *http.Request) { + if err := r.ParseForm(); err != nil { + http.Error(w, "bad form", http.StatusBadRequest) + return + } + hostname := strings.TrimSpace(r.FormValue("hostname")) + mac := strings.TrimSpace(r.FormValue("mac")) + serverType := r.FormValue("server_type") + notes := r.FormValue("notes") + + var errs []string + if hostname == "" { + errs = append(errs, "Hostname is required") + } + if !isValidMAC(mac) { + errs = append(errs, "Invalid MAC address format (expected xx:xx:xx:xx:xx:xx)") + } + if _, ok := u.ServerTypes.Get(serverType); !ok { + errs = append(errs, "Invalid server type") + } + if len(errs) > 0 { + types := u.ServerTypes.Keys() + renderHTML(w, hostFormPage(types, strings.Join(errs, "; "), &model.Host{ + Hostname: hostname, + MAC: mac, + ServerType: serverType, + Notes: notes, + })) + return + } + + _, err := u.Hosts.Create(r.Context(), model.Host{ + Hostname: hostname, + MAC: mac, + ServerType: serverType, + Notes: notes, + }) + if err != nil { + types := u.ServerTypes.Keys() + renderHTML(w, hostFormPage(types, "Host already exists: "+err.Error(), nil)) + return + } + + hosts, _ := u.Hosts.List(r.Context()) + _ = u.PXE.Reload(hosts) + http.Redirect(w, r, "/", http.StatusSeeOther) +} + +func (u *UI) HostDetail(w http.ResponseWriter, r *http.Request) { + idStr := chi.URLParam(r, "id") + var id int64 + fmt.Sscanf(idStr, "%d", &id) + + host, err := u.Hosts.Get(r.Context(), id) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + http.Error(w, "Host not found", http.StatusNotFound) + return + } + http.Error(w, "Internal error", http.StatusInternalServerError) + return + } + ops, _ := u.Ops.ListByHost(r.Context(), host.ID) + renderHTML(w, hostDetailPage(host, ops)) +} + +func (u *UI) TriggerRebuild(w http.ResponseWriter, r *http.Request) { + idStr := chi.URLParam(r, "id") + var id int64 + fmt.Sscanf(idStr, "%d", &id) + + host, err := u.Hosts.Get(r.Context(), id) + if err != nil { + http.Error(w, "Host not found", http.StatusNotFound) + return + } + + locked, _ := u.Locks.IsLocked(r.Context(), host.ID) + if locked { + http.Redirect(w, r, fmt.Sprintf("/hosts/%d", id), http.StatusSeeOther) + return + } + + opID, _ := u.Ops.Create(r.Context(), model.Operation{ + HostID: host.ID, + Kind: model.OpRebuildProxmox, + }) + _ = u.Locks.Acquire(r.Context(), host.ID, opID) + u.Runner.Transition(r.Context(), host.ID, statemachine.TriggerRebuildRequested) + + hosts, _ := u.Hosts.List(r.Context()) + _ = u.PXE.Reload(hosts) + + http.Redirect(w, r, fmt.Sprintf("/hosts/%d", id), http.StatusSeeOther) +} + +func (u *UI) DeleteHost(w http.ResponseWriter, r *http.Request) { + idStr := chi.URLParam(r, "id") + var id int64 + fmt.Sscanf(idStr, "%d", &id) + + _ = u.Hosts.Delete(r.Context(), id) + hosts, _ := u.Hosts.List(r.Context()) + _ = u.PXE.Reload(hosts) + http.Redirect(w, r, "/", http.StatusSeeOther) +} + +func (u *UI) ImagesPage(w http.ResponseWriter, r *http.Request) { + images, _ := u.Images.List(r.Context()) + renderHTML(w, imagesPage(images)) +} + +var macRegex = regexp.MustCompile(`^([0-9a-fA-F]{2}[:\-]){5}[0-9a-fA-F]{2}$`) + +func isValidMAC(mac string) bool { + return macRegex.MatchString(strings.TrimSpace(mac)) +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..d08b14d --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,108 @@ +package config + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +type Config struct { + Server Server `yaml:"server"` + Database Database `yaml:"database"` + PXE PXE `yaml:"pxe"` + Images Images `yaml:"images"` + Proxmox Proxmox `yaml:"proxmox"` + Credentials Credentials `yaml:"credentials"` + Infrastructure Infrastructure `yaml:"infrastructure"` + Locks Locks `yaml:"locks"` + ServerTypePath string `yaml:"server_types_path"` +} + +type Server struct { + Bind string `yaml:"bind"` + PublicURL string `yaml:"public_url"` +} + +type Database struct { + Path string `yaml:"path"` +} + +type PXE struct { + Enabled bool `yaml:"enabled"` + Interface string `yaml:"interface"` + Subnet string `yaml:"subnet"` + RuntimeDir string `yaml:"runtime_dir"` + TFTPRoot string `yaml:"tftp_root"` + DnsmasqBin string `yaml:"dnsmasq_bin"` +} + +type Images struct { + Dir string `yaml:"dir"` +} + +type Proxmox struct { + ExistingNode string `yaml:"existing_node"` + ClusterName string `yaml:"cluster_name"` + JoinFingerprint string `yaml:"join_fingerprint"` +} + +type Credentials struct { + SSHPrivateKeyPath string `yaml:"ssh_private_key_path"` + SSHPublicKey string `yaml:"ssh_public_key"` + RootPasswordHash string `yaml:"root_password_hash"` +} + +type Infrastructure struct { + BaseURL string `yaml:"base_url"` + RoomID int `yaml:"room_id"` + ServerTypeMap map[string]int `yaml:"server_type_map"` + TimeoutSec int `yaml:"timeout_seconds"` +} + +type Locks struct { + TTLMinutes int `yaml:"ttl_minutes"` +} + +func Load(path string) (*Config, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read config: %w", err) + } + cfg := &Config{} + if err := yaml.Unmarshal(data, cfg); err != nil { + return nil, fmt.Errorf("parse config: %w", err) + } + applyDefaults(cfg) + return cfg, nil +} + +func applyDefaults(cfg *Config) { + if cfg.Server.Bind == "" { + cfg.Server.Bind = "0.0.0.0:8080" + } + if cfg.Database.Path == "" { + cfg.Database.Path = "./data/provisioning.db" + } + if cfg.PXE.RuntimeDir == "" { + cfg.PXE.RuntimeDir = "./data/pxe" + } + if cfg.PXE.TFTPRoot == "" { + cfg.PXE.TFTPRoot = "./data/tftp" + } + if cfg.PXE.DnsmasqBin == "" { + cfg.PXE.DnsmasqBin = "/usr/sbin/dnsmasq" + } + if cfg.Images.Dir == "" { + cfg.Images.Dir = "./data/images" + } + if cfg.Locks.TTLMinutes == 0 { + cfg.Locks.TTLMinutes = 60 + } + if cfg.Infrastructure.TimeoutSec == 0 { + cfg.Infrastructure.TimeoutSec = 10 + } + if cfg.ServerTypePath == "" { + cfg.ServerTypePath = "./server-types.yaml" + } +} diff --git a/internal/config/servertypes.go b/internal/config/servertypes.go new file mode 100644 index 0000000..ffd2b3b --- /dev/null +++ b/internal/config/servertypes.go @@ -0,0 +1,118 @@ +package config + +import ( + "fmt" + "log" + "os" + "sync" + + "provisioning/internal/model" + + "github.com/fsnotify/fsnotify" + "gopkg.in/yaml.v3" +) + +type serverTypesFile struct { + ServerTypes map[string]model.ServerType `yaml:"server_types"` +} + +type ServerTypeRegistry struct { + mu sync.RWMutex + types map[string]model.ServerType + path string +} + +func LoadServerTypes(path string) (*ServerTypeRegistry, error) { + r := &ServerTypeRegistry{path: path} + if err := r.load(); err != nil { + return nil, err + } + return r, nil +} + +func (r *ServerTypeRegistry) Get(key string) (model.ServerType, bool) { + r.mu.RLock() + defer r.mu.RUnlock() + st, ok := r.types[key] + return st, ok +} + +func (r *ServerTypeRegistry) List() []model.ServerType { + r.mu.RLock() + defer r.mu.RUnlock() + out := make([]model.ServerType, 0, len(r.types)) + for _, st := range r.types { + out = append(out, st) + } + return out +} + +func (r *ServerTypeRegistry) Keys() []string { + r.mu.RLock() + defer r.mu.RUnlock() + out := make([]string, 0, len(r.types)) + for k := range r.types { + out = append(out, k) + } + return out +} + +func (r *ServerTypeRegistry) load() error { + data, err := os.ReadFile(r.path) + if err != nil { + return fmt.Errorf("read server types: %w", err) + } + var f serverTypesFile + if err := yaml.Unmarshal(data, &f); err != nil { + return fmt.Errorf("parse server types: %w", err) + } + if len(f.ServerTypes) == 0 { + return fmt.Errorf("server types file contains no types") + } + for k, st := range f.ServerTypes { + st.Key = k + f.ServerTypes[k] = st + } + r.mu.Lock() + r.types = f.ServerTypes + r.mu.Unlock() + return nil +} + +func (r *ServerTypeRegistry) Watch(stop <-chan struct{}) { + watcher, err := fsnotify.NewWatcher() + if err != nil { + log.Printf("server-types: fsnotify unavailable: %v", err) + return + } + if err := watcher.Add(r.path); err != nil { + log.Printf("server-types: watch failed: %v", err) + _ = watcher.Close() + return + } + go func() { + defer watcher.Close() + for { + select { + case <-stop: + return + case ev, ok := <-watcher.Events: + if !ok { + return + } + if ev.Op&(fsnotify.Write|fsnotify.Create) != 0 { + if err := r.load(); err != nil { + log.Printf("server-types: hot-reload failed: %v (keeping previous config)", err) + } else { + log.Printf("server-types: reloaded %d types", len(r.types)) + } + } + case err, ok := <-watcher.Errors: + if !ok { + return + } + log.Printf("server-types: watch error: %v", err) + } + } + }() +} diff --git a/internal/db/db.go b/internal/db/db.go new file mode 100644 index 0000000..f2e25a5 --- /dev/null +++ b/internal/db/db.go @@ -0,0 +1,81 @@ +package db + +import ( + "database/sql" + "embed" + "fmt" + "io/fs" + "path/filepath" + "sort" + "strings" + + _ "modernc.org/sqlite" +) + +//go:embed migrations/*.sql +var migrationsFS embed.FS + +func Open(path string) (*sql.DB, error) { + dsn := fmt.Sprintf("file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=busy_timeout(5000)", filepath.ToSlash(path)) + db, err := sql.Open("sqlite", dsn) + if err != nil { + return nil, fmt.Errorf("open sqlite: %w", err) + } + if err := db.Ping(); err != nil { + _ = db.Close() + return nil, fmt.Errorf("ping sqlite: %w", err) + } + if err := migrate(db); err != nil { + _ = db.Close() + return nil, err + } + return db, nil +} + +func migrate(db *sql.DB) error { + entries, err := fs.ReadDir(migrationsFS, "migrations") + if err != nil { + return fmt.Errorf("read migrations: %w", err) + } + names := make([]string, 0, len(entries)) + for _, e := range entries { + if !e.IsDir() && strings.HasSuffix(e.Name(), ".sql") { + names = append(names, e.Name()) + } + } + sort.Strings(names) + + if _, err := db.Exec(`CREATE TABLE IF NOT EXISTS schema_migrations (name TEXT PRIMARY KEY, applied_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP)`); err != nil { + return fmt.Errorf("ensure schema_migrations: %w", err) + } + + for _, name := range names { + var applied int + if err := db.QueryRow(`SELECT COUNT(1) FROM schema_migrations WHERE name = ?`, name).Scan(&applied); err != nil { + return fmt.Errorf("check migration %s: %w", name, err) + } + if applied > 0 { + continue + } + content, err := migrationsFS.ReadFile("migrations/" + name) + if err != nil { + return fmt.Errorf("read migration %s: %w", name, err) + } + tx, err := db.Begin() + if err != nil { + return fmt.Errorf("begin migration %s: %w", name, err) + } + if _, err := tx.Exec(string(content)); err != nil { + _ = tx.Rollback() + return fmt.Errorf("apply migration %s: %w", name, err) + } + if _, err := tx.Exec(`INSERT INTO schema_migrations(name) VALUES(?)`, name); err != nil { + _ = tx.Rollback() + return fmt.Errorf("record migration %s: %w", name, err) + } + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit migration %s: %w", name, err) + } + } + return nil +} diff --git a/internal/db/migrations/0001_init.sql b/internal/db/migrations/0001_init.sql new file mode 100644 index 0000000..4c1725a --- /dev/null +++ b/internal/db/migrations/0001_init.sql @@ -0,0 +1,43 @@ +CREATE TABLE hosts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + hostname TEXT NOT NULL UNIQUE, + mac TEXT NOT NULL UNIQUE, + server_type TEXT NOT NULL, + state TEXT NOT NULL DEFAULT 'registered', + ip_address TEXT, + hardware_id TEXT, + infra_host_id INTEGER, + notes TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now')), + updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now')) +); + +CREATE TABLE operations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + host_id INTEGER NOT NULL REFERENCES hosts(id) ON DELETE CASCADE, + kind TEXT NOT NULL, + state TEXT NOT NULL DEFAULT 'active', + image_id INTEGER REFERENCES images(id), + started_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now')), + completed_at TEXT, + error_message TEXT +); +CREATE INDEX idx_operations_host ON operations(host_id); + +CREATE TABLE operation_locks ( + host_id INTEGER PRIMARY KEY REFERENCES hosts(id) ON DELETE CASCADE, + operation_id INTEGER NOT NULL REFERENCES operations(id), + locked_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now')), + expires_at TEXT NOT NULL +); + +CREATE TABLE images ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + kind TEXT NOT NULL, + version TEXT NOT NULL, + kernel_path TEXT NOT NULL, + initrd_path TEXT NOT NULL, + is_default INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now')) +); diff --git a/internal/events/hub.go b/internal/events/hub.go new file mode 100644 index 0000000..f48632c --- /dev/null +++ b/internal/events/hub.go @@ -0,0 +1,160 @@ +package events + +import ( + "context" + "fmt" + "net/http" + "sync" + "sync/atomic" + "time" +) + +type Event struct { + Name string + Payload string +} + +type subscriber struct { + id int64 + ch chan Event +} + +const ( + defaultBuffer = 32 + heartbeatInterval = 15 * time.Second +) + +type Hub struct { + mu sync.RWMutex + nextID int64 + subs map[int64]*subscriber + buffer int + heartbeat time.Duration + done chan struct{} + closeOnce sync.Once +} + +func NewHub() *Hub { + h := &Hub{ + subs: map[int64]*subscriber{}, + buffer: defaultBuffer, + heartbeat: heartbeatInterval, + done: make(chan struct{}), + } + go h.heartbeatLoop() + return h +} + +func (h *Hub) Publish(ev Event) { + h.mu.RLock() + defer h.mu.RUnlock() + for _, s := range h.subs { + select { + case s.ch <- ev: + default: + } + } +} + +func (h *Hub) Subscribe() (id int64, ch <-chan Event, cancel func()) { + id = atomic.AddInt64(&h.nextID, 1) + s := &subscriber{id: id, ch: make(chan Event, h.buffer)} + h.mu.Lock() + h.subs[id] = s + h.mu.Unlock() + return id, s.ch, func() { + h.mu.Lock() + delete(h.subs, id) + h.mu.Unlock() + close(s.ch) + } +} + +func (h *Hub) heartbeatLoop() { + t := time.NewTicker(h.heartbeat) + defer t.Stop() + for { + select { + case <-h.done: + return + case <-t.C: + h.Publish(Event{ + Name: "heartbeat", + Payload: fmt.Sprintf(``, time.Now().Unix()), + }) + } + } +} + +func (h *Hub) ServeSSE(w http.ResponseWriter, r *http.Request) { + flusher, ok := w.(http.Flusher) + if !ok { + http.Error(w, "streaming not supported", http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + w.Header().Set("X-Accel-Buffering", "no") + + _, eventsCh, cancel := h.Subscribe() + defer cancel() + + fmt.Fprintf(w, "event: hello\ndata: ok\n\n") + flusher.Flush() + + ctx := r.Context() + for { + select { + case <-ctx.Done(): + return + case ev, ok := <-eventsCh: + if !ok { + return + } + writeSSE(w, ev) + flusher.Flush() + } + } +} + +func writeSSE(w http.ResponseWriter, ev Event) { + if ev.Name != "" { + fmt.Fprintf(w, "event: %s\n", ev.Name) + } + for _, line := range splitLines(ev.Payload) { + fmt.Fprintf(w, "data: %s\n", line) + } + fmt.Fprint(w, "\n") +} + +func splitLines(s string) []string { + if s == "" { + return []string{""} + } + out := []string{} + start := 0 + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + out = append(out, s[start:i]) + start = i + 1 + } + } + if start <= len(s) { + out = append(out, s[start:]) + } + return out +} + +func (h *Hub) Shutdown(_ context.Context) error { + h.closeOnce.Do(func() { + close(h.done) + h.mu.Lock() + for id, s := range h.subs { + close(s.ch) + delete(h.subs, id) + } + h.mu.Unlock() + }) + return nil +} diff --git a/internal/httpserver/router.go b/internal/httpserver/router.go new file mode 100644 index 0000000..af31fef --- /dev/null +++ b/internal/httpserver/router.go @@ -0,0 +1,59 @@ +package httpserver + +import ( + "net/http" + + "provisioning/internal/api" + "provisioning/internal/events" + "provisioning/internal/web" + + "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" +) + +type Deps struct { + HostAPI *api.HostAPI + BootAPI *api.BootAPI + UI *api.UI + Hub *events.Hub +} + +func NewRouter(d Deps) http.Handler { + r := chi.NewRouter() + r.Use(middleware.RealIP) + r.Use(middleware.Recoverer) + r.Use(middleware.Logger) + + // Static files + r.Handle("/static/*", http.StripPrefix("/static/", http.FileServer(http.FS(web.Static)))) + + // SSE + r.Get("/events", d.Hub.ServeSSE) + + // Dashboard UI + r.Get("/", d.UI.Dashboard) + r.Get("/hosts/new", d.UI.NewHostForm) + r.Post("/hosts", d.UI.CreateHost) + r.Get("/hosts/{id}", d.UI.HostDetail) + r.Post("/hosts/{id}/rebuild", d.UI.TriggerRebuild) + r.Post("/hosts/{id}/delete", d.UI.DeleteHost) + r.Get("/images", d.UI.ImagesPage) + + // Host JSON API + r.Route("/api/hosts", func(r chi.Router) { + r.Get("/", d.HostAPI.List) + r.Post("/", d.HostAPI.Create) + r.Get("/{id}", d.HostAPI.Get) + r.Delete("/{id}", d.HostAPI.Delete) + r.Post("/{id}/rebuild", d.HostAPI.Rebuild) + }) + + // Boot / PXE endpoints + r.Get("/ipxe/{mac}", d.BootAPI.IPXEScript) + r.Post("/api/boot/answer", d.BootAPI.AnswerFile) + r.Post("/api/hosts/{id}/installed", d.BootAPI.InstallComplete) + r.Get("/api/hosts/{id}/first-boot-script", d.BootAPI.FirstBootScript) + r.Post("/api/hosts/{id}/phone-home", d.BootAPI.PhoneHome) + + return r +} diff --git a/internal/infra/client.go b/internal/infra/client.go new file mode 100644 index 0000000..6f5af08 --- /dev/null +++ b/internal/infra/client.go @@ -0,0 +1,92 @@ +package infra + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "time" +) + +type Client struct { + BaseURL string + HTTPClient *http.Client +} + +func NewClient(baseURL string, timeout time.Duration) *Client { + return &Client{ + BaseURL: baseURL, + HTTPClient: &http.Client{Timeout: timeout}, + } +} + +type CreateHostRequest struct { + HardwareID string `json:"hardware_id"` + Hostname string `json:"hostname"` + AssetID string `json:"asset_id"` + RoomID int `json:"room_id"` + ServerTypeID int `json:"server_type_id"` +} + +type CreateHostResponse struct { + ID int64 `json:"id"` +} + +func (c *Client) CreateHost(ctx context.Context, req CreateHostRequest) (int64, error) { + body, err := json.Marshal(req) + if err != nil { + return 0, err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/api/hosts", bytes.NewReader(body)) + if err != nil { + return 0, err + } + httpReq.Header.Set("Content-Type", "application/json") + + resp, err := c.HTTPClient.Do(httpReq) + if err != nil { + return 0, fmt.Errorf("infra: create host: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusCreated { + return 0, fmt.Errorf("infra: create host: status %d", resp.StatusCode) + } + + var result CreateHostResponse + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return 0, fmt.Errorf("infra: decode response: %w", err) + } + return result.ID, nil +} + +type CreateInterfaceRequest struct { + HostID int `json:"host_id"` + Name string `json:"name"` + MACAddress string `json:"mac_address"` + IPAddress string `json:"ip_address"` +} + +func (c *Client) CreateInterface(ctx context.Context, req CreateInterfaceRequest) error { + body, err := json.Marshal(req) + if err != nil { + return err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/api/interfaces", bytes.NewReader(body)) + if err != nil { + return err + } + httpReq.Header.Set("Content-Type", "application/json") + + resp, err := c.HTTPClient.Do(httpReq) + if err != nil { + return fmt.Errorf("infra: create interface: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusCreated { + return fmt.Errorf("infra: create interface: status %d", resp.StatusCode) + } + return nil +} diff --git a/internal/model/model.go b/internal/model/model.go new file mode 100644 index 0000000..f2b4fe9 --- /dev/null +++ b/internal/model/model.go @@ -0,0 +1,82 @@ +package model + +import "time" + +type HostState string + +const ( + StateRegistered HostState = "registered" + StatePXEReady HostState = "pxe_ready" + StatePXEBooted HostState = "pxe_booted" + StateInstalling HostState = "installing" + StateInstalled HostState = "installed" + StateFirstBoot HostState = "first_boot" + StateJoining HostState = "joining" + StateReady HostState = "ready" + StateFailed HostState = "failed" +) + +type Host struct { + ID int64 + Hostname string + MAC string + ServerType string + State HostState + IPAddress string + HardwareID string + InfraHostID int64 + Notes string + CreatedAt time.Time + UpdatedAt time.Time +} + +type OperationKind string + +const ( + OpRebuildProxmox OperationKind = "rebuild_proxmox" +) + +type OperationState string + +const ( + OpActive OperationState = "active" + OpCompleted OperationState = "completed" + OpFailed OperationState = "failed" +) + +type Operation struct { + ID int64 + HostID int64 + Kind OperationKind + State OperationState + ImageID int64 + StartedAt time.Time + CompletedAt *time.Time + ErrorMessage string +} + +type Image struct { + ID int64 + Name string + Kind string + Version string + KernelPath string + InitrdPath string + IsDefault bool + CreatedAt time.Time +} + +type ServerType struct { + Key string + DisplayName string `yaml:"display_name"` + BootDisk string `yaml:"boot_disk"` + ManagementNIC string `yaml:"management_nic"` + GPU bool `yaml:"gpu"` + HostnamePrefix string `yaml:"hostname_prefix"` + ExpectedNICs []NICDef `yaml:"expected_nics"` +} + +type NICDef struct { + Name string `yaml:"name"` + Speed string `yaml:"speed"` +} diff --git a/internal/orchestrator/cluster.go b/internal/orchestrator/cluster.go new file mode 100644 index 0000000..539a639 --- /dev/null +++ b/internal/orchestrator/cluster.go @@ -0,0 +1,60 @@ +package orchestrator + +import ( + "context" + "fmt" + "log" + "os" + "time" + + "golang.org/x/crypto/ssh" +) + +type ClusterJoiner struct { + ExistingNode string + ClusterName string + JoinFingerprint string + SSHKeyPath string +} + +func (c *ClusterJoiner) Join(ctx context.Context, hostIP string) error { + client, err := c.connect(hostIP) + if err != nil { + return fmt.Errorf("ssh connect to %s: %w", hostIP, err) + } + defer client.Close() + + cmd := fmt.Sprintf("pvecm add %s --force", c.ExistingNode) + log.Printf("cluster: running on %s: %s", hostIP, cmd) + + session, err := client.NewSession() + if err != nil { + return fmt.Errorf("ssh session: %w", err) + } + defer session.Close() + + output, err := session.CombinedOutput(cmd) + if err != nil { + return fmt.Errorf("pvecm add failed: %w\noutput: %s", err, string(output)) + } + log.Printf("cluster: %s joined successfully", hostIP) + return nil +} + +func (c *ClusterJoiner) connect(hostIP string) (*ssh.Client, error) { + keyData, err := os.ReadFile(c.SSHKeyPath) + if err != nil { + return nil, fmt.Errorf("read ssh key: %w", err) + } + signer, err := ssh.ParsePrivateKey(keyData) + if err != nil { + return nil, fmt.Errorf("parse ssh key: %w", err) + } + config := &ssh.ClientConfig{ + User: "root", + Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + Timeout: 30 * time.Second, + } + return ssh.Dial("tcp", hostIP+":22", config) +} diff --git a/internal/orchestrator/host.go b/internal/orchestrator/host.go new file mode 100644 index 0000000..8f01eb6 --- /dev/null +++ b/internal/orchestrator/host.go @@ -0,0 +1,111 @@ +package orchestrator + +import ( + "context" + "log" + + "provisioning/internal/config" + "provisioning/internal/infra" + "provisioning/internal/model" + "provisioning/internal/statemachine" + "provisioning/internal/store" +) + +type HostOrchestrator struct { + Runner *Runner + Hosts *store.Hosts + Ops *store.Operations + Locks *store.Locks + Cluster *ClusterJoiner + InfraClient *infra.Client + Config *config.Config + ServerTypes *config.ServerTypeRegistry +} + +func (o *HostOrchestrator) HandlePhoneHome(ctx context.Context, hostID int64, ip string, hardwareID string) { + if err := o.Hosts.UpdateIP(ctx, hostID, ip, hardwareID); err != nil { + log.Printf("host %d: failed to update IP: %v", hostID, err) + o.Runner.FailHost(ctx, hostID, "failed to update IP: "+err.Error()) + return + } + + if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerPhoneHome); err != nil { + log.Printf("host %d: phone-home transition failed: %v", hostID, err) + return + } + + go o.postPhoneHome(hostID, ip, hardwareID) +} + +func (o *HostOrchestrator) postPhoneHome(hostID int64, ip string, hardwareID string) { + ctx := context.Background() + + host, err := o.Hosts.Get(ctx, hostID) + if err != nil { + log.Printf("host %d: failed to get host for cluster join: %v", hostID, err) + o.Runner.FailHost(ctx, hostID, "get host: "+err.Error()) + return + } + + if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerClusterJoinStart); err != nil { + log.Printf("host %d: cluster join start transition failed: %v", hostID, err) + return + } + + if err := o.Cluster.Join(ctx, ip); err != nil { + log.Printf("host %d: cluster join failed: %v", hostID, err) + o.Runner.FailHost(ctx, hostID, "cluster join: "+err.Error()) + return + } + + if err := o.registerInfra(ctx, host, ip, hardwareID); err != nil { + log.Printf("host %d: infra registration failed: %v", hostID, err) + o.Runner.FailHost(ctx, hostID, "infra registration: "+err.Error()) + return + } + + if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerJoinComplete); err != nil { + log.Printf("host %d: join complete transition failed: %v", hostID, err) + return + } + + op, err := o.Ops.GetActive(ctx, hostID) + if err == nil { + _ = o.Ops.Complete(ctx, op.ID) + } + _ = o.Locks.Release(ctx, hostID) + log.Printf("host %d (%s): provisioning complete", hostID, host.Hostname) +} + +func (o *HostOrchestrator) registerInfra(ctx context.Context, host *model.Host, ip string, hardwareID string) error { + if o.InfraClient == nil { + return nil + } + + st, _ := o.ServerTypes.Get(host.ServerType) + serverTypeID := o.Config.Infrastructure.ServerTypeMap[host.ServerType] + + infraID, err := o.InfraClient.CreateHost(ctx, infra.CreateHostRequest{ + HardwareID: hardwareID, + Hostname: host.Hostname, + AssetID: host.Hostname, + RoomID: o.Config.Infrastructure.RoomID, + ServerTypeID: serverTypeID, + }) + if err != nil { + return err + } + + if err := o.Hosts.UpdateInfraID(ctx, host.ID, infraID); err != nil { + return err + } + + _ = o.InfraClient.CreateInterface(ctx, infra.CreateInterfaceRequest{ + HostID: int(infraID), + Name: st.ManagementNIC, + MACAddress: host.MAC, + IPAddress: ip, + }) + + return nil +} diff --git a/internal/orchestrator/runner.go b/internal/orchestrator/runner.go new file mode 100644 index 0000000..d3e1783 --- /dev/null +++ b/internal/orchestrator/runner.go @@ -0,0 +1,51 @@ +package orchestrator + +import ( + "context" + "fmt" + "log" + + "provisioning/internal/events" + "provisioning/internal/model" + "provisioning/internal/statemachine" + "provisioning/internal/store" +) + +type Runner struct { + Hosts *store.Hosts + Ops *store.Operations + Locks *store.Locks + Hub *events.Hub +} + +func (r *Runner) Transition(ctx context.Context, hostID int64, trigger statemachine.Trigger) (model.HostState, error) { + host, err := r.Hosts.Get(ctx, hostID) + if err != nil { + return "", fmt.Errorf("transition: get host: %w", err) + } + next, err := statemachine.Next(host.State, trigger) + if err != nil { + return "", fmt.Errorf("transition: %w", err) + } + if err := r.Hosts.UpdateState(ctx, hostID, next); err != nil { + return "", fmt.Errorf("transition: update state: %w", err) + } + log.Printf("host %d (%s): %s -> %s [%s]", hostID, host.Hostname, host.State, next, trigger) + r.Hub.Publish(events.Event{ + Name: "host.state_changed", + Payload: fmt.Sprintf(`{"host_id":%d,"old_state":"%s","new_state":"%s"}`, hostID, host.State, next), + }) + return next, nil +} + +func (r *Runner) FailHost(ctx context.Context, hostID int64, reason string) { + if _, err := r.Transition(ctx, hostID, statemachine.TriggerFailed); err != nil { + log.Printf("host %d: failed to transition to failed state: %v", hostID, err) + return + } + op, err := r.Ops.GetActive(ctx, hostID) + if err == nil { + _ = r.Ops.Fail(ctx, op.ID, reason) + } + _ = r.Locks.Release(ctx, hostID) +} diff --git a/internal/pxe/answer.go b/internal/pxe/answer.go new file mode 100644 index 0000000..b633c83 --- /dev/null +++ b/internal/pxe/answer.go @@ -0,0 +1,44 @@ +package pxe + +import ( + "fmt" + "strings" + + "provisioning/internal/config" + "provisioning/internal/model" +) + +func GenerateAnswerFile(host *model.Host, serverType model.ServerType, cfg *config.Config) string { + var b strings.Builder + + b.WriteString("[global]\n") + b.WriteString(`keyboard = "en-us"` + "\n") + b.WriteString(`country = "us"` + "\n") + b.WriteString(fmt.Sprintf("fqdn = \"%s.thewrightserver.net\"\n", host.Hostname)) + b.WriteString(`mailto = "admin@thewrightserver.net"` + "\n") + b.WriteString(`timezone = "America/Indiana/Indianapolis"` + "\n") + b.WriteString(fmt.Sprintf("root-password-hashed = \"%s\"\n", cfg.Credentials.RootPasswordHash)) + b.WriteString(fmt.Sprintf("root-ssh-keys = [\"%s\"]\n", cfg.Credentials.SSHPublicKey)) + b.WriteString("\n") + + b.WriteString("[network]\n") + b.WriteString(`source = "from-dhcp"` + "\n") + b.WriteString("\n") + + b.WriteString("[disk-setup]\n") + b.WriteString(`filesystem = "zfs"` + "\n") + b.WriteString(`zfs.raid = "raid0"` + "\n") + b.WriteString(fmt.Sprintf("disk-list = [\"%s\"]\n", serverType.BootDisk)) + b.WriteString("\n") + + b.WriteString("[post-installation-webhook]\n") + b.WriteString(fmt.Sprintf("url = \"%s/api/hosts/%d/installed\"\n", cfg.Server.PublicURL, host.ID)) + b.WriteString("\n") + + b.WriteString("[first-boot]\n") + b.WriteString(`source = "from-url"` + "\n") + b.WriteString(fmt.Sprintf("url = \"%s/api/hosts/%d/first-boot-script\"\n", cfg.Server.PublicURL, host.ID)) + b.WriteString(`ordering = "after-network"` + "\n") + + return b.String() +} diff --git a/internal/pxe/firstboot.go b/internal/pxe/firstboot.go new file mode 100644 index 0000000..3bd3b82 --- /dev/null +++ b/internal/pxe/firstboot.go @@ -0,0 +1,28 @@ +package pxe + +import ( + "fmt" + + "provisioning/internal/config" + "provisioning/internal/model" +) + +func GenerateFirstBootScript(host *model.Host, serverType model.ServerType, cfg *config.Config) string { + return fmt.Sprintf(`#!/bin/bash +set -euo pipefail + +PROVISIONING_URL="%s" +HOST_ID="%d" +NIC="%s" + +IP=$(ip -4 addr show dev "$NIC" | grep -oP '(?<=inet\s)\d+(\.\d+){3}') +HWID=$(dmidecode -s system-uuid) + +curl -fsSL -X POST "${PROVISIONING_URL}/api/hosts/${HOST_ID}/phone-home" \ + -H "Content-Type: application/json" \ + -d "{\"ip\": \"${IP}\", \"hardware_id\": \"${HWID}\", \"hostname\": \"$(hostname)\"}" + +systemctl disable provisioning-firstboot.service +rm -f /etc/systemd/system/provisioning-firstboot.service +`, cfg.Server.PublicURL, host.ID, serverType.ManagementNIC) +} diff --git a/internal/pxe/ipxe.go b/internal/pxe/ipxe.go new file mode 100644 index 0000000..57f511f --- /dev/null +++ b/internal/pxe/ipxe.go @@ -0,0 +1,19 @@ +package pxe + +import ( + "fmt" + + "provisioning/internal/model" +) + +func BuildIPXEScript(publicURL string, img *model.Image, mac string) string { + kernelURL := fmt.Sprintf("%s/images/boot/%s/%s", publicURL, img.Name, "linux26") + initrdURL := fmt.Sprintf("%s/images/boot/%s/%s", publicURL, img.Name, "initrd.img") + + return fmt.Sprintf(`#!ipxe +echo Provisioning: booting %s on ${mac} +kernel %s vga=791 video=vesafb:lfb:on +initrd %s +boot +`, img.Name, kernelURL, initrdURL) +} diff --git a/internal/pxe/signal_unix.go b/internal/pxe/signal_unix.go new file mode 100644 index 0000000..b1f8d07 --- /dev/null +++ b/internal/pxe/signal_unix.go @@ -0,0 +1,12 @@ +//go:build !windows + +package pxe + +import ( + "os" + "syscall" +) + +func signalReload(p *os.Process) error { + return p.Signal(syscall.SIGHUP) +} diff --git a/internal/pxe/signal_windows.go b/internal/pxe/signal_windows.go new file mode 100644 index 0000000..688d314 --- /dev/null +++ b/internal/pxe/signal_windows.go @@ -0,0 +1,9 @@ +//go:build windows + +package pxe + +import "os" + +func signalReload(_ *os.Process) error { + return nil +} diff --git a/internal/pxe/supervisor.go b/internal/pxe/supervisor.go new file mode 100644 index 0000000..94d43a5 --- /dev/null +++ b/internal/pxe/supervisor.go @@ -0,0 +1,161 @@ +package pxe + +import ( + "context" + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "text/template" + + "provisioning/internal/model" +) + +type SupervisorConfig struct { + Enabled bool + Interface string + Subnet string + RuntimeDir string + TFTPRoot string + DnsmasqBin string + PublicURL string +} + +type Supervisor struct { + cfg SupervisorConfig + cmd *exec.Cmd + cancel context.CancelFunc +} + +func NewSupervisor(cfg SupervisorConfig) *Supervisor { + return &Supervisor{cfg: cfg} +} + +func (s *Supervisor) Start(ctx context.Context, hosts []model.Host) error { + if !s.cfg.Enabled { + log.Printf("pxe: dnsmasq disabled") + return nil + } + if err := os.MkdirAll(s.cfg.RuntimeDir, 0o755); err != nil { + return fmt.Errorf("pxe: create runtime dir: %w", err) + } + if err := s.writeConfig(hosts); err != nil { + return err + } + return s.startProcess(ctx) +} + +func (s *Supervisor) Reload(hosts []model.Host) error { + if !s.cfg.Enabled { + return nil + } + if err := s.writeConfig(hosts); err != nil { + return err + } + if s.cmd != nil && s.cmd.Process != nil { + return signalReload(s.cmd.Process) + } + return nil +} + +func (s *Supervisor) Shutdown() error { + if s.cancel != nil { + s.cancel() + } + if s.cmd != nil && s.cmd.Process != nil { + return s.cmd.Process.Kill() + } + return nil +} + +func (s *Supervisor) writeConfig(hosts []model.Host) error { + confPath := filepath.Join(s.cfg.RuntimeDir, "dnsmasq.conf") + hostsPath := filepath.Join(s.cfg.RuntimeDir, "dhcp-hostsfile") + + var macs []string + for _, h := range hosts { + macs = append(macs, h.MAC) + } + if err := os.WriteFile(hostsPath, []byte(strings.Join(macs, "\n")+"\n"), 0o644); err != nil { + return fmt.Errorf("pxe: write dhcp-hostsfile: %w", err) + } + + conf := dnsmasqConf{ + Interface: s.cfg.Interface, + TFTPRoot: s.cfg.TFTPRoot, + HostsFile: hostsPath, + PublicURL: s.cfg.PublicURL, + RuntimeDir: s.cfg.RuntimeDir, + } + f, err := os.Create(confPath) + if err != nil { + return fmt.Errorf("pxe: create dnsmasq.conf: %w", err) + } + defer f.Close() + if err := dnsmasqTmpl.Execute(f, conf); err != nil { + return fmt.Errorf("pxe: render dnsmasq.conf: %w", err) + } + return nil +} + +func (s *Supervisor) startProcess(ctx context.Context) error { + confPath := filepath.Join(s.cfg.RuntimeDir, "dnsmasq.conf") + procCtx, cancel := context.WithCancel(ctx) + s.cancel = cancel + s.cmd = exec.CommandContext(procCtx, s.cfg.DnsmasqBin, "--keep-in-foreground", "--conf-file="+confPath) + s.cmd.Stdout = os.Stdout + s.cmd.Stderr = os.Stderr + if err := s.cmd.Start(); err != nil { + cancel() + return fmt.Errorf("pxe: start dnsmasq: %w", err) + } + go func() { + if err := s.cmd.Wait(); err != nil { + select { + case <-procCtx.Done(): + default: + log.Printf("pxe: dnsmasq exited: %v", err) + } + } + }() + log.Printf("pxe: dnsmasq started (pid %d)", s.cmd.Process.Pid) + return nil +} + +type dnsmasqConf struct { + Interface string + TFTPRoot string + HostsFile string + PublicURL string + RuntimeDir string +} + +var dnsmasqTmpl = template.Must(template.New("dnsmasq.conf").Parse(` +port=0 +interface={{.Interface}} +bind-interfaces + +dhcp-range=tag:known,192.168.1.0,proxy +dhcp-hostsfile={{.HostsFile}} +dhcp-ignore=tag:!known + +enable-tftp +tftp-root={{.TFTPRoot}} + +# Legacy BIOS +dhcp-match=set:bios,option:client-arch,0 +dhcp-boot=tag:bios,undionly.kpxe + +# UEFI +dhcp-match=set:efi64,option:client-arch,7 +dhcp-boot=tag:efi64,ipxe.efi + +# iPXE user-class: chain to HTTP script +dhcp-match=set:ipxe,option:user-class,iPXE +dhcp-boot=tag:ipxe,{{.PublicURL}}/ipxe/${mac:hexhyp} + +log-dhcp +log-facility={{.RuntimeDir}}/dnsmasq.log +`)) diff --git a/internal/statemachine/host.go b/internal/statemachine/host.go new file mode 100644 index 0000000..c016832 --- /dev/null +++ b/internal/statemachine/host.go @@ -0,0 +1,82 @@ +package statemachine + +import ( + "fmt" + + "provisioning/internal/model" +) + +type Trigger string + +const ( + TriggerRebuildRequested Trigger = "RebuildRequested" + TriggerPXEScriptServed Trigger = "PXEScriptServed" + TriggerAnswerServed Trigger = "AnswerServed" + TriggerInstallWebhook Trigger = "InstallWebhook" + TriggerPhoneHome Trigger = "PhoneHome" + TriggerClusterJoinStart Trigger = "ClusterJoinStarted" + TriggerJoinComplete Trigger = "JoinComplete" + TriggerFailed Trigger = "Failed" +) + +type transition struct { + from []model.HostState + to model.HostState +} + +var allActiveStates = []model.HostState{ + model.StatePXEReady, + model.StatePXEBooted, + model.StateInstalling, + model.StateInstalled, + model.StateFirstBoot, + model.StateJoining, +} + +var table = map[Trigger]transition{ + TriggerRebuildRequested: { + from: []model.HostState{model.StateRegistered, model.StateReady, model.StateFailed}, + to: model.StatePXEReady, + }, + TriggerPXEScriptServed: { + from: []model.HostState{model.StatePXEReady}, + to: model.StatePXEBooted, + }, + TriggerAnswerServed: { + from: []model.HostState{model.StatePXEBooted}, + to: model.StateInstalling, + }, + TriggerInstallWebhook: { + from: []model.HostState{model.StateInstalling}, + to: model.StateInstalled, + }, + TriggerPhoneHome: { + from: []model.HostState{model.StateInstalled}, + to: model.StateFirstBoot, + }, + TriggerClusterJoinStart: { + from: []model.HostState{model.StateFirstBoot}, + to: model.StateJoining, + }, + TriggerJoinComplete: { + from: []model.HostState{model.StateJoining}, + to: model.StateReady, + }, + TriggerFailed: { + from: allActiveStates, + to: model.StateFailed, + }, +} + +func Next(current model.HostState, t Trigger) (model.HostState, error) { + tr, ok := table[t] + if !ok { + return "", fmt.Errorf("unknown trigger %q", t) + } + for _, s := range tr.from { + if s == current { + return tr.to, nil + } + } + return "", fmt.Errorf("trigger %q not allowed from state %q", t, current) +} diff --git a/internal/statemachine/host_test.go b/internal/statemachine/host_test.go new file mode 100644 index 0000000..bcee27f --- /dev/null +++ b/internal/statemachine/host_test.go @@ -0,0 +1,67 @@ +package statemachine + +import ( + "testing" + + "provisioning/internal/model" +) + +func TestValidTransitions(t *testing.T) { + cases := []struct { + from model.HostState + trigger Trigger + want model.HostState + }{ + {model.StateRegistered, TriggerRebuildRequested, model.StatePXEReady}, + {model.StateReady, TriggerRebuildRequested, model.StatePXEReady}, + {model.StateFailed, TriggerRebuildRequested, model.StatePXEReady}, + {model.StatePXEReady, TriggerPXEScriptServed, model.StatePXEBooted}, + {model.StatePXEBooted, TriggerAnswerServed, model.StateInstalling}, + {model.StateInstalling, TriggerInstallWebhook, model.StateInstalled}, + {model.StateInstalled, TriggerPhoneHome, model.StateFirstBoot}, + {model.StateFirstBoot, TriggerClusterJoinStart, model.StateJoining}, + {model.StateJoining, TriggerJoinComplete, model.StateReady}, + } + for _, tc := range cases { + got, err := Next(tc.from, tc.trigger) + if err != nil { + t.Errorf("Next(%q, %q) error: %v", tc.from, tc.trigger, err) + continue + } + if got != tc.want { + t.Errorf("Next(%q, %q) = %q, want %q", tc.from, tc.trigger, got, tc.want) + } + } +} + +func TestFailedFromAllActive(t *testing.T) { + for _, state := range allActiveStates { + got, err := Next(state, TriggerFailed) + if err != nil { + t.Errorf("Next(%q, Failed) error: %v", state, err) + continue + } + if got != model.StateFailed { + t.Errorf("Next(%q, Failed) = %q, want %q", state, got, model.StateFailed) + } + } +} + +func TestInvalidTransitions(t *testing.T) { + cases := []struct { + from model.HostState + trigger Trigger + }{ + {model.StateRegistered, TriggerPXEScriptServed}, + {model.StateReady, TriggerPhoneHome}, + {model.StatePXEReady, TriggerInstallWebhook}, + {model.StateInstalling, TriggerRebuildRequested}, + {model.StateRegistered, TriggerFailed}, + } + for _, tc := range cases { + _, err := Next(tc.from, tc.trigger) + if err == nil { + t.Errorf("Next(%q, %q) expected error, got nil", tc.from, tc.trigger) + } + } +} diff --git a/internal/store/hosts.go b/internal/store/hosts.go new file mode 100644 index 0000000..1aee526 --- /dev/null +++ b/internal/store/hosts.go @@ -0,0 +1,126 @@ +package store + +import ( + "context" + "database/sql" + "fmt" + "strings" + "time" + + "provisioning/internal/model" +) + +type Hosts struct { + DB *sql.DB +} + +const hostColumns = `id, hostname, mac, server_type, state, ip_address, hardware_id, infra_host_id, notes, created_at, updated_at` + +func scanHost(row interface{ Scan(dest ...any) error }, h *model.Host) error { + var ip, hwID sql.NullString + var infraID sql.NullInt64 + var createdAt, updatedAt string + if err := row.Scan(&h.ID, &h.Hostname, &h.MAC, &h.ServerType, &h.State, + &ip, &hwID, &infraID, &h.Notes, &createdAt, &updatedAt); err != nil { + return err + } + h.IPAddress = ip.String + h.HardwareID = hwID.String + if infraID.Valid { + h.InfraHostID = infraID.Int64 + } + h.CreatedAt, _ = time.Parse(time.RFC3339, createdAt) + h.UpdatedAt, _ = time.Parse(time.RFC3339, updatedAt) + return nil +} + +func (s *Hosts) Create(ctx context.Context, h model.Host) (int64, error) { + h.MAC = normalizeMAC(h.MAC) + res, err := s.DB.ExecContext(ctx, ` + INSERT INTO hosts(hostname, mac, server_type, notes) + VALUES(?,?,?,?) + `, h.Hostname, h.MAC, h.ServerType, h.Notes) + if err != nil { + return 0, fmt.Errorf("insert host: %w", err) + } + return res.LastInsertId() +} + +func (s *Hosts) List(ctx context.Context) ([]model.Host, error) { + rows, err := s.DB.QueryContext(ctx, `SELECT `+hostColumns+` FROM hosts ORDER BY hostname COLLATE NOCASE`) + if err != nil { + return nil, fmt.Errorf("list hosts: %w", err) + } + defer rows.Close() + var out []model.Host + for rows.Next() { + var h model.Host + if err := scanHost(rows, &h); err != nil { + return nil, fmt.Errorf("scan host: %w", err) + } + out = append(out, h) + } + return out, rows.Err() +} + +func (s *Hosts) Get(ctx context.Context, id int64) (*model.Host, error) { + row := s.DB.QueryRowContext(ctx, `SELECT `+hostColumns+` FROM hosts WHERE id = ?`, id) + var h model.Host + if err := scanHost(row, &h); err != nil { + if err == sql.ErrNoRows { + return nil, ErrNotFound + } + return nil, fmt.Errorf("get host: %w", err) + } + return &h, nil +} + +func (s *Hosts) GetByMAC(ctx context.Context, mac string) (*model.Host, error) { + row := s.DB.QueryRowContext(ctx, `SELECT `+hostColumns+` FROM hosts WHERE mac = ?`, normalizeMAC(mac)) + var h model.Host + if err := scanHost(row, &h); err != nil { + if err == sql.ErrNoRows { + return nil, ErrNotFound + } + return nil, fmt.Errorf("get host by mac: %w", err) + } + return &h, nil +} + +func (s *Hosts) UpdateState(ctx context.Context, id int64, state model.HostState) error { + res, err := s.DB.ExecContext(ctx, `UPDATE hosts SET state = ?, updated_at = strftime('%Y-%m-%dT%H:%M:%SZ','now') WHERE id = ?`, state, id) + if err != nil { + return fmt.Errorf("update host state: %w", err) + } + n, _ := res.RowsAffected() + if n == 0 { + return ErrNotFound + } + return nil +} + +func (s *Hosts) UpdateIP(ctx context.Context, id int64, ip string, hardwareID string) error { + _, err := s.DB.ExecContext(ctx, `UPDATE hosts SET ip_address = ?, hardware_id = ?, updated_at = strftime('%Y-%m-%dT%H:%M:%SZ','now') WHERE id = ?`, ip, hardwareID, id) + return err +} + +func (s *Hosts) UpdateInfraID(ctx context.Context, id int64, infraHostID int64) error { + _, err := s.DB.ExecContext(ctx, `UPDATE hosts SET infra_host_id = ?, updated_at = strftime('%Y-%m-%dT%H:%M:%SZ','now') WHERE id = ?`, infraHostID, id) + return err +} + +func (s *Hosts) Delete(ctx context.Context, id int64) error { + res, err := s.DB.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, id) + if err != nil { + return fmt.Errorf("delete host: %w", err) + } + n, _ := res.RowsAffected() + if n == 0 { + return ErrNotFound + } + return nil +} + +func normalizeMAC(m string) string { + return strings.ToLower(strings.TrimSpace(m)) +} diff --git a/internal/store/images.go b/internal/store/images.go new file mode 100644 index 0000000..886610b --- /dev/null +++ b/internal/store/images.go @@ -0,0 +1,101 @@ +package store + +import ( + "context" + "database/sql" + "fmt" + "time" + + "provisioning/internal/model" +) + +type Images struct { + DB *sql.DB +} + +func (s *Images) Create(ctx context.Context, img model.Image) (int64, error) { + res, err := s.DB.ExecContext(ctx, ` + INSERT INTO images(name, kind, version, kernel_path, initrd_path, is_default) + VALUES(?,?,?,?,?,?) + `, img.Name, img.Kind, img.Version, img.KernelPath, img.InitrdPath, boolToInt(img.IsDefault)) + if err != nil { + return 0, fmt.Errorf("insert image: %w", err) + } + return res.LastInsertId() +} + +func (s *Images) List(ctx context.Context) ([]model.Image, error) { + rows, err := s.DB.QueryContext(ctx, `SELECT id, name, kind, version, kernel_path, initrd_path, is_default, created_at FROM images ORDER BY name`) + if err != nil { + return nil, fmt.Errorf("list images: %w", err) + } + defer rows.Close() + var out []model.Image + for rows.Next() { + var img model.Image + var isDefault int + var createdAt string + if err := rows.Scan(&img.ID, &img.Name, &img.Kind, &img.Version, &img.KernelPath, &img.InitrdPath, &isDefault, &createdAt); err != nil { + return nil, fmt.Errorf("scan image: %w", err) + } + img.IsDefault = isDefault == 1 + img.CreatedAt, _ = time.Parse(time.RFC3339, createdAt) + out = append(out, img) + } + return out, rows.Err() +} + +func (s *Images) GetDefault(ctx context.Context) (*model.Image, error) { + row := s.DB.QueryRowContext(ctx, `SELECT id, name, kind, version, kernel_path, initrd_path, is_default, created_at FROM images WHERE is_default = 1 LIMIT 1`) + var img model.Image + var isDefault int + var createdAt string + if err := row.Scan(&img.ID, &img.Name, &img.Kind, &img.Version, &img.KernelPath, &img.InitrdPath, &isDefault, &createdAt); err != nil { + if err == sql.ErrNoRows { + return nil, ErrNotFound + } + return nil, fmt.Errorf("get default image: %w", err) + } + img.IsDefault = true + img.CreatedAt, _ = time.Parse(time.RFC3339, createdAt) + return &img, nil +} + +func (s *Images) Get(ctx context.Context, id int64) (*model.Image, error) { + row := s.DB.QueryRowContext(ctx, `SELECT id, name, kind, version, kernel_path, initrd_path, is_default, created_at FROM images WHERE id = ?`, id) + var img model.Image + var isDefault int + var createdAt string + if err := row.Scan(&img.ID, &img.Name, &img.Kind, &img.Version, &img.KernelPath, &img.InitrdPath, &isDefault, &createdAt); err != nil { + if err == sql.ErrNoRows { + return nil, ErrNotFound + } + return nil, fmt.Errorf("get image: %w", err) + } + img.IsDefault = isDefault == 1 + img.CreatedAt, _ = time.Parse(time.RFC3339, createdAt) + return &img, nil +} + +func (s *Images) SetDefault(ctx context.Context, id int64) error { + tx, err := s.DB.BeginTx(ctx, nil) + if err != nil { + return err + } + if _, err := tx.ExecContext(ctx, `UPDATE images SET is_default = 0`); err != nil { + _ = tx.Rollback() + return err + } + if _, err := tx.ExecContext(ctx, `UPDATE images SET is_default = 1 WHERE id = ?`, id); err != nil { + _ = tx.Rollback() + return err + } + return tx.Commit() +} + +func boolToInt(b bool) int { + if b { + return 1 + } + return 0 +} diff --git a/internal/store/locks.go b/internal/store/locks.go new file mode 100644 index 0000000..28641af --- /dev/null +++ b/internal/store/locks.go @@ -0,0 +1,46 @@ +package store + +import ( + "context" + "database/sql" + "fmt" + "time" +) + +type Locks struct { + DB *sql.DB + TTLMinutes int +} + +func (s *Locks) Acquire(ctx context.Context, hostID, operationID int64) error { + s.cleanExpired(ctx) + expiresAt := time.Now().UTC().Add(time.Duration(s.TTLMinutes) * time.Minute).Format(time.RFC3339) + _, err := s.DB.ExecContext(ctx, ` + INSERT INTO operation_locks(host_id, operation_id, expires_at) + VALUES(?,?,?) + `, hostID, operationID, expiresAt) + if err != nil { + return fmt.Errorf("acquire lock: %w", err) + } + return nil +} + +func (s *Locks) Release(ctx context.Context, hostID int64) error { + _, err := s.DB.ExecContext(ctx, `DELETE FROM operation_locks WHERE host_id = ?`, hostID) + return err +} + +func (s *Locks) IsLocked(ctx context.Context, hostID int64) (bool, error) { + s.cleanExpired(ctx) + var count int + err := s.DB.QueryRowContext(ctx, `SELECT COUNT(1) FROM operation_locks WHERE host_id = ?`, hostID).Scan(&count) + if err != nil { + return false, fmt.Errorf("check lock: %w", err) + } + return count > 0, nil +} + +func (s *Locks) cleanExpired(ctx context.Context) { + now := time.Now().UTC().Format(time.RFC3339) + _, _ = s.DB.ExecContext(ctx, `DELETE FROM operation_locks WHERE expires_at < ?`, now) +} diff --git a/internal/store/operations.go b/internal/store/operations.go new file mode 100644 index 0000000..748a4ba --- /dev/null +++ b/internal/store/operations.go @@ -0,0 +1,87 @@ +package store + +import ( + "context" + "database/sql" + "fmt" + "time" + + "provisioning/internal/model" +) + +type Operations struct { + DB *sql.DB +} + +func (s *Operations) Create(ctx context.Context, op model.Operation) (int64, error) { + res, err := s.DB.ExecContext(ctx, ` + INSERT INTO operations(host_id, kind, state, image_id) + VALUES(?,?,?,?) + `, op.HostID, op.Kind, model.OpActive, nullInt64(op.ImageID)) + if err != nil { + return 0, fmt.Errorf("insert operation: %w", err) + } + return res.LastInsertId() +} + +func (s *Operations) Complete(ctx context.Context, id int64) error { + _, err := s.DB.ExecContext(ctx, `UPDATE operations SET state = ?, completed_at = strftime('%Y-%m-%dT%H:%M:%SZ','now') WHERE id = ?`, model.OpCompleted, id) + return err +} + +func (s *Operations) Fail(ctx context.Context, id int64, errMsg string) error { + _, err := s.DB.ExecContext(ctx, `UPDATE operations SET state = ?, completed_at = strftime('%Y-%m-%dT%H:%M:%SZ','now'), error_message = ? WHERE id = ?`, model.OpFailed, errMsg, id) + return err +} + +func (s *Operations) ListByHost(ctx context.Context, hostID int64) ([]model.Operation, error) { + rows, err := s.DB.QueryContext(ctx, ` + SELECT id, host_id, kind, state, COALESCE(image_id, 0), started_at, completed_at, COALESCE(error_message, '') + FROM operations WHERE host_id = ? ORDER BY started_at DESC + `, hostID) + if err != nil { + return nil, fmt.Errorf("list operations: %w", err) + } + defer rows.Close() + var out []model.Operation + for rows.Next() { + var op model.Operation + var startedAt string + var completedAt sql.NullString + if err := rows.Scan(&op.ID, &op.HostID, &op.Kind, &op.State, &op.ImageID, &startedAt, &completedAt, &op.ErrorMessage); err != nil { + return nil, fmt.Errorf("scan operation: %w", err) + } + op.StartedAt, _ = time.Parse(time.RFC3339, startedAt) + if completedAt.Valid { + t, _ := time.Parse(time.RFC3339, completedAt.String) + op.CompletedAt = &t + } + out = append(out, op) + } + return out, rows.Err() +} + +func (s *Operations) GetActive(ctx context.Context, hostID int64) (*model.Operation, error) { + row := s.DB.QueryRowContext(ctx, ` + SELECT id, host_id, kind, state, COALESCE(image_id, 0), started_at, completed_at, COALESCE(error_message, '') + FROM operations WHERE host_id = ? AND state = ? ORDER BY started_at DESC LIMIT 1 + `, hostID, model.OpActive) + var op model.Operation + var startedAt string + var completedAt sql.NullString + if err := row.Scan(&op.ID, &op.HostID, &op.Kind, &op.State, &op.ImageID, &startedAt, &completedAt, &op.ErrorMessage); err != nil { + if err == sql.ErrNoRows { + return nil, ErrNotFound + } + return nil, fmt.Errorf("get active operation: %w", err) + } + op.StartedAt, _ = time.Parse(time.RFC3339, startedAt) + return &op, nil +} + +func nullInt64(v int64) any { + if v == 0 { + return nil + } + return v +} diff --git a/internal/store/store.go b/internal/store/store.go new file mode 100644 index 0000000..7ac4f93 --- /dev/null +++ b/internal/store/store.go @@ -0,0 +1,5 @@ +package store + +import "errors" + +var ErrNotFound = errors.New("not found") diff --git a/internal/web/embed.go b/internal/web/embed.go new file mode 100644 index 0000000..1e83476 --- /dev/null +++ b/internal/web/embed.go @@ -0,0 +1,6 @@ +package web + +import "embed" + +//go:embed static +var Static embed.FS diff --git a/internal/web/static/app.css b/internal/web/static/app.css new file mode 100644 index 0000000..d85039f --- /dev/null +++ b/internal/web/static/app.css @@ -0,0 +1,127 @@ +* { box-sizing: border-box; margin: 0; padding: 0; } + +:root { + --bg: #0f1419; + --surface: #1a2027; + --border: #2d3748; + --text: #e2e8f0; + --text-muted: #8892a4; + --accent: #60a5fa; + --green: #34d399; + --amber: #fbbf24; + --red: #f87171; + --blue: #60a5fa; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif; + background: var(--bg); + color: var(--text); + line-height: 1.5; +} + +.topbar { + display: flex; + align-items: center; + gap: 1.5rem; + padding: 0.75rem 1.5rem; + background: var(--surface); + border-bottom: 1px solid var(--border); +} + +.brand { + font-weight: 700; + font-size: 1.1rem; + color: var(--accent); + text-decoration: none; +} + +.nav-links { display: flex; gap: 1rem; } +.nav-links a { color: var(--text-muted); text-decoration: none; font-size: 0.9rem; } +.nav-links a:hover { color: var(--text); } + +.sse-indicator { margin-left: auto; color: var(--green); font-size: 0.8rem; } +.sse-indicator.disconnected { color: var(--red); } + +main { padding: 1.5rem; max-width: 1200px; margin: 0 auto; } + +.actions { display: flex; align-items: center; gap: 1rem; margin-bottom: 1rem; } +.count { color: var(--text-muted); font-size: 0.85rem; } + +.btn { + display: inline-block; + padding: 0.5rem 1rem; + background: var(--accent); + color: #000; + border: none; + border-radius: 4px; + cursor: pointer; + text-decoration: none; + font-size: 0.85rem; + font-weight: 500; +} +.btn:hover { opacity: 0.9; } +.btn-danger { background: var(--red); } + +.host-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); gap: 1rem; } + +.tile { + display: block; + padding: 1rem; + background: var(--surface); + border: 1px solid var(--border); + border-radius: 6px; + text-decoration: none; + color: var(--text); + transition: border-color 0.15s; +} +.tile:hover { border-color: var(--accent); } +.tile-name { font-weight: 600; margin-bottom: 0.25rem; } +.tile-type { font-size: 0.8rem; color: var(--text-muted); } +.tile-state { font-size: 0.8rem; margin-top: 0.5rem; padding: 0.15rem 0.5rem; border-radius: 3px; display: inline-block; } +.tile-mac { font-size: 0.75rem; color: var(--text-muted); margin-top: 0.5rem; font-family: monospace; } + +.state-grey .tile-state { background: #374151; color: #9ca3af; } +.state-blue .tile-state { background: #1e3a5f; color: var(--blue); } +.state-amber .tile-state { background: #422006; color: var(--amber); } +.state-green .tile-state { background: #064e3b; color: var(--green); } +.state-red .tile-state { background: #450a0a; color: var(--red); } + +.host-header { display: flex; align-items: center; gap: 1rem; margin-bottom: 1rem; } +.badge { padding: 0.2rem 0.6rem; border-radius: 3px; font-size: 0.8rem; } +.state-grey .badge, .badge.state-grey { background: #374151; color: #9ca3af; } +.state-blue .badge, .badge.state-blue { background: #1e3a5f; color: var(--blue); } +.state-amber .badge, .badge.state-amber { background: #422006; color: var(--amber); } +.state-green .badge, .badge.state-green { background: #064e3b; color: var(--green); } +.state-red .badge, .badge.state-red { background: #450a0a; color: var(--red); } + +.detail-table { margin-bottom: 1.5rem; } +.detail-table th { text-align: left; padding: 0.4rem 1rem 0.4rem 0; color: var(--text-muted); font-weight: 500; } +.detail-table td { padding: 0.4rem 0; } + +.ops-table { width: 100%; border-collapse: collapse; font-size: 0.85rem; } +.ops-table th { text-align: left; padding: 0.5rem; border-bottom: 1px solid var(--border); color: var(--text-muted); } +.ops-table td { padding: 0.5rem; border-bottom: 1px solid var(--border); } + +.form { max-width: 400px; } +.form label { display: block; margin-bottom: 1rem; color: var(--text-muted); font-size: 0.85rem; } +.form input, .form select, .form textarea { + display: block; + width: 100%; + padding: 0.5rem; + margin-top: 0.25rem; + background: var(--bg); + border: 1px solid var(--border); + border-radius: 4px; + color: var(--text); + font-size: 0.9rem; +} +.form textarea { min-height: 60px; resize: vertical; } +.form .btn { margin-top: 0.5rem; } + +.error { background: #450a0a; color: var(--red); padding: 0.75rem; border-radius: 4px; margin-bottom: 1rem; font-size: 0.85rem; } +.empty { color: var(--text-muted); padding: 2rem; text-align: center; } +.empty a { color: var(--accent); } +.inline { display: inline; } +h2 { margin-bottom: 1rem; } +h3 { margin: 1.5rem 0 0.75rem; color: var(--text-muted); font-size: 0.95rem; } diff --git a/internal/web/static/app.js b/internal/web/static/app.js new file mode 100644 index 0000000..c4d2d48 --- /dev/null +++ b/internal/web/static/app.js @@ -0,0 +1,23 @@ +(function() { + const dot = document.getElementById('sse-dot'); + let es; + + function connect() { + es = new EventSource('/events'); + es.addEventListener('hello', () => { + dot.classList.remove('disconnected'); + }); + es.addEventListener('host.state_changed', (e) => { + // Reload the page to reflect state changes + // Future: HTMX swap individual tiles + window.location.reload(); + }); + es.onerror = () => { + dot.classList.add('disconnected'); + es.close(); + setTimeout(connect, 3000); + }; + } + + connect(); +})();