Use ephemeral SSH keys per rebuild instead of static config keys
build-and-push / test (push) Successful in 9m57s
build-and-push / build-and-push (push) Has been cancelled

Generate a fresh ed25519 key pair at rebuild time, inject the public key
into the Proxmox answer file, use the private key for cluster join over
SSH, then remove the key from both the remote host and the database.
This eliminates the need to manage static SSH keys in config/secrets.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-03 21:09:22 -04:00
parent aec31b9f8b
commit b23ef64ee1
13 changed files with 191 additions and 68 deletions
-1
View File
@@ -53,7 +53,6 @@ Edit `provisioning.yaml`:
- `pxe.subnet` — LAN CIDR for proxy-DHCP - `pxe.subnet` — LAN CIDR for proxy-DHCP
- `proxmox.existing_node` — IP of any current cluster member - `proxmox.existing_node` — IP of any current cluster member
- `proxmox.join_fingerprint` — from `pvecm status` on an existing node - `proxmox.join_fingerprint` — from `pvecm status` on an existing node
- `credentials.ssh_public_key` — public key injected into new hosts
- `credentials.root_password_hash``mkpasswd -m sha-512` - `credentials.root_password_hash``mkpasswd -m sha-512`
- `infrastructure.base_url` — URL of the Infrastructure service - `infrastructure.base_url` — URL of the Infrastructure service
- `infrastructure.server_type_map` — maps local type keys to Infrastructure IDs - `infrastructure.server_type_map` — maps local type keys to Infrastructure IDs
-2
View File
@@ -22,8 +22,6 @@ proxmox:
join_fingerprint: "AA:BB:CC:DD:EE:FF:00:11:22:33:44:55:66:77:88:99:AA:BB:CC:DD:EE:FF:00:11:22:33:44:55:66:77:88:99" join_fingerprint: "AA:BB:CC:DD:EE:FF:00:11:22:33:44:55:66:77:88:99:AA:BB:CC:DD:EE:FF:00:11:22:33:44:55:66:77:88:99"
credentials: credentials:
ssh_private_key_path: "/etc/provisioning/keys/id_ed25519"
ssh_public_key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAEXAMPLE provisioning@homelab"
root_password_hash: "$6$rounds=5000$randomsalt$hashedpasswordhere" root_password_hash: "$6$rounds=5000$randomsalt$hashedpasswordhere"
infrastructure: infrastructure:
+7 -1
View File
@@ -82,7 +82,13 @@ func (a *BootAPI) AnswerFile(w http.ResponseWriter, r *http.Request) {
a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerAnswerServed) a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerAnswerServed)
} }
answer := pxe.GenerateAnswerFile(host, st, a.Config) _, pubKey, _ := a.Hosts.GetEphemeralKey(r.Context(), host.ID)
if pubKey == "" {
http.Error(w, "no ephemeral key for host", http.StatusInternalServerError)
return
}
answer := pxe.GenerateAnswerFile(host, st, a.Config, pubKey)
w.Header().Set("Content-Type", "application/toml") w.Header().Set("Content-Type", "application/toml")
w.Write([]byte(answer)) w.Write([]byte(answer))
} }
+15 -8
View File
@@ -18,14 +18,15 @@ import (
) )
type HostAPI struct { type HostAPI struct {
Hosts *store.Hosts Hosts *store.Hosts
Ops *store.Operations Ops *store.Operations
Locks *store.Locks Locks *store.Locks
Images *store.Images Images *store.Images
Runner *orchestrator.Runner Runner *orchestrator.Runner
PXE *pxe.Supervisor Orchestrator *orchestrator.HostOrchestrator
Config *config.Config PXE *pxe.Supervisor
ServerTypes *config.ServerTypeRegistry Config *config.Config
ServerTypes *config.ServerTypeRegistry
} }
func (a *HostAPI) List(w http.ResponseWriter, r *http.Request) { func (a *HostAPI) List(w http.ResponseWriter, r *http.Request) {
@@ -124,6 +125,12 @@ func (a *HostAPI) Rebuild(w http.ResponseWriter, r *http.Request) {
return return
} }
if err := a.Orchestrator.PrepareRebuild(r.Context(), host.ID); err != nil {
_ = a.Locks.Release(r.Context(), host.ID)
writeJSONErr(w, http.StatusInternalServerError, "failed to generate SSH key: "+err.Error())
return
}
if _, err := a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerRebuildRequested); err != nil { if _, err := a.Runner.Transition(r.Context(), host.ID, statemachine.TriggerRebuildRequested); err != nil {
_ = a.Locks.Release(r.Context(), host.ID) _ = a.Locks.Release(r.Context(), host.ID)
writeJSONErr(w, http.StatusConflict, err.Error()) writeJSONErr(w, http.StatusConflict, err.Error())
+22 -20
View File
@@ -58,17 +58,6 @@ func newTestServer(t *testing.T) *httptest.Server {
pxeSupervisor := pxe.NewSupervisor(pxe.SupervisorConfig{Enabled: false}) pxeSupervisor := pxe.NewSupervisor(pxe.SupervisorConfig{Enabled: false})
hostAPI := &api.HostAPI{
Hosts: hosts,
Ops: ops,
Locks: locks,
Images: images,
Runner: runner,
PXE: pxeSupervisor,
Config: cfg,
ServerTypes: serverTypes,
}
hostOrch := &orchestrator.HostOrchestrator{ hostOrch := &orchestrator.HostOrchestrator{
Runner: runner, Runner: runner,
Hosts: hosts, Hosts: hosts,
@@ -79,6 +68,18 @@ func newTestServer(t *testing.T) *httptest.Server {
ServerTypes: serverTypes, ServerTypes: serverTypes,
} }
hostAPI := &api.HostAPI{
Hosts: hosts,
Ops: ops,
Locks: locks,
Images: images,
Runner: runner,
Orchestrator: hostOrch,
PXE: pxeSupervisor,
Config: cfg,
ServerTypes: serverTypes,
}
bootAPI := &api.BootAPI{ bootAPI := &api.BootAPI{
Hosts: hosts, Hosts: hosts,
Images: images, Images: images,
@@ -89,15 +90,16 @@ func newTestServer(t *testing.T) *httptest.Server {
} }
ui := &api.UI{ ui := &api.UI{
Hosts: hosts, Hosts: hosts,
Ops: ops, Ops: ops,
Locks: locks, Locks: locks,
Images: images, Images: images,
Runner: runner, Runner: runner,
Hub: hub, Orchestrator: hostOrch,
PXE: pxeSupervisor, Hub: hub,
Config: cfg, PXE: pxeSupervisor,
ServerTypes: serverTypes, Config: cfg,
ServerTypes: serverTypes,
} }
router := httpserver.NewRouter(httpserver.Deps{ router := httpserver.NewRouter(httpserver.Deps{
+17 -9
View File
@@ -19,15 +19,16 @@ import (
) )
type UI struct { type UI struct {
Hosts *store.Hosts Hosts *store.Hosts
Ops *store.Operations Ops *store.Operations
Locks *store.Locks Locks *store.Locks
Images *store.Images Images *store.Images
Runner *orchestrator.Runner Runner *orchestrator.Runner
Hub *events.Hub Orchestrator *orchestrator.HostOrchestrator
PXE *pxe.Supervisor Hub *events.Hub
Config *config.Config PXE *pxe.Supervisor
ServerTypes *config.ServerTypeRegistry Config *config.Config
ServerTypes *config.ServerTypeRegistry
} }
func (u *UI) Dashboard(w http.ResponseWriter, r *http.Request) { func (u *UI) Dashboard(w http.ResponseWriter, r *http.Request) {
@@ -128,6 +129,13 @@ func (u *UI) TriggerRebuild(w http.ResponseWriter, r *http.Request) {
Kind: model.OpRebuildProxmox, Kind: model.OpRebuildProxmox,
}) })
_ = u.Locks.Acquire(r.Context(), host.ID, opID) _ = u.Locks.Acquire(r.Context(), host.ID, opID)
if err := u.Orchestrator.PrepareRebuild(r.Context(), host.ID); err != nil {
_ = u.Locks.Release(r.Context(), host.ID)
http.Error(w, "Failed to prepare rebuild: "+err.Error(), http.StatusInternalServerError)
return
}
u.Runner.Transition(r.Context(), host.ID, statemachine.TriggerRebuildRequested) u.Runner.Transition(r.Context(), host.ID, statemachine.TriggerRebuildRequested)
hosts, _ := u.Hosts.List(r.Context()) hosts, _ := u.Hosts.List(r.Context())
+1 -3
View File
@@ -48,9 +48,7 @@ type Proxmox struct {
} }
type Credentials struct { type Credentials struct {
SSHPrivateKeyPath string `yaml:"ssh_private_key_path"` RootPasswordHash string `yaml:"root_password_hash"`
SSHPublicKey string `yaml:"ssh_public_key"`
RootPasswordHash string `yaml:"root_password_hash"`
} }
type Infrastructure struct { type Infrastructure struct {
@@ -0,0 +1,2 @@
ALTER TABLE hosts ADD COLUMN ssh_private_key TEXT;
ALTER TABLE hosts ADD COLUMN ssh_public_key TEXT;
+46 -21
View File
@@ -4,7 +4,6 @@ import (
"context" "context"
"fmt" "fmt"
"log" "log"
"os"
"time" "time"
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
@@ -14,41 +13,39 @@ type ClusterJoiner struct {
ExistingNode string ExistingNode string
ClusterName string ClusterName string
JoinFingerprint string JoinFingerprint string
SSHKeyPath string
} }
func (c *ClusterJoiner) Join(ctx context.Context, hostIP string) error { func (c *ClusterJoiner) Join(ctx context.Context, hostIP string, privateKey string, publicKey string) error {
client, err := c.connect(hostIP) client, err := c.connect(hostIP, privateKey)
if err != nil { if err != nil {
return fmt.Errorf("ssh connect to %s: %w", hostIP, err) return fmt.Errorf("ssh connect to %s: %w", hostIP, err)
} }
defer client.Close() defer client.Close()
// Join the cluster
cmd := fmt.Sprintf("pvecm add %s --force", c.ExistingNode) cmd := fmt.Sprintf("pvecm add %s --force", c.ExistingNode)
log.Printf("cluster: running on %s: %s", hostIP, cmd) log.Printf("cluster: running on %s: %s", hostIP, cmd)
if err := c.runCmd(client, cmd); err != nil {
session, err := client.NewSession() return fmt.Errorf("pvecm add failed: %w", err)
if err != nil {
return fmt.Errorf("ssh session: %w", err)
}
defer session.Close()
output, err := session.CombinedOutput(cmd)
if err != nil {
return fmt.Errorf("pvecm add failed: %w\noutput: %s", err, string(output))
} }
log.Printf("cluster: %s joined successfully", hostIP) log.Printf("cluster: %s joined successfully", hostIP)
// Remove the ephemeral key from authorized_keys
escaped := escapeForSed(publicKey)
removeCmd := fmt.Sprintf(`sed -i '\|%s|d' /root/.ssh/authorized_keys`, escaped)
if err := c.runCmd(client, removeCmd); err != nil {
log.Printf("cluster: warning: failed to remove ephemeral key from %s: %v", hostIP, err)
} else {
log.Printf("cluster: ephemeral key removed from %s", hostIP)
}
return nil return nil
} }
func (c *ClusterJoiner) connect(hostIP string) (*ssh.Client, error) { func (c *ClusterJoiner) connect(hostIP string, privateKeyPEM string) (*ssh.Client, error) {
keyData, err := os.ReadFile(c.SSHKeyPath) signer, err := ssh.ParsePrivateKey([]byte(privateKeyPEM))
if err != nil { if err != nil {
return nil, fmt.Errorf("read ssh key: %w", err) return nil, fmt.Errorf("parse ephemeral key: %w", err)
}
signer, err := ssh.ParsePrivateKey(keyData)
if err != nil {
return nil, fmt.Errorf("parse ssh key: %w", err)
} }
config := &ssh.ClientConfig{ config := &ssh.ClientConfig{
User: "root", User: "root",
@@ -58,3 +55,31 @@ func (c *ClusterJoiner) connect(hostIP string) (*ssh.Client, error) {
} }
return ssh.Dial("tcp", hostIP+":22", config) return ssh.Dial("tcp", hostIP+":22", config)
} }
func (c *ClusterJoiner) runCmd(client *ssh.Client, cmd string) error {
session, err := client.NewSession()
if err != nil {
return fmt.Errorf("ssh session: %w", err)
}
defer session.Close()
output, err := session.CombinedOutput(cmd)
if err != nil {
return fmt.Errorf("%w\noutput: %s", err, string(output))
}
return nil
}
func escapeForSed(s string) string {
// Trim trailing newline and escape sed delimiter
result := ""
for _, c := range s {
if c == '|' {
result += `\|`
} else if c == '\n' {
continue
} else {
result += string(c)
}
}
return result
}
+21 -1
View File
@@ -22,6 +22,16 @@ type HostOrchestrator struct {
ServerTypes *config.ServerTypeRegistry ServerTypes *config.ServerTypeRegistry
} }
// PrepareRebuild generates an ephemeral SSH key pair and stores it on the host.
// The public key will be injected into the Proxmox answer file.
func (o *HostOrchestrator) PrepareRebuild(ctx context.Context, hostID int64) error {
kp, err := GenerateEphemeralKey()
if err != nil {
return err
}
return o.Hosts.SetEphemeralKey(ctx, hostID, kp.PrivateKey, kp.PublicKey)
}
func (o *HostOrchestrator) HandlePhoneHome(ctx context.Context, hostID int64, ip string, hardwareID string) { func (o *HostOrchestrator) HandlePhoneHome(ctx context.Context, hostID int64, ip string, hardwareID string) {
if err := o.Hosts.UpdateIP(ctx, hostID, ip, hardwareID); err != nil { if err := o.Hosts.UpdateIP(ctx, hostID, ip, hardwareID); err != nil {
log.Printf("host %d: failed to update IP: %v", hostID, err) log.Printf("host %d: failed to update IP: %v", hostID, err)
@@ -47,17 +57,27 @@ func (o *HostOrchestrator) postPhoneHome(hostID int64, ip string, hardwareID str
return return
} }
privateKey, publicKey, err := o.Hosts.GetEphemeralKey(ctx, hostID)
if err != nil || privateKey == "" {
log.Printf("host %d: no ephemeral key available: %v", hostID, err)
o.Runner.FailHost(ctx, hostID, "no ephemeral SSH key")
return
}
if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerClusterJoinStart); err != nil { if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerClusterJoinStart); err != nil {
log.Printf("host %d: cluster join start transition failed: %v", hostID, err) log.Printf("host %d: cluster join start transition failed: %v", hostID, err)
return return
} }
if err := o.Cluster.Join(ctx, ip); err != nil { if err := o.Cluster.Join(ctx, ip, privateKey, publicKey); err != nil {
log.Printf("host %d: cluster join failed: %v", hostID, err) log.Printf("host %d: cluster join failed: %v", hostID, err)
o.Runner.FailHost(ctx, hostID, "cluster join: "+err.Error()) o.Runner.FailHost(ctx, hostID, "cluster join: "+err.Error())
return return
} }
// Key has been removed from the remote host; clear it from the DB
_ = o.Hosts.ClearEphemeralKey(ctx, hostID)
if err := o.registerInfra(ctx, host, ip, hardwareID); err != nil { if err := o.registerInfra(ctx, host, ip, hardwareID); err != nil {
log.Printf("host %d: infra registration failed: %v", hostID, err) log.Printf("host %d: infra registration failed: %v", hostID, err)
o.Runner.FailHost(ctx, hostID, "infra registration: "+err.Error()) o.Runner.FailHost(ctx, hostID, "infra registration: "+err.Error())
+39
View File
@@ -0,0 +1,39 @@
package orchestrator
import (
"crypto/ed25519"
"crypto/rand"
"encoding/pem"
"fmt"
"golang.org/x/crypto/ssh"
)
type KeyPair struct {
PrivateKey string
PublicKey string
}
func GenerateEphemeralKey() (*KeyPair, error) {
pub, priv, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return nil, fmt.Errorf("generate ed25519 key: %w", err)
}
sshPub, err := ssh.NewPublicKey(pub)
if err != nil {
return nil, fmt.Errorf("ssh public key: %w", err)
}
pubStr := string(ssh.MarshalAuthorizedKey(sshPub))
privBytes, err := ssh.MarshalPrivateKey(priv, "")
if err != nil {
return nil, fmt.Errorf("marshal private key: %w", err)
}
privStr := string(pem.EncodeToMemory(privBytes))
return &KeyPair{
PrivateKey: privStr,
PublicKey: pubStr,
}, nil
}
+2 -2
View File
@@ -8,7 +8,7 @@ import (
"provisioning/internal/model" "provisioning/internal/model"
) )
func GenerateAnswerFile(host *model.Host, serverType model.ServerType, cfg *config.Config) string { func GenerateAnswerFile(host *model.Host, serverType model.ServerType, cfg *config.Config, sshPublicKey string) string {
var b strings.Builder var b strings.Builder
b.WriteString("[global]\n") b.WriteString("[global]\n")
@@ -18,7 +18,7 @@ func GenerateAnswerFile(host *model.Host, serverType model.ServerType, cfg *conf
b.WriteString(`mailto = "admin@thewrightserver.net"` + "\n") b.WriteString(`mailto = "admin@thewrightserver.net"` + "\n")
b.WriteString(`timezone = "America/Indiana/Indianapolis"` + "\n") b.WriteString(`timezone = "America/Indiana/Indianapolis"` + "\n")
b.WriteString(fmt.Sprintf("root-password-hashed = \"%s\"\n", cfg.Credentials.RootPasswordHash)) b.WriteString(fmt.Sprintf("root-password-hashed = \"%s\"\n", cfg.Credentials.RootPasswordHash))
b.WriteString(fmt.Sprintf("root-ssh-keys = [\"%s\"]\n", cfg.Credentials.SSHPublicKey)) b.WriteString(fmt.Sprintf("root-ssh-keys = [\"%s\"]\n", strings.TrimSpace(sshPublicKey)))
b.WriteString("\n") b.WriteString("\n")
b.WriteString("[network]\n") b.WriteString("[network]\n")
+19
View File
@@ -109,6 +109,25 @@ func (s *Hosts) UpdateInfraID(ctx context.Context, id int64, infraHostID int64)
return err return err
} }
func (s *Hosts) SetEphemeralKey(ctx context.Context, id int64, privateKey, publicKey string) error {
_, err := s.DB.ExecContext(ctx, `UPDATE hosts SET ssh_private_key = ?, ssh_public_key = ? WHERE id = ?`, privateKey, publicKey, id)
return err
}
func (s *Hosts) GetEphemeralKey(ctx context.Context, id int64) (privateKey, publicKey string, err error) {
var priv, pub sql.NullString
err = s.DB.QueryRowContext(ctx, `SELECT ssh_private_key, ssh_public_key FROM hosts WHERE id = ?`, id).Scan(&priv, &pub)
if err != nil {
return "", "", err
}
return priv.String, pub.String, nil
}
func (s *Hosts) ClearEphemeralKey(ctx context.Context, id int64) error {
_, err := s.DB.ExecContext(ctx, `UPDATE hosts SET ssh_private_key = NULL, ssh_public_key = NULL WHERE id = ?`, id)
return err
}
func (s *Hosts) Delete(ctx context.Context, id int64) error { func (s *Hosts) Delete(ctx context.Context, id int64) error {
res, err := s.DB.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, id) res, err := s.DB.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, id)
if err != nil { if err != nil {