Use ephemeral SSH keys per rebuild instead of static config keys
Generate a fresh ed25519 key pair at rebuild time, inject the public key into the Proxmox answer file, use the private key for cluster join over SSH, then remove the key from both the remote host and the database. This eliminates the need to manage static SSH keys in config/secrets. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
@@ -14,41 +13,39 @@ type ClusterJoiner struct {
|
||||
ExistingNode string
|
||||
ClusterName string
|
||||
JoinFingerprint string
|
||||
SSHKeyPath string
|
||||
}
|
||||
|
||||
func (c *ClusterJoiner) Join(ctx context.Context, hostIP string) error {
|
||||
client, err := c.connect(hostIP)
|
||||
func (c *ClusterJoiner) Join(ctx context.Context, hostIP string, privateKey string, publicKey string) error {
|
||||
client, err := c.connect(hostIP, privateKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ssh connect to %s: %w", hostIP, err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
// Join the cluster
|
||||
cmd := fmt.Sprintf("pvecm add %s --force", c.ExistingNode)
|
||||
log.Printf("cluster: running on %s: %s", hostIP, cmd)
|
||||
|
||||
session, err := client.NewSession()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ssh session: %w", err)
|
||||
}
|
||||
defer session.Close()
|
||||
|
||||
output, err := session.CombinedOutput(cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("pvecm add failed: %w\noutput: %s", err, string(output))
|
||||
if err := c.runCmd(client, cmd); err != nil {
|
||||
return fmt.Errorf("pvecm add failed: %w", err)
|
||||
}
|
||||
log.Printf("cluster: %s joined successfully", hostIP)
|
||||
|
||||
// Remove the ephemeral key from authorized_keys
|
||||
escaped := escapeForSed(publicKey)
|
||||
removeCmd := fmt.Sprintf(`sed -i '\|%s|d' /root/.ssh/authorized_keys`, escaped)
|
||||
if err := c.runCmd(client, removeCmd); err != nil {
|
||||
log.Printf("cluster: warning: failed to remove ephemeral key from %s: %v", hostIP, err)
|
||||
} else {
|
||||
log.Printf("cluster: ephemeral key removed from %s", hostIP)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *ClusterJoiner) connect(hostIP string) (*ssh.Client, error) {
|
||||
keyData, err := os.ReadFile(c.SSHKeyPath)
|
||||
func (c *ClusterJoiner) connect(hostIP string, privateKeyPEM string) (*ssh.Client, error) {
|
||||
signer, err := ssh.ParsePrivateKey([]byte(privateKeyPEM))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read ssh key: %w", err)
|
||||
}
|
||||
signer, err := ssh.ParsePrivateKey(keyData)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse ssh key: %w", err)
|
||||
return nil, fmt.Errorf("parse ephemeral key: %w", err)
|
||||
}
|
||||
config := &ssh.ClientConfig{
|
||||
User: "root",
|
||||
@@ -58,3 +55,31 @@ func (c *ClusterJoiner) connect(hostIP string) (*ssh.Client, error) {
|
||||
}
|
||||
return ssh.Dial("tcp", hostIP+":22", config)
|
||||
}
|
||||
|
||||
func (c *ClusterJoiner) runCmd(client *ssh.Client, cmd string) error {
|
||||
session, err := client.NewSession()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ssh session: %w", err)
|
||||
}
|
||||
defer session.Close()
|
||||
output, err := session.CombinedOutput(cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w\noutput: %s", err, string(output))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func escapeForSed(s string) string {
|
||||
// Trim trailing newline and escape sed delimiter
|
||||
result := ""
|
||||
for _, c := range s {
|
||||
if c == '|' {
|
||||
result += `\|`
|
||||
} else if c == '\n' {
|
||||
continue
|
||||
} else {
|
||||
result += string(c)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -22,6 +22,16 @@ type HostOrchestrator struct {
|
||||
ServerTypes *config.ServerTypeRegistry
|
||||
}
|
||||
|
||||
// PrepareRebuild generates an ephemeral SSH key pair and stores it on the host.
|
||||
// The public key will be injected into the Proxmox answer file.
|
||||
func (o *HostOrchestrator) PrepareRebuild(ctx context.Context, hostID int64) error {
|
||||
kp, err := GenerateEphemeralKey()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return o.Hosts.SetEphemeralKey(ctx, hostID, kp.PrivateKey, kp.PublicKey)
|
||||
}
|
||||
|
||||
func (o *HostOrchestrator) HandlePhoneHome(ctx context.Context, hostID int64, ip string, hardwareID string) {
|
||||
if err := o.Hosts.UpdateIP(ctx, hostID, ip, hardwareID); err != nil {
|
||||
log.Printf("host %d: failed to update IP: %v", hostID, err)
|
||||
@@ -47,17 +57,27 @@ func (o *HostOrchestrator) postPhoneHome(hostID int64, ip string, hardwareID str
|
||||
return
|
||||
}
|
||||
|
||||
privateKey, publicKey, err := o.Hosts.GetEphemeralKey(ctx, hostID)
|
||||
if err != nil || privateKey == "" {
|
||||
log.Printf("host %d: no ephemeral key available: %v", hostID, err)
|
||||
o.Runner.FailHost(ctx, hostID, "no ephemeral SSH key")
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerClusterJoinStart); err != nil {
|
||||
log.Printf("host %d: cluster join start transition failed: %v", hostID, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := o.Cluster.Join(ctx, ip); err != nil {
|
||||
if err := o.Cluster.Join(ctx, ip, privateKey, publicKey); err != nil {
|
||||
log.Printf("host %d: cluster join failed: %v", hostID, err)
|
||||
o.Runner.FailHost(ctx, hostID, "cluster join: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// Key has been removed from the remote host; clear it from the DB
|
||||
_ = o.Hosts.ClearEphemeralKey(ctx, hostID)
|
||||
|
||||
if err := o.registerInfra(ctx, host, ip, hardwareID); err != nil {
|
||||
log.Printf("host %d: infra registration failed: %v", hostID, err)
|
||||
o.Runner.FailHost(ctx, hostID, "infra registration: "+err.Error())
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
type KeyPair struct {
|
||||
PrivateKey string
|
||||
PublicKey string
|
||||
}
|
||||
|
||||
func GenerateEphemeralKey() (*KeyPair, error) {
|
||||
pub, priv, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("generate ed25519 key: %w", err)
|
||||
}
|
||||
|
||||
sshPub, err := ssh.NewPublicKey(pub)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ssh public key: %w", err)
|
||||
}
|
||||
pubStr := string(ssh.MarshalAuthorizedKey(sshPub))
|
||||
|
||||
privBytes, err := ssh.MarshalPrivateKey(priv, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal private key: %w", err)
|
||||
}
|
||||
privStr := string(pem.EncodeToMemory(privBytes))
|
||||
|
||||
return &KeyPair{
|
||||
PrivateKey: privStr,
|
||||
PublicKey: pubStr,
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user