b23ef64ee1
Generate a fresh ed25519 key pair at rebuild time, inject the public key into the Proxmox answer file, use the private key for cluster join over SSH, then remove the key from both the remote host and the database. This eliminates the need to manage static SSH keys in config/secrets. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
132 lines
3.8 KiB
Go
132 lines
3.8 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
|
|
"provisioning/internal/config"
|
|
"provisioning/internal/infra"
|
|
"provisioning/internal/model"
|
|
"provisioning/internal/statemachine"
|
|
"provisioning/internal/store"
|
|
)
|
|
|
|
type HostOrchestrator struct {
|
|
Runner *Runner
|
|
Hosts *store.Hosts
|
|
Ops *store.Operations
|
|
Locks *store.Locks
|
|
Cluster *ClusterJoiner
|
|
InfraClient *infra.Client
|
|
Config *config.Config
|
|
ServerTypes *config.ServerTypeRegistry
|
|
}
|
|
|
|
// PrepareRebuild generates an ephemeral SSH key pair and stores it on the host.
|
|
// The public key will be injected into the Proxmox answer file.
|
|
func (o *HostOrchestrator) PrepareRebuild(ctx context.Context, hostID int64) error {
|
|
kp, err := GenerateEphemeralKey()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return o.Hosts.SetEphemeralKey(ctx, hostID, kp.PrivateKey, kp.PublicKey)
|
|
}
|
|
|
|
func (o *HostOrchestrator) HandlePhoneHome(ctx context.Context, hostID int64, ip string, hardwareID string) {
|
|
if err := o.Hosts.UpdateIP(ctx, hostID, ip, hardwareID); err != nil {
|
|
log.Printf("host %d: failed to update IP: %v", hostID, err)
|
|
o.Runner.FailHost(ctx, hostID, "failed to update IP: "+err.Error())
|
|
return
|
|
}
|
|
|
|
if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerPhoneHome); err != nil {
|
|
log.Printf("host %d: phone-home transition failed: %v", hostID, err)
|
|
return
|
|
}
|
|
|
|
go o.postPhoneHome(hostID, ip, hardwareID)
|
|
}
|
|
|
|
func (o *HostOrchestrator) postPhoneHome(hostID int64, ip string, hardwareID string) {
|
|
ctx := context.Background()
|
|
|
|
host, err := o.Hosts.Get(ctx, hostID)
|
|
if err != nil {
|
|
log.Printf("host %d: failed to get host for cluster join: %v", hostID, err)
|
|
o.Runner.FailHost(ctx, hostID, "get host: "+err.Error())
|
|
return
|
|
}
|
|
|
|
privateKey, publicKey, err := o.Hosts.GetEphemeralKey(ctx, hostID)
|
|
if err != nil || privateKey == "" {
|
|
log.Printf("host %d: no ephemeral key available: %v", hostID, err)
|
|
o.Runner.FailHost(ctx, hostID, "no ephemeral SSH key")
|
|
return
|
|
}
|
|
|
|
if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerClusterJoinStart); err != nil {
|
|
log.Printf("host %d: cluster join start transition failed: %v", hostID, err)
|
|
return
|
|
}
|
|
|
|
if err := o.Cluster.Join(ctx, ip, privateKey, publicKey); err != nil {
|
|
log.Printf("host %d: cluster join failed: %v", hostID, err)
|
|
o.Runner.FailHost(ctx, hostID, "cluster join: "+err.Error())
|
|
return
|
|
}
|
|
|
|
// Key has been removed from the remote host; clear it from the DB
|
|
_ = o.Hosts.ClearEphemeralKey(ctx, hostID)
|
|
|
|
if err := o.registerInfra(ctx, host, ip, hardwareID); err != nil {
|
|
log.Printf("host %d: infra registration failed: %v", hostID, err)
|
|
o.Runner.FailHost(ctx, hostID, "infra registration: "+err.Error())
|
|
return
|
|
}
|
|
|
|
if _, err := o.Runner.Transition(ctx, hostID, statemachine.TriggerJoinComplete); err != nil {
|
|
log.Printf("host %d: join complete transition failed: %v", hostID, err)
|
|
return
|
|
}
|
|
|
|
op, err := o.Ops.GetActive(ctx, hostID)
|
|
if err == nil {
|
|
_ = o.Ops.Complete(ctx, op.ID)
|
|
}
|
|
_ = o.Locks.Release(ctx, hostID)
|
|
log.Printf("host %d (%s): provisioning complete", hostID, host.Hostname)
|
|
}
|
|
|
|
func (o *HostOrchestrator) registerInfra(ctx context.Context, host *model.Host, ip string, hardwareID string) error {
|
|
if o.InfraClient == nil {
|
|
return nil
|
|
}
|
|
|
|
st, _ := o.ServerTypes.Get(host.ServerType)
|
|
serverTypeID := o.Config.Infrastructure.ServerTypeMap[host.ServerType]
|
|
|
|
infraID, err := o.InfraClient.CreateHost(ctx, infra.CreateHostRequest{
|
|
HardwareID: hardwareID,
|
|
Hostname: host.Hostname,
|
|
AssetID: host.Hostname,
|
|
RoomID: o.Config.Infrastructure.RoomID,
|
|
ServerTypeID: serverTypeID,
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := o.Hosts.UpdateInfraID(ctx, host.ID, infraID); err != nil {
|
|
return err
|
|
}
|
|
|
|
_ = o.InfraClient.CreateInterface(ctx, infra.CreateInterfaceRequest{
|
|
HostID: int(infraID),
|
|
Name: st.ManagementNIC,
|
|
MACAddress: host.MAC,
|
|
IPAddress: ip,
|
|
})
|
|
|
|
return nil
|
|
}
|