9bb4b09a04
CI / Lint + build + test (push) Has been cancelled
Post-repair hardware validation pipeline for Proxmox cluster hosts. Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
93 lines
2.3 KiB
Go
93 lines
2.3 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// IperfSupervisor runs a single `iperf3 -s` process under the
|
|
// orchestrator so the Network stage has a stable server to dial. Each
|
|
// run's Network test is sequential (stages are always serial), so one
|
|
// server process handles every host under test.
|
|
//
|
|
// Missing iperf3 binary is logged once and the supervisor becomes a
|
|
// no-op — the agent's Network stage will then fail to connect and skip
|
|
// cleanly via the stage's own error path.
|
|
type IperfSupervisor struct {
|
|
Port int // default 5201
|
|
|
|
mu sync.Mutex
|
|
cmd *exec.Cmd
|
|
started bool
|
|
fatal error
|
|
}
|
|
|
|
func NewIperfSupervisor(port int) *IperfSupervisor {
|
|
if port <= 0 {
|
|
port = 5201
|
|
}
|
|
return &IperfSupervisor{Port: port}
|
|
}
|
|
|
|
func (s *IperfSupervisor) Start(ctx context.Context) error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if s.started {
|
|
return nil
|
|
}
|
|
if _, err := exec.LookPath("iperf3"); err != nil {
|
|
s.fatal = fmt.Errorf("iperf3 not in PATH: %w", err)
|
|
log.Printf("iperf supervisor: %v (Network stage will fail to connect)", s.fatal)
|
|
return nil
|
|
}
|
|
cmd := exec.CommandContext(ctx, "iperf3", "-s", "-p", strconv.Itoa(s.Port))
|
|
if err := cmd.Start(); err != nil {
|
|
s.fatal = err
|
|
return err
|
|
}
|
|
s.cmd = cmd
|
|
s.started = true
|
|
log.Printf("iperf supervisor: iperf3 -s -p %d (pid=%d)", s.Port, cmd.Process.Pid)
|
|
go s.wait()
|
|
return nil
|
|
}
|
|
|
|
// Shutdown politely stops the iperf3 subprocess. Called from main on
|
|
// SIGINT. A 3s grace period is enough for iperf3 to flush logs; after
|
|
// that we kill.
|
|
func (s *IperfSupervisor) Shutdown(timeout time.Duration) error {
|
|
s.mu.Lock()
|
|
cmd := s.cmd
|
|
s.mu.Unlock()
|
|
if cmd == nil || cmd.Process == nil {
|
|
return nil
|
|
}
|
|
// os.Interrupt is cross-platform; on Linux it maps to SIGINT which
|
|
// iperf3 handles gracefully. On Windows (dev only) it's a no-op and
|
|
// we'll fall through to Kill after the timeout.
|
|
_ = cmd.Process.Signal(os.Interrupt)
|
|
done := make(chan error, 1)
|
|
go func() { done <- cmd.Wait() }()
|
|
select {
|
|
case <-done:
|
|
return nil
|
|
case <-time.After(timeout):
|
|
_ = cmd.Process.Kill()
|
|
return errors.New("iperf3 did not exit in time; killed")
|
|
}
|
|
}
|
|
|
|
func (s *IperfSupervisor) wait() {
|
|
_ = s.cmd.Wait()
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
s.started = false
|
|
}
|