Files
Vetting/internal/orchestrator/iperf.go
T
josh 9bb4b09a04
CI / Lint + build + test (push) Has been cancelled
Initial commit: full Phases 1-6 implementation
Post-repair hardware validation pipeline for Proxmox cluster hosts.
Go orchestrator + in-image agent + mkosi live image + bundled dnsmasq
PXE + SQLite + HTMX/SSE UI + notify registry + janitor + full docs.
2026-04-17 21:32:10 -04:00

93 lines
2.3 KiB
Go

package orchestrator
import (
"context"
"errors"
"fmt"
"log"
"os"
"os/exec"
"strconv"
"sync"
"time"
)
// IperfSupervisor runs a single `iperf3 -s` process under the
// orchestrator so the Network stage has a stable server to dial. Each
// run's Network test is sequential (stages are always serial), so one
// server process handles every host under test.
//
// Missing iperf3 binary is logged once and the supervisor becomes a
// no-op — the agent's Network stage will then fail to connect and skip
// cleanly via the stage's own error path.
type IperfSupervisor struct {
Port int // default 5201
mu sync.Mutex
cmd *exec.Cmd
started bool
fatal error
}
func NewIperfSupervisor(port int) *IperfSupervisor {
if port <= 0 {
port = 5201
}
return &IperfSupervisor{Port: port}
}
func (s *IperfSupervisor) Start(ctx context.Context) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.started {
return nil
}
if _, err := exec.LookPath("iperf3"); err != nil {
s.fatal = fmt.Errorf("iperf3 not in PATH: %w", err)
log.Printf("iperf supervisor: %v (Network stage will fail to connect)", s.fatal)
return nil
}
cmd := exec.CommandContext(ctx, "iperf3", "-s", "-p", strconv.Itoa(s.Port))
if err := cmd.Start(); err != nil {
s.fatal = err
return err
}
s.cmd = cmd
s.started = true
log.Printf("iperf supervisor: iperf3 -s -p %d (pid=%d)", s.Port, cmd.Process.Pid)
go s.wait()
return nil
}
// Shutdown politely stops the iperf3 subprocess. Called from main on
// SIGINT. A 3s grace period is enough for iperf3 to flush logs; after
// that we kill.
func (s *IperfSupervisor) Shutdown(timeout time.Duration) error {
s.mu.Lock()
cmd := s.cmd
s.mu.Unlock()
if cmd == nil || cmd.Process == nil {
return nil
}
// os.Interrupt is cross-platform; on Linux it maps to SIGINT which
// iperf3 handles gracefully. On Windows (dev only) it's a no-op and
// we'll fall through to Kill after the timeout.
_ = cmd.Process.Signal(os.Interrupt)
done := make(chan error, 1)
go func() { done <- cmd.Wait() }()
select {
case <-done:
return nil
case <-time.After(timeout):
_ = cmd.Process.Kill()
return errors.New("iperf3 did not exit in time; killed")
}
}
func (s *IperfSupervisor) wait() {
_ = s.cmd.Wait()
s.mu.Lock()
defer s.mu.Unlock()
s.started = false
}