Files
Vetting/agent/hostmode/run.go
T
josh a0c0fb114f
CI / Lint + build + test (push) Has been cancelled
Add host-mode heartbeat: vetting-agent host + last-seen badge
vetting-agent gains a `host` subcommand that runs as a systemd service
installed by the quick-register one-liner, POSTing every 30s to
/api/v1/hosts/{mac}/heartbeat so the dashboard tile shows "online" or
"Nm ago" without waiting on WoL. Ships dormant client code for the
Phase 2 reboot_for_vetting command so the server can flip it on later
without a binary redeploy.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-17 23:34:15 -04:00

102 lines
2.7 KiB
Go

// Package hostmode implements the "persistent reporter" mode of
// vetting-agent. It runs as a systemd service on the host (not in
// the live image), heartbeats to the orchestrator every ~30s, and
// in Phase 2 accepts commands — most importantly reboot-for-vetting.
package hostmode
import (
"context"
"errors"
"fmt"
"log"
"os"
"strings"
"time"
"gopkg.in/yaml.v3"
)
// Config mirrors /etc/vetting/host-agent.yaml. All fields are
// optional except OrchestratorURL — the rest have reasonable
// defaults so a single `orchestrator_url:` line works.
type Config struct {
OrchestratorURL string `yaml:"orchestrator_url"`
MAC string `yaml:"mac,omitempty"`
Interval time.Duration `yaml:"-"`
IntervalRaw string `yaml:"interval,omitempty"`
}
func LoadConfig(path string) (*Config, error) {
b, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("read %s: %w", path, err)
}
var c Config
if err := yaml.Unmarshal(b, &c); err != nil {
return nil, fmt.Errorf("parse %s: %w", path, err)
}
c.OrchestratorURL = strings.TrimRight(strings.TrimSpace(c.OrchestratorURL), "/")
if c.OrchestratorURL == "" {
return nil, errors.New("orchestrator_url is required")
}
c.MAC = strings.ToLower(strings.TrimSpace(c.MAC))
if c.IntervalRaw == "" {
c.Interval = 30 * time.Second
} else {
d, err := time.ParseDuration(c.IntervalRaw)
if err != nil {
return nil, fmt.Errorf("parse interval: %w", err)
}
if d < time.Second {
return nil, fmt.Errorf("interval %s is too aggressive", d)
}
c.Interval = d
}
return &c, nil
}
// Run blocks until ctx is cancelled, heartbeating on an interval.
// Errors never abort the loop — the service is `Restart=on-failure`
// in systemd, and a transient HTTP failure is not a reason to exit.
func Run(ctx context.Context, cfgPath string) error {
cfg, err := LoadConfig(cfgPath)
if err != nil {
return err
}
if cfg.MAC == "" {
mac, err := primaryMAC()
if err != nil {
return fmt.Errorf("resolve primary MAC: %w", err)
}
cfg.MAC = mac
}
log.Printf("hostmode: reporting to %s as %s every %s",
cfg.OrchestratorURL, cfg.MAC, cfg.Interval)
client := newHostClient(cfg.OrchestratorURL)
// Fire one heartbeat immediately so the dashboard lights up on
// service start, without waiting for the first tick.
tick(ctx, client, cfg)
t := time.NewTicker(cfg.Interval)
defer t.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-t.C:
tick(ctx, client, cfg)
}
}
}
func tick(ctx context.Context, c *hostClient, cfg *Config) {
resp, err := c.heartbeat(ctx, cfg.MAC)
if err != nil {
log.Printf("hostmode: heartbeat: %v", err)
return
}
handleResponse(ctx, resp)
}