diff --git a/agent/agent_worker.go b/agent/agent_worker.go index cb0ccc170e..de7b73f1cb 100644 --- a/agent/agent_worker.go +++ b/agent/agent_worker.go @@ -217,6 +217,16 @@ func NewAgentWorker(l logger.Logger, reg *api.AgentRegisterResponse, m *metrics. } } +// AgentID returns the registered agent ID for this worker. +func (a *AgentWorker) AgentID() string { + return a.agent.UUID +} + +// AgentName returns the registered agent name for this worker. +func (a *AgentWorker) AgentName() string { + return a.agent.Name +} + const workerStatusPart = `{{if le .LastPing.Seconds 2.0}}✅{{else}}❌{{end}} Last ping: {{.LastPing}} ago
{{if le .LastHeartbeat.Seconds 60.0}}✅{{else}}❌{{end}} Last heartbeat: {{.LastHeartbeat}} ago
{{if .LastHeartbeatError}}❌{{else}}✅{{end}} Last heartbeat error: {{printf "%v" .LastHeartbeatError}}` diff --git a/clicommand/agent_start.go b/clicommand/agent_start.go index 7a2fad21c8..a040434911 100644 --- a/clicommand/agent_start.go +++ b/clicommand/agent_start.go @@ -26,6 +26,7 @@ import ( "github.com/buildkite/agent/v3/agent" "github.com/buildkite/agent/v3/api" "github.com/buildkite/agent/v3/core" + "github.com/buildkite/agent/v3/env" "github.com/buildkite/agent/v3/internal/agentapi" "github.com/buildkite/agent/v3/internal/awslib" "github.com/buildkite/agent/v3/internal/concurrently" @@ -1374,10 +1375,10 @@ var AgentStartCommand = cli.Command{ pool := agent.NewAgentPool(workers, &agentConf) // Agent-wide shutdown hook. Once per agent, for all workers on the agent. - defer agentShutdownHook(l, cfg) + defer agentShutdownHook(l, cfg, workers) // Once the shutdown hook has been setup, trigger the startup hook. - if err := agentStartupHook(l, cfg); err != nil { + if err := agentStartupHook(l, cfg, workers); err != nil { return fmt.Errorf("failed to run startup hook: %w", err) } @@ -1543,18 +1544,33 @@ func (ps *poolSignals) handleLoop(ctx context.Context, signals chan os.Signal) { } } -func agentStartupHook(log logger.Logger, cfg AgentStartConfig) error { - return agentLifecycleHook("agent-startup", log, cfg) +func agentStartupHook(log logger.Logger, cfg AgentStartConfig, workers []*agent.AgentWorker) error { + return agentLifecycleHook("agent-startup", log, cfg, agentLifecycleHookEnv(workers)) } -func agentShutdownHook(log logger.Logger, cfg AgentStartConfig) { - _ = agentLifecycleHook("agent-shutdown", log, cfg) +func agentLifecycleHookEnv(workers []*agent.AgentWorker) *env.Environment { + environ := env.New() + agentIDs := make([]string, 0, len(workers)) + agentNames := make([]string, 0, len(workers)) + for _, worker := range workers { + agentIDs = append(agentIDs, worker.AgentID()) + agentNames = append(agentNames, worker.AgentName()) + } + + environ.Set("BUILDKITE_AGENT_IDS", strings.Join(agentIDs, ",")) + environ.Set("BUILDKITE_AGENT_NAMES", strings.Join(agentNames, ",")) + + return environ +} + +func agentShutdownHook(log logger.Logger, cfg AgentStartConfig, workers []*agent.AgentWorker) { + _ = agentLifecycleHook("agent-shutdown", log, cfg, agentLifecycleHookEnv(workers)) } // agentLifecycleHook looks for a hook script in the hooks path // and executes it if found. Output (stdout + stderr) is streamed into the main // agent logger. Exit status failure is logged and returned for the caller to handle -func agentLifecycleHook(hookName string, log logger.Logger, cfg AgentStartConfig) error { +func agentLifecycleHook(hookName string, log logger.Logger, cfg AgentStartConfig, hookEnv *env.Environment) error { // search for hook (including .bat & .ps1 files on Windows) hooks := []string{} p, err := hook.Find(nil, cfg.HooksPath, hookName) @@ -1593,6 +1609,7 @@ func agentLifecycleHook(hookName string, log logger.Logger, cfg AgentStartConfig log.Errorf("creating shell for %q hook: %v", hookName, err) return err } + sh.Env.Merge(hookEnv) var wg sync.WaitGroup wg.Go(func() { diff --git a/clicommand/agent_start_test.go b/clicommand/agent_start_test.go index 42738d292f..9d2b9a1c33 100644 --- a/clicommand/agent_start_test.go +++ b/clicommand/agent_start_test.go @@ -7,6 +7,8 @@ import ( "runtime" "testing" + "github.com/buildkite/agent/v3/agent" + "github.com/buildkite/agent/v3/api" "github.com/buildkite/agent/v3/core" "github.com/buildkite/agent/v3/logger" "github.com/google/go-cmp/cmp" @@ -43,6 +45,32 @@ func writeAgentHook(t *testing.T, dir, hookName, msg string) string { return filepath } +func writeAgentHookScript(t *testing.T, dir, hookName, script string) string { + t.Helper() + + filename := hookName + if runtime.GOOS == "windows" { + filename = hookName + ".bat" + } + + filepath := filepath.Join(dir, filename) + t.Logf("Creating %q", filepath) + if err := os.WriteFile(filepath, []byte(script), 0o755); err != nil { + t.Fatalf("%+v", err) + } + return filepath +} + +func testAgentWorker(id, name string) *agent.AgentWorker { + return agent.NewAgentWorker( + logger.Discard, + &api.AgentRegisterResponse{UUID: id, Name: name}, + nil, + api.NewClient(logger.Discard, api.Config{}), + agent.AgentWorkerConfig{}, + ) +} + func TestAgentStartupHook(t *testing.T) { t.Parallel() @@ -64,7 +92,7 @@ func TestAgentStartupHook(t *testing.T) { defer closer() filepath := writeAgentHook(t, hooksPath, "agent-startup", "hello world") log := logger.NewBuffer() - err := agentStartupHook(log, cfg(hooksPath)) + err := agentStartupHook(log, cfg(hooksPath), nil) if err != nil { t.Fatalf("%+v", log.Messages) } @@ -83,7 +111,7 @@ func TestAgentStartupHook(t *testing.T) { defer closer() log := logger.NewBuffer() - err := agentStartupHook(log, cfg(hooksPath)) + err := agentStartupHook(log, cfg(hooksPath), nil) if err != nil { t.Fatalf("%+v", log.Messages) } @@ -96,7 +124,7 @@ func TestAgentStartupHook(t *testing.T) { t.Parallel() log := logger.NewBuffer() - err := agentStartupHook(log, cfg("zxczxczxc")) + err := agentStartupHook(log, cfg("zxczxczxc"), nil) if err != nil { t.Fatalf("%+v", log.Messages) } @@ -133,7 +161,7 @@ func TestAgentStartupHookWithAdditionalPaths(t *testing.T) { defer additionalCloser() log := logger.NewBuffer() - err := agentStartupHook(log, cfg(hooksPath, additionalHooksPath)) + err := agentStartupHook(log, cfg(hooksPath, additionalHooksPath), nil) if err != nil { t.Fatalf("%+v", log.Messages) } @@ -148,6 +176,101 @@ func TestAgentStartupHookWithAdditionalPaths(t *testing.T) { }) } +func TestAgentStartupHookEnv(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + desc string + workers []*agent.AgentWorker + wantIDs string + wantNames string + }{ + { + desc: "empty", + }, + { + desc: "single agent", + workers: []*agent.AgentWorker{testAgentWorker("agent-123", "test-agent-1")}, + wantIDs: "agent-123", + wantNames: "test-agent-1", + }, + { + desc: "multiple agents", + workers: []*agent.AgentWorker{ + testAgentWorker("agent-123", "test-agent-1"), + testAgentWorker("agent-456", "test-agent-2"), + }, + wantIDs: "agent-123,agent-456", + wantNames: "test-agent-1,test-agent-2", + }, + } { + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + + env := agentLifecycleHookEnv(tc.workers) + gotIDs, hasIDs := env.Get("BUILDKITE_AGENT_IDS") + if !hasIDs { + t.Fatal("BUILDKITE_AGENT_IDS is not set") + } + if got := gotIDs; got != tc.wantIDs { + t.Errorf("BUILDKITE_AGENT_IDS = %q, want %q", got, tc.wantIDs) + } + gotNames, hasNames := env.Get("BUILDKITE_AGENT_NAMES") + if !hasNames { + t.Fatal("BUILDKITE_AGENT_NAMES is not set") + } + if got := gotNames; got != tc.wantNames { + t.Errorf("BUILDKITE_AGENT_NAMES = %q, want %q", got, tc.wantNames) + } + }) + } +} + +func TestAgentStartupHookWithRegisteredAgentsEnv(t *testing.T) { + t.Parallel() + + cfg := func(hooksPath string) AgentStartConfig { + return AgentStartConfig{ + HooksPath: hooksPath, + GlobalConfig: GlobalConfig{NoColor: true}, + } + } + prompt := "$" + if runtime.GOOS == "windows" { + prompt = ">" + } + + hooksPath, closer := setupHooksPath(t) + defer closer() + + var script string + if runtime.GOOS == "windows" { + script = `@echo off +echo ids=%BUILDKITE_AGENT_IDS% +echo names=%BUILDKITE_AGENT_NAMES%` + } else { + script = `echo ids=$BUILDKITE_AGENT_IDS +echo names=$BUILDKITE_AGENT_NAMES` + } + filepath := writeAgentHookScript(t, hooksPath, "agent-startup", script) + + log := logger.NewBuffer() + err := agentStartupHook(log, cfg(hooksPath), []*agent.AgentWorker{ + testAgentWorker("agent-123", "test-agent-1"), + testAgentWorker("agent-456", "test-agent-2"), + }) + if err != nil { + t.Fatalf("%+v", log.Messages) + } + if diff := cmp.Diff(log.Messages, []string{ + "[info] " + prompt + " " + filepath, + "[info] ids=agent-123,agent-456", + "[info] names=test-agent-1,test-agent-2", + }); diff != "" { + t.Errorf("log.Messages diff (-got +want):\n%s", diff) + } +} + func TestAgentShutdownHook(t *testing.T) { t.Parallel() @@ -169,7 +292,7 @@ func TestAgentShutdownHook(t *testing.T) { defer closer() filepath := writeAgentHook(t, hooksPath, "agent-shutdown", "hello world") log := logger.NewBuffer() - agentShutdownHook(log, cfg(hooksPath)) + agentShutdownHook(log, cfg(hooksPath), nil) if diff := cmp.Diff(log.Messages, []string{ "[info] " + prompt + " " + filepath, @@ -186,7 +309,7 @@ func TestAgentShutdownHook(t *testing.T) { defer closer() log := logger.NewBuffer() - agentShutdownHook(log, cfg(hooksPath)) + agentShutdownHook(log, cfg(hooksPath), nil) if diff := cmp.Diff(log.Messages, []string{}); diff != "" { t.Errorf("log.Messages diff (-got +want):\n%s", diff) } @@ -196,11 +319,43 @@ func TestAgentShutdownHook(t *testing.T) { t.Parallel() log := logger.NewBuffer() - agentShutdownHook(log, cfg("zxczxczxc")) + agentShutdownHook(log, cfg("zxczxczxc"), nil) if diff := cmp.Diff(log.Messages, []string{}); diff != "" { t.Errorf("log.Messages diff (-got +want):\n%s", diff) } }) + + t.Run("with registered agents env", func(t *testing.T) { + t.Parallel() + + hooksPath, closer := setupHooksPath(t) + defer closer() + + var script string + if runtime.GOOS == "windows" { + script = `@echo off +echo ids=%BUILDKITE_AGENT_IDS% +echo names=%BUILDKITE_AGENT_NAMES%` + } else { + script = `echo ids=$BUILDKITE_AGENT_IDS +echo names=$BUILDKITE_AGENT_NAMES` + } + filepath := writeAgentHookScript(t, hooksPath, "agent-shutdown", script) + + log := logger.NewBuffer() + agentShutdownHook(log, cfg(hooksPath), []*agent.AgentWorker{ + testAgentWorker("agent-123", "test-agent-1"), + testAgentWorker("agent-456", "test-agent-2"), + }) + + if diff := cmp.Diff(log.Messages, []string{ + "[info] " + prompt + " " + filepath, + "[info] ids=agent-123,agent-456", + "[info] names=test-agent-1,test-agent-2", + }); diff != "" { + t.Errorf("log.Messages diff (-got +want):\n%s", diff) + } + }) } func TestAgentStartJobLocked_ExitCode28(t *testing.T) { diff --git a/docs/agent-start.md b/docs/agent-start.md index f87452084a..c8183f31ec 100644 --- a/docs/agent-start.md +++ b/docs/agent-start.md @@ -22,6 +22,12 @@ goroutine which waits for all the workers to finish, then closes a channel. The effect is that `AgentPool` returns either `nil` once all workers have stopped without error, or the first non-nil error. +Once all workers have registered, the once-per-process `agent-startup` hook runs +before the `AgentPool` starts. The `agent-startup` and `agent-shutdown` hooks +receive `BUILDKITE_AGENT_IDS` and `BUILDKITE_AGENT_NAMES` as comma-separated +lists in spawn order, allowing hook scripts to identify the registered spawned +agents without querying the API. + After connecting, `AgentWorker` runs two main goroutines: one periodically calls `Heartbeat`, the other more frequently calls `Ping`. `Ping` is how the worker discovers work from the API. @@ -40,4 +46,3 @@ helper goroutines: * Copying PTY output * Waiting on context cancellation in order to hard-terminate the process -