Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions agent/agent_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,16 @@ func NewAgentWorker(l logger.Logger, reg *api.AgentRegisterResponse, m *metrics.
}
}

// AgentID returns the registered agent ID for this worker.
func (a *AgentWorker) AgentID() string {
return a.agent.UUID
}

// AgentName returns the registered agent name for this worker.
func (a *AgentWorker) AgentName() string {
return a.agent.Name
}

const workerStatusPart = `{{if le .LastPing.Seconds 2.0}}✅{{else}}❌{{end}} Last ping: {{.LastPing}} ago <br/>
{{if le .LastHeartbeat.Seconds 60.0}}✅{{else}}❌{{end}} Last heartbeat: {{.LastHeartbeat}} ago<br/>
{{if .LastHeartbeatError}}❌{{else}}✅{{end}} Last heartbeat error: {{printf "%v" .LastHeartbeatError}}`
Expand Down
31 changes: 24 additions & 7 deletions clicommand/agent_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/buildkite/agent/v3/agent"
"github.com/buildkite/agent/v3/api"
"github.com/buildkite/agent/v3/core"
"github.com/buildkite/agent/v3/env"
"github.com/buildkite/agent/v3/internal/agentapi"
"github.com/buildkite/agent/v3/internal/awslib"
"github.com/buildkite/agent/v3/internal/concurrently"
Expand Down Expand Up @@ -1374,10 +1375,10 @@ var AgentStartCommand = cli.Command{
pool := agent.NewAgentPool(workers, &agentConf)

// Agent-wide shutdown hook. Once per agent, for all workers on the agent.
defer agentShutdownHook(l, cfg)
defer agentShutdownHook(l, cfg, workers)

// Once the shutdown hook has been setup, trigger the startup hook.
if err := agentStartupHook(l, cfg); err != nil {
if err := agentStartupHook(l, cfg, workers); err != nil {
return fmt.Errorf("failed to run startup hook: %w", err)
}

Expand Down Expand Up @@ -1543,18 +1544,33 @@ func (ps *poolSignals) handleLoop(ctx context.Context, signals chan os.Signal) {
}
}

func agentStartupHook(log logger.Logger, cfg AgentStartConfig) error {
return agentLifecycleHook("agent-startup", log, cfg)
func agentStartupHook(log logger.Logger, cfg AgentStartConfig, workers []*agent.AgentWorker) error {
return agentLifecycleHook("agent-startup", log, cfg, agentLifecycleHookEnv(workers))
}

func agentShutdownHook(log logger.Logger, cfg AgentStartConfig) {
_ = agentLifecycleHook("agent-shutdown", log, cfg)
func agentLifecycleHookEnv(workers []*agent.AgentWorker) *env.Environment {
environ := env.New()
agentIDs := make([]string, 0, len(workers))
agentNames := make([]string, 0, len(workers))
for _, worker := range workers {
agentIDs = append(agentIDs, worker.AgentID())
agentNames = append(agentNames, worker.AgentName())
}

environ.Set("BUILDKITE_AGENT_IDS", strings.Join(agentIDs, ","))
environ.Set("BUILDKITE_AGENT_NAMES", strings.Join(agentNames, ","))

return environ
}

func agentShutdownHook(log logger.Logger, cfg AgentStartConfig, workers []*agent.AgentWorker) {
_ = agentLifecycleHook("agent-shutdown", log, cfg, agentLifecycleHookEnv(workers))
}

// agentLifecycleHook looks for a hook script in the hooks path
// and executes it if found. Output (stdout + stderr) is streamed into the main
// agent logger. Exit status failure is logged and returned for the caller to handle
func agentLifecycleHook(hookName string, log logger.Logger, cfg AgentStartConfig) error {
func agentLifecycleHook(hookName string, log logger.Logger, cfg AgentStartConfig, hookEnv *env.Environment) error {
// search for hook (including .bat & .ps1 files on Windows)
hooks := []string{}
p, err := hook.Find(nil, cfg.HooksPath, hookName)
Expand Down Expand Up @@ -1593,6 +1609,7 @@ func agentLifecycleHook(hookName string, log logger.Logger, cfg AgentStartConfig
log.Errorf("creating shell for %q hook: %v", hookName, err)
return err
}
sh.Env.Merge(hookEnv)

var wg sync.WaitGroup
wg.Go(func() {
Expand Down
169 changes: 162 additions & 7 deletions clicommand/agent_start_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"runtime"
"testing"

"github.com/buildkite/agent/v3/agent"
"github.com/buildkite/agent/v3/api"
"github.com/buildkite/agent/v3/core"
"github.com/buildkite/agent/v3/logger"
"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -43,6 +45,32 @@ func writeAgentHook(t *testing.T, dir, hookName, msg string) string {
return filepath
}

func writeAgentHookScript(t *testing.T, dir, hookName, script string) string {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since all the values of script passed to this function are constants, I think they would be better of as fixtures.

t.Helper()

filename := hookName
if runtime.GOOS == "windows" {
filename = hookName + ".bat"
}

filepath := filepath.Join(dir, filename)
t.Logf("Creating %q", filepath)
if err := os.WriteFile(filepath, []byte(script), 0o755); err != nil {
t.Fatalf("%+v", err)
}
return filepath
}

func testAgentWorker(id, name string) *agent.AgentWorker {
return agent.NewAgentWorker(
logger.Discard,
&api.AgentRegisterResponse{UUID: id, Name: name},
nil,
api.NewClient(logger.Discard, api.Config{}),
agent.AgentWorkerConfig{},
)
}

func TestAgentStartupHook(t *testing.T) {
t.Parallel()

Expand All @@ -64,7 +92,7 @@ func TestAgentStartupHook(t *testing.T) {
defer closer()
filepath := writeAgentHook(t, hooksPath, "agent-startup", "hello world")
log := logger.NewBuffer()
err := agentStartupHook(log, cfg(hooksPath))
err := agentStartupHook(log, cfg(hooksPath), nil)
if err != nil {
t.Fatalf("%+v", log.Messages)
}
Expand All @@ -83,7 +111,7 @@ func TestAgentStartupHook(t *testing.T) {
defer closer()

log := logger.NewBuffer()
err := agentStartupHook(log, cfg(hooksPath))
err := agentStartupHook(log, cfg(hooksPath), nil)
if err != nil {
t.Fatalf("%+v", log.Messages)
}
Expand All @@ -96,7 +124,7 @@ func TestAgentStartupHook(t *testing.T) {
t.Parallel()

log := logger.NewBuffer()
err := agentStartupHook(log, cfg("zxczxczxc"))
err := agentStartupHook(log, cfg("zxczxczxc"), nil)
if err != nil {
t.Fatalf("%+v", log.Messages)
}
Expand Down Expand Up @@ -133,7 +161,7 @@ func TestAgentStartupHookWithAdditionalPaths(t *testing.T) {
defer additionalCloser()

log := logger.NewBuffer()
err := agentStartupHook(log, cfg(hooksPath, additionalHooksPath))
err := agentStartupHook(log, cfg(hooksPath, additionalHooksPath), nil)
if err != nil {
t.Fatalf("%+v", log.Messages)
}
Expand All @@ -148,6 +176,101 @@ func TestAgentStartupHookWithAdditionalPaths(t *testing.T) {
})
}

func TestAgentStartupHookEnv(t *testing.T) {
t.Parallel()

for _, tc := range []struct {
desc string
workers []*agent.AgentWorker
wantIDs string
wantNames string
}{
{
desc: "empty",
},
{
desc: "single agent",
workers: []*agent.AgentWorker{testAgentWorker("agent-123", "test-agent-1")},
wantIDs: "agent-123",
wantNames: "test-agent-1",
},
{
desc: "multiple agents",
workers: []*agent.AgentWorker{
testAgentWorker("agent-123", "test-agent-1"),
testAgentWorker("agent-456", "test-agent-2"),
},
wantIDs: "agent-123,agent-456",
wantNames: "test-agent-1,test-agent-2",
},
} {
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()

env := agentLifecycleHookEnv(tc.workers)
gotIDs, hasIDs := env.Get("BUILDKITE_AGENT_IDS")
if !hasIDs {
t.Fatal("BUILDKITE_AGENT_IDS is not set")
}
if got := gotIDs; got != tc.wantIDs {
t.Errorf("BUILDKITE_AGENT_IDS = %q, want %q", got, tc.wantIDs)
}
gotNames, hasNames := env.Get("BUILDKITE_AGENT_NAMES")
if !hasNames {
t.Fatal("BUILDKITE_AGENT_NAMES is not set")
}
if got := gotNames; got != tc.wantNames {
t.Errorf("BUILDKITE_AGENT_NAMES = %q, want %q", got, tc.wantNames)
}
})
}
}

func TestAgentStartupHookWithRegisteredAgentsEnv(t *testing.T) {
t.Parallel()

cfg := func(hooksPath string) AgentStartConfig {
return AgentStartConfig{
HooksPath: hooksPath,
GlobalConfig: GlobalConfig{NoColor: true},
}
}
prompt := "$"
if runtime.GOOS == "windows" {
prompt = ">"
}

hooksPath, closer := setupHooksPath(t)
defer closer()

var script string
if runtime.GOOS == "windows" {
script = `@echo off
echo ids=%BUILDKITE_AGENT_IDS%
echo names=%BUILDKITE_AGENT_NAMES%`
} else {
script = `echo ids=$BUILDKITE_AGENT_IDS
echo names=$BUILDKITE_AGENT_NAMES`
}
filepath := writeAgentHookScript(t, hooksPath, "agent-startup", script)

log := logger.NewBuffer()
err := agentStartupHook(log, cfg(hooksPath), []*agent.AgentWorker{
testAgentWorker("agent-123", "test-agent-1"),
testAgentWorker("agent-456", "test-agent-2"),
})
if err != nil {
t.Fatalf("%+v", log.Messages)
}
if diff := cmp.Diff(log.Messages, []string{
"[info] " + prompt + " " + filepath,
"[info] ids=agent-123,agent-456",
"[info] names=test-agent-1,test-agent-2",
}); diff != "" {
t.Errorf("log.Messages diff (-got +want):\n%s", diff)
}
}

func TestAgentShutdownHook(t *testing.T) {
t.Parallel()

Expand All @@ -169,7 +292,7 @@ func TestAgentShutdownHook(t *testing.T) {
defer closer()
filepath := writeAgentHook(t, hooksPath, "agent-shutdown", "hello world")
log := logger.NewBuffer()
agentShutdownHook(log, cfg(hooksPath))
agentShutdownHook(log, cfg(hooksPath), nil)

if diff := cmp.Diff(log.Messages, []string{
"[info] " + prompt + " " + filepath,
Expand All @@ -186,7 +309,7 @@ func TestAgentShutdownHook(t *testing.T) {
defer closer()

log := logger.NewBuffer()
agentShutdownHook(log, cfg(hooksPath))
agentShutdownHook(log, cfg(hooksPath), nil)
if diff := cmp.Diff(log.Messages, []string{}); diff != "" {
t.Errorf("log.Messages diff (-got +want):\n%s", diff)
}
Expand All @@ -196,11 +319,43 @@ func TestAgentShutdownHook(t *testing.T) {
t.Parallel()

log := logger.NewBuffer()
agentShutdownHook(log, cfg("zxczxczxc"))
agentShutdownHook(log, cfg("zxczxczxc"), nil)
if diff := cmp.Diff(log.Messages, []string{}); diff != "" {
t.Errorf("log.Messages diff (-got +want):\n%s", diff)
}
})

t.Run("with registered agents env", func(t *testing.T) {
t.Parallel()

hooksPath, closer := setupHooksPath(t)
defer closer()

var script string
if runtime.GOOS == "windows" {
script = `@echo off
echo ids=%BUILDKITE_AGENT_IDS%
echo names=%BUILDKITE_AGENT_NAMES%`
} else {
script = `echo ids=$BUILDKITE_AGENT_IDS
echo names=$BUILDKITE_AGENT_NAMES`
}
filepath := writeAgentHookScript(t, hooksPath, "agent-shutdown", script)

log := logger.NewBuffer()
agentShutdownHook(log, cfg(hooksPath), []*agent.AgentWorker{
testAgentWorker("agent-123", "test-agent-1"),
testAgentWorker("agent-456", "test-agent-2"),
})

if diff := cmp.Diff(log.Messages, []string{
"[info] " + prompt + " " + filepath,
"[info] ids=agent-123,agent-456",
"[info] names=test-agent-1,test-agent-2",
}); diff != "" {
t.Errorf("log.Messages diff (-got +want):\n%s", diff)
}
})
}

func TestAgentStartJobLocked_ExitCode28(t *testing.T) {
Expand Down
7 changes: 6 additions & 1 deletion docs/agent-start.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ goroutine which waits for all the workers to finish, then closes a channel.
The effect is that `AgentPool` returns either `nil` once all workers have
stopped without error, or the first non-nil error.

Once all workers have registered, the once-per-process `agent-startup` hook runs
before the `AgentPool` starts. The `agent-startup` and `agent-shutdown` hooks
receive `BUILDKITE_AGENT_IDS` and `BUILDKITE_AGENT_NAMES` as comma-separated
lists in spawn order, allowing hook scripts to identify the registered spawned
agents without querying the API.

After connecting, `AgentWorker` runs two main goroutines: one periodically
calls `Heartbeat`, the other more frequently calls `Ping`. `Ping` is how the
worker discovers work from the API.
Expand All @@ -40,4 +46,3 @@ helper goroutines:

* Copying PTY output
* Waiting on context cancellation in order to hard-terminate the process