Skip to content
7 changes: 7 additions & 0 deletions clicommand/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type BootstrapConfig struct {
PullRequest string `cli:"pullrequest"`
PullRequestUsingMergeRefspec bool `cli:"pull-request-using-merge-refspec"`
GitSubmodules bool `cli:"git-submodules"`
GitLFSEnabled bool `cli:"git-lfs-enabled"`
SSHKeyscan bool `cli:"ssh-keyscan"`
AgentName string `cli:"agent" validate:"required"`
Queue string `cli:"queue"`
Expand Down Expand Up @@ -311,6 +312,11 @@ var BootstrapCommand = cli.Command{
Usage: "Enable git submodules (default: true)",
EnvVar: "BUILDKITE_GIT_SUBMODULES",
},
cli.BoolFlag{
Name: "git-lfs-enabled",
Usage: "Enable Git LFS object download during checkout (default: false)",
EnvVar: "BUILDKITE_GIT_LFS_ENABLED",
Comment thread
lizrabuya marked this conversation as resolved.
},
cli.BoolTFlag{
Name: "pty",
Usage: "Run jobs within a pseudo terminal (default: true)",
Expand Down Expand Up @@ -461,6 +467,7 @@ var BootstrapCommand = cli.Command{
GitCloneFlags: cfg.GitCloneFlags,
GitCloneMirrorFlags: cfg.GitCloneMirrorFlags,
GitFetchFlags: cfg.GitFetchFlags,
GitLFSEnabled: cfg.GitLFSEnabled,
GitSparseCheckoutPaths: cfg.GitSparseCheckoutPaths,
GitSSHKey: cfg.GitSSHKey,
GitMirrorsLockTimeout: cfg.GitMirrorsLockTimeout,
Expand Down
27 changes: 27 additions & 0 deletions internal/job/checkout.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"slices"
Expand Down Expand Up @@ -262,6 +263,14 @@ func (e *Executor) checkout(ctx context.Context) error {
break
}

// Fail fast before any git work if git-lfs is required but missing.
// This operation only handles default checkout behavior, so it's possible for a custom checkout hook to require git-lfs but not have this check. That's a bit unfortunate, but we can add it to custom hooks later if needed.
if e.GitLFSEnabled {
if _, err := exec.LookPath("git-lfs"); err != nil {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Blocking: exec.LookPath reads the agent process PATH, but checkout commands are resolved from e.shell.Env via shell.Command / AbsolutePath, and hooks/plugins can mutate that environment before checkout. That can fail a job where git-lfs was added to the job PATH, or pass this check even though the shell PATH used for git lfs ... cannot resolve it. Can this check use the shell environment too (for example e.shell.AbsolutePath("git-lfs")) and cover that with a test that changes sh.Env rather than only t.Setenv?

return fmt.Errorf("BUILDKITE_GIT_LFS_ENABLED=true but git-lfs binary is not found on PATH: %w", err)
}
}

maxAttempts := e.CheckoutAttempts
if maxAttempts <= 0 {
maxAttempts = 6
Expand Down Expand Up @@ -964,6 +973,15 @@ func (e *Executor) defaultCheckoutPhase(ctx context.Context) (retErr error) {
return fmt.Errorf("cleaning git repository: %w", err)
}

// Install LFS filter before fetch so the filter is registered before any
// network operation, following the conventional git-lfs setup order.
if e.GitLFSEnabled {
e.shell.Commentf("Installing Git LFS filter")
if err := e.shell.Command("git", "lfs", "install", "--local").Run(ctx); err != nil {
return fmt.Errorf("installing git lfs filter: %w", err)
}
}

if err := e.fetchSource(ctx); err != nil {
return err
}
Expand Down Expand Up @@ -1081,6 +1099,15 @@ func (e *Executor) defaultCheckoutPhase(ctx context.Context) (retErr error) {
}
}

if e.GitLFSEnabled {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Blocking: sparseCheckoutActive is ignored here, so BUILDKITE_GIT_LFS_ENABLED=true with sparse checkout still runs an unfiltered git lfs fetch. That is the combination SUP-6529 calls out to avoid: sparse checkout users can still download LFS objects outside the sparse set, defeating the sparse checkout and potentially blowing up disk/network usage. Can this branch be gated on !sparseCheckoutActive, or otherwise constrained to the sparse paths, before merging?

if err := gitLFSFetchCheckout(ctx, gitLFSFetchCheckoutArgs{
Shell: e.shell,
Retry: true,
}); err != nil {
return err
}
}

// Git clean after checkout. We need to do this because submodules could have
// changed in between the last checkout and this one. A double clean is the only
// good solution to this problem that we've found
Expand Down
217 changes: 217 additions & 0 deletions internal/job/checkout_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package job

import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -442,3 +445,217 @@ func TestDefaultCheckoutPhase_DelayedRefCreation(t *testing.T) {
t.Fatalf("tt.executor.defaultCheckoutPhase(ctx) error = %v, want nil", err)
}
}

func TestGitLFSBinaryMissing(t *testing.T) {
// Not parallel: the test manipulates PATH via t.Setenv, which modifies
// process-global state.

ctx := t.Context()

// Restrict PATH so exec.LookPath("git-lfs") fails. An empty temp dir
// suffices — the fail-fast check returns before any git call.
t.Setenv("PATH", t.TempDir())

sh, err := shell.New()
if err != nil {
t.Fatalf("shell.New() error = %v, want nil", err)
}

executor := &Executor{
shell: sh,
ExecutorConfig: ExecutorConfig{
Repository: "https://github.com/buildkite/agent.git",
GitLFSEnabled: true,
},
}

err = executor.checkout(ctx)
if err == nil {
t.Fatalf("executor.checkout(ctx) error = nil, want error containing %q", "git-lfs binary is not found on PATH")
}
if !strings.Contains(err.Error(), "git-lfs binary is not found on PATH") {
t.Errorf("executor.checkout(ctx) error = %q, want it to contain %q", err.Error(), "git-lfs binary is not found on PATH")
}
}

func TestDefaultCheckoutPhase_GitLFS(t *testing.T) {
// Not parallel: subtests manipulate PATH via t.Setenv, which modifies
// process-global state.
ctx := t.Context()

t.Setenv("GIT_AUTHOR_NAME", "Buildkite Agent")
t.Setenv("GIT_AUTHOR_EMAIL", "agent@example.com")
t.Setenv("GIT_COMMITTER_NAME", "Buildkite Agent")
t.Setenv("GIT_COMMITTER_EMAIL", "agent@example.com")

// gitOnlyBinDir returns a temp dir containing git (via a symlink on Unix or
// a .bat wrapper on Windows) but no git-lfs, so exec.LookPath("git-lfs")
// will fail while git commands still work.
gitOnlyBinDir := func(t *testing.T) string {
t.Helper()
gitBin, err := exec.LookPath("git")
if err != nil {
t.Fatalf("exec.LookPath(\"git\") error = %v", err)
}
binDir := t.TempDir()
if runtime.GOOS == "windows" {
// Use a .bat wrapper to avoid copying the multi-MB binary and to
// sidestep the symlink-privilege requirement on Windows.
wrapper := fmt.Sprintf("@echo off\r\n\"%s\" %%*\r\n", gitBin)
if err := os.WriteFile(filepath.Join(binDir, "git.bat"), []byte(wrapper), 0o755); err != nil {
t.Fatalf("os.WriteFile() error = %v", err)
}
} else {
if err := os.Symlink(gitBin, filepath.Join(binDir, "git")); err != nil {
t.Fatalf("os.Symlink() error = %v", err)
}
}
return binDir
}

// fakeLFSBinDir returns a temp dir that has git (via gitOnlyBinDir) plus a
// fake git-lfs whose behaviour is defined by the provided scripts.
// unixScript is a #!/bin/sh script; winBatch is a .bat file body.
fakeLFSBinDir := func(t *testing.T, unixScript, winBatch string) string {
t.Helper()
binDir := gitOnlyBinDir(t)
if runtime.GOOS == "windows" {
if err := os.WriteFile(filepath.Join(binDir, "git-lfs.bat"), []byte(winBatch), 0o755); err != nil {
t.Fatalf("os.WriteFile() error = %v", err)
}
} else {
if err := os.WriteFile(filepath.Join(binDir, "git-lfs"), []byte(unixScript), 0o755); err != nil {
t.Fatalf("os.WriteFile() error = %v", err)
}
}
return binDir
}

tests := []struct {
name string
lfsEnabled bool
setupPath func(t *testing.T)
wantErr string
}{
{
name: "LFS disabled",
lfsEnabled: false,
},
{
name: "LFS enabled binary present",
lfsEnabled: true,
setupPath: func(t *testing.T) {
if _, err := exec.LookPath("git-lfs"); err != nil {
t.Skip("git-lfs not installed")
}
},
},
{
name: "LFS enabled git lfs command fails",
lfsEnabled: true,
setupPath: func(t *testing.T) {
// Git for Windows ships its own git-lfs.exe inside
// GIT_EXEC_PATH, which git resolves before falling back to
// PATH. We can't fool git's subcommand lookup with a PATH
// override the way we can fool Go's exec.LookPath.
if runtime.GOOS == "windows" {
t.Skip("Not runnable on Windows: git for Windows uses bundled git-lfs.exe regardless of PATH")
}
t.Setenv("PATH", fakeLFSBinDir(t,
"#!/bin/sh\nexit 1\n",
"@echo off\r\nexit /b 1\r\n",
))
},
wantErr: "installing git lfs filter",
},
{
name: "LFS enabled git lfs fetch fails",
Comment thread
lizrabuya marked this conversation as resolved.
lfsEnabled: true,
setupPath: func(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Not runnable on Windows: git for Windows uses bundled git-lfs.exe regardless of PATH")
}
t.Setenv("PATH", fakeLFSBinDir(t,
"#!/bin/sh\ncase \"$1\" in\n install) exit 0 ;;\n *) exit 1 ;;\nesac\n",
"@echo off\r\nif \"%1\"==\"install\" exit /b 0\r\nexit /b 1\r\n",
))
},
wantErr: "git lfs fetch",
},
Comment thread
lizrabuya marked this conversation as resolved.
}

s := githttptest.NewServer()
t.Cleanup(s.Close)

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Set up the remote repository BEFORE restricting PATH so that
// githttptest's git operations use the real git binary.
projectName := "test-" + strings.ReplaceAll(strings.ToLower(tt.name), " ", "-")
if err := s.CreateRepository(projectName); err != nil {
t.Fatalf("s.CreateRepository(%q) error = %v", projectName, err)
}
out, err := s.InitRepository(projectName)
if err != nil {
t.Fatalf("s.InitRepository(%q) error = %v, output: %s", projectName, err, out)
}

// Restrict PATH after the repo is initialised.
if tt.setupPath != nil {
tt.setupPath(t)
}

sh, err := shell.New()
if err != nil {
t.Fatalf("shell.New() error = %v", err)
}

// Use os.MkdirTemp + best-effort cleanup rather than t.TempDir():
// on Windows, git's child processes (credential helpers, git-lfs
// filter-process) can hold file handles open past their parent's
// exit, and t.TempDir()'s strict cleanup fails the test.
checkoutDir, err := os.MkdirTemp("", "checkout-path-")
if err != nil {
t.Fatalf("os.MkdirTemp() error = %v", err)
}
t.Cleanup(func() {
os.RemoveAll(checkoutDir) //nolint:errcheck // Best-effort cleanup.
})
buildDir, err := os.MkdirTemp("", "build-path-")
if err != nil {
t.Fatalf("os.MkdirTemp() error = %v", err)
}
t.Cleanup(func() {
os.RemoveAll(buildDir) //nolint:errcheck // Best-effort cleanup.
})
sh.Env.Set("BUILDKITE_BUILD_CHECKOUT_PATH", checkoutDir)

executor := &Executor{
shell: sh,
ExecutorConfig: ExecutorConfig{
Commit: "HEAD",
Branch: "main",
GitCleanFlags: "-f -d -x",
BuildPath: buildDir,
Repository: s.RepoURL(projectName),
GitLFSEnabled: tt.lfsEnabled,
},
}

err = executor.defaultCheckoutPhase(ctx)
if tt.wantErr == "" {
if err != nil {
t.Errorf("defaultCheckoutPhase() error = %v, want nil", err)
}
return
}
if err == nil {
t.Errorf("defaultCheckoutPhase() error = nil, want error containing %q", tt.wantErr)
return
}
if !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("defaultCheckoutPhase() error = %q, want it to contain %q", err.Error(), tt.wantErr)
}
})
}
}
3 changes: 3 additions & 0 deletions internal/job/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ type ExecutorConfig struct {
// Should git submodules be checked out
GitSubmodules bool `env:"BUILDKITE_GIT_SUBMODULES"`

// Whether to enable Git LFS operations during checkout
GitLFSEnabled bool `env:"BUILDKITE_GIT_LFS_ENABLED"`
Comment thread
lizrabuya marked this conversation as resolved.

// If the commit was part of a pull request, this will container the PR number
PullRequest string

Expand Down
4 changes: 4 additions & 0 deletions internal/job/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,10 @@ func (e *Executor) setUp(ctx context.Context) (retErr error) {
// Disable any interactive Git/SSH prompting
e.shell.Env.Set("GIT_TERMINAL_PROMPT", "0")

// Force-set GIT_LFS_SKIP_SMUDGE=1 before any git operations so LFS
// objects are not downloaded automatically during checkout.
e.shell.Env.Set("GIT_LFS_SKIP_SMUDGE", "1")

// Fetch and set secrets before environment hook execution
if e.Secrets != "" {
if err := e.fetchAndSetSecrets(ctx); err != nil {
Expand Down
Loading