From 65574443a5478bc8aac8609e1b5bffb8ed03331a Mon Sep 17 00:00:00 2001 From: Yacine Smaoui <1141454+ysmaoui@users.noreply.github.com> Date: Tue, 23 Jun 2026 14:51:12 +0200 Subject: [PATCH] Add copy fallback when hardlinking hits the filesystem link limit When bb_worker uses a native build directory, cached files are hardlinked from the cache into each action's input root. Filesystems cap the number of hard links per file (NTFS allows 1023). A file that is shared by a large number of input roots can exceed this limit, after which hardlinking fails with ERROR_TOO_MANY_LINKS on Windows (or EMLINK on POSIX) and the action fails. When the link limit is reached, copy the cached file into the input root instead of hardlinking it. The copy is an independent file with its own link count, so materialization succeeds. Only the few heavily shared files that exceed the limit pay the cost of a copy. --- pkg/cas/BUILD.bazel | 11 +++++- pkg/cas/copy_on_link_limit.go | 44 ++++++++++++++++++++++++ pkg/cas/hardlinking_file_fetcher.go | 8 +++++ pkg/cas/hardlinking_file_fetcher_test.go | 38 ++++++++++++++++++++ pkg/cas/link_limit_other.go | 14 ++++++++ pkg/cas/link_limit_windows.go | 15 ++++++++ 6 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 pkg/cas/copy_on_link_limit.go create mode 100644 pkg/cas/link_limit_other.go create mode 100644 pkg/cas/link_limit_windows.go diff --git a/pkg/cas/BUILD.bazel b/pkg/cas/BUILD.bazel index cbfe29b8..16ea83f9 100644 --- a/pkg/cas/BUILD.bazel +++ b/pkg/cas/BUILD.bazel @@ -7,11 +7,14 @@ go_library( "blob_access_file_fetcher.go", "caching_directory_fetcher.go", "configuration.go", + "copy_on_link_limit.go", "decomposed_directory_walker.go", "directory_fetcher.go", "directory_walker.go", "file_fetcher.go", "hardlinking_file_fetcher.go", + "link_limit_other.go", + "link_limit_windows.go", "suspending_directory_fetcher.go", ], importpath = "github.com/buildbarn/bb-remote-execution/pkg/cas", @@ -32,7 +35,12 @@ go_library( "@org_golang_google_grpc//status", "@org_golang_google_protobuf//encoding/protowire", "@org_golang_google_protobuf//proto", - ], + ] + select({ + "@rules_go//go/platform:windows": [ + "@org_golang_x_sys//windows", + ], + "//conditions:default": [], + }), ) go_test( @@ -51,6 +59,7 @@ go_test( "@com_github_buildbarn_bb_storage//pkg/blobstore/slicing", "@com_github_buildbarn_bb_storage//pkg/digest", "@com_github_buildbarn_bb_storage//pkg/eviction", + "@com_github_buildbarn_bb_storage//pkg/filesystem", "@com_github_buildbarn_bb_storage//pkg/filesystem/path", "@com_github_buildbarn_bb_storage//pkg/testutil", "@com_github_stretchr_testify//require", diff --git a/pkg/cas/copy_on_link_limit.go b/pkg/cas/copy_on_link_limit.go new file mode 100644 index 00000000..617b0df7 --- /dev/null +++ b/pkg/cas/copy_on_link_limit.go @@ -0,0 +1,44 @@ +package cas + +import ( + "io" + + "github.com/buildbarn/bb-storage/pkg/filesystem" + "github.com/buildbarn/bb-storage/pkg/filesystem/path" +) + +// copyCachedFile copies a file between directories, yielding an independent file +// with its own hard link count. It is the fallback for hardlinking when the +// source has reached the filesystem's maximum link count (1023 on NTFS). +func copyCachedFile(srcDirectory filesystem.Directory, srcName path.Component, dstDirectory filesystem.Directory, dstName path.Component) error { + r, err := srcDirectory.OpenRead(srcName) + if err != nil { + return err + } + defer r.Close() + + // CreateExcl mirrors hardlink semantics: the destination must not exist. + w, err := dstDirectory.OpenWrite(dstName, filesystem.CreateExcl(0o777)) + if err != nil { + return err + } + + buf := make([]byte, 1<<16) + for offset := int64(0); ; { + n, readErr := r.ReadAt(buf, offset) + if n > 0 { + if _, err := w.WriteAt(buf[:n], offset); err != nil { + w.Close() + return err + } + offset += int64(n) + } + if readErr == io.EOF { + return w.Close() + } + if readErr != nil { + w.Close() + return readErr + } + } +} diff --git a/pkg/cas/hardlinking_file_fetcher.go b/pkg/cas/hardlinking_file_fetcher.go index 838f7f02..58f46753 100644 --- a/pkg/cas/hardlinking_file_fetcher.go +++ b/pkg/cas/hardlinking_file_fetcher.go @@ -170,6 +170,14 @@ func (ff *hardlinkingFileFetcher) tryLinkFromCache(key string, directory filesys if err := ff.cacheDirectory.Link(path.MustNewComponent(key), directory, name); err == nil { // Successfully hardlinked the file to its destination. return nil + } else if isHardlinkLimitReached(err) { + // The file reached the filesystem's maximum hard link + // count (1023 on NTFS); copy it into place instead of + // failing the action. + if err := copyCachedFile(ff.cacheDirectory, path.MustNewComponent(key), directory, name); err != nil { + return util.StatusWrapfWithCode(err, codes.Internal, "Failed to copy cached file %#v after reaching the hard link limit", key) + } + return nil } else if !os.IsNotExist(err) { return util.StatusWrapfWithCode(err, codes.Internal, "Failed to create hardlink to cached file %#v", key) } diff --git a/pkg/cas/hardlinking_file_fetcher_test.go b/pkg/cas/hardlinking_file_fetcher_test.go index 1796da48..e9867ed2 100644 --- a/pkg/cas/hardlinking_file_fetcher_test.go +++ b/pkg/cas/hardlinking_file_fetcher_test.go @@ -2,6 +2,7 @@ package cas_test import ( "context" + "io" "os" "syscall" "testing" @@ -11,6 +12,7 @@ import ( "github.com/buildbarn/bb-remote-execution/pkg/cas" "github.com/buildbarn/bb-storage/pkg/digest" "github.com/buildbarn/bb-storage/pkg/eviction" + "github.com/buildbarn/bb-storage/pkg/filesystem" "github.com/buildbarn/bb-storage/pkg/filesystem/path" "github.com/buildbarn/bb-storage/pkg/testutil" "github.com/stretchr/testify/require" @@ -143,3 +145,39 @@ func TestHardlinkingFileFetcher(t *testing.T) { fileFetcher.GetFile(ctx, blobDigest2, buildDirectory, path.MustNewComponent("goodbye.txt"), false), ) } + +func TestHardlinkingFileFetcherCopyFallbackOnLinkLimit(t *testing.T) { + ctrl, ctx := gomock.WithContext(context.Background(), t) + + baseFileFetcher := mock.NewMockFileFetcher(ctrl) + cacheDirectory := mock.NewMockDirectory(ctrl) + fileFetcher := cas.NewHardlinkingFileFetcher(baseFileFetcher, cacheDirectory, 10, 10000, eviction.NewLRUSet[string]()) + + blobDigest := digest.MustNewDigest("example", remoteexecution.DigestFunction_MD5, "8b1a9953c4611296a827abf8c47804d7", 5) + buildDirectory := mock.NewMockDirectory(ctrl) + key := path.MustNewComponent("3-8b1a9953c4611296a827abf8c47804d7-5-x") + name := path.MustNewComponent("hello.txt") + + // Prime the cache: download the file and link it into the cache directory. + baseFileFetcher.EXPECT().GetFile(ctx, blobDigest, buildDirectory, name, false) + buildDirectory.EXPECT().Link(name, cacheDirectory, key) + require.NoError(t, fileFetcher.GetFile(ctx, blobDigest, buildDirectory, name, false)) + + // The cached file has reached the filesystem's maximum hard link count, so + // hardlinking it into the build directory fails with EMLINK. The fetcher + // must fall back to copying the cached file's contents into place. + cacheDirectory.EXPECT().Link(key, buildDirectory, name).Return(syscall.EMLINK) + cachedFile := mock.NewMockFileReader(ctrl) + cacheDirectory.EXPECT().OpenRead(key).Return(cachedFile, nil) + copiedFile := mock.NewMockFileWriter(ctrl) + buildDirectory.EXPECT().OpenWrite(name, filesystem.CreateExcl(0o777)).Return(copiedFile, nil) + cachedFile.EXPECT().ReadAt(gomock.Any(), int64(0)).DoAndReturn( + func(p []byte, off int64) (int, error) { + return copy(p, []byte("Hello")), io.EOF + }, + ) + copiedFile.EXPECT().WriteAt([]byte("Hello"), int64(0)).Return(5, nil) + copiedFile.EXPECT().Close() + cachedFile.EXPECT().Close() + require.NoError(t, fileFetcher.GetFile(ctx, blobDigest, buildDirectory, name, false)) +} diff --git a/pkg/cas/link_limit_other.go b/pkg/cas/link_limit_other.go new file mode 100644 index 00000000..36e74318 --- /dev/null +++ b/pkg/cas/link_limit_other.go @@ -0,0 +1,14 @@ +//go:build !windows + +package cas + +import ( + "errors" + "syscall" +) + +// isHardlinkLimitReached reports whether err is the filesystem's "too many hard +// links" error (EMLINK on POSIX). +func isHardlinkLimitReached(err error) bool { + return errors.Is(err, syscall.EMLINK) +} diff --git a/pkg/cas/link_limit_windows.go b/pkg/cas/link_limit_windows.go new file mode 100644 index 00000000..0e1147b2 --- /dev/null +++ b/pkg/cas/link_limit_windows.go @@ -0,0 +1,15 @@ +//go:build windows + +package cas + +import ( + "errors" + + "golang.org/x/sys/windows" +) + +// isHardlinkLimitReached reports whether err is the filesystem's "too many hard +// links" error. NTFS returns ERROR_TOO_MANY_LINKS after 1023 links to a file. +func isHardlinkLimitReached(err error) bool { + return errors.Is(err, windows.ERROR_TOO_MANY_LINKS) +}