diff --git a/pkg/cas/BUILD.bazel b/pkg/cas/BUILD.bazel index cbfe29b8..16ea83f9 100644 --- a/pkg/cas/BUILD.bazel +++ b/pkg/cas/BUILD.bazel @@ -7,11 +7,14 @@ go_library( "blob_access_file_fetcher.go", "caching_directory_fetcher.go", "configuration.go", + "copy_on_link_limit.go", "decomposed_directory_walker.go", "directory_fetcher.go", "directory_walker.go", "file_fetcher.go", "hardlinking_file_fetcher.go", + "link_limit_other.go", + "link_limit_windows.go", "suspending_directory_fetcher.go", ], importpath = "github.com/buildbarn/bb-remote-execution/pkg/cas", @@ -32,7 +35,12 @@ go_library( "@org_golang_google_grpc//status", "@org_golang_google_protobuf//encoding/protowire", "@org_golang_google_protobuf//proto", - ], + ] + select({ + "@rules_go//go/platform:windows": [ + "@org_golang_x_sys//windows", + ], + "//conditions:default": [], + }), ) go_test( @@ -51,6 +59,7 @@ go_test( "@com_github_buildbarn_bb_storage//pkg/blobstore/slicing", "@com_github_buildbarn_bb_storage//pkg/digest", "@com_github_buildbarn_bb_storage//pkg/eviction", + "@com_github_buildbarn_bb_storage//pkg/filesystem", "@com_github_buildbarn_bb_storage//pkg/filesystem/path", "@com_github_buildbarn_bb_storage//pkg/testutil", "@com_github_stretchr_testify//require", diff --git a/pkg/cas/copy_on_link_limit.go b/pkg/cas/copy_on_link_limit.go new file mode 100644 index 00000000..617b0df7 --- /dev/null +++ b/pkg/cas/copy_on_link_limit.go @@ -0,0 +1,44 @@ +package cas + +import ( + "io" + + "github.com/buildbarn/bb-storage/pkg/filesystem" + "github.com/buildbarn/bb-storage/pkg/filesystem/path" +) + +// copyCachedFile copies a file between directories, yielding an independent file +// with its own hard link count. It is the fallback for hardlinking when the +// source has reached the filesystem's maximum link count (1023 on NTFS). +func copyCachedFile(srcDirectory filesystem.Directory, srcName path.Component, dstDirectory filesystem.Directory, dstName path.Component) error { + r, err := srcDirectory.OpenRead(srcName) + if err != nil { + return err + } + defer r.Close() + + // CreateExcl mirrors hardlink semantics: the destination must not exist. + w, err := dstDirectory.OpenWrite(dstName, filesystem.CreateExcl(0o777)) + if err != nil { + return err + } + + buf := make([]byte, 1<<16) + for offset := int64(0); ; { + n, readErr := r.ReadAt(buf, offset) + if n > 0 { + if _, err := w.WriteAt(buf[:n], offset); err != nil { + w.Close() + return err + } + offset += int64(n) + } + if readErr == io.EOF { + return w.Close() + } + if readErr != nil { + w.Close() + return readErr + } + } +} diff --git a/pkg/cas/hardlinking_file_fetcher.go b/pkg/cas/hardlinking_file_fetcher.go index 838f7f02..58f46753 100644 --- a/pkg/cas/hardlinking_file_fetcher.go +++ b/pkg/cas/hardlinking_file_fetcher.go @@ -170,6 +170,14 @@ func (ff *hardlinkingFileFetcher) tryLinkFromCache(key string, directory filesys if err := ff.cacheDirectory.Link(path.MustNewComponent(key), directory, name); err == nil { // Successfully hardlinked the file to its destination. return nil + } else if isHardlinkLimitReached(err) { + // The file reached the filesystem's maximum hard link + // count (1023 on NTFS); copy it into place instead of + // failing the action. + if err := copyCachedFile(ff.cacheDirectory, path.MustNewComponent(key), directory, name); err != nil { + return util.StatusWrapfWithCode(err, codes.Internal, "Failed to copy cached file %#v after reaching the hard link limit", key) + } + return nil } else if !os.IsNotExist(err) { return util.StatusWrapfWithCode(err, codes.Internal, "Failed to create hardlink to cached file %#v", key) } diff --git a/pkg/cas/hardlinking_file_fetcher_test.go b/pkg/cas/hardlinking_file_fetcher_test.go index 1796da48..e9867ed2 100644 --- a/pkg/cas/hardlinking_file_fetcher_test.go +++ b/pkg/cas/hardlinking_file_fetcher_test.go @@ -2,6 +2,7 @@ package cas_test import ( "context" + "io" "os" "syscall" "testing" @@ -11,6 +12,7 @@ import ( "github.com/buildbarn/bb-remote-execution/pkg/cas" "github.com/buildbarn/bb-storage/pkg/digest" "github.com/buildbarn/bb-storage/pkg/eviction" + "github.com/buildbarn/bb-storage/pkg/filesystem" "github.com/buildbarn/bb-storage/pkg/filesystem/path" "github.com/buildbarn/bb-storage/pkg/testutil" "github.com/stretchr/testify/require" @@ -143,3 +145,39 @@ func TestHardlinkingFileFetcher(t *testing.T) { fileFetcher.GetFile(ctx, blobDigest2, buildDirectory, path.MustNewComponent("goodbye.txt"), false), ) } + +func TestHardlinkingFileFetcherCopyFallbackOnLinkLimit(t *testing.T) { + ctrl, ctx := gomock.WithContext(context.Background(), t) + + baseFileFetcher := mock.NewMockFileFetcher(ctrl) + cacheDirectory := mock.NewMockDirectory(ctrl) + fileFetcher := cas.NewHardlinkingFileFetcher(baseFileFetcher, cacheDirectory, 10, 10000, eviction.NewLRUSet[string]()) + + blobDigest := digest.MustNewDigest("example", remoteexecution.DigestFunction_MD5, "8b1a9953c4611296a827abf8c47804d7", 5) + buildDirectory := mock.NewMockDirectory(ctrl) + key := path.MustNewComponent("3-8b1a9953c4611296a827abf8c47804d7-5-x") + name := path.MustNewComponent("hello.txt") + + // Prime the cache: download the file and link it into the cache directory. + baseFileFetcher.EXPECT().GetFile(ctx, blobDigest, buildDirectory, name, false) + buildDirectory.EXPECT().Link(name, cacheDirectory, key) + require.NoError(t, fileFetcher.GetFile(ctx, blobDigest, buildDirectory, name, false)) + + // The cached file has reached the filesystem's maximum hard link count, so + // hardlinking it into the build directory fails with EMLINK. The fetcher + // must fall back to copying the cached file's contents into place. + cacheDirectory.EXPECT().Link(key, buildDirectory, name).Return(syscall.EMLINK) + cachedFile := mock.NewMockFileReader(ctrl) + cacheDirectory.EXPECT().OpenRead(key).Return(cachedFile, nil) + copiedFile := mock.NewMockFileWriter(ctrl) + buildDirectory.EXPECT().OpenWrite(name, filesystem.CreateExcl(0o777)).Return(copiedFile, nil) + cachedFile.EXPECT().ReadAt(gomock.Any(), int64(0)).DoAndReturn( + func(p []byte, off int64) (int, error) { + return copy(p, []byte("Hello")), io.EOF + }, + ) + copiedFile.EXPECT().WriteAt([]byte("Hello"), int64(0)).Return(5, nil) + copiedFile.EXPECT().Close() + cachedFile.EXPECT().Close() + require.NoError(t, fileFetcher.GetFile(ctx, blobDigest, buildDirectory, name, false)) +} diff --git a/pkg/cas/link_limit_other.go b/pkg/cas/link_limit_other.go new file mode 100644 index 00000000..36e74318 --- /dev/null +++ b/pkg/cas/link_limit_other.go @@ -0,0 +1,14 @@ +//go:build !windows + +package cas + +import ( + "errors" + "syscall" +) + +// isHardlinkLimitReached reports whether err is the filesystem's "too many hard +// links" error (EMLINK on POSIX). +func isHardlinkLimitReached(err error) bool { + return errors.Is(err, syscall.EMLINK) +} diff --git a/pkg/cas/link_limit_windows.go b/pkg/cas/link_limit_windows.go new file mode 100644 index 00000000..0e1147b2 --- /dev/null +++ b/pkg/cas/link_limit_windows.go @@ -0,0 +1,15 @@ +//go:build windows + +package cas + +import ( + "errors" + + "golang.org/x/sys/windows" +) + +// isHardlinkLimitReached reports whether err is the filesystem's "too many hard +// links" error. NTFS returns ERROR_TOO_MANY_LINKS after 1023 links to a file. +func isHardlinkLimitReached(err error) bool { + return errors.Is(err, windows.ERROR_TOO_MANY_LINKS) +}