From c57493a615b4a737dfc97e54c0089696683b4727 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Tue, 5 Dec 2023 09:21:39 +0100 Subject: [PATCH] Add a helper function for traversing topologically sorted trees --- internal/mock/BUILD.bazel | 1 + internal/mock/aliases/BUILD.bazel | 1 + internal/mock/aliases/aliases.go | 6 + pkg/blobstore/BUILD.bazel | 4 + .../visit_topologically_sorted_tree.go | 105 ++++++ .../visit_topologically_sorted_tree_test.go | 311 ++++++++++++++++++ 6 files changed, 428 insertions(+) create mode 100644 pkg/blobstore/visit_topologically_sorted_tree.go create mode 100644 pkg/blobstore/visit_topologically_sorted_tree_test.go diff --git a/internal/mock/BUILD.bazel b/internal/mock/BUILD.bazel index a1a34b8b..04404ff6 100644 --- a/internal/mock/BUILD.bazel +++ b/internal/mock/BUILD.bazel @@ -6,6 +6,7 @@ gomock( out = "aliases.go", interfaces = [ "AEAD", + "IntTreeDirectoryVisitor", "ReadCloser", "ResponseWriter", "RoundTripper", diff --git a/internal/mock/aliases/BUILD.bazel b/internal/mock/aliases/BUILD.bazel index 4092e4fd..a98182bb 100644 --- a/internal/mock/aliases/BUILD.bazel +++ b/internal/mock/aliases/BUILD.bazel @@ -5,4 +5,5 @@ go_library( srcs = ["aliases.go"], importpath = "github.com/buildbarn/bb-storage/internal/mock/aliases", visibility = ["//:__subpackages__"], + deps = ["//pkg/blobstore"], ) diff --git a/internal/mock/aliases/aliases.go b/internal/mock/aliases/aliases.go index cb78f467..a5083086 100644 --- a/internal/mock/aliases/aliases.go +++ b/internal/mock/aliases/aliases.go @@ -4,6 +4,8 @@ import ( "crypto/cipher" "io" "net/http" + + "github.com/buildbarn/bb-storage/pkg/blobstore" ) // This file contains aliases for some of the interfaces provided by the @@ -15,6 +17,10 @@ import ( // AEAD is an alias of cipher.AEAD. type AEAD = cipher.AEAD +// IntTreeDirectoryVisitor is a TreeDirectoryVisitor that takes integer +// arguments. +type IntTreeDirectoryVisitor = blobstore.TreeDirectoryVisitor[int] + // ReadCloser is an alias of io.ReadCloser. type ReadCloser = io.ReadCloser diff --git a/pkg/blobstore/BUILD.bazel b/pkg/blobstore/BUILD.bazel index 60fef555..915776a1 100644 --- a/pkg/blobstore/BUILD.bazel +++ b/pkg/blobstore/BUILD.bazel @@ -23,6 +23,7 @@ go_library( "read_canarying_blob_access.go", "reference_expanding_blob_access.go", "validation_caching_read_buffer_factory.go", + "visit_topologically_sorted_tree.go", "zip_reading_blob_access.go", "zip_writing_blob_access.go", ], @@ -68,6 +69,7 @@ go_test( "read_canarying_blob_access_test.go", "reference_expanding_blob_access_test.go", "validation_caching_read_buffer_factory_test.go", + "visit_topologically_sorted_tree_test.go", "zip_reading_blob_access_test.go", "zip_writing_blob_access_test.go", ], @@ -87,6 +89,8 @@ go_test( "@com_github_stretchr_testify//require", "@org_golang_google_grpc//codes", "@org_golang_google_grpc//status", + "@org_golang_google_protobuf//encoding/protowire", + "@org_golang_google_protobuf//proto", "@org_golang_google_protobuf//types/known/timestamppb", ], ) diff --git a/pkg/blobstore/visit_topologically_sorted_tree.go b/pkg/blobstore/visit_topologically_sorted_tree.go new file mode 100644 index 00000000..95c15510 --- /dev/null +++ b/pkg/blobstore/visit_topologically_sorted_tree.go @@ -0,0 +1,105 @@ +package blobstore + +import ( + "io" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/pkg/blobstore/buffer" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/util" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/proto" +) + +// TreeDirectoryVisitor is a callback type that is invoked by +// VisitTopologicallySortedTree for each Directory message contained in +// the REv2 Tree object. +type TreeDirectoryVisitor[TArgument any] func(d *remoteexecution.Directory, argument *TArgument, childArguments []*TArgument) error + +// VisitTopologicallySortedTree iterates over all Directory messages +// contained in an REv2 Tree object. For each directory, the visitor is +// capable of tracking state, which it can also build up when parent +// directories are visited. +// +// This function expects the REv2 Tree object to be topologically +// sorted, with parents being stored before children. As a result, +// directories are also visited in topological order. +func VisitTopologicallySortedTree[TArgument any](r io.Reader, digestFunction digest.Function, maximumDirectorySizeBytes int, rootArgument *TArgument, visitDirectory TreeDirectoryVisitor[TArgument]) error { + expectedFieldNumber := TreeRootFieldNumber + expectedDirectories := map[digest.Digest]*TArgument{} + if err := util.VisitProtoBytesFields(r, func(fieldNumber protowire.Number, offsetBytes, sizeBytes int64, fieldReader io.Reader) error { + if fieldNumber != expectedFieldNumber { + return status.Errorf(codes.InvalidArgument, "Expected field number %d", expectedFieldNumber) + } + expectedFieldNumber = TreeChildrenFieldNumber + + var directoryMessage proto.Message + var errUnmarshal error + var argument *TArgument + switch fieldNumber { + case TreeRootFieldNumber: + directoryMessage, errUnmarshal = buffer.NewProtoBufferFromReader( + &remoteexecution.Directory{}, + io.NopCloser(fieldReader), + buffer.UserProvided, + ).ToProto(&remoteexecution.Directory{}, maximumDirectorySizeBytes) + argument = rootArgument + case TreeChildrenFieldNumber: + b1, b2 := buffer.NewProtoBufferFromReader( + &remoteexecution.Directory{}, + io.NopCloser(fieldReader), + buffer.UserProvided, + ).CloneCopy(maximumDirectorySizeBytes) + + digestGenerator := digestFunction.NewGenerator(sizeBytes) + if err := b1.IntoWriter(digestGenerator); err != nil { + b2.Discard() + return err + } + directoryDigest := digestGenerator.Sum() + + var ok bool + argument, ok = expectedDirectories[directoryDigest] + if !ok { + b2.Discard() + return status.Errorf(codes.InvalidArgument, "Directory has digest %#v, which was not expected", directoryDigest.String()) + } + delete(expectedDirectories, directoryDigest) + + directoryMessage, errUnmarshal = b2.ToProto(&remoteexecution.Directory{}, maximumDirectorySizeBytes) + } + if errUnmarshal != nil { + return errUnmarshal + } + + directory := directoryMessage.(*remoteexecution.Directory) + childArguments := make([]*TArgument, 0, len(directory.Directories)) + for _, childDirectory := range directory.Directories { + digest, err := digestFunction.NewDigestFromProto(childDirectory.Digest) + if err != nil { + return util.StatusWrapf(err, "Invalid digest for child directory %#v", childDirectory.Name) + } + childArgument, ok := expectedDirectories[digest] + if !ok { + childArgument = new(TArgument) + expectedDirectories[digest] = childArgument + } + childArguments = append(childArguments, childArgument) + } + + return visitDirectory(directory, argument, childArguments) + }); err != nil { + return err + } + + if expectedFieldNumber == TreeRootFieldNumber { + return status.Error(codes.InvalidArgument, "Tree does not contain any directories") + } + if len(expectedDirectories) > 0 { + return status.Errorf(codes.InvalidArgument, "At least %d more directories were expected", len(expectedDirectories)) + } + return nil +} diff --git a/pkg/blobstore/visit_topologically_sorted_tree_test.go b/pkg/blobstore/visit_topologically_sorted_tree_test.go new file mode 100644 index 00000000..89421c3c --- /dev/null +++ b/pkg/blobstore/visit_topologically_sorted_tree_test.go @@ -0,0 +1,311 @@ +package blobstore_test + +import ( + "bytes" + "testing" + + remoteexecution "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/buildbarn/bb-storage/internal/mock" + "github.com/buildbarn/bb-storage/pkg/blobstore" + "github.com/buildbarn/bb-storage/pkg/digest" + "github.com/buildbarn/bb-storage/pkg/testutil" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/require" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/proto" +) + +func TestVisitTopologicallySortedTree(t *testing.T) { + ctrl := gomock.NewController(t) + + t.Run("EmptyTree", func(t *testing.T) { + // Tree objects must contain at least one directory. + visitor := mock.NewMockIntTreeDirectoryVisitor(ctrl) + + rootArgument := 123 + testutil.RequireEqualStatus( + t, + status.Error(codes.InvalidArgument, "Tree does not contain any directories"), + blobstore.VisitTopologicallySortedTree( + bytes.NewBuffer(nil), + digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256), + /* maximumDirectorySizeBytes = */ 10000, + &rootArgument, + visitor.Call)) + }) + + t.Run("MissingRootDirectory", func(t *testing.T) { + // The first directory must always be the root directory. + visitor := mock.NewMockIntTreeDirectoryVisitor(ctrl) + + tree, err := proto.Marshal(&remoteexecution.Tree{ + Children: []*remoteexecution.Directory{{ + Files: []*remoteexecution.FileNode{{ + Name: "hello", + Digest: &remoteexecution.Digest{ + Hash: "d8ef1593a6a7947c61baacd6945cbbcda0ca6b90cab290c4fdc628391f9c3a21", + SizeBytes: 12345, + }, + }}, + }}, + }) + require.NoError(t, err) + rootArgument := 123 + testutil.RequireEqualStatus( + t, + status.Error(codes.InvalidArgument, "Field with number 2 at offset 2 size 80: Expected field number 1"), + blobstore.VisitTopologicallySortedTree( + bytes.NewBuffer(tree), + digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256), + /* maximumDirectorySizeBytes = */ 10000, + &rootArgument, + visitor.Call)) + }) + + t.Run("MalformedRootDirectory", func(t *testing.T) { + // Individual entries in the tree must be valid REv2 + // Directory messages. + visitor := mock.NewMockIntTreeDirectoryVisitor(ctrl) + + rootArgument := 123 + testutil.RequirePrefixedStatus( + t, + status.Error(codes.InvalidArgument, "Field with number 1 at offset 2 size 5: Failed to unmarshal message: "), + blobstore.VisitTopologicallySortedTree( + bytes.NewBuffer([]byte{byte(blobstore.TreeRootFieldNumber<<3) | byte(protowire.BytesType), 5, 'H', 'e', 'l', 'l', 'o'}), + digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256), + /* maximumDirectorySizeBytes = */ 10000, + &rootArgument, + visitor.Call)) + }) + + t.Run("InvalidChildDigest", func(t *testing.T) { + // Digests of child directories must be well-formed. + visitor := mock.NewMockIntTreeDirectoryVisitor(ctrl) + + tree, err := proto.Marshal(&remoteexecution.Tree{ + Root: &remoteexecution.Directory{ + Directories: []*remoteexecution.DirectoryNode{{ + Name: "hello", + Digest: &remoteexecution.Digest{ + Hash: "fc20a67a98bfd79b463f00c03e0cd8f5", + SizeBytes: 42, + }, + }}, + }, + Children: []*remoteexecution.Directory{{ + Files: []*remoteexecution.FileNode{{ + Name: "hello", + Digest: &remoteexecution.Digest{ + Hash: "d8ef1593a6a7947c61baacd6945cbbcda0ca6b90cab290c4fdc628391f9c3a21", + SizeBytes: 12345, + }, + }}, + }}, + }) + require.NoError(t, err) + rootArgument := 123 + testutil.RequireEqualStatus( + t, + status.Error(codes.InvalidArgument, "Field with number 1 at offset 2 size 47: Invalid digest for child directory \"hello\": Hash has length 32, while 64 characters were expected"), + blobstore.VisitTopologicallySortedTree( + bytes.NewBuffer(tree), + digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256), + /* maximumDirectorySizeBytes = */ 10000, + &rootArgument, + visitor.Call)) + }) + + t.Run("VisitorFailure", func(t *testing.T) { + // Errors returned by the visitor must be propagated. + visitor := mock.NewMockIntTreeDirectoryVisitor(ctrl) + rootDirectory := &remoteexecution.Directory{ + Files: []*remoteexecution.FileNode{{ + Name: "foo", + Digest: &remoteexecution.Digest{ + Hash: "e732cb0c4b229c11f314a8f5d0091300e8863ad85b6120a30441424ec05ee570", + SizeBytes: 42, + }, + }}, + } + rootArgument := 123 + visitor.EXPECT().Call(testutil.EqProto(t, rootDirectory), &rootArgument, gomock.Len(0)). + Return(status.Error(codes.Internal, "Cannot download \"foo\"")) + + tree, err := proto.Marshal(&remoteexecution.Tree{ + Root: rootDirectory, + }) + require.NoError(t, err) + testutil.RequireEqualStatus( + t, + status.Error(codes.Internal, "Field with number 1 at offset 2 size 77: Cannot download \"foo\""), + blobstore.VisitTopologicallySortedTree( + bytes.NewBuffer(tree), + digest.MustNewFunction("", remoteexecution.DigestFunction_SHA256), + /* maximumDirectorySizeBytes = */ 10000, + &rootArgument, + visitor.Call)) + }) + + t.Run("UnexpectedChildDirectory", func(t *testing.T) { + // Trees can't contain child directories that aren't + // referenced by any parent. + visitor := mock.NewMockIntTreeDirectoryVisitor(ctrl) + rootDirectory := &remoteexecution.Directory{ + Files: []*remoteexecution.FileNode{{ + Name: "foo", + Digest: &remoteexecution.Digest{ + Hash: "8459db5934c6c4c57e2370091f52cab4e7d2c5fd9189fad04e65bcf7da271632", + SizeBytes: 1200, + }, + }}, + } + rootArgument := 123 + visitor.EXPECT().Call(testutil.EqProto(t, rootDirectory), &rootArgument, gomock.Len(0)) + + tree, err := proto.Marshal(&remoteexecution.Tree{ + Root: rootDirectory, + Children: []*remoteexecution.Directory{{ + Files: []*remoteexecution.FileNode{{ + Name: "bar", + Digest: &remoteexecution.Digest{ + Hash: "d78246e76afcbd45f2b9177363be8f0b69562258efd3ce10b01537b1fd29e88a", + SizeBytes: 1300, + }, + }}, + }}, + }) + require.NoError(t, err) + testutil.RequireEqualStatus( + t, + status.Error(codes.InvalidArgument, "Field with number 2 at offset 82 size 78: Directory has digest \"1-0dcd71a395913d28ea219014f2b3c290a5b6b3208f75d4652cc8189633ef1edd-78-hello\", which was not expected"), + blobstore.VisitTopologicallySortedTree( + bytes.NewBuffer(tree), + digest.MustNewFunction("hello", remoteexecution.DigestFunction_SHA256), + /* maximumDirectorySizeBytes = */ 10000, + &rootArgument, + visitor.Call)) + }) + + t.Run("MissingChildDirectory", func(t *testing.T) { + // Directories can't refer to children that aren't + // present in the tree. + visitor := mock.NewMockIntTreeDirectoryVisitor(ctrl) + rootDirectory := &remoteexecution.Directory{ + Directories: []*remoteexecution.DirectoryNode{ + { + Name: "a", + Digest: &remoteexecution.Digest{ + Hash: "6e229505e4229f925c2e0db995bdb423831db29244c6b52afe48eef4df8652c2", + SizeBytes: 42, + }, + }, + { + Name: "b", + Digest: &remoteexecution.Digest{ + Hash: "8210f9ba247ca671f58d112980ddbc2d56bbe34071854bcf9f64b1523c9d9968", + SizeBytes: 43, + }, + }, + }, + } + rootArgument := 123 + defaultArgument := 0 + visitor.EXPECT().Call(testutil.EqProto(t, rootDirectory), &rootArgument, []*int{&defaultArgument, &defaultArgument}) + + tree, err := proto.Marshal(&remoteexecution.Tree{ + Root: rootDirectory, + }) + require.NoError(t, err) + testutil.RequireEqualStatus( + t, + status.Error(codes.InvalidArgument, "At least 2 more directories were expected"), + blobstore.VisitTopologicallySortedTree( + bytes.NewBuffer(tree), + digest.MustNewFunction("hello", remoteexecution.DigestFunction_SHA256), + /* maximumDirectorySizeBytes = */ 10000, + &rootArgument, + visitor.Call)) + }) + + t.Run("Success", func(t *testing.T) { + visitor := mock.NewMockIntTreeDirectoryVisitor(ctrl) + rootArgument := 123 + defaultArgument := 0 + directory1 := &remoteexecution.Directory{ + Directories: []*remoteexecution.DirectoryNode{ + { + Name: "a", + Digest: &remoteexecution.Digest{ + Hash: "a04ac895fa2f73839ffeb85ac1bccfdea48fe533edd53fc7ee29cd4d887cb808", + SizeBytes: 225, + }, + }, + { + Name: "b", + Digest: &remoteexecution.Digest{ + Hash: "a04ac895fa2f73839ffeb85ac1bccfdea48fe533edd53fc7ee29cd4d887cb808", + SizeBytes: 225, + }, + }, + }, + } + visitor.EXPECT().Call(testutil.EqProto(t, directory1), &rootArgument, []*int{&defaultArgument, &defaultArgument}) + + directory2 := &remoteexecution.Directory{ + Directories: []*remoteexecution.DirectoryNode{ + { + Name: "a", + Digest: &remoteexecution.Digest{ + Hash: "040d506676723f18bfa788eb192be1249b0915507fe1cc1bdd2a531e353689c2", + SizeBytes: 79, + }, + }, + { + Name: "b", + Digest: &remoteexecution.Digest{ + Hash: "040d506676723f18bfa788eb192be1249b0915507fe1cc1bdd2a531e353689c2", + SizeBytes: 79, + }, + }, + { + Name: "c", + Digest: &remoteexecution.Digest{ + Hash: "040d506676723f18bfa788eb192be1249b0915507fe1cc1bdd2a531e353689c2", + SizeBytes: 79, + }, + }, + }, + } + visitor.EXPECT().Call(testutil.EqProto(t, directory2), &defaultArgument, []*int{&defaultArgument, &defaultArgument, &defaultArgument}) + + directory3 := &remoteexecution.Directory{ + Files: []*remoteexecution.FileNode{{ + Name: "hello", + Digest: &remoteexecution.Digest{ + Hash: "173341c24df2cda18b84228f409a93ed5d20c1d37ab33dff39ee010027ae93fe", + SizeBytes: 5, + }, + }}, + } + visitor.EXPECT().Call(testutil.EqProto(t, directory3), &defaultArgument, gomock.Len(0)) + + tree, err := proto.Marshal(&remoteexecution.Tree{ + Root: directory1, + Children: []*remoteexecution.Directory{ + directory2, + directory3, + }, + }) + require.NoError(t, err) + require.NoError(t, blobstore.VisitTopologicallySortedTree( + bytes.NewBuffer(tree), + digest.MustNewFunction("hello", remoteexecution.DigestFunction_SHA256), + /* maximumDirectorySizeBytes = */ 10000, + &rootArgument, + visitor.Call)) + }) +}