-
Notifications
You must be signed in to change notification settings - Fork 472
perf(castore): generate metadata from bytes in in-mem cache #614
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,12 +14,14 @@ | |
| package core | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "math/rand" | ||
| "testing" | ||
|
|
||
| "github.com/stretchr/testify/require" | ||
|
|
||
| "github.com/uber/kraken/utils/memsize" | ||
| "github.com/uber/kraken/utils/randutil" | ||
| ) | ||
|
|
||
| func TestMetaInfoGetPieceLength(t *testing.T) { | ||
|
|
@@ -118,3 +120,87 @@ func TestMetaInfoSerializationLimit(t *testing.T) { | |
| }) | ||
| } | ||
| } | ||
|
|
||
| func TestNewMetaInfoFromBytes_MatchesReader(t *testing.T) { | ||
| cases := []struct { | ||
| name string | ||
| size uint64 | ||
| pieceLength uint64 | ||
| }{ | ||
| {"empty", 0, 256}, | ||
| {"smaller_than_piece", 100, 256}, | ||
| {"exact_one_piece", 256, 256}, | ||
| {"two_pieces", 512, 256}, | ||
| {"non_aligned", 700, 256}, | ||
| {"many_pieces", 1 << 20, 256}, | ||
| {"piece_length_one", 100, 1}, | ||
| {"piece_larger_than_blob", 100, 1024}, | ||
| {"invalid_piece_length", 100, 0}, | ||
| } | ||
| for _, tc := range cases { | ||
| t.Run(tc.name, func(t *testing.T) { | ||
| require := require.New(t) | ||
| data := randutil.Text(tc.size) | ||
| d, err := NewDigester().FromBytes(data) | ||
| require.NoError(err) | ||
|
|
||
| miReader, errReader := NewMetaInfo(d, bytes.NewReader(data), int64(tc.pieceLength)) | ||
| miBytes, errBytes := NewMetaInfoFromBytes(d, data, int64(tc.pieceLength)) | ||
|
sambhav-jain-16 marked this conversation as resolved.
|
||
| require.Equal(errReader != nil, errBytes != nil) | ||
| if errReader != nil { | ||
| require.EqualError(errBytes, errReader.Error()) | ||
| return | ||
| } | ||
| require.NoError(errReader) | ||
| require.NoError(errBytes) | ||
| require.Equal(miReader.InfoHash(), miBytes.InfoHash()) | ||
| require.Equal(miReader.Length(), miBytes.Length()) | ||
| require.Equal(miReader.NumPieces(), miBytes.NumPieces()) | ||
| require.Equal(miReader.PieceLength(), miBytes.PieceLength()) | ||
| for i := 0; i < miReader.NumPieces(); i++ { | ||
| require.Equal(miReader.GetPieceSum(i), miBytes.GetPieceSum(i), | ||
| "piece %d sum mismatch", i) | ||
| } | ||
| bReader, err := miReader.Serialize() | ||
| require.NoError(err) | ||
| bBytes, err := miBytes.Serialize() | ||
| require.NoError(err) | ||
| require.Equal(bReader, bBytes) | ||
|
Comment on lines
+164
to
+168
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the reason for these serialisation-based checks when we also manually assert the fields? |
||
| }) | ||
| } | ||
| } | ||
|
|
||
| func BenchmarkNewMetaInfoFromBytes(b *testing.B) { | ||
| cases := []struct { | ||
| name string | ||
| blobSize uint64 | ||
|
sambhav-jain-16 marked this conversation as resolved.
|
||
| pieceLength uint64 | ||
| }{ | ||
| {"1MB_4pc", 1 << 20, 256 << 10}, | ||
| {"16MB_64pc", 16 << 20, 256 << 10}, | ||
| {"64MB_256pc", 64 << 20, 256 << 10}, | ||
| {"256MB_1024pc", 256 << 20, 256 << 10}, | ||
| {"16MB_4pc_4MBpc", 16 << 20, 4 << 20}, | ||
| {"16MB_16pc_1MBpc", 16 << 20, 1 << 20}, | ||
| {"16MB_1024pc_16KBpc", 16 << 20, 16 << 10}, | ||
| } | ||
| for _, tc := range cases { | ||
| b.Run(tc.name, func(b *testing.B) { | ||
| data := randutil.Text(tc.blobSize) | ||
| d, err := NewDigester().FromBytes(data) | ||
| require.NoError(b, err) | ||
| pl := int64(tc.pieceLength) | ||
|
|
||
| b.ResetTimer() | ||
| b.ReportAllocs() | ||
| b.SetBytes(int64(tc.blobSize)) | ||
| for b.Loop() { | ||
| mi, err := NewMetaInfoFromBytes(d, data, pl) | ||
| require.NoError(b, err) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: If we revert to calling
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious, what concurrency issues can we face with this?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Concurrency issues" was a bit of an exaggeration, but |
||
| if mi == nil { | ||
| b.Fatal("nil MetaInfo") | ||
| } | ||
| } | ||
| }) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is a 0.5% performance improvement worth the extra complexity we are introducing from extra code?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think given the code addition is very less and it is in the critical part of downloading and generating the metainfo, it should benefit us.