Skip to content

Commit

Permalink
TabletReader::getStreamSizeSum
Browse files Browse the repository at this point in the history
Summary: We want to be able to know stream sizes without actually loading the stripe.

Differential Revision: D56738694
  • Loading branch information
munozdaniel authored and facebook-github-bot committed Apr 30, 2024
1 parent 3acdb4b commit 13888d6
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 1 deletion.
41 changes: 41 additions & 0 deletions dwio/nimble/tablet/TabletReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,47 @@ std::vector<std::unique_ptr<StreamLoader>> TabletReader::load(
return streams;
}

uint64_t TabletReader::getStreamSizeSum(
uint32_t stripe,
std::span<const uint32_t> streamIdentifiers) const {
NIMBLE_CHECK(stripe < stripeCount_, "Stripe is out of range.");

StripeGroup stripeGroup;
auto footerRoot =
asFlatBuffersRoot<serialization::Footer>(footer_->content());
auto stripesRoot =
asFlatBuffersRoot<serialization::Stripes>(stripes_->content());
auto targetIndex = stripesRoot->group_indices()->Get(stripe);
StripeGroup* stripeGroupPtr = &stripeGroup_;
if (targetIndex != stripeGroup_.index()) {
auto stripeGroupInfo = footerRoot->stripe_groups()->Get(targetIndex);
velox::common::Region stripeGroupRegion{
stripeGroupInfo->offset(), stripeGroupInfo->size(), "StripeGroup"};
folly::IOBuf result;
file_->preadv({&stripeGroupRegion, 1}, {&result, 1});

stripeGroup.reset(
targetIndex,
*stripes_,
stripe,
std::make_unique<MetadataBuffer>(
memoryPool_,
result,
static_cast<CompressionType>(stripeGroupInfo->compression_type())));
stripeGroupPtr = &stripeGroup;
}

uint64_t streamSizeSum = 0;
const auto stripeStreamSizes = stripeGroupPtr->streamSizes(stripe);
for (auto streamId : streamIdentifiers) {
if (streamId >= stripeGroupPtr->streamCount()) {
continue;
}
streamSizeSum += stripeStreamSizes[streamId];
}
return streamSizeSum;
}

std::optional<Section> TabletReader::loadOptionalSection(
const std::string& name,
bool keepCache) const {
Expand Down
4 changes: 4 additions & 0 deletions dwio/nimble/tablet/TabletReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ class TabletReader {
return std::string_view{};
}) const;

uint64_t getStreamSizeSum(
uint32_t stripe,
std::span<const uint32_t> streamIdentifiers) const;

std::optional<Section> loadOptionalSection(
const std::string& name,
bool keepCache = false) const;
Expand Down
22 changes: 21 additions & 1 deletion dwio/nimble/tablet/tests/TabletTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,21 @@ void parameterizedTest(
VLOG(1) << "Output Tablet -> StripeCount: " << tablet.stripeCount()
<< ", RowCount: " << tablet.tabletRowCount();

// Now, read all stripes and verify results
uint32_t maxIdentifiers = 0;
for (auto stripe = 0; stripe < stripesData.size(); ++stripe) {
maxIdentifiers = std::max(maxIdentifiers, tablet.streamCount(stripe));
}
std::vector<uint32_t> allIdentifiers(maxIdentifiers);
std::iota(allIdentifiers.begin(), allIdentifiers.end(), 0);
std::span<const uint32_t> allIdentifiersSpan{
allIdentifiers.cbegin(), allIdentifiers.cend()};
size_t extraReads = 0;
std::vector<uint64_t> streamSizeSum;
for (auto stripe = 0; stripe < stripesData.size(); ++stripe) {
streamSizeSum.push_back(
tablet.getStreamSizeSum(stripe, allIdentifiersSpan));
}
// Now, read all stripes and verify results
for (auto stripe = 0; stripe < stripesData.size(); ++stripe) {
EXPECT_EQ(stripesData[stripe].rowCount, tablet.stripeRowCount(stripe));

Expand All @@ -157,6 +170,13 @@ void parameterizedTest(
std::iota(identifiers.begin(), identifiers.end(), 0);
auto serializedStreams =
tablet.load(stripe, {identifiers.cbegin(), identifiers.cend()});
uint64_t streamSizeSumExpected = 0;
for (const auto& stream : serializedStreams) {
if (stream) {
streamSizeSumExpected += stream->getStream().size();
}
}
EXPECT_EQ(streamSizeSum[stripe], streamSizeSumExpected);
auto chunks = readFile.chunks();
auto expectedReads = stripesData[stripe].streams.size();
auto diff = chunks.size() - expectedReads;
Expand Down

0 comments on commit 13888d6

Please sign in to comment.