diff --git a/dwio/nimble/tablet/TabletReader.cpp b/dwio/nimble/tablet/TabletReader.cpp index 33197c5..8620fe2 100644 --- a/dwio/nimble/tablet/TabletReader.cpp +++ b/dwio/nimble/tablet/TabletReader.cpp @@ -605,6 +605,47 @@ std::vector> TabletReader::load( return streams; } +uint64_t TabletReader::getStreamSizeSum( + uint32_t stripe, + std::span streamIdentifiers) const { + NIMBLE_CHECK(stripe < stripeCount_, "Stripe is out of range."); + + StripeGroup stripeGroup; + auto footerRoot = + asFlatBuffersRoot(footer_->content()); + auto stripesRoot = + asFlatBuffersRoot(stripes_->content()); + auto targetIndex = stripesRoot->group_indices()->Get(stripe); + StripeGroup* stripeGroupPtr = &stripeGroup_; + if (targetIndex != stripeGroup_.index()) { + auto stripeGroupInfo = footerRoot->stripe_groups()->Get(targetIndex); + velox::common::Region stripeGroupRegion{ + stripeGroupInfo->offset(), stripeGroupInfo->size(), "StripeGroup"}; + folly::IOBuf result; + file_->preadv({&stripeGroupRegion, 1}, {&result, 1}); + + stripeGroup.reset( + targetIndex, + *stripes_, + stripe, + std::make_unique( + memoryPool_, + result, + static_cast(stripeGroupInfo->compression_type()))); + stripeGroupPtr = &stripeGroup; + } + + uint64_t streamSizeSum = 0; + const auto stripeStreamSizes = stripeGroupPtr->streamSizes(stripe); + for (auto streamId : streamIdentifiers) { + if (streamId >= stripeGroupPtr->streamCount()) { + continue; + } + streamSizeSum += stripeStreamSizes[streamId]; + } + return streamSizeSum; +} + std::optional
TabletReader::loadOptionalSection( const std::string& name, bool keepCache) const { diff --git a/dwio/nimble/tablet/TabletReader.h b/dwio/nimble/tablet/TabletReader.h index 5cdb876..2eb88c0 100644 --- a/dwio/nimble/tablet/TabletReader.h +++ b/dwio/nimble/tablet/TabletReader.h @@ -166,6 +166,10 @@ class TabletReader { return std::string_view{}; }) const; + uint64_t getStreamSizeSum( + uint32_t stripe, + std::span streamIdentifiers) const; + std::optional
loadOptionalSection( const std::string& name, bool keepCache = false) const; diff --git a/dwio/nimble/tablet/tests/TabletTests.cpp b/dwio/nimble/tablet/tests/TabletTests.cpp index 966a531..5c14b93 100644 --- a/dwio/nimble/tablet/tests/TabletTests.cpp +++ b/dwio/nimble/tablet/tests/TabletTests.cpp @@ -147,8 +147,21 @@ void parameterizedTest( VLOG(1) << "Output Tablet -> StripeCount: " << tablet.stripeCount() << ", RowCount: " << tablet.tabletRowCount(); - // Now, read all stripes and verify results + uint32_t maxIdentifiers = 0; + for (auto stripe = 0; stripe < stripesData.size(); ++stripe) { + maxIdentifiers = std::max(maxIdentifiers, tablet.streamCount(stripe)); + } + std::vector allIdentifiers(maxIdentifiers); + std::iota(allIdentifiers.begin(), allIdentifiers.end(), 0); + std::span allIdentifiersSpan{ + allIdentifiers.cbegin(), allIdentifiers.cend()}; size_t extraReads = 0; + std::vector streamSizeSum; + for (auto stripe = 0; stripe < stripesData.size(); ++stripe) { + streamSizeSum.push_back( + tablet.getStreamSizeSum(stripe, allIdentifiersSpan)); + } + // Now, read all stripes and verify results for (auto stripe = 0; stripe < stripesData.size(); ++stripe) { EXPECT_EQ(stripesData[stripe].rowCount, tablet.stripeRowCount(stripe)); @@ -157,6 +170,13 @@ void parameterizedTest( std::iota(identifiers.begin(), identifiers.end(), 0); auto serializedStreams = tablet.load(stripe, {identifiers.cbegin(), identifiers.cend()}); + uint64_t streamSizeSumExpected = 0; + for (const auto& stream : serializedStreams) { + if (stream) { + streamSizeSumExpected += stream->getStream().size(); + } + } + EXPECT_EQ(streamSizeSum[stripe], streamSizeSumExpected); auto chunks = readFile.chunks(); auto expectedReads = stripesData[stripe].streams.size(); auto diff = chunks.size() - expectedReads;