From 1b4422eecf9474e9fa09d2da881c485597b8e9de Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Mon, 6 Jan 2025 22:22:21 +0100 Subject: [PATCH] [ntuple] general rename 'BytesOnStorage' --> 'NBytesOnDisk' --- tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx | 2 +- tree/ntuple/v7/src/RNTupleDescriptor.cxx | 2 +- tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx | 38 +++++++++---------- tree/ntuple/v7/src/RNTupleMerger.cxx | 6 +-- tree/ntuple/v7/src/RNTupleSerialize.cxx | 12 +++--- tree/ntuple/v7/test/ntuple_descriptor.cxx | 4 +- 6 files changed, 32 insertions(+), 32 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx index 85b0e5a07f527..4438a22427bf3 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx @@ -373,7 +373,7 @@ public: { return fColumnRanges.find(physicalId) != fColumnRanges.end(); } - std::uint64_t GetBytesOnStorage() const; + std::uint64_t GetNBytesOnDisk() const; }; class RClusterDescriptor::RColumnRangeIterable { diff --git a/tree/ntuple/v7/src/RNTupleDescriptor.cxx b/tree/ntuple/v7/src/RNTupleDescriptor.cxx index 70af13acbddaa..60683eac02aef 100644 --- a/tree/ntuple/v7/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/v7/src/RNTupleDescriptor.cxx @@ -248,7 +248,7 @@ bool ROOT::Experimental::RClusterDescriptor::operator==(const RClusterDescriptor fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges; } -std::uint64_t ROOT::Experimental::RClusterDescriptor::GetBytesOnStorage() const +std::uint64_t ROOT::Experimental::RClusterDescriptor::GetNBytesOnDisk() const { std::uint64_t nbytes = 0; for (const auto &pr : fPageRanges) { diff --git a/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx b/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx index 5dd2806a62680..f62984b523897 100644 --- a/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx +++ b/tree/ntuple/v7/src/RNTupleDescriptorFmt.cxx @@ -29,8 +29,8 @@ struct ClusterInfo { std::uint64_t fFirstEntry = 0; std::uint32_t fNPages = 0; std::uint32_t fNEntries = 0; - std::uint32_t fBytesOnStorage = 0; - std::uint32_t fBytesInMemory = 0; + std::uint32_t fNBytesOnDisk = 0; + std::uint32_t fNBytesInMemory = 0; bool operator==(const ClusterInfo &other) const { return fFirstEntry == other.fFirstEntry; } @@ -43,7 +43,7 @@ struct ColumnInfo { ROOT::Experimental::DescriptorId_t fFieldId = 0; std::uint64_t fNElements = 0; std::uint64_t fNPages = 0; - std::uint64_t fBytesOnStorage = 0; + std::uint64_t fNBytesOnDisk = 0; std::uint32_t fElementSize = 0; std::uint32_t fColumnIndex = 0; std::uint16_t fRepresentationIndex = 0; @@ -94,8 +94,8 @@ void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) cons clusters.emplace_back(info); } - std::uint64_t bytesOnStorage = 0; - std::uint64_t bytesInMemory = 0; + std::uint64_t nBytesOnDisk = 0; + std::uint64_t nBytesInMemory = 0; std::uint64_t nPages = 0; int compression = -1; for (const auto &column : fColumnDescriptors) { @@ -129,12 +129,12 @@ void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) cons const auto &pageRange = cluster.second.GetPageRange(column.second.GetPhysicalId()); auto idx = cluster2Idx[cluster.first]; for (const auto &page : pageRange.fPageInfos) { - bytesOnStorage += page.fLocator.GetNBytesOnDisk(); - bytesInMemory += page.fNElements * elementSize; - clusters[idx].fBytesOnStorage += page.fLocator.GetNBytesOnDisk(); - clusters[idx].fBytesInMemory += page.fNElements * elementSize; + nBytesOnDisk += page.fLocator.GetNBytesOnDisk(); + nBytesInMemory += page.fNElements * elementSize; + clusters[idx].fNBytesOnDisk += page.fLocator.GetNBytesOnDisk(); + clusters[idx].fNBytesInMemory += page.fNElements * elementSize; ++clusters[idx].fNPages; - info.fBytesOnStorage += page.fLocator.GetNBytesOnDisk(); + info.fNBytesOnDisk += page.fLocator.GetNBytesOnDisk(); ++info.fNPages; ++nPages; } @@ -153,16 +153,16 @@ void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) cons output << " # Alias Columns: " << GetNLogicalColumns() - GetNPhysicalColumns() << "\n"; output << " # Pages: " << nPages << "\n"; output << " # Clusters: " << GetNClusters() << "\n"; - output << " Size on storage: " << bytesOnStorage << " B" + output << " Size on storage: " << nBytesOnDisk << " B" + << "\n"; + output << " Compression rate: " << std::fixed << std::setprecision(2) << float(nBytesInMemory) / float(nBytesOnDisk) << "\n"; - output << " Compression rate: " << std::fixed << std::setprecision(2) - << float(bytesInMemory) / float(bytesOnStorage) << "\n"; output << " Header size: " << headerSize << " B" << "\n"; output << " Footer size: " << footerSize << " B" << "\n"; output << " Meta-data / data: " << std::fixed << std::setprecision(3) - << float(headerSize + footerSize) / float(bytesOnStorage) << "\n"; + << float(headerSize + footerSize) / float(nBytesOnDisk) << "\n"; output << "------------------------------------------------------------\n"; output << "CLUSTER DETAILS\n"; output << "------------------------------------------------------------" << std::endl; @@ -174,10 +174,10 @@ void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) cons output << " " << " # Pages: " << clusters[i].fNPages << "\n"; output << " " - << " Size on storage: " << clusters[i].fBytesOnStorage << " B\n"; + << " Size on storage: " << clusters[i].fNBytesOnDisk << " B\n"; output << " " << " Compression: " << std::fixed << std::setprecision(2) - << float(clusters[i].fBytesInMemory) / float(float(clusters[i].fBytesOnStorage)) << std::endl; + << float(clusters[i].fNBytesInMemory) / float(float(clusters[i].fNBytesOnDisk)) << std::endl; } output << "------------------------------------------------------------\n"; @@ -189,7 +189,7 @@ void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) cons } std::sort(columns.begin(), columns.end()); for (const auto &col : columns) { - auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fBytesOnStorage / col.fNPages); + auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fNBytesOnDisk / col.fNPages); auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages); std::string nameAndType = std::string(" ") + col.fFieldName + " [#" + std::to_string(col.fColumnIndex); if (col.fRepresentationIndex > 0) @@ -205,9 +205,9 @@ void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) cons output << " # Pages: " << col.fNPages << "\n"; output << " Avg elements / page: " << avgElementsPerPage << "\n"; output << " Avg page size: " << avgPageSize << " B\n"; - output << " Size on storage: " << col.fBytesOnStorage << " B\n"; + output << " Size on storage: " << col.fNBytesOnDisk << " B\n"; output << " Compression: " << std::fixed << std::setprecision(2) - << float(col.fElementSize * col.fNElements) / float(col.fBytesOnStorage) << "\n"; + << float(col.fElementSize * col.fNElements) / float(col.fNBytesOnDisk) << "\n"; output << "............................................................" << std::endl; } } diff --git a/tree/ntuple/v7/src/RNTupleMerger.cxx b/tree/ntuple/v7/src/RNTupleMerger.cxx index 236c5ffd6f67a..b54c2de6e06b0 100644 --- a/tree/ntuple/v7/src/RNTupleMerger.cxx +++ b/tree/ntuple/v7/src/RNTupleMerger.cxx @@ -659,12 +659,12 @@ static void GenerateExtraDstColumns(size_t nClusterEntries, std::spanGetPackedSize(nElements); + const auto nBytesOnDisk = colElement->GetPackedSize(nElements); constexpr auto kPageSizeLimit = 256 * 1024; // TODO(gparolini): consider coalescing the last page if its size is less than some threshold - const size_t nPages = bytesOnStorage / kPageSizeLimit + !!(bytesOnStorage % kPageSizeLimit); + const size_t nPages = nBytesOnDisk / kPageSizeLimit + !!(nBytesOnDisk % kPageSizeLimit); for (size_t i = 0; i < nPages; ++i) { - const auto pageSize = (i < nPages - 1) ? kPageSizeLimit : bytesOnStorage - kPageSizeLimit * (nPages - 1); + const auto pageSize = (i < nPages - 1) ? kPageSizeLimit : nBytesOnDisk - kPageSizeLimit * (nPages - 1); const auto checksumSize = RPageStorage::kNBytesPageChecksum; const auto bufSize = pageSize + checksumSize; auto &buffer = sealedPageData.fBuffers.emplace_back(new unsigned char[bufSize]); diff --git a/tree/ntuple/v7/src/RNTupleSerialize.cxx b/tree/ntuple/v7/src/RNTupleSerialize.cxx index 29356bed318f1..bdd79421990b0 100644 --- a/tree/ntuple/v7/src/RNTupleSerialize.cxx +++ b/tree/ntuple/v7/src/RNTupleSerialize.cxx @@ -419,18 +419,18 @@ void DeserializeLocatorPayloadLarge(const unsigned char *buffer, ROOT::Experimen std::uint32_t SerializeLocatorPayloadObject64(const ROOT::Experimental::RNTupleLocator &locator, unsigned char *buffer) { const auto &data = locator.GetPosition(); - const uint32_t sizeofBytesOnStorage = (locator.GetNBytesOnDisk() > std::numeric_limits::max()) - ? sizeof(std::uint64_t) - : sizeof(std::uint32_t); + const uint32_t sizeofNBytesOnDisk = (locator.GetNBytesOnDisk() > std::numeric_limits::max()) + ? sizeof(std::uint64_t) + : sizeof(std::uint32_t); if (buffer) { - if (sizeofBytesOnStorage == sizeof(std::uint32_t)) { + if (sizeofNBytesOnDisk == sizeof(std::uint32_t)) { RNTupleSerializer::SerializeUInt32(locator.GetNBytesOnDisk(), buffer); } else { RNTupleSerializer::SerializeUInt64(locator.GetNBytesOnDisk(), buffer); } - RNTupleSerializer::SerializeUInt64(data.GetLocation(), buffer + sizeofBytesOnStorage); + RNTupleSerializer::SerializeUInt64(data.GetLocation(), buffer + sizeofNBytesOnDisk); } - return sizeofBytesOnStorage + sizeof(std::uint64_t); + return sizeofNBytesOnDisk + sizeof(std::uint64_t); } void DeserializeLocatorPayloadObject64(const unsigned char *buffer, std::uint32_t sizeofLocatorPayload, diff --git a/tree/ntuple/v7/test/ntuple_descriptor.cxx b/tree/ntuple/v7/test/ntuple_descriptor.cxx index 714a9e75243db..9c26ac3a6d8c9 100644 --- a/tree/ntuple/v7/test/ntuple_descriptor.cxx +++ b/tree/ntuple/v7/test/ntuple_descriptor.cxx @@ -504,7 +504,7 @@ TEST(RColumnDescriptorIterable, IterateOverColumns) EXPECT_EQ(desc.GetNLogicalColumns(), counter); } -TEST(RClusterDescriptor, GetBytesOnStorage) +TEST(RClusterDescriptor, GetNBytesOnDisk) { auto model = RNTupleModel::Create(); auto fldJets = model->MakeField>("jets"); @@ -526,7 +526,7 @@ TEST(RClusterDescriptor, GetBytesOnStorage) auto clusterID = desc.FindClusterId(0, 0); ASSERT_NE(ROOT::Experimental::kInvalidDescriptorId, clusterID); - EXPECT_EQ(8 + 8 + 8 + 3, desc.GetClusterDescriptor(clusterID).GetBytesOnStorage()); + EXPECT_EQ(8 + 8 + 8 + 3, desc.GetClusterDescriptor(clusterID).GetNBytesOnDisk()); } TEST(RNTupleDescriptor, Clone)