From ccfdf9e4bde2ac97f090bd786d52d55130ffa149 Mon Sep 17 00:00:00 2001 From: Damian Li Date: Wed, 6 Nov 2024 00:13:52 -0500 Subject: [PATCH] LSM-Tree level storage bug fixed --- LSMTree/LSMTree.cpp | 48 +++++++++----------------- LSMTree/LSMTree.h | 1 + Storage/DiskBTree/DiskBTree.cpp | 60 ++++++++++++++++++++++++++++++--- Storage/DiskBTree/DiskBTree.h | 5 +++ tests/lsm_tree_unittests.cpp | 17 +++++----- 5 files changed, 85 insertions(+), 46 deletions(-) diff --git a/LSMTree/LSMTree.cpp b/LSMTree/LSMTree.cpp index c9544cc..402822d 100644 --- a/LSMTree/LSMTree.cpp +++ b/LSMTree/LSMTree.cpp @@ -410,11 +410,13 @@ void LSMTree::mergeLevels(int levelIndex, const std::shared_ptr& sstT mergeSSTables(existingSSTable, sstToMerge, mergedLeafsPath.string(), leafPageSmallestKeys, numOfPages, totalKvs); // Create a new DiskBTree instance for the merged SSTable using the provided constructor - // cout << "LSMTree::mergeLevels() newSSTablePath.string() is " << newSSTablePath.string() << endl; + cout << "LSMTree::mergeLevels() newSSTablePath.string() is " << newSSTablePath.string() << endl; std::shared_ptr mergedSSTable = std::make_shared( newSSTablePath.string(), mergedLeafsPath.string(), leafPageSmallestKeys, numOfPages, totalKvs); + // cout << "=========================" << endl; + // mergedSSTable->printKVs(); // Delete old SSTable files and merged leaf pages file fs::remove(existingSSTable->getFileName()); @@ -459,8 +461,8 @@ void LSMTree::mergeSSTables(const std::shared_ptr& sst1, PageManager& pm1 = *(sst1->pageManager); PageManager& pm2 = *(sst2->pageManager); - // cout << "num of kv in sst1: " << sst1->getNumberOfKeyValues() << endl; - // cout << "num of kv in sst2: " << sst2->getNumberOfKeyValues() << endl; + cout << "num of kv in sst1: " << sst1->getNumberOfKeyValues() << endl; + cout << "num of kv in sst2: " << sst2->getNumberOfKeyValues() << endl; // Get leaf page offsets @@ -623,6 +625,8 @@ void LSMTree::mergeSSTables(const std::shared_ptr& sst1, // Close the output PageManager outputLeafPageManager.close(); + + // cout << "LSMTree::MergeSSTables(): Number of KVs after merge: " << totalKvs << endl; } // Generate unique SSTable file names @@ -638,35 +642,13 @@ void LSMTree::printTree() const { if (levels[i] == nullptr) { cout << " No SST file in current level" << endl; }else { - uint64_t currentOffset = levels[i]->getLeafBeginOffset(); - bool done = false; - - while (!done) { - // Read the leaf page from disk - Page currentPage = levels[i]->pageManager->readPage(currentOffset); - - // Process current leaf page - const std::vector& kvPairs = currentPage.getLeafEntries(); - - // Iterate over the kvPairs - for (const auto& kv : kvPairs) { - // for testing purpose, we only print int value - cout << "Key = " << kv.kv.int_key() << " Value = " << kv.kv.int_value() << endl; - } - - if (done) { - break; - } - - // Move to the next leaf page - uint64_t nextLeafOffset = currentPage.getNextLeafOffset(); - if (nextLeafOffset == 0) { - // No more leaf pages - break; - } - - currentOffset = nextLeafOffset; - } + levels[i]->printKVs(); } } -} \ No newline at end of file +} + +void LSMTree::printLevelSizes() const { + for (int i = 0; i < levelMaxSizes.size(); i++) { + cout << "Level " << i+1 << " maximum size = " << levelMaxSizes[i] << endl; + } +} diff --git a/LSMTree/LSMTree.h b/LSMTree/LSMTree.h index 19ed779..66d036c 100644 --- a/LSMTree/LSMTree.h +++ b/LSMTree/LSMTree.h @@ -46,6 +46,7 @@ class LSMTree { // print LSM-Tree structure void printTree() const; + void printLevelSizes() const; private: // Level 0 is always the in-memory memtable std::unique_ptr memtable; // Level 0 diff --git a/Storage/DiskBTree/DiskBTree.cpp b/Storage/DiskBTree/DiskBTree.cpp index 4fdcac7..33a777b 100644 --- a/Storage/DiskBTree/DiskBTree.cpp +++ b/Storage/DiskBTree/DiskBTree.cpp @@ -71,10 +71,11 @@ DiskBTree::DiskBTree(const std::string& sstFileName) } DiskBTree::DiskBTree(const std::string& sstFileName, const std::string& leafsFileName, const std::vector& leafPageSmallestKeys, int numOfPages, int totalKvs) - : sstFileName(sstFileName), root(nullptr) + : sstFileName(sstFileName), root(nullptr), leafPageSmallestKeys(leafPageSmallestKeys) { // Constructor for creating a new SST file from existing leaf pages totalKeyValueCount = totalKvs; + int actual_KV_read = 0; pageManager = std::make_shared(sstFileName); // cout << "DiskBTree::DiskBTree() Leaf file name: " << leafsFileName << endl; // Step 1: Write placeholder metadata to offset 0 @@ -88,14 +89,29 @@ DiskBTree::DiskBTree(const std::string& sstFileName, const std::string& leafsFil PageManager leafPageManager(leafsFileName); // cout << "DiskBTree::DiskBTree(): Number of Pages to read: " << numOfPages << std::endl; - for(int i = 0; i < numOfPages; i++) { + + + for(int i = 0; i < leafPageSmallestKeys.size(); i++) { // cout << "DiskBTree::DiskBTree() read page offset: " << currentOffset << endl; + uint64_t offset = currentOffset; Page leafPage = leafPageManager.readPage(currentOffset); + actual_KV_read += leafPage.getLeafEntries().size(); + + // Set the nextLeafOffset of the previous leaf page + + if (i > 0) { + Page PreviousPage = pageManager->readPage(offset-pageSize); + PreviousPage.setNextLeafOffset(offset); + // Re-write the previous leaf page to update the nextLeafOffset + pageManager->writePage(leafPageOffsets[i-1], PreviousPage); + } + // leafPage.printType(); pageManager->writePage(currentOffset, leafPage); leafPageOffsets.push_back(currentOffset); currentOffset += pageSize; } + // cout << "DiskBTree::DiskBTree(): actual_KV_read == " << actual_KV_read << endl; // Set leafBeginOffset and leafEndOffset @@ -373,9 +389,11 @@ void DiskBTree::computeDegreeAndHeightFromLeafKeys(const std::vector& leafP // The root node should already have its offset set // No further action needed } + +void DiskBTree::printKVs() const { + uint64_t currentOffset = getLeafBeginOffset(); + bool done = false; + + while (!done) { + // Read the leaf page from disk + Page currentPage = pageManager->readPage(currentOffset); + + // Process current leaf page + const std::vector& kvPairs = currentPage.getLeafEntries(); + + // Iterate over the kvPairs + for (const auto& kv : kvPairs) { + // for testing purpose, we only print int value + cout << "Key = " << kv.kv.int_key() << " Value = " << kv.kv.int_value() << endl; + } + + if (done) { + break; + } + + // Move to the next leaf page + uint64_t nextLeafOffset = currentPage.getNextLeafOffset(); + if (nextLeafOffset == 0) { + // No more leaf pages + break; + } + + currentOffset = nextLeafOffset; + } +} \ No newline at end of file diff --git a/Storage/DiskBTree/DiskBTree.h b/Storage/DiskBTree/DiskBTree.h index b6d28c5..9c13ba1 100644 --- a/Storage/DiskBTree/DiskBTree.h +++ b/Storage/DiskBTree/DiskBTree.h @@ -59,6 +59,9 @@ class DiskBTree { std::string getSstFilename() const { return sstFileName; }; + // print all key value pair from disk + void printKVs() const; + private: // Offset of the root node uint64_t rootOffset; @@ -126,6 +129,8 @@ class DiskBTree { // New method to write the tree into the SST file using leaf page offsets void writeTreeToSSTWithLeafOffsets(const std::vector& leafPageOffsets); + + }; #endif // DISK_BTREE_H diff --git a/tests/lsm_tree_unittests.cpp b/tests/lsm_tree_unittests.cpp index d0c5cd8..b15c35e 100644 --- a/tests/lsm_tree_unittests.cpp +++ b/tests/lsm_tree_unittests.cpp @@ -275,7 +275,7 @@ TEST(LSMTreeTest, ScanAcrossLevels_II) { std::string dbPath = "test_lsm_scan_across"; cleanUpDir(dbPath); - size_t memtableSize = 501; // Small size to trigger flushes + size_t memtableSize = 500; // Small size to trigger flushes LSMTree lsmTree(memtableSize, dbPath); // Insert 15 keys to populate Level1 and some in memtable @@ -289,16 +289,17 @@ TEST(LSMTreeTest, ScanAcrossLevels_II) { KeyValueWrapper endKey(500, 0); // print all key - lsmTree.printTree(); - // // Perform scan - // std::vector scanResult; - // lsmTree.scan(startKey, endKey, scanResult); - // + // lsmTree.printTree(); + // lsmTree.printLevelSizes(); + // Perform scan + std::vector scanResult; + lsmTree.scan(startKey, endKey, scanResult); + // for (int i = 0; i < scanResult.size(); ++i) { // cout << "Keys scanned: " << scanResult[i].kv.int_key() << endl; // } - // // Expected keys: 100 to 500 inclusive - // EXPECT_EQ(scanResult.size(), 401); + // Expected keys: 100 to 500 inclusive + EXPECT_EQ(scanResult.size(), 401); // Clean up