Skip to content

Commit

Permalink
LSM-Tree level storage bug fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
kkli08 committed Nov 6, 2024
1 parent 1f2bdc6 commit ccfdf9e
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 46 deletions.
48 changes: 15 additions & 33 deletions LSMTree/LSMTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,11 +410,13 @@ void LSMTree::mergeLevels(int levelIndex, const std::shared_ptr<DiskBTree>& sstT
mergeSSTables(existingSSTable, sstToMerge, mergedLeafsPath.string(), leafPageSmallestKeys, numOfPages, totalKvs);

// Create a new DiskBTree instance for the merged SSTable using the provided constructor
// cout << "LSMTree::mergeLevels() newSSTablePath.string() is " << newSSTablePath.string() << endl;
cout << "LSMTree::mergeLevels() newSSTablePath.string() is " << newSSTablePath.string() << endl;
std::shared_ptr<DiskBTree> mergedSSTable = std::make_shared<DiskBTree>(
newSSTablePath.string(), mergedLeafsPath.string(), leafPageSmallestKeys, numOfPages, totalKvs);


// cout << "=========================" << endl;
// mergedSSTable->printKVs();

// Delete old SSTable files and merged leaf pages file
fs::remove(existingSSTable->getFileName());
Expand Down Expand Up @@ -459,8 +461,8 @@ void LSMTree::mergeSSTables(const std::shared_ptr<DiskBTree>& sst1,
PageManager& pm1 = *(sst1->pageManager);
PageManager& pm2 = *(sst2->pageManager);

// cout << "num of kv in sst1: " << sst1->getNumberOfKeyValues() << endl;
// cout << "num of kv in sst2: " << sst2->getNumberOfKeyValues() << endl;
cout << "num of kv in sst1: " << sst1->getNumberOfKeyValues() << endl;
cout << "num of kv in sst2: " << sst2->getNumberOfKeyValues() << endl;


// Get leaf page offsets
Expand Down Expand Up @@ -623,6 +625,8 @@ void LSMTree::mergeSSTables(const std::shared_ptr<DiskBTree>& sst1,

// Close the output PageManager
outputLeafPageManager.close();

// cout << "LSMTree::MergeSSTables(): Number of KVs after merge: " << totalKvs << endl;
}

// Generate unique SSTable file names
Expand All @@ -638,35 +642,13 @@ void LSMTree::printTree() const {
if (levels[i] == nullptr) {
cout << " No SST file in current level" << endl;
}else {
uint64_t currentOffset = levels[i]->getLeafBeginOffset();
bool done = false;

while (!done) {
// Read the leaf page from disk
Page currentPage = levels[i]->pageManager->readPage(currentOffset);

// Process current leaf page
const std::vector<KeyValueWrapper>& kvPairs = currentPage.getLeafEntries();

// Iterate over the kvPairs
for (const auto& kv : kvPairs) {
// for testing purpose, we only print int value
cout << "Key = " << kv.kv.int_key() << " Value = " << kv.kv.int_value() << endl;
}

if (done) {
break;
}

// Move to the next leaf page
uint64_t nextLeafOffset = currentPage.getNextLeafOffset();
if (nextLeafOffset == 0) {
// No more leaf pages
break;
}

currentOffset = nextLeafOffset;
}
levels[i]->printKVs();
}
}
}
}

void LSMTree::printLevelSizes() const {
for (int i = 0; i < levelMaxSizes.size(); i++) {
cout << "Level " << i+1 << " maximum size = " << levelMaxSizes[i] << endl;
}
}
1 change: 1 addition & 0 deletions LSMTree/LSMTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class LSMTree {

// print LSM-Tree structure
void printTree() const;
void printLevelSizes() const;
private:
// Level 0 is always the in-memory memtable
std::unique_ptr<Memtable> memtable; // Level 0
Expand Down
60 changes: 55 additions & 5 deletions Storage/DiskBTree/DiskBTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ DiskBTree::DiskBTree(const std::string& sstFileName)
}

DiskBTree::DiskBTree(const std::string& sstFileName, const std::string& leafsFileName, const std::vector<KeyValueWrapper>& leafPageSmallestKeys, int numOfPages, int totalKvs)
: sstFileName(sstFileName), root(nullptr)
: sstFileName(sstFileName), root(nullptr), leafPageSmallestKeys(leafPageSmallestKeys)
{
// Constructor for creating a new SST file from existing leaf pages
totalKeyValueCount = totalKvs;
int actual_KV_read = 0;
pageManager = std::make_shared<PageManager>(sstFileName);
// cout << "DiskBTree::DiskBTree() Leaf file name: " << leafsFileName << endl;
// Step 1: Write placeholder metadata to offset 0
Expand All @@ -88,14 +89,29 @@ DiskBTree::DiskBTree(const std::string& sstFileName, const std::string& leafsFil

PageManager leafPageManager(leafsFileName);
// cout << "DiskBTree::DiskBTree(): Number of Pages to read: " << numOfPages << std::endl;
for(int i = 0; i < numOfPages; i++) {


for(int i = 0; i < leafPageSmallestKeys.size(); i++) {
// cout << "DiskBTree::DiskBTree() read page offset: " << currentOffset << endl;
uint64_t offset = currentOffset;
Page leafPage = leafPageManager.readPage(currentOffset);
actual_KV_read += leafPage.getLeafEntries().size();

// Set the nextLeafOffset of the previous leaf page

if (i > 0) {
Page PreviousPage = pageManager->readPage(offset-pageSize);
PreviousPage.setNextLeafOffset(offset);
// Re-write the previous leaf page to update the nextLeafOffset
pageManager->writePage(leafPageOffsets[i-1], PreviousPage);
}

// leafPage.printType();
pageManager->writePage(currentOffset, leafPage);
leafPageOffsets.push_back(currentOffset);
currentOffset += pageSize;
}
// cout << "DiskBTree::DiskBTree(): actual_KV_read == " << actual_KV_read << endl;


// Set leafBeginOffset and leafEndOffset
Expand Down Expand Up @@ -373,9 +389,11 @@ void DiskBTree::computeDegreeAndHeightFromLeafKeys(const std::vector<KeyValueWra
size_t keySize = 0;
if (!leafPageSmallestKeys.empty()) {
// Serialize the first key to get its size
std::string keyData;
leafPageSmallestKeys[0].kv.SerializeToString(&keyData);
keySize = keyData.size();
// std::string keyData;
// leafPageSmallestKeys[0].kv.SerializeToString(&keyData);
// keySize = keyData.size();
keySize = leafPageSmallestKeys[0].getSerializedSize();

} else {
keySize = sizeof(KeyValueWrapper); // Fallback estimate
}
Expand Down Expand Up @@ -658,3 +676,35 @@ void DiskBTree::writeTreeToSSTWithLeafOffsets(const std::vector<uint64_t>& leafP
// The root node should already have its offset set
// No further action needed
}

void DiskBTree::printKVs() const {
uint64_t currentOffset = getLeafBeginOffset();
bool done = false;

while (!done) {
// Read the leaf page from disk
Page currentPage = pageManager->readPage(currentOffset);

// Process current leaf page
const std::vector<KeyValueWrapper>& kvPairs = currentPage.getLeafEntries();

// Iterate over the kvPairs
for (const auto& kv : kvPairs) {
// for testing purpose, we only print int value
cout << "Key = " << kv.kv.int_key() << " Value = " << kv.kv.int_value() << endl;
}

if (done) {
break;
}

// Move to the next leaf page
uint64_t nextLeafOffset = currentPage.getNextLeafOffset();
if (nextLeafOffset == 0) {
// No more leaf pages
break;
}

currentOffset = nextLeafOffset;
}
}
5 changes: 5 additions & 0 deletions Storage/DiskBTree/DiskBTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ class DiskBTree {

std::string getSstFilename() const { return sstFileName; };

// print all key value pair from disk
void printKVs() const;

private:
// Offset of the root node
uint64_t rootOffset;
Expand Down Expand Up @@ -126,6 +129,8 @@ class DiskBTree {

// New method to write the tree into the SST file using leaf page offsets
void writeTreeToSSTWithLeafOffsets(const std::vector<uint64_t>& leafPageOffsets);


};

#endif // DISK_BTREE_H
17 changes: 9 additions & 8 deletions tests/lsm_tree_unittests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ TEST(LSMTreeTest, ScanAcrossLevels_II) {
std::string dbPath = "test_lsm_scan_across";
cleanUpDir(dbPath);

size_t memtableSize = 501; // Small size to trigger flushes
size_t memtableSize = 500; // Small size to trigger flushes
LSMTree lsmTree(memtableSize, dbPath);

// Insert 15 keys to populate Level1 and some in memtable
Expand All @@ -289,16 +289,17 @@ TEST(LSMTreeTest, ScanAcrossLevels_II) {
KeyValueWrapper endKey(500, 0);

// print all key
lsmTree.printTree();
// // Perform scan
// std::vector<KeyValueWrapper> scanResult;
// lsmTree.scan(startKey, endKey, scanResult);
//
// lsmTree.printTree();
// lsmTree.printLevelSizes();
// Perform scan
std::vector<KeyValueWrapper> scanResult;
lsmTree.scan(startKey, endKey, scanResult);

// for (int i = 0; i < scanResult.size(); ++i) {
// cout << "Keys scanned: " << scanResult[i].kv.int_key() << endl;
// }
// // Expected keys: 100 to 500 inclusive
// EXPECT_EQ(scanResult.size(), 401);
// Expected keys: 100 to 500 inclusive
EXPECT_EQ(scanResult.size(), 401);


// Clean up
Expand Down

0 comments on commit ccfdf9e

Please sign in to comment.