From 46efcd54c4e129f4ff009bb8ae4cd12ee86030f6 Mon Sep 17 00:00:00 2001 From: Damian Li Date: Fri, 1 Nov 2024 10:34:39 -0400 Subject: [PATCH] update doc & code cleaning --- README.md | 3 +- Storage/Page/Page.h | 2 +- documentation/docs/api.md | 5 +- documentation/docs/benchmark.md | 3 -- documentation/docs/design.md | 89 ++++++++++++++++++++++++++++++++- documentation/docs/layout.md | 42 ++++++++++++---- 6 files changed, 125 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 1ccfb4d..f44cd61 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,6 @@ Initializes the database system, setting up the necessary files and directories /* * Initialize with default value : * Memtable::size == 1e3 - * SsTFileManager::DiskBTree::Degree == 3 */ auto MyDBDefault = std::make_unique(); auto MyDBDefault = std::make_unique(int memtableSize); @@ -151,7 +150,7 @@ print total number of cache hit in the buffer pool during the database operation #include "VeloxDB/VeloxDB.h" int memtableSize = 1e4; -auto db = std::make_unique(memtableSize, 3); +auto db = std::make_unique(memtableSize); db->Open("test_db"); const int numEntries = 1e6; // Insert 1e6 key-value pairs diff --git a/Storage/Page/Page.h b/Storage/Page/Page.h index 62d73fd..9e9af94 100644 --- a/Storage/Page/Page.h +++ b/Storage/Page/Page.h @@ -3,7 +3,7 @@ #define PAGE_H #include "KeyValue.h" -#include "BloomFilter.h" // Include the BloomFilter header +#include "BloomFilter.h" #include #include #include diff --git a/documentation/docs/api.md b/documentation/docs/api.md index bcffa46..775bd06 100644 --- a/documentation/docs/api.md +++ b/documentation/docs/api.md @@ -9,10 +9,9 @@ Initializes and open the database system, setting up the necessary files and dir /* * Initialize with default value : * Memtable::size == 1e3 - * SsTFileManager::DiskBTree::Degree == 3 */ auto MyDBDefault = std::make_unique(); -auto MyDBDefault = std::make_unique(int memtableSize, int BTreeDegree); +auto MyDBDefault = std::make_unique(int memtableSize); MyDBDefault->Open("database_name"); // open ``` @@ -145,7 +144,7 @@ print total number of cache hit in the buffer pool during the database operation #include "VeloxDB/VeloxDB.h" int memtableSize = 1e4; -auto db = std::make_unique(memtableSize, 3); +auto db = std::make_unique(memtableSize); db->Open("test_db"); const int numEntries = 1e6; // Insert 1e6 key-value pairs diff --git a/documentation/docs/benchmark.md b/documentation/docs/benchmark.md index deb81b7..addeb02 100644 --- a/documentation/docs/benchmark.md +++ b/documentation/docs/benchmark.md @@ -3,7 +3,6 @@ #### `VeloxDB::Put` **`Put` throughput with different `Memtable` size** ```text - B Tree Degree = 3 page size = 4 kb ``` ![](image/static_b_tree_benchmark/put_throughput.png) @@ -11,7 +10,6 @@ #### `VeloxDB::Get` **`Get` latency with different `Memtable` size** ```text - B Tree Degree = 3 page size = 4 kb ``` ![](image/static_b_tree_benchmark/get_latency.png) @@ -19,7 +17,6 @@ #### `VeloxDB::Scan` **`Scan` throughput with different `Memtable` size** ```text - B Tree Degree = 3 page size = 4 kb ``` ![](image/static_b_tree_benchmark/scan_throughput.png) diff --git a/documentation/docs/design.md b/documentation/docs/design.md index 304d711..9bbf8bb 100644 --- a/documentation/docs/design.md +++ b/documentation/docs/design.md @@ -1 +1,88 @@ -### TBA \ No newline at end of file +### **Buffer Pool** +```c++ +// PageManager.h +class PageManager { +public: + // ... + + // BufferPool configuration + void setBufferPoolParameters(size_t capacity, EvictionPolicy policy); + long long getCacheHit() const {return bufferPool->getCacheHit();}; + + // ... + +private: + // ... + + // shared buffer pool among all the pageManager + std::shared_ptr bufferPool; + + // ... +}; + +``` + +### **Bloom Filter** +TBD + +### **Static B+ Tree as SST file** +#### Writing into sst file +```c++ +DiskBTree::DiskBTree(const std::string& sstFileName, const std::vector& keyValues, size_t pageSize) + : sstFileName(sstFileName), pageManager(sstFileName, pageSize), pageSize(pageSize), root(nullptr) +{ + // Constructor for creating a new SST file + + // Step 1: Write placeholder metadata to offset 0 + Page metadataPage(Page::PageType::SST_METADATA); + pageManager.writePage(0, metadataPage); // Reserve offset 0 + + // Step 2: Split keyValues into leaf pages + splitInputPairs(keyValues); + + // Step 3: Compute degree and height + computeDegreeAndHeight(); + + // Step 4: Build the tree + buildTree(); + + // Step 5: Write the tree into the SST file + writeTreeToSST(); + + // Step 6: Set the root offset + rootOffset = root->offset; + + // Step 7: Update and write the metadata page with the actual root offset + metadataPage.setMetadata(rootOffset, leafBeginOffset, leafEndOffset, sstFileName); + pageManager.writePage(0, metadataPage); + + // After writing, clear the in-memory structures to free memory + // ... +} +``` +#### Reading sst file +```c++ +DiskBTree::DiskBTree(const std::string& sstFileName) + : sstFileName(sstFileName), pageManager(sstFileName), root(nullptr) +{ + // Constructor for reading an existing SST file + + // Read the metadata page from offset 0 + Page metadataPage = pageManager.readPage(0); + + // Extract metadata + std::string fileName; + metadataPage.getMetadata(rootOffset, leafBeginOffset, leafEndOffset, fileName); + + // sstFileName is already set; ensure it matches the metadata (optional) + if (sstFileName != fileName) { + std::cerr << "Warning: SST file name does not match metadata file name." << std::endl; + } + + // Since the tree is static, we don't load any nodes into memory + // We rely on reading pages from disk during search and scan operations +} +``` + +### **LSM-Tree** +TBD \ No newline at end of file diff --git a/documentation/docs/layout.md b/documentation/docs/layout.md index 6f24127..0dc6f24 100644 --- a/documentation/docs/layout.md +++ b/documentation/docs/layout.md @@ -1,27 +1,34 @@ ### SST Files Layout ``` -[Internal Node Page (Root)] -[Internal Node Page 1] -[Internal Node Page 2] -... -[Internal Node Page n] +[SST Metadata Page] [Leaf Node Page 1] [Leaf Node Page 2] [Leaf Node Page 3] ... [Leaf Node Page m] +[Internal Node Page (Root)] +[Internal Node Page 1] +[Internal Node Page 2] +... +[Internal Node Page n] [* Clustered Index Page] [* Bloom Filter Page] -[SST Metadata Page] ``` #### `Page::PageSize` > Page with `PageSize::` **PageSize** (`4KB`, `8KB`) #### `Page::SST_MetaData` ```c++ -LeafNode_Begin_Offset -LeafNode_End_offset -FileName +struct SSTMetadata { + uint64_t rootPageOffset; + uint64_t leafNodeBeginOffset; + uint64_t leafNodeEndOffset; + std::string fileName; + + // SST Bloom filter + BloomFilter bloomFilter; + bool hasBloomFilter = false; + } ``` #### `Page::LeafNodes` @@ -34,8 +41,19 @@ serialized key-value pair 1 metadata (serialized by protobuf) serialized key-value pair 2 metadata (serialized by protobuf) serialized key-value pair 3 metadata (serialized by protobuf) ... +Bloom Filter for each Leaf Page // with padding ``` +```c++ +struct LeafNodeData { + std::vector keyValues; + uint64_t nextLeafOffset; // Offset to next leaf node + + // Bloom filter for the leaf node + BloomFilter bloomFilter; + bool hasBloomFilter = false; + } +``` #### `Page::InternalNodes` ```c++ @@ -49,6 +67,12 @@ level#1 key-value pair 2 metadata (serialized by protobuf), jump_offset_L2_K1, j ... // with padding ``` +```c++ +struct InternalNodeData { + std::vector keys; + std::vector childOffsets; // Offsets to child pages, size = keys.size() + 1 + } +``` #### `Page::BloomFilter` TBD