From 79d5039a9d9938ceb157f059414aec904627f1d9 Mon Sep 17 00:00:00 2001 From: Damian Ubuntu arm64 Date: Thu, 3 Oct 2024 21:49:00 -0400 Subject: [PATCH] benchmark put_throughput_benchmark.cpp --- Benchmark/CMakeLists.txt | 77 ++++++++++++++++++++++++ Benchmark/plot_benchmark.py | 20 +++++++ Benchmark/put_throughput_benchmark.cpp | 83 ++++++++++++++++++++++++++ CMakeLists.txt | 2 + 4 files changed, 182 insertions(+) create mode 100644 Benchmark/CMakeLists.txt create mode 100644 Benchmark/plot_benchmark.py create mode 100644 Benchmark/put_throughput_benchmark.cpp diff --git a/Benchmark/CMakeLists.txt b/Benchmark/CMakeLists.txt new file mode 100644 index 0000000..eedc0ce --- /dev/null +++ b/Benchmark/CMakeLists.txt @@ -0,0 +1,77 @@ +# Minimum required version of CMake +cmake_minimum_required(VERSION 3.10) + +# Project name +project(Benchmark) + +# Set C++ standard +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Find Protobuf package +find_package(Protobuf REQUIRED) + +# Specify the path to the .proto files +set(PROTO_FILES + ${PROJECT_SOURCE_DIR}/../protobuf/KeyValue.proto +) + +# Set the output directory for the generated .pb.cc and .pb.h files +set(GENERATED_PROTO_DIR ${PROJECT_SOURCE_DIR}/../protobuf) + +# Include the generated protobuf files +set(PROTO_SRCS + ${GENERATED_PROTO_DIR}/KeyValue.pb.cc +) +set(PROTO_HDRS + ${GENERATED_PROTO_DIR}/KeyValue.pb.h +) + +# Include directories +include_directories( + ${PROJECT_SOURCE_DIR}/../protobuf + ${PROJECT_SOURCE_DIR}/../kv + ${PROJECT_SOURCE_DIR}/../Memory/Memtable + ${PROJECT_SOURCE_DIR}/../Memory/BufferPool + ${PROJECT_SOURCE_DIR}/../Storage/Page + ${PROJECT_SOURCE_DIR}/../Storage/PageManager + ${PROJECT_SOURCE_DIR}/../Storage/SstFileManager + ${PROJECT_SOURCE_DIR}/../Storage/FileManager + ${PROJECT_SOURCE_DIR}/../Storage/DiskBTree + ${PROJECT_SOURCE_DIR}/../Tree/BinaryTree + ${PROJECT_SOURCE_DIR}/../Tree/BTree + ${PROJECT_SOURCE_DIR}/../Tree/RedBlackTree + ${PROJECT_SOURCE_DIR}/../Tree/TreeNode + ${PROJECT_SOURCE_DIR}/../VeloxDB + ${PROJECT_SOURCE_DIR} + ${GENERATED_PROTO_DIR} # Include generated protobuf directory +) + +# List of source files for VeloxDB +set(VELOXDB_SRCS + put_throughput_benchmark.cpp + ../Memory/Memtable/Memtable.cpp + ../kv/KeyValue.cpp + ../Tree/BTree/BTree.cpp + ../Tree/RedBlackTree/RedBlackTree.cpp + ../Tree/TreeNode/TreeNode.cpp + ../Tree/BinaryTree/BinaryTree.cpp + ../Storage/Page/Page.cpp + ../Storage/PageManager/PageManager.cpp + ../Storage/DiskBTree/DiskBTree.cpp + ../Storage/SstFileManager/SstFileManager.cpp + ../VeloxDB/VeloxDB.cpp + ../Memory/BufferPool/BufferPool.cpp +) + +# Specify the source files for the benchmark +set(BENCHMARK_SRCS + put_throughput_benchmark.cpp # Ensure this file contains `main` function +) + +# Add executable for the benchmark, ensuring main function is included +add_executable(put_benchmark ${BENCHMARK_SRCS} ${PROTO_SRCS} ${VELOXDB_SRCS}) + +# Link libraries for the benchmark executable +target_link_libraries(put_benchmark ${Protobuf_LIBRARIES}) + diff --git a/Benchmark/plot_benchmark.py b/Benchmark/plot_benchmark.py new file mode 100644 index 0000000..af53aa8 --- /dev/null +++ b/Benchmark/plot_benchmark.py @@ -0,0 +1,20 @@ +import pandas as pd +import matplotlib.pyplot as plt + +# Read the CSV file +data = pd.read_csv("put_throughput.csv") + +# Pivot the data to get MemtableSize as columns and DataSizeMB as index +pivot_data = data.pivot(index='DataSizeMB', columns='MemtableSize', values='Throughput(MB/s)') + +# Plot the graph +pivot_data.plot(kind='line', marker='o') + +# Set the title and labels +plt.title('Put Throughput for Different Memtable Sizes') +plt.xlabel('Input Data Size (MB)') +plt.ylabel('Throughput (MB/s)') +plt.xscale('log', base=2) # Log scale for X-axis (data size) +plt.grid(True) +plt.legend(title='Memtable Size (MB)') +plt.show() diff --git a/Benchmark/put_throughput_benchmark.cpp b/Benchmark/put_throughput_benchmark.cpp new file mode 100644 index 0000000..b50cf07 --- /dev/null +++ b/Benchmark/put_throughput_benchmark.cpp @@ -0,0 +1,83 @@ +// +// Created by damian on 10/3/24. +// +#include +#include +#include +#include +#include +#include +#include "../VeloxDB/VeloxDB.h" + +using namespace std::chrono; + +// Constants for benchmark +constexpr size_t MB = 1024 * 1024; // 1MB in bytes +constexpr size_t START_DATA_SIZE_MB = 1; // Start with 1 MB +constexpr size_t END_DATA_SIZE_MB = 512; // End with 512 MB (adjust as needed) +const std::string DB_NAME = "benchmark_db"; + +// Function to generate random strings +std::string generateRandomString(size_t length) { + const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + std::string result; + result.reserve(length); + for (size_t i = 0; i < length; ++i) { + result += charset[rand() % (sizeof(charset) - 1)]; + } + return result; +} + +// Function to benchmark Put operation +void benchmarkPut(std::unique_ptr& db, size_t dataSizeMB, size_t memtableSize, std::ofstream& csvFile) { + std::cout << "Benchmarking Put: MemtableSize = " << memtableSize / MB << "MB, DataSize = " << dataSizeMB << "MB" << std::endl; + + // Open the database + db->Open(DB_NAME); + + // Start timing + auto start = high_resolution_clock::now(); + + // Insert data + size_t bytesInserted = 0; + while (bytesInserted < dataSizeMB * MB) { + std::string key = generateRandomString(16); // 16-byte key + std::string value = generateRandomString(100); // 100-byte value + db->Put(key, value); + bytesInserted += key.size() + value.size(); + } + + // Stop timing + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start).count(); + + // Calculate throughput in MB/s + double throughput = static_cast(dataSizeMB * 1000) / duration; + + // Write result to CSV + csvFile << memtableSize << "," << dataSizeMB << "," << throughput << std::endl; + + // Close the database + db->Close(); +} + +int main() { + std::ofstream csvFile("put_throughput.csv"); + csvFile << "MemtableSize,DataSizeMB,Throughput(MB/s)\n"; + + // Benchmark configurations + std::vector memtableSizes = {1 * MB, 5 * MB, 10 * MB}; // Memtable sizes: 1MB, 5MB, 10MB + + // Run benchmarks for each Memtable size and data size (2^n MB) + for (auto memtableSize : memtableSizes) { + auto db = std::make_unique(memtableSize, 3); // Initialize VeloxDB with custom Memtable size + for (size_t dataSizeMB = START_DATA_SIZE_MB; dataSizeMB <= END_DATA_SIZE_MB; dataSizeMB *= 2) { + benchmarkPut(db, dataSizeMB, memtableSize, csvFile); + } + } + + csvFile.close(); + std::cout << "Benchmark completed. Results saved to put_throughput.csv" << std::endl; + return 0; +} + diff --git a/CMakeLists.txt b/CMakeLists.txt index de96915..265ef58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,3 +153,5 @@ include_directories( # Add the generated protobuf directory to the include path include_directories(${GENERATED_PROTO_DIR}) +# Optionally include the Benchmark directory +add_subdirectory(Benchmark)