diff --git a/Benchmark/CMakeLists.txt b/Benchmark/CMakeLists.txt index 1e73fb1..367f761 100644 --- a/Benchmark/CMakeLists.txt +++ b/Benchmark/CMakeLists.txt @@ -9,23 +9,38 @@ if(APPLE) set(CMAKE_OSX_ARCHITECTURES arm64) endif() + +# === === === Put throughput === === === # Source files for the benchmark set(BENCHMARK_SRCS put_throughput_benchmark.cpp ) - # Add executable for the benchmark add_executable(put_benchmark ${BENCHMARK_SRCS} ) - # Include directories for the benchmark executable target_include_directories(put_benchmark PRIVATE ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ) - # Link libraries to the benchmark executable target_link_libraries(put_benchmark PRIVATE veloxdb_lib ) + +# === === === Get Latency === === === +set(GET_BENCHMARK_SRCS + get_latency_benchmark.cpp +) +# Add executable for Get latency benchmark +add_executable(get_benchmark ${GET_BENCHMARK_SRCS}) + +target_include_directories(get_benchmark PRIVATE + ${CMAKE_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) +# Link VeloxDB library to the get_benchmark executable +target_link_libraries(get_benchmark PRIVATE + veloxdb_lib +) \ No newline at end of file diff --git a/Benchmark/get_latency/plot_get_latency.py b/Benchmark/get_latency/plot_get_latency.py new file mode 100644 index 0000000..1b50061 --- /dev/null +++ b/Benchmark/get_latency/plot_get_latency.py @@ -0,0 +1,64 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + +# Read the CSV files for Linux and macOS (latency data) +linux_data = pd.read_csv("linux_get_latency.csv") +macos_data = pd.read_csv("macos_get_latency.csv") + +# Pivot the data to get MemtableSize as columns and DataSizeMB as index +linux_pivot = linux_data.pivot(index='DataSizeMB', columns='MemtableSizeMB', values='AverageLatency(ms)') +macos_pivot = macos_data.pivot(index='DataSizeMB', columns='MemtableSizeMB', values='AverageLatency(ms)') + +# Define the color scheme for Linux and macOS +# Linux: Shades of purple, macOS: Shades of orange +linux_colors = ['#9370DB', '#7B68EE', '#4B0082'] # Medium Purple, Medium Slate Blue, Indigo +macos_colors = ['#FFB347', '#FF8C00', '#FF4500'] # Light Orange, Dark Orange, Orange Red + +# Set up the plot +plt.figure(figsize=(10, 6)) + +# Plot Linux data +x_values = linux_pivot.index.to_numpy() +for i, col in enumerate(linux_pivot.columns): + y_values = linux_pivot[col].to_numpy() + plt.plot(x_values, y_values, color=linux_colors[i % len(linux_colors)], marker='o', linestyle='-', + label=f'Linux {col} MB') + +# Plot macOS data with solid lines +x_values = macos_pivot.index.to_numpy() +for i, col in enumerate(macos_pivot.columns): + y_values = macos_pivot[col].to_numpy() + plt.plot(x_values, y_values, color=macos_colors[i % len(macos_colors)], marker='^', linestyle='-', + label=f'macOS {col} MB') + +# Set the title and labels +plt.title('Get Latency for Different Memtable Sizes (Linux vs. macOS)') +plt.xlabel('Input Data Size (MB)') +plt.ylabel('Average Latency (ms)') + +# Set custom x-axis values and labels +x_ticks = [2 ** i for i in range(1, 10)] # Use 2, 4, 8, 16, 32, ..., 512 +ax = plt.gca() # Get current axis +ax.set_xscale('log', base=2) # Set log scale for x-axis to maintain equal spacing + +# Set tick positions and labels manually +ax.set_xticks(x_ticks) # Set the position of the x ticks to match the values +ax.set_xticklabels([str(x) for x in x_ticks]) # Set the labels to display the actual numbers + +# Customize the legend +plt.legend(title='Memtable Size (MB)', loc='upper left', fontsize=9) + +# Remove gridlines (disable gridlines) +plt.grid(False) # Completely remove all gridlines + +# Remove border lines (spines) on the top and right +ax.spines['top'].set_visible(False) +ax.spines['right'].set_visible(False) + +# Optional: Remove left and bottom spines if desired +# ax.spines['left'].set_visible(False) +# ax.spines['bottom'].set_visible(False) + +# Show the plot +plt.show() diff --git a/Benchmark/get_latency_benchmark.cpp b/Benchmark/get_latency_benchmark.cpp new file mode 100644 index 0000000..259a2fd --- /dev/null +++ b/Benchmark/get_latency_benchmark.cpp @@ -0,0 +1,117 @@ +// +// Created by Damian Li on 2024-10-04. +// +#include +#include +#include +#include +#include +#include +#include // Include filesystem +#include "VeloxDB.h" + +namespace fs = std::filesystem; +using namespace std::chrono; + +// Constants for benchmark +constexpr size_t MB = 1024 * 1024; // 1MB in bytes +constexpr size_t START_DATA_SIZE_MB = 1; // Start with 1 MB +constexpr size_t END_DATA_SIZE_MB = 512; // End with 512 MB (adjust as needed) +const std::string DB_NAME = "benchmark_db"; + +// Function to generate random strings +std::string generateRandomString(size_t length) { + const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + std::string result; + result.reserve(length); + for (size_t i = 0; i < length; ++i) { + result += charset[rand() % (sizeof(charset) - 1)]; + } + return result; +} + +// Function to benchmark Get operation +void benchmarkGet(size_t dataSizeMB, size_t memtableSize, std::ofstream& csvFile) { + std::cout << "Benchmarking Get: MemtableSize = " << memtableSize / MB + << "MB, DataSize = " << dataSizeMB << "MB" << std::endl; + + // Create the database object with the specified memtable size + auto db = std::make_unique(memtableSize, 3); // Adjust other parameters as needed + + // Open the database + db->Open(DB_NAME); + + // Insert data first to populate the database + size_t bytesInserted = 0; + std::vector keys; + while (bytesInserted < dataSizeMB * MB) { + std::string key = generateRandomString(16); // 16-byte key + std::string value = generateRandomString(100); // 100-byte value + db->Put(key, value); + keys.push_back(key); // Save the key for later retrieval + bytesInserted += key.size() + value.size(); + } + + // Start timing the Get operations + auto start = high_resolution_clock::now(); + + // Retrieve all inserted keys + for (const auto& key : keys) { + auto result = db->Get(key); + } + + // Stop timing + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start).count(); + + // Calculate average latency in milliseconds + double average_latency = static_cast(duration) / keys.size(); + + // Write result to CSV + csvFile << memtableSize / MB << "," << dataSizeMB << "," << average_latency << std::endl; + + // Close the database + db->Close(); + + // Delete the database files to free up space + try { + if (fs::exists(DB_NAME)) { + fs::remove_all(DB_NAME); + std::cout << "Deleted database directory: " << DB_NAME << std::endl; + } + } catch (const fs::filesystem_error& e) { + std::cerr << "Error deleting database directory: " << e.what() << std::endl; + } +} + +int main() { + // Seed the random number generator + srand(static_cast(time(nullptr))); + + // Define the output directory for the CSV file + std::string outputDir = "./get_latency"; + std::string outputFilePath = outputDir + "/get_latency.csv"; + + // Create the directory if it does not exist + if (!fs::exists(outputDir)) { + fs::create_directories(outputDir); + } + + // Open CSV file for writing + std::ofstream csvFile(outputFilePath); + csvFile << "MemtableSizeMB,DataSizeMB,AverageLatency(ms)\n"; + + // Benchmark configurations + std::vector memtableSizes = {1 * MB, 5 * MB, 10 * MB}; // Memtable sizes: 1MB, 5MB, 10MB + + // Run benchmarks for each Memtable size and data size + for (auto memtableSize : memtableSizes) { + for (size_t dataSizeMB = START_DATA_SIZE_MB; dataSizeMB <= END_DATA_SIZE_MB; dataSizeMB *= 2) { + benchmarkGet(dataSizeMB, memtableSize, csvFile); + } + } + + csvFile.close(); + std::cout << "Benchmark completed. Results saved to " << outputFilePath << std::endl; + return 0; +} diff --git a/Benchmark/put_throughput_benchmark.cpp b/Benchmark/put_throughput_benchmark.cpp index ca9df8b..c3b0ed6 100644 --- a/Benchmark/put_throughput_benchmark.cpp +++ b/Benchmark/put_throughput_benchmark.cpp @@ -5,7 +5,7 @@ #include #include #include // Include filesystem -#include "../VeloxDB/VeloxDB.h" +#include "VeloxDB.h" namespace fs = std::filesystem; using namespace std::chrono; @@ -92,7 +92,7 @@ int main() { csvFile << "MemtableSizeMB,DataSizeMB,Throughput(MB/s)\n"; // Benchmark configurations - std::vector memtableSizes = {1 * MB, 5 * MB, 10 * MB, 15 * MB}; // Memtable sizes: 1MB, 5MB, 10MB + std::vector memtableSizes = {1 * MB, 5 * MB, 10 * MB}; // Memtable sizes: 1MB, 5MB, 10MB // Run benchmarks for each Memtable size and data size for (auto memtableSize : memtableSizes) {