benchmark update Get

kkli08 · Oct 4, 2024 · 9af6076 · 9af6076
1 parent 7847a74
commit 9af6076
Show file tree

Hide file tree

Showing 4 changed files with 201 additions and 5 deletions.
diff --git a/Benchmark/CMakeLists.txt b/Benchmark/CMakeLists.txt
@@ -9,23 +9,38 @@ if(APPLE)
     set(CMAKE_OSX_ARCHITECTURES arm64)
 endif()
 
+
+# === === === Put throughput  === === ===
 # Source files for the benchmark
 set(BENCHMARK_SRCS
         put_throughput_benchmark.cpp
 )
-
 # Add executable for the benchmark
 add_executable(put_benchmark
         ${BENCHMARK_SRCS}
 )
-
 # Include directories for the benchmark executable
 target_include_directories(put_benchmark PRIVATE
         ${CMAKE_SOURCE_DIR}
         ${CMAKE_CURRENT_SOURCE_DIR}
 )
-
 # Link libraries to the benchmark executable
 target_link_libraries(put_benchmark PRIVATE
         veloxdb_lib
 )
+
+#  === === === Get Latency === === ===
+set(GET_BENCHMARK_SRCS
+        get_latency_benchmark.cpp
+)
+# Add executable for Get latency benchmark
+add_executable(get_benchmark ${GET_BENCHMARK_SRCS})
+
+target_include_directories(get_benchmark PRIVATE
+        ${CMAKE_SOURCE_DIR}
+        ${CMAKE_CURRENT_SOURCE_DIR}
+)
+# Link VeloxDB library to the get_benchmark executable
+target_link_libraries(get_benchmark PRIVATE
+        veloxdb_lib
+)
diff --git a/Benchmark/get_latency/plot_get_latency.py b/Benchmark/get_latency/plot_get_latency.py
@@ -0,0 +1,64 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+# Read the CSV files for Linux and macOS (latency data)
+linux_data = pd.read_csv("linux_get_latency.csv")
+macos_data = pd.read_csv("macos_get_latency.csv")
+
+# Pivot the data to get MemtableSize as columns and DataSizeMB as index
+linux_pivot = linux_data.pivot(index='DataSizeMB', columns='MemtableSizeMB', values='AverageLatency(ms)')
+macos_pivot = macos_data.pivot(index='DataSizeMB', columns='MemtableSizeMB', values='AverageLatency(ms)')
+
+# Define the color scheme for Linux and macOS
+# Linux: Shades of purple, macOS: Shades of orange
+linux_colors = ['#9370DB', '#7B68EE', '#4B0082']  # Medium Purple, Medium Slate Blue, Indigo
+macos_colors = ['#FFB347', '#FF8C00', '#FF4500']  # Light Orange, Dark Orange, Orange Red
+
+# Set up the plot
+plt.figure(figsize=(10, 6))
+
+# Plot Linux data
+x_values = linux_pivot.index.to_numpy()
+for i, col in enumerate(linux_pivot.columns):
+    y_values = linux_pivot[col].to_numpy()
+    plt.plot(x_values, y_values, color=linux_colors[i % len(linux_colors)], marker='o', linestyle='-',
+             label=f'Linux {col} MB')
+
+# Plot macOS data with solid lines
+x_values = macos_pivot.index.to_numpy()
+for i, col in enumerate(macos_pivot.columns):
+    y_values = macos_pivot[col].to_numpy()
+    plt.plot(x_values, y_values, color=macos_colors[i % len(macos_colors)], marker='^', linestyle='-',
+             label=f'macOS {col} MB')
+
+# Set the title and labels
+plt.title('Get Latency for Different Memtable Sizes (Linux vs. macOS)')
+plt.xlabel('Input Data Size (MB)')
+plt.ylabel('Average Latency (ms)')
+
+# Set custom x-axis values and labels
+x_ticks = [2 ** i for i in range(1, 10)]  # Use 2, 4, 8, 16, 32, ..., 512
+ax = plt.gca()  # Get current axis
+ax.set_xscale('log', base=2)  # Set log scale for x-axis to maintain equal spacing
+
+# Set tick positions and labels manually
+ax.set_xticks(x_ticks)  # Set the position of the x ticks to match the values
+ax.set_xticklabels([str(x) for x in x_ticks])  # Set the labels to display the actual numbers
+
+# Customize the legend
+plt.legend(title='Memtable Size (MB)', loc='upper left', fontsize=9)
+
+# Remove gridlines (disable gridlines)
+plt.grid(False)  # Completely remove all gridlines
+
+# Remove border lines (spines) on the top and right
+ax.spines['top'].set_visible(False)
+ax.spines['right'].set_visible(False)
+
+# Optional: Remove left and bottom spines if desired
+# ax.spines['left'].set_visible(False)
+# ax.spines['bottom'].set_visible(False)
+
+# Show the plot
+plt.show()
diff --git a/Benchmark/get_latency_benchmark.cpp b/Benchmark/get_latency_benchmark.cpp
@@ -0,0 +1,117 @@
+//
+// Created by Damian Li on 2024-10-04.
+//
+#include <iostream>
+#include <chrono>
+#include <memory>
+#include <string>
+#include <fstream>
+#include <cstdlib>
+#include <filesystem>  // Include filesystem
+#include "VeloxDB.h"
+
+namespace fs = std::filesystem;
+using namespace std::chrono;
+
+// Constants for benchmark
+constexpr size_t MB = 1024 * 1024; // 1MB in bytes
+constexpr size_t START_DATA_SIZE_MB = 1;  // Start with 1 MB
+constexpr size_t END_DATA_SIZE_MB = 512;  // End with 512 MB (adjust as needed)
+const std::string DB_NAME = "benchmark_db";
+
+// Function to generate random strings
+std::string generateRandomString(size_t length) {
+    const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    std::string result;
+    result.reserve(length);
+    for (size_t i = 0; i < length; ++i) {
+        result += charset[rand() % (sizeof(charset) - 1)];
+    }
+    return result;
+}
+
+// Function to benchmark Get operation
+void benchmarkGet(size_t dataSizeMB, size_t memtableSize, std::ofstream& csvFile) {
+    std::cout << "Benchmarking Get: MemtableSize = " << memtableSize / MB
+              << "MB, DataSize = " << dataSizeMB << "MB" << std::endl;
+
+    // Create the database object with the specified memtable size
+    auto db = std::make_unique<VeloxDB>(memtableSize, 3);  // Adjust other parameters as needed
+
+    // Open the database
+    db->Open(DB_NAME);
+
+    // Insert data first to populate the database
+    size_t bytesInserted = 0;
+    std::vector<std::string> keys;
+    while (bytesInserted < dataSizeMB * MB) {
+        std::string key = generateRandomString(16);    // 16-byte key
+        std::string value = generateRandomString(100); // 100-byte value
+        db->Put(key, value);
+        keys.push_back(key);  // Save the key for later retrieval
+        bytesInserted += key.size() + value.size();
+    }
+
+    // Start timing the Get operations
+    auto start = high_resolution_clock::now();
+
+    // Retrieve all inserted keys
+    for (const auto& key : keys) {
+        auto result = db->Get(key);
+    }
+
+    // Stop timing
+    auto stop = high_resolution_clock::now();
+    auto duration = duration_cast<milliseconds>(stop - start).count();
+
+    // Calculate average latency in milliseconds
+    double average_latency = static_cast<double>(duration) / keys.size();
+
+    // Write result to CSV
+    csvFile << memtableSize / MB << "," << dataSizeMB << "," << average_latency << std::endl;
+
+    // Close the database
+    db->Close();
+
+    // Delete the database files to free up space
+    try {
+        if (fs::exists(DB_NAME)) {
+            fs::remove_all(DB_NAME);
+            std::cout << "Deleted database directory: " << DB_NAME << std::endl;
+        }
+    } catch (const fs::filesystem_error& e) {
+        std::cerr << "Error deleting database directory: " << e.what() << std::endl;
+    }
+}
+
+int main() {
+    // Seed the random number generator
+    srand(static_cast<unsigned>(time(nullptr)));
+
+    // Define the output directory for the CSV file
+    std::string outputDir = "./get_latency";
+    std::string outputFilePath = outputDir + "/get_latency.csv";
+
+    // Create the directory if it does not exist
+    if (!fs::exists(outputDir)) {
+        fs::create_directories(outputDir);
+    }
+
+    // Open CSV file for writing
+    std::ofstream csvFile(outputFilePath);
+    csvFile << "MemtableSizeMB,DataSizeMB,AverageLatency(ms)\n";
+
+    // Benchmark configurations
+    std::vector<size_t> memtableSizes = {1 * MB, 5 * MB, 10 * MB}; // Memtable sizes: 1MB, 5MB, 10MB
+
+    // Run benchmarks for each Memtable size and data size
+    for (auto memtableSize : memtableSizes) {
+        for (size_t dataSizeMB = START_DATA_SIZE_MB; dataSizeMB <= END_DATA_SIZE_MB; dataSizeMB *= 2) {
+            benchmarkGet(dataSizeMB, memtableSize, csvFile);
+        }
+    }
+
+    csvFile.close();
+    std::cout << "Benchmark completed. Results saved to " << outputFilePath << std::endl;
+    return 0;
+}
diff --git a/Benchmark/put_throughput_benchmark.cpp b/Benchmark/put_throughput_benchmark.cpp
@@ -5,7 +5,7 @@
 #include <fstream>
 #include <cstdlib>
 #include <filesystem>  // Include filesystem
-#include "../VeloxDB/VeloxDB.h"
+#include "VeloxDB.h"
 
 namespace fs = std::filesystem;
 using namespace std::chrono;
@@ -92,7 +92,7 @@ int main() {
     csvFile << "MemtableSizeMB,DataSizeMB,Throughput(MB/s)\n";
 
     // Benchmark configurations
-    std::vector<size_t> memtableSizes = {1 * MB, 5 * MB, 10 * MB, 15 * MB}; // Memtable sizes: 1MB, 5MB, 10MB
+    std::vector<size_t> memtableSizes = {1 * MB, 5 * MB, 10 * MB}; // Memtable sizes: 1MB, 5MB, 10MB
 
     // Run benchmarks for each Memtable size and data size
     for (auto memtableSize : memtableSizes) {