Skip to content

Commit

Permalink
benchmark update Get
Browse files Browse the repository at this point in the history
  • Loading branch information
kkli08 committed Oct 4, 2024
1 parent 7847a74 commit 9af6076
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 5 deletions.
21 changes: 18 additions & 3 deletions Benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,38 @@ if(APPLE)
set(CMAKE_OSX_ARCHITECTURES arm64)
endif()


# === === === Put throughput === === ===
# Source files for the benchmark
set(BENCHMARK_SRCS
put_throughput_benchmark.cpp
)

# Add executable for the benchmark
add_executable(put_benchmark
${BENCHMARK_SRCS}
)

# Include directories for the benchmark executable
target_include_directories(put_benchmark PRIVATE
${CMAKE_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
)

# Link libraries to the benchmark executable
target_link_libraries(put_benchmark PRIVATE
veloxdb_lib
)

# === === === Get Latency === === ===
set(GET_BENCHMARK_SRCS
get_latency_benchmark.cpp
)
# Add executable for Get latency benchmark
add_executable(get_benchmark ${GET_BENCHMARK_SRCS})

target_include_directories(get_benchmark PRIVATE
${CMAKE_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
)
# Link VeloxDB library to the get_benchmark executable
target_link_libraries(get_benchmark PRIVATE
veloxdb_lib
)
64 changes: 64 additions & 0 deletions Benchmark/get_latency/plot_get_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Read the CSV files for Linux and macOS (latency data)
linux_data = pd.read_csv("linux_get_latency.csv")
macos_data = pd.read_csv("macos_get_latency.csv")

# Pivot the data to get MemtableSize as columns and DataSizeMB as index
linux_pivot = linux_data.pivot(index='DataSizeMB', columns='MemtableSizeMB', values='AverageLatency(ms)')
macos_pivot = macos_data.pivot(index='DataSizeMB', columns='MemtableSizeMB', values='AverageLatency(ms)')

# Define the color scheme for Linux and macOS
# Linux: Shades of purple, macOS: Shades of orange
linux_colors = ['#9370DB', '#7B68EE', '#4B0082'] # Medium Purple, Medium Slate Blue, Indigo
macos_colors = ['#FFB347', '#FF8C00', '#FF4500'] # Light Orange, Dark Orange, Orange Red

# Set up the plot
plt.figure(figsize=(10, 6))

# Plot Linux data
x_values = linux_pivot.index.to_numpy()
for i, col in enumerate(linux_pivot.columns):
y_values = linux_pivot[col].to_numpy()
plt.plot(x_values, y_values, color=linux_colors[i % len(linux_colors)], marker='o', linestyle='-',
label=f'Linux {col} MB')

# Plot macOS data with solid lines
x_values = macos_pivot.index.to_numpy()
for i, col in enumerate(macos_pivot.columns):
y_values = macos_pivot[col].to_numpy()
plt.plot(x_values, y_values, color=macos_colors[i % len(macos_colors)], marker='^', linestyle='-',
label=f'macOS {col} MB')

# Set the title and labels
plt.title('Get Latency for Different Memtable Sizes (Linux vs. macOS)')
plt.xlabel('Input Data Size (MB)')
plt.ylabel('Average Latency (ms)')

# Set custom x-axis values and labels
x_ticks = [2 ** i for i in range(1, 10)] # Use 2, 4, 8, 16, 32, ..., 512
ax = plt.gca() # Get current axis
ax.set_xscale('log', base=2) # Set log scale for x-axis to maintain equal spacing

# Set tick positions and labels manually
ax.set_xticks(x_ticks) # Set the position of the x ticks to match the values
ax.set_xticklabels([str(x) for x in x_ticks]) # Set the labels to display the actual numbers

# Customize the legend
plt.legend(title='Memtable Size (MB)', loc='upper left', fontsize=9)

# Remove gridlines (disable gridlines)
plt.grid(False) # Completely remove all gridlines

# Remove border lines (spines) on the top and right
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Optional: Remove left and bottom spines if desired
# ax.spines['left'].set_visible(False)
# ax.spines['bottom'].set_visible(False)

# Show the plot
plt.show()
117 changes: 117 additions & 0 deletions Benchmark/get_latency_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
//
// Created by Damian Li on 2024-10-04.
//
#include <iostream>
#include <chrono>
#include <memory>
#include <string>
#include <fstream>
#include <cstdlib>
#include <filesystem> // Include filesystem
#include "VeloxDB.h"

namespace fs = std::filesystem;
using namespace std::chrono;

// Constants for benchmark
constexpr size_t MB = 1024 * 1024; // 1MB in bytes
constexpr size_t START_DATA_SIZE_MB = 1; // Start with 1 MB
constexpr size_t END_DATA_SIZE_MB = 512; // End with 512 MB (adjust as needed)
const std::string DB_NAME = "benchmark_db";

// Function to generate random strings
std::string generateRandomString(size_t length) {
const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
std::string result;
result.reserve(length);
for (size_t i = 0; i < length; ++i) {
result += charset[rand() % (sizeof(charset) - 1)];
}
return result;
}

// Function to benchmark Get operation
void benchmarkGet(size_t dataSizeMB, size_t memtableSize, std::ofstream& csvFile) {
std::cout << "Benchmarking Get: MemtableSize = " << memtableSize / MB
<< "MB, DataSize = " << dataSizeMB << "MB" << std::endl;

// Create the database object with the specified memtable size
auto db = std::make_unique<VeloxDB>(memtableSize, 3); // Adjust other parameters as needed

// Open the database
db->Open(DB_NAME);

// Insert data first to populate the database
size_t bytesInserted = 0;
std::vector<std::string> keys;
while (bytesInserted < dataSizeMB * MB) {
std::string key = generateRandomString(16); // 16-byte key
std::string value = generateRandomString(100); // 100-byte value
db->Put(key, value);
keys.push_back(key); // Save the key for later retrieval
bytesInserted += key.size() + value.size();
}

// Start timing the Get operations
auto start = high_resolution_clock::now();

// Retrieve all inserted keys
for (const auto& key : keys) {
auto result = db->Get(key);
}

// Stop timing
auto stop = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>(stop - start).count();

// Calculate average latency in milliseconds
double average_latency = static_cast<double>(duration) / keys.size();

// Write result to CSV
csvFile << memtableSize / MB << "," << dataSizeMB << "," << average_latency << std::endl;

// Close the database
db->Close();

// Delete the database files to free up space
try {
if (fs::exists(DB_NAME)) {
fs::remove_all(DB_NAME);
std::cout << "Deleted database directory: " << DB_NAME << std::endl;
}
} catch (const fs::filesystem_error& e) {
std::cerr << "Error deleting database directory: " << e.what() << std::endl;
}
}

int main() {
// Seed the random number generator
srand(static_cast<unsigned>(time(nullptr)));

// Define the output directory for the CSV file
std::string outputDir = "./get_latency";
std::string outputFilePath = outputDir + "/get_latency.csv";

// Create the directory if it does not exist
if (!fs::exists(outputDir)) {
fs::create_directories(outputDir);
}

// Open CSV file for writing
std::ofstream csvFile(outputFilePath);
csvFile << "MemtableSizeMB,DataSizeMB,AverageLatency(ms)\n";

// Benchmark configurations
std::vector<size_t> memtableSizes = {1 * MB, 5 * MB, 10 * MB}; // Memtable sizes: 1MB, 5MB, 10MB

// Run benchmarks for each Memtable size and data size
for (auto memtableSize : memtableSizes) {
for (size_t dataSizeMB = START_DATA_SIZE_MB; dataSizeMB <= END_DATA_SIZE_MB; dataSizeMB *= 2) {
benchmarkGet(dataSizeMB, memtableSize, csvFile);
}
}

csvFile.close();
std::cout << "Benchmark completed. Results saved to " << outputFilePath << std::endl;
return 0;
}
4 changes: 2 additions & 2 deletions Benchmark/put_throughput_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <fstream>
#include <cstdlib>
#include <filesystem> // Include filesystem
#include "../VeloxDB/VeloxDB.h"
#include "VeloxDB.h"

namespace fs = std::filesystem;
using namespace std::chrono;
Expand Down Expand Up @@ -92,7 +92,7 @@ int main() {
csvFile << "MemtableSizeMB,DataSizeMB,Throughput(MB/s)\n";

// Benchmark configurations
std::vector<size_t> memtableSizes = {1 * MB, 5 * MB, 10 * MB, 15 * MB}; // Memtable sizes: 1MB, 5MB, 10MB
std::vector<size_t> memtableSizes = {1 * MB, 5 * MB, 10 * MB}; // Memtable sizes: 1MB, 5MB, 10MB

// Run benchmarks for each Memtable size and data size
for (auto memtableSize : memtableSizes) {
Expand Down

0 comments on commit 9af6076

Please sign in to comment.