Skip to content
This repository has been archived by the owner on Mar 23, 2023. It is now read-only.

Commit

Permalink
[utils] add benchmark for YCSB
Browse files Browse the repository at this point in the history
This tools allows to put multiple suites and run them one-by-one
and parse the output to easy to use form as CSV files.
  • Loading branch information
KFilipek committed Mar 9, 2021
1 parent df8221c commit c1fe545
Show file tree
Hide file tree
Showing 3 changed files with 317 additions and 0 deletions.
40 changes: 40 additions & 0 deletions utils/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
from os.path import join, getsize

for root, dirs, filenames in os.walk('results'):
if len(dirs) == 0:
parsed_results = []
for filename in filenames:
if filename.split('_')[0] == 'run':
with open(root + '/' + filename) as file_object:
file_object.readline()
trimmed_lines = []
for line in file_object.readlines():
record = tuple(line.replace(',','').split(' '))
if record[0] != '[CLEANUP]' or record[0] != '[READ-FAILED]':
if record[0] == '[READ]' or record[0] == '[INSERT]' or record[0] == '[UPDATE]' or record[0] == '[OVERALL]': #in case of READ
try:
int(record[1])
except ValueError: #if cannot cast it's fine
trimmed_lines.append(record)
parsed_results.append([int(filename.split('_')[1].split('.')[0]), trimmed_lines])

parsed_results = sorted(parsed_results, key=lambda x: x[0], reverse=False)
csv = []
print root
threads = 'Threads;#;'
if len(parsed_results) <= 0:
continue
print '------CSV------'
for i in range(0, len(parsed_results[0][1])):
csv.append(parsed_results[0][1][i][0] + ';' + parsed_results[0][1][i][1] + ';')
for test_result in parsed_results:
threads += str(test_result[0]) + ';'
for i, line in enumerate(test_result[1]):
csv[i] += line[2].replace('\n','').replace('.',',') + ';'
csv.insert(0, threads)
with open(root + '/results.csv','w') as csv_file:
for x in csv:
csv_file.write(x + '\n')
print x
csv_file.close()
202 changes: 202 additions & 0 deletions utils/run_suite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#!/usr/bin/python2
import json
import os
import subprocess

#comment
# SUITE write_workload
# THREADS 1 2 4 8 16 32 48 64 96
# JOURNALING enabled/disabled
# RECORDS 1000
# OPERATIONS 100
# READ_PROPORTION 0.0
# UPDATE_PROPORTION 0.0
# INSERT_PROPORTION 1.0
# YCSB_NUMA 1
# DROP_BEFORE
# ENDSUITE

#GET PATHS FROM CONFIG FILE
PATH_TO_YCSB = ''

path_configuration = open("path_configuration.txt", "r")
for line in path_configuration:
if line.startswith('YCSB_PATH='):
arg = line.split("=")
if len(arg) > 1:
PATH_TO_YCSB = arg[1].replace('\n','')
else:
raise NameError('No path in YCSB_PATH!')

if not os.path.isdir(PATH_TO_YCSB):
raise NameError('Wrong path to YCSB!')

class Test:
def __init__(self):
self.pmemkv_engine = "cmap"
self.pmemkv_dbsize = 0
self.pmemkv_dbpath = "/dev/shm/"
self.workload_type = "workloada"
self.testName = ""
self.threads = []
# self.journaling = ""
self.records = 0
self.operations = 0
self.read_proportion = -1.0
self.update_proportion = -1.0
self.insert_proportion = -1.0
self.ycsb_numa = -1
# Actually we don't need creation
# self.drop_before = -1
# self.create_after_drop = -1
self.is_load = -1
def toJSON(self):
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)

def getArgs(str):
arguments = []
for i in range(1, len(str)):
arguments.append(str[i])
return arguments

KEYWORDS = set(["THREADS", "JOURNALING", "RECORDS", "OPERATIONS",
"READ_PROPORTION", "LOAD", "UPDATE_PROPORTION",
"INSERT_PROPORTION", "YCSB_NUMA", "SUITE", "ENDSUITE",
"DROP_BEFORE", "CREATE_AFTER_DROP", "PMEMKV_ENGINE",
"PMEMKV_DBSIZE", "PMEMKV_DBPATH", "WORKLOAD_TYPE"]) #Add keyword if you need to extend implementation

# open meta file
with open("test_suite.txt", "r") as configfile:
configurations = []
for line in configfile:
splittedLine = line.split()
if line == '\n' or line.startswith('#'):
continue
if len(set.intersection(KEYWORDS, splittedLine)) != 1:
print(splittedLine)
raise NameError('Too many keywords in single line!')

#get args if exists
args = getArgs(splittedLine)

#if line starts from keyword we must read arguments
if splittedLine[0] == "SUITE":
configurations.append(Test())
configurations[len(configurations)-1].testName = args[0]
elif splittedLine[0] == "THREADS":
configurations[len(configurations)-1].threads = args
elif splittedLine[0] == "LOAD":
configurations[len(configurations)-1].is_load = 1
elif splittedLine[0] == "RECORDS":
configurations[len(configurations)-1].records = args[0]
elif splittedLine[0] == "OPERATIONS":
configurations[len(configurations)-1].operations = args[0]
elif splittedLine[0] == "READ_PROPORTION":
configurations[len(configurations)-1].read_proportion = args[0]
elif splittedLine[0] == "UPDATE_PROPORTION":
configurations[len(configurations)-1].update_proportion = args[0]
elif splittedLine[0] == "INSERT_PROPORTION":
configurations[len(configurations)-1].insert_proportion = args[0]
elif splittedLine[0] == "YCSB_NUMA":
configurations[len(configurations)-1].ycsb_numa = args[0]
elif splittedLine[0] == "PMEMKV_ENGINE":
configurations[len(configurations)-1].pmemkv_engine = args[0]
elif splittedLine[0] == "PMEMKV_DBSIZE":
configurations[len(configurations)-1].pmemkv_dbsize = args[0]
elif splittedLine[0] == "PMEMKV_DBPATH":
configurations[len(configurations)-1].pmemkv_dbpath = args[0]
elif splittedLine[0] == "WORKLOAD_TYPE":
configurations[len(configurations)-1].workload_type = args[0]
elif splittedLine[0] == "ENDSUITE":
continue
else:
raise NameError('Unrecognized keyword')
configfile.close()

print('Script read those tests:')
i = 1
for conf in configurations:
print('{:>20} {:<12}'.format('Test#: ', str(i)))
print('{:>20} {:<12}'.format("Name: ", conf.testName))
print('{:>20} {:<12}'.format("Threads: " ,str(conf.threads)))
print('{:>20} {:<12}'.format("Records: ", conf.records))
print('{:>20} {:<12}'.format("Operation: ", conf.operations))
print('{:>20} {:<12}'.format("Read proportion: ", str(conf.read_proportion)))
print('{:>20} {:<12}'.format("Update proportion: ", str(conf.update_proportion)))
print('{:>20} {:<12}'.format("Insert proportion: ", str(conf.insert_proportion)))
print('{:>20} {:<12}'.format("Is load: ", str(conf.is_load)))
print('{:>20} {:<12}'.format("NUMA for YCSB: ", conf.ycsb_numa))
print('{:>20} {:<12}'.format("Workload type: ", conf.workload_type))
print('{:>20} {:<12}'.format("Pmemkv engine: ", conf.pmemkv_engine))
print('{:>20} {:<12}'.format("Pmemkv size: ", conf.pmemkv_dbsize))
print('{:>20} {:<12}'.format("Pmemkv path: ", conf.pmemkv_dbpath))
print("")
i = i + 1

# PUT CONFIGURATION TO FILE IN PROPER PATH
results_directory = "results/"
if not os.path.exists(results_directory):
os.makedirs(results_directory)
i = 1
with open(results_directory + '/configurations.json', 'w') as jsonconfig:
for conf in configurations:
jsonconfig.write(conf.toJSON() + '\n')
if not os.path.exists(results_directory + conf.testName + '/'):
os.makedirs(results_directory + conf.testName + '/')
with open(results_directory + conf.testName + '/test_description.txt', 'a') as test_description:
test_description.write('{:>20} {:<12}'.format('Test#: ', str(i)) + '\n') # 'Test #' + str(i)
test_description.write('{:>20} {:<12}'.format("Name: ", conf.testName) + '\n')
test_description.write('{:>20} {:<12}'.format("Threads: " ,str(conf.threads)) + '\n')
test_description.write('{:>20} {:<12}'.format("Records: ", conf.records) + '\n')
test_description.write('{:>20} {:<12}'.format("Operation: ", conf.operations) + '\n')
test_description.write('{:>20} {:<12}'.format("Read proportion: ", str(conf.read_proportion)) + '\n')
test_description.write('{:>20} {:<12}'.format("Update proportion: ", str(conf.update_proportion)) + '\n')
test_description.write('{:>20} {:<12}'.format("Insert proportion: ", str(conf.insert_proportion)) + '\n')
test_description.write('{:>20} {:<12}'.format("NUMA for YCSB: ", conf.ycsb_numa) + '\n')
test_description.write('{:>20} {:<12}'.format("Workload type: ", conf.workload_type) + '\n')
test_description.write('{:>20} {:<12}'.format("Pmemkv engine: ", conf.pmemkv_engine) + '\n')
test_description.write('{:>20} {:<12}'.format("Pmemkv size: ", conf.pmemkv_dbsize) + '\n')
test_description.write('{:>20} {:<12}'.format("Pmemkv path: ", conf.pmemkv_dbpath) + '\n')
test_description.write('\n')
i = i + 1

# run specified configurations
generated_commands = []
for test in configurations:
command_prefix = ''
command_suffix = ''

command_prefix = './run_workload.sh ' + test.testName

if not test.is_load == 1:
command_prefix += ' run '
else:
command_prefix += ' load '


# Put path to YCSB main directory
command_suffix += PATH_TO_YCSB + ' '
# Put operation numbers
command_suffix += test.records + ' ' + test.operations + ' '
# Put workload ratios
command_suffix += test.read_proportion + ' ' + test.update_proportion + ' ' + test.insert_proportion + ' '
# Put NUMA node
if test.ycsb_numa == -1:
print('NUMA node is not set for test: ' + test.testName + '.')
command_suffix += test.ycsb_numa + ' '
# Put workload type
command_suffix += test.workload_type + ' '
# Put engine specific fields
command_suffix += test.pmemkv_engine + ' ' + test.pmemkv_dbsize + ' ' + test.pmemkv_dbpath + ' '

for thread_no in test.threads:
# DROP&CREATE BEFORE NEXT INSERTS
generated_commands.append(command_prefix + thread_no + ' ' + command_suffix)

# Generate script
with open('testplan.sh','w') as testplan:
testplan.write('#!/bin/bash\n')
for x in generated_commands:
testplan.write(x + '\n')
print(generated_commands)
75 changes: 75 additions & 0 deletions utils/run_workload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash
# Run workload from command line
#
# e.g. ./run_workload.sh run_cmap run 12 PATH_TO_YCSB 1000000 1000000
# {0} {1} {2} {3} {4} {5} {6}
# -1.0 -1.0 -1.0 1 workloadb csmap 80000000 DBPATH
# {7} {8} {9} {10} {11} {12} {13} {14}
# 1 - suite name
# 2 - ycsb phase: load/run
# 3 - thread count
# 4 - path to YCSB
# 5 - record count
# 6 - operation count
# 7 - read proportion
# 8 - update proportion
# 9 - insert proportion
# 10 - NUMA node for YCSB
# 11 - workload scenario (workload[a-f])
####### Engine related args
# 12 - pmemkv: engine name
# 13 - pmemkv: pool size
# 14 - pmemkv: path to pool

YCSB_PATH=/home/kfilipek/ycsb/ # TODO(kfilipek): remove hardcoding
echo $YCSB_PATH
OLD_PATH=$(pwd)

echo $@
echo "Passed $# argumets to script"

if [ "$#" -ne "14" ];
then
echo "Illegal number of parameters, should be 11. Check script documentation."
exit 0
fi

mkdir -p "results/$1/" # Create results directory: results/{test_suite_name}/
# Prepare future arguments for YCSB
NUMA_ARG=""
READ_RATIO=""
INSERT_RATIO=""
UPDATE_RATIO=""
if [ "$7" != "-1.0" ];
then
READ_RATIO=" -p readproportion=$7 "
fi
if [ "$9" != "-1.0" ];
then
INSERT_RATIO=" -p insertproportion=$9 "
fi
if [ "$8" != "-1.0" ];
then
UPDATE_RATIO=" -p updateproportion=$8 "
fi
if [ "${10}" != "-1" ];
then
NUMA_ARG=" numactl -N ${10} "
fi
# echo "READ_RATIO param: $READ_RATIO"
# echo "INSERT_RATIO param: $INSERT_RATIO"
# echo "UPDATE_RATIO param: $UPDATE_RATIO"
# echo "NUMA NODE param: $NUMA_ARG"
#exit

# TODOD(kfilipek): Implement splitting threads into processes
cd $YCSB_PATH
if [ "${2}" == "load" ];
then
# Remove old DB before new load phase
echo "Remove old DB: ${14}"
rm -rf ${14}
fi
rm -f ${14}
$NUMA_ARG bin/ycsb.sh $2 pmemkv -threads ${3} -P workloads/${11} -p hdrhistogram.percentiles=95,99,99.9,99.99 $READ_RATIO $INSERT_RATIO $UPDATE_RATIO -p recordcount=$5 -p operationcount=$6 -p pmemkv.engine=${12} -p pmemkv.dbsize=${13} -p pmemkv.dbpath=${14} > $OLD_PATH/results/$1/${2}_${3}.log
cd $OLD_PATH

0 comments on commit c1fe545

Please sign in to comment.