-
Notifications
You must be signed in to change notification settings - Fork 5
/
bench.py
executable file
·218 lines (195 loc) · 9.16 KB
/
bench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/python3
import argparse
import collections
import csv
import os
import subprocess
import sys
from build import print_header, print_error, STEP_IMPLEMENTATIONS
INPUT_SIZES = [100, 160, 250, 400, 630, 1000, 1600, 2500, 4000, 6300]
STEP_ITERATIONS = [2, 3, 5, 10, 15, 20, 20, 20]
assert len(STEP_ITERATIONS) == len(STEP_IMPLEMENTATIONS)
# Prefix the benchmark command with taskset, binding the process to 4 physical cores
CPU_BIND_CMD = "taskset --cpu-list 0-3"
class PerfToolException(BaseException): pass
def get_gflops(n, secs):
total_float_ops = 2 * n ** 3
return 1e-9 * total_float_ops / secs
class Reporter:
supported = ("stdout", "csv")
def __init__(self, out="stdout", output_path=None, use_perf=False):
assert out in self.__class__.supported, "unsupported report format: {}".format(out)
self.out = out
self.use_perf = use_perf
if use_perf:
self.fieldnames = ["N (rows)", "time (us)", "instructions", "cycles", "GFLOP/s"]
else:
self.fieldnames = ["N (rows)", "iterations", "time (us)", "GFLOP/s"]
self.output_path = output_path
def print_row(self, row):
if self.out == "stdout":
if self.use_perf:
insn_per_cycle = row["instructions"]/row["cycles"]
print("{:8d}{:12d}{:15d}{:15d}{:10.2f}{:12.2f}".format(*row.values(), insn_per_cycle))
else:
print("{:8d}{:12d}{:15d}{:10.2f}".format(*row.values()))
elif self.out == "csv":
with open(self.output_path, "a") as f:
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
writer.writerow(row)
def print_header(self, clear_file=False):
if self.out == "stdout":
if self.use_perf:
print("{:>8s}{:>12s}{:>15s}{:>15s}{:>10s}{:>12s}".format(*self.fieldnames, "insn/cycle"))
else:
print("{:>8s}{:>12s}{:>15s}{:>10s}".format(*self.fieldnames))
elif self.out == "csv":
with open(self.output_path, "w" if clear_file else "a") as f:
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
writer.writeheader()
def parse_perf_csv(s):
if "You may not have permission to collect stats" in s:
raise PerfToolException
def get_value(key):
return s.partition(key)[0].splitlines()[-1].rstrip(',')
return collections.OrderedDict((
("time (us)", int(1e6*float(s.splitlines()[1]))),
("instructions", int(get_value("instructions"))),
("cycles", int(get_value("cycles"))),
))
def run(cmd, input_size, no_perf=False, num_threads=None):
"""
Run given command string with perf-stat and return results in dict.
"""
cmd_prefix = ''
if not no_perf:
cmd_prefix = "perf stat --detailed --detailed --field-separator=, "
env = None
if num_threads:
env = dict(os.environ.copy(), OMP_NUM_THREADS=str(num_threads), RAYON_NUM_THREADS=str(num_threads))
cmd_list = (cmd_prefix + cmd).split(' ')
result = subprocess.run(
cmd_list,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env=env
)
out = result.stdout.decode("utf-8")
return out if no_perf else parse_perf_csv(out)
def do_benchmark(build_dir, iterations_per_step_impl, benchmark_langs, threads, report_dir=None, reporter_out=None, no_perf=False):
if no_perf:
print_header("Benchmarking", end="\n\n")
else:
print_header("Benchmarking with perf-stat", end="\n\n")
for step_impl in STEP_IMPLEMENTATIONS:
if impl_filter and not any(step_impl.startswith(prefix) for prefix in impl_filter):
continue
iterations = iterations_per_step_impl[step_impl]
for lang in benchmark_langs:
print_header(lang + ' ' + step_impl)
report_path = None
if report_dir:
report_name = step_impl[:2] + '.' + reporter_out
report_path = os.path.join(report_dir, lang, report_name)
reporter = Reporter(reporter_out, report_path, use_perf=not no_perf)
reporter.print_header(clear_file=True)
bench_cmd = os.path.join(build_dir, step_impl + "_" + lang)
for input_size in input_sizes:
if no_perf:
bench_args = 'benchmark {} {}'.format(input_size, iterations)
cmd = ' '.join((CPU_BIND_CMD, bench_cmd, bench_args))
output = run(cmd, input_size, no_perf=no_perf, num_threads=threads)
result = collections.OrderedDict()
result["N (rows)"] = input_size
result["iterations"] = iterations
result["time (us)"] = int(1e6 * sum(float(line.strip()) for line in output.splitlines()[1:] if line.strip()))
result["GFLOP/s"] = get_gflops(input_size, 1e-6 * result["time (us)"] / iterations)
reporter.print_row(result)
else:
bench_args = 'benchmark {} 1'.format(input_size)
cmd = ' '.join((CPU_BIND_CMD, bench_cmd, bench_args))
for iter_n in range(iterations):
result = run(cmd, input_size, no_perf=no_perf, num_threads=threads)
result["N (rows)"] = input_size
result.move_to_end("N (rows)", last=False)
result["GFLOP/s"] = get_gflops(input_size, 1e-6 * result["time (us)"])
reporter.print_row(result)
if report_dir:
print("Wrote {} report to {}".format(reporter_out, report_path))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run benchmark binaries for comparing C++ and Rust implementations of the step function")
parser.add_argument("--build_dir", "-b",
type=str,
default=os.path.join(os.path.curdir, "build"),
help="Path to build output directory from build.py, defaults to the default of build.py.")
parser.add_argument("--limit_input_size_begin", "-n",
type=int,
default=0,
help="n in sizes[n:], where sizes is {}".format(INPUT_SIZES))
parser.add_argument("--limit_input_size_end", "-m",
type=int,
default=len(INPUT_SIZES),
help="m in sizes[:m], where sizes is {}".format(INPUT_SIZES))
parser.add_argument("--input_size",
type=int,
help="Override input size with a single value. Takes precedence over --limit_input_size{begin,end}.")
parser.add_argument("--threads", "-t",
type=int,
help="Value for environment variables controlling number of threads, defaults to 1")
parser.add_argument("--implementation", "-i",
action='append',
type=str,
help="Specify implementations to be run, e.g '-i v0' runs only v0_baseline. Can be specified multiple times.")
parser.add_argument("--reporter_out",
choices=Reporter.supported,
default="stdout",
help="Reporter output type")
parser.add_argument("--report_dir",
type=str,
help="Directory to create reports")
parser.add_argument("--iterations", "-c",
type=int,
help="Amount of iterations for each input size")
parser.add_argument("--no-perf",
action='store_true',
help="Do not use perf, just benchmark and get the running time and compute gflops heuristic")
parser.add_argument("--no-cpp",
action='store_true',
help="Skip all C++ benchmarks")
parser.add_argument("--no-rust",
action='store_true',
help="Skip all Rust benchmarks")
args = parser.parse_args()
build_dir = os.path.abspath(os.path.join(args.build_dir, "bin"))
if not os.path.exists(build_dir):
print_error("Build directory {}, that should contain the test executables, does not exist".format(build_dir))
sys.exit(1)
impl_filter = tuple(set(args.implementation)) if args.implementation else ()
if args.input_size:
input_sizes = [args.input_size]
else:
input_sizes = INPUT_SIZES[args.limit_input_size_begin:args.limit_input_size_end]
benchmark_langs = []
if not args.no_cpp:
benchmark_langs.append("cpp")
if not args.no_rust:
benchmark_langs.append("rust")
if args.reporter_out == "csv":
if args.report_dir:
if not os.path.exists(args.report_dir):
os.makedirs(args.report_dir)
for lang in benchmark_langs:
os.mkdir(os.path.join(args.report_dir, lang))
else:
print_error("Reporter type 'csv' needs an output directory, please specify one with --report_dir")
sys.exit(1)
if args.iterations:
iterations = {k: args.iterations for k in STEP_IMPLEMENTATIONS}
else:
iterations = {k: iterations for k, iterations in zip(STEP_IMPLEMENTATIONS, STEP_ITERATIONS)}
try:
do_benchmark(build_dir, iterations, benchmark_langs, args.threads, args.report_dir, args.reporter_out, args.no_perf)
except PerfToolException:
print()
print_error("Failed to run perf due to low privileges. Consider e.g. decreasing the integer value in /proc/sys/kernel/perf_event_paranoid")
sys.exit(1)