-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench_batched.py
48 lines (40 loc) · 1.67 KB
/
bench_batched.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import time
from infer import gen_text
from itertools import count
from client import BatchInferenceClient
import asyncio
import uvloop
from beautifultable import BeautifulTable
from numpy import percentile
import secrets
class Benchmarker:
def __init__(self):
self.request_counter = count()
async def time_per_request(self, model):
start = time.perf_counter()
_ = await model.infer(next(self.request_counter), secrets.token_urlsafe(64))
total_time = ((time.perf_counter() - start)*1000)
return total_time
async def benchmark(self, num_req=5):
async with BatchInferenceClient() as model:
start = time.perf_counter()
time_taken_per_example = await asyncio.gather(*[self.time_per_request(model) for _ in range(num_req)])
total_time = ((time.perf_counter() - start)*1000)
return total_time, percentile(time_taken_per_example, 50), percentile(time_taken_per_example, 95), percentile(time_taken_per_example, 95)
async def main():
benchmarker = Benchmarker()
table = BeautifulTable()
table.columns.header = ["Batch_Size", "Total time in ms", "p50", "p95", "p99"]
for batch_size in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]:
total_time, p_50, p_95, p_99 = await benchmarker.benchmark(batch_size)
table.rows.append([batch_size, total_time, p_50, p_95, p_99])
print(f"Processed {batch_size} req in {total_time}ms")
print(table)
if __name__ == "__main__":
uvloop.install()
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
loop.close()