Skip to content

Commit

Permalink
Allow benchmark to write json output (#801)
Browse files Browse the repository at this point in the history
* write json output in benchmark

* fix bugs

* fix
  • Loading branch information
richardsliu authored Sep 7, 2024
1 parent 819dfe9 commit c1633fa
Showing 1 changed file with 66 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import argparse
import asyncio
from datetime import datetime
import json
import random
import time
Expand Down Expand Up @@ -266,6 +267,42 @@ async def benchmark(
await asyncio.gather(*tasks)


def save_json_results(args: argparse.Namespace, benchmark_result):
# dimensions values are strings
dimensions_json = {}
# metrics values are numerical
metrics_json = {}

# Setup
current_dt = datetime.now().strftime("%Y%m%d-%H%M%S")
dimensions_json["date"] = current_dt
dimensions_json["backend"] = args.backend
dimensions_json["model_id"] = args.model
dimensions_json["tokenizer_id"] = args.tokenizer
if args.additional_metadata_metrics_to_save is not None:
dimensions_json = {
**dimensions_json,
**json.loads(args.additional_metadata_metrics_to_save),
}
metrics_json["num_prompts"] = args.num_prompts

# Traffic
metrics_json["request_rate"] = args.request_rate
metrics_json = {**metrics_json, **benchmark_result}

final_json = {}
final_json["metrics"] = metrics_json
final_json["dimensions"] = dimensions_json

# Save to file
base_model_id = args.model.split("/")[-1]
file_name = (
f"{args.backend}-{args.request_rate}qps-{base_model_id}-{current_dt}.json"
)
with open(file_name, "w", encoding="utf-8") as outfile:
json.dump(final_json, outfile)


def main(args: argparse.Namespace):
print(args)
random.seed(args.seed)
Expand Down Expand Up @@ -305,24 +342,32 @@ def main(args: argparse.Namespace):
args.model,
)
)
benchmark_result = {}
benchmark_end_time = time.time()
benchmark_time = benchmark_end_time - benchmark_start_time
print(f"Total time: {benchmark_time:.2f} s")
print(f"Requests/min: {60 * args.num_prompts / benchmark_time:.2f}")
benchmark_result['benchmark_time'] = benchmark_time

total_output_tokens = np.sum([output_len for _, output_len, _ in
REQUEST_LATENCY])
output_tokens_per_min = 60 * total_output_tokens / benchmark_time
print(f"Output_tokens/min: {output_tokens_per_min:.2f}")
benchmark_result['total_output_token'] = int(total_output_tokens)
benchmark_result['output_tokens_per_min'] = output_tokens_per_min

total_input_tokens = np.sum([prompt_len for prompt_len, _, _ in
REQUEST_LATENCY])
input_tokens_per_min = 60 * total_input_tokens / benchmark_time
print(f"Input_tokens/min: {input_tokens_per_min:.2f}")
benchmark_result['total_input_tokens'] = int(total_input_tokens)
benchmark_result['input_tokens_per_min'] = input_tokens_per_min

total_tokens = total_input_tokens + total_output_tokens
tokens_per_min = 60 * total_tokens / benchmark_time
print(f"Tokens/min: {tokens_per_min:.2f}")
benchmark_result['total_tokens'] = int(total_tokens)
benchmark_result['tokens_per_min'] = tokens_per_min

if args.machine_cost:
print(
Expand All @@ -336,6 +381,7 @@ def main(args: argparse.Namespace):
"Average seconds/request (includes waiting time on server):"
f" {avg_latency:.2f}"
)
benchmark_result['avg_latency'] = avg_latency

avg_per_token_latency = np.mean([
latency / (prompt_len + output_len)
Expand All @@ -345,6 +391,7 @@ def main(args: argparse.Namespace):
"Average milliseconds/token (includes waiting time on server):"
f" {1000 * avg_per_token_latency:.2f}"
)
benchmark_result['avg_per_token_latency'] = avg_per_token_latency

avg_per_output_token_latency = np.mean(
[latency / output_len for _, output_len, latency in REQUEST_LATENCY]
Expand All @@ -353,6 +400,7 @@ def main(args: argparse.Namespace):
"Average milliseconds/output_token (includes waiting time on server):"
f" {1000 * avg_per_output_token_latency:.2f}"
)
benchmark_result['avg_per_output_token_latency'] = avg_per_output_token_latency

avg_input_len = np.mean(
[prompt_len for prompt_len, _, _ in REQUEST_LATENCY]
Expand All @@ -361,6 +409,7 @@ def main(args: argparse.Namespace):
"Average input length:"
f" {avg_input_len:.2f}"
)
benchmark_result['avg_input_len'] = avg_input_len

avg_output_len = np.mean(
[output_len for _, output_len, _ in REQUEST_LATENCY]
Expand All @@ -369,6 +418,10 @@ def main(args: argparse.Namespace):
"Average output length:"
f" {avg_output_len:.2f}"
)
benchmark_result['avg_output_len'] = avg_output_len

if args.save_json_results:
save_json_results(args, benchmark_result)


if __name__ == "__main__":
Expand Down Expand Up @@ -479,6 +532,18 @@ def main(args: argparse.Namespace):
" and max_output_length."
),
)
parser.add_argument(
"--save-json-results",
action="store_true",
help="Whether to save benchmark results to a json file.",
)
parser.add_argument(
"--additional-metadata-metrics-to-save",
type=str,
help=(
"Additional metadata about the workload. Should be a dictionary in"
" the form of a string."
),
)
cmd_args = parser.parse_args()
main(cmd_args)

0 comments on commit c1633fa

Please sign in to comment.