From c1633fa75ee38a573a78e0066b17e0280248f471 Mon Sep 17 00:00:00 2001 From: Richard Liu <39319471+richardsliu@users.noreply.github.com> Date: Fri, 6 Sep 2024 18:16:54 -0700 Subject: [PATCH] Allow benchmark to write json output (#801) * write json output in benchmark * fix bugs * fix --- .../container/benchmark_serving.py | 67 ++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py index 8ae1109ee..5f521058b 100644 --- a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py +++ b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py @@ -7,6 +7,7 @@ import argparse import asyncio +from datetime import datetime import json import random import time @@ -266,6 +267,42 @@ async def benchmark( await asyncio.gather(*tasks) +def save_json_results(args: argparse.Namespace, benchmark_result): + # dimensions values are strings + dimensions_json = {} + # metrics values are numerical + metrics_json = {} + + # Setup + current_dt = datetime.now().strftime("%Y%m%d-%H%M%S") + dimensions_json["date"] = current_dt + dimensions_json["backend"] = args.backend + dimensions_json["model_id"] = args.model + dimensions_json["tokenizer_id"] = args.tokenizer + if args.additional_metadata_metrics_to_save is not None: + dimensions_json = { + **dimensions_json, + **json.loads(args.additional_metadata_metrics_to_save), + } + metrics_json["num_prompts"] = args.num_prompts + + # Traffic + metrics_json["request_rate"] = args.request_rate + metrics_json = {**metrics_json, **benchmark_result} + + final_json = {} + final_json["metrics"] = metrics_json + final_json["dimensions"] = dimensions_json + + # Save to file + base_model_id = args.model.split("/")[-1] + file_name = ( + f"{args.backend}-{args.request_rate}qps-{base_model_id}-{current_dt}.json" + ) + with open(file_name, "w", encoding="utf-8") as outfile: + json.dump(final_json, outfile) + + def main(args: argparse.Namespace): print(args) random.seed(args.seed) @@ -305,24 +342,32 @@ def main(args: argparse.Namespace): args.model, ) ) + benchmark_result = {} benchmark_end_time = time.time() benchmark_time = benchmark_end_time - benchmark_start_time print(f"Total time: {benchmark_time:.2f} s") print(f"Requests/min: {60 * args.num_prompts / benchmark_time:.2f}") + benchmark_result['benchmark_time'] = benchmark_time total_output_tokens = np.sum([output_len for _, output_len, _ in REQUEST_LATENCY]) output_tokens_per_min = 60 * total_output_tokens / benchmark_time print(f"Output_tokens/min: {output_tokens_per_min:.2f}") + benchmark_result['total_output_token'] = int(total_output_tokens) + benchmark_result['output_tokens_per_min'] = output_tokens_per_min total_input_tokens = np.sum([prompt_len for prompt_len, _, _ in REQUEST_LATENCY]) input_tokens_per_min = 60 * total_input_tokens / benchmark_time print(f"Input_tokens/min: {input_tokens_per_min:.2f}") + benchmark_result['total_input_tokens'] = int(total_input_tokens) + benchmark_result['input_tokens_per_min'] = input_tokens_per_min total_tokens = total_input_tokens + total_output_tokens tokens_per_min = 60 * total_tokens / benchmark_time print(f"Tokens/min: {tokens_per_min:.2f}") + benchmark_result['total_tokens'] = int(total_tokens) + benchmark_result['tokens_per_min'] = tokens_per_min if args.machine_cost: print( @@ -336,6 +381,7 @@ def main(args: argparse.Namespace): "Average seconds/request (includes waiting time on server):" f" {avg_latency:.2f}" ) + benchmark_result['avg_latency'] = avg_latency avg_per_token_latency = np.mean([ latency / (prompt_len + output_len) @@ -345,6 +391,7 @@ def main(args: argparse.Namespace): "Average milliseconds/token (includes waiting time on server):" f" {1000 * avg_per_token_latency:.2f}" ) + benchmark_result['avg_per_token_latency'] = avg_per_token_latency avg_per_output_token_latency = np.mean( [latency / output_len for _, output_len, latency in REQUEST_LATENCY] @@ -353,6 +400,7 @@ def main(args: argparse.Namespace): "Average milliseconds/output_token (includes waiting time on server):" f" {1000 * avg_per_output_token_latency:.2f}" ) + benchmark_result['avg_per_output_token_latency'] = avg_per_output_token_latency avg_input_len = np.mean( [prompt_len for prompt_len, _, _ in REQUEST_LATENCY] @@ -361,6 +409,7 @@ def main(args: argparse.Namespace): "Average input length:" f" {avg_input_len:.2f}" ) + benchmark_result['avg_input_len'] = avg_input_len avg_output_len = np.mean( [output_len for _, output_len, _ in REQUEST_LATENCY] @@ -369,6 +418,10 @@ def main(args: argparse.Namespace): "Average output length:" f" {avg_output_len:.2f}" ) + benchmark_result['avg_output_len'] = avg_output_len + + if args.save_json_results: + save_json_results(args, benchmark_result) if __name__ == "__main__": @@ -479,6 +532,18 @@ def main(args: argparse.Namespace): " and max_output_length." ), ) + parser.add_argument( + "--save-json-results", + action="store_true", + help="Whether to save benchmark results to a json file.", + ) + parser.add_argument( + "--additional-metadata-metrics-to-save", + type=str, + help=( + "Additional metadata about the workload. Should be a dictionary in" + " the form of a string." + ), + ) cmd_args = parser.parse_args() main(cmd_args) - \ No newline at end of file