From a97842593cf073c8b079ade9fe99cb0895f54fbd Mon Sep 17 00:00:00 2001 From: gs-olive <113141689+gs-olive@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:21:16 -0700 Subject: [PATCH] [userbenchmark] Add error logs to GHA - Add error text file to Github actions artifact for improved debugging and logging --- userbenchmark/torch_trt/run.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/userbenchmark/torch_trt/run.py b/userbenchmark/torch_trt/run.py index a539902352..df3ebf7957 100644 --- a/userbenchmark/torch_trt/run.py +++ b/userbenchmark/torch_trt/run.py @@ -53,8 +53,11 @@ def cli(args: List[str]): return vars(parsed_args), unknown -def save_metrics(metrics): - """Save metrics to a JSON file with formatted filename""" +def save_metrics(metrics, error_logs): + """Save metrics to a JSON file with formatted filename. + + Save error logs to a text file + """ metrics_json = { "name": "torch_trt", "environ": { @@ -73,14 +76,26 @@ def save_metrics(metrics): os.makedirs(target_dir, exist_ok=True) # Format filename and path to save metrics + curr_time = time.time() metrics_file = "metrics-{}.json".format( - datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S") + datetime.fromtimestamp(curr_time).strftime("%Y%m%d%H%M%S") ) + error_logs_file = "error-logs-{}.txt".format( + datetime.fromtimestamp(curr_time).strftime("%Y%m%d%H%M%S") + ) + metrics_save_path = os.path.join(target_dir, metrics_file) + error_save_path = os.path.join(target_dir, error_logs_file) with open(metrics_save_path, "w") as f: json.dump(metrics_json, f, indent=4) + with open(error_save_path, "w") as f: + for model, log in error_logs.items(): + f.write(f"Model: {model} failed with error\n\n") + f.write(log) + f.write("\n++++++++++\n") + def run_single_model( model: Union[BenchmarkModel, ModelTask], @@ -204,6 +219,9 @@ def run(args: List[str]): unknown_args.append("--ir") unknown_args.append(selected_ir) + + error_logs = {} + # Parse model string if specified, otherwise run all models # Adapted from benchmark/run.py if parsed_args["model"]: @@ -284,7 +302,7 @@ def run(args: List[str]): print( f"\nBenchmarking model {model_name} failed with:\n{e}\nSkipping the model.\n" ) - metrics = {model_name: -1.0} + error_logs[model_name] = f"Failed to run benchmark: {traceback.format_exc()}" # Halt further model runs on KeyboardInterrupt except KeyboardInterrupt: @@ -295,8 +313,8 @@ def run(args: List[str]): print( f"\nBenchmarking model {model_name} failed.\nSkipping the model.\n" ) - metrics = {model_name: -1.0} + error_logs[model_name] = "Failed to run benchmark: Other Python error" all_metrics = {**all_metrics, **metrics} - save_metrics(all_metrics) + save_metrics(all_metrics, error_logs)