Fix the nightly benchmark bugs.

pytorch · May 10, 2024 · ea69e61 · ea69e61
1 parent 65aac18
commit ea69e61
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 16 deletions.
diff --git a/torchbenchmark/util/experiment/instantiator.py b/torchbenchmark/util/experiment/instantiator.py
@@ -24,7 +24,6 @@
 
 @dataclasses.dataclass
 class TorchBenchModelConfig:
-    model_set: str
     name: str
     test: str
     device: str
@@ -176,4 +175,4 @@ def get_model_set_from_model_name(model_name: str) -> str:
         return "timm"
     if model_name in list_models():
         return "torchbench"
-    assert False, f"Model {model_name} is not found in any model set."
+    assert False, f"Model {model_name} is not found in any model set."
diff --git a/torchbenchmark/util/experiment/metrics.py b/torchbenchmark/util/experiment/metrics.py
@@ -22,6 +22,7 @@
 class TorchBenchModelMetrics:
     latencies: List[float]
     throughputs: List[float]
+    accuracy: Optional[bool]
     cpu_peak_mem: Optional[float]
     gpu_peak_mem: Optional[float]
     ttfb: Optional[float]  # time-to-first-batch
@@ -250,4 +251,21 @@ def run_config(config: TorchBenchModelConfig,
                dryrun: bool=False,
     ) -> Union[TorchBenchModelMetrics, Dict[str, Any]]:
     """Run a benchmark config and return the metrics as a Dict"""
-    pass
+    print(f"Running config {config} ...", flush=True, end="")
+    metrics = TorchBenchModelMetrics(
+        latencies=[],
+        throughputs=[],
+        accuracy=None,
+        cpu_peak_mem=None,
+        gpu_peak_mem=None,
+        ttfb=None,
+        pt2_compilation_time=None,
+        pt2_graph_breaks=None,
+        model_flops=None,
+        error_msg=None,
+    )
+    if dryrun:
+        print("[skip_by_dryrun]", flush=True)
+        return dataclasses.asdict(metrics) if as_dict else metrics
+    print("[done]", flush=True)
+    return dataclasses.asdict(metrics) if as_dict else metrics
diff --git a/userbenchmark/group_bench/run_config.py b/userbenchmark/group_bench/run_config.py
@@ -7,7 +7,7 @@
 def _get_models(models: Optional[List[str]]=None, model_set: Optional[List[str]]=None) -> List[str]:
     result = set(models) if models else set()
     for s in model_set:
-        result.union(set(list_extended_models(s)))
+        result = result.union(set(list_extended_models(s)))
     return list(result) 
 
 def config_obj_to_model_configs(config: Dict[str, Any]) -> Dict[str, Dict[str, List[TorchBenchModelConfig]]]:
@@ -20,7 +20,8 @@ def config_obj_to_model_configs(config: Dict[str, Any]) -> Dict[str, Dict[str, L
     test_groups = config["test_groups"]
     result = {}
     for group_name in test_groups.keys():
-        extra_args = test_groups[group_name].copy()
+        extra_args = test_groups[group_name].get("extra_args", [])
+        extra_args = [] if extra_args == None else extra_args.copy()
         cfgs = itertools.product(*[devices, tests, batch_sizes, precisions, models])
         for device, test, batch_size, precision, model_name in cfgs:
             if precision:
@@ -73,15 +74,14 @@ def run_benchmark_group_config(group_config_file: str, dryrun: bool=False) -> Li
     configs: Dict[str, Dict[str, List[TorchBenchModelConfig]]] = config_obj_to_model_configs(config_obj)
     for common_key in configs.keys():
         group_key = _common_key_to_group_key(common_key)
-        group_result = {"group_key": group_key, "group_results": []}
+        group_result = {"group_key": group_key, "group_results": {}}
         for group_name in configs[common_key]:
-            group_result["group_results"] = [
-                _config_result_to_group_result(
-                    group_name=group_name,
-                    model_name=x.name,
-                    metrics=run_config(x, as_dict=True, dryrun=dryrun),
-                    required_metrics=x.metrics)
-                    for x in configs[common_key][group_name]
-            ]
+            for x in configs[common_key][group_name]:
+                group_result["group_results"].update(
+                    _config_result_to_group_result(
+                        group_name=group_name,
+                        model_name=x.name,
+                        metrics=run_config(x, as_dict=True, dryrun=dryrun),
+                        required_metrics=x.metrics))
         result.append(group_result)
     return result
diff --git a/userbenchmark/torch-nightly/nightly.yaml b/userbenchmark/torch-nightly/nightly.yaml
@@ -9,8 +9,8 @@ device:
   - cuda
 metrics:
   - latencies
-  - peak_cpu_mem
-  - peak_gpu_mem
+  - cpu_peak_mem
+  - gpu_peak_mem
 test_groups:
   eager:
     extra_args: