Skip to content

Commit

Permalink
Fix the nightly benchmark bugs.
Browse files Browse the repository at this point in the history
  • Loading branch information
xuzhao9 committed May 10, 2024
1 parent 65aac18 commit ea69e61
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 16 deletions.
3 changes: 1 addition & 2 deletions torchbenchmark/util/experiment/instantiator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

@dataclasses.dataclass
class TorchBenchModelConfig:
model_set: str
name: str
test: str
device: str
Expand Down Expand Up @@ -176,4 +175,4 @@ def get_model_set_from_model_name(model_name: str) -> str:
return "timm"
if model_name in list_models():
return "torchbench"
assert False, f"Model {model_name} is not found in any model set."
assert False, f"Model {model_name} is not found in any model set."
20 changes: 19 additions & 1 deletion torchbenchmark/util/experiment/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
class TorchBenchModelMetrics:
latencies: List[float]
throughputs: List[float]
accuracy: Optional[bool]
cpu_peak_mem: Optional[float]
gpu_peak_mem: Optional[float]
ttfb: Optional[float] # time-to-first-batch
Expand Down Expand Up @@ -250,4 +251,21 @@ def run_config(config: TorchBenchModelConfig,
dryrun: bool=False,
) -> Union[TorchBenchModelMetrics, Dict[str, Any]]:
"""Run a benchmark config and return the metrics as a Dict"""
pass
print(f"Running config {config} ...", flush=True, end="")
metrics = TorchBenchModelMetrics(
latencies=[],
throughputs=[],
accuracy=None,
cpu_peak_mem=None,
gpu_peak_mem=None,
ttfb=None,
pt2_compilation_time=None,
pt2_graph_breaks=None,
model_flops=None,
error_msg=None,
)
if dryrun:
print("[skip_by_dryrun]", flush=True)
return dataclasses.asdict(metrics) if as_dict else metrics
print("[done]", flush=True)
return dataclasses.asdict(metrics) if as_dict else metrics
22 changes: 11 additions & 11 deletions userbenchmark/group_bench/run_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def _get_models(models: Optional[List[str]]=None, model_set: Optional[List[str]]=None) -> List[str]:
result = set(models) if models else set()
for s in model_set:
result.union(set(list_extended_models(s)))
result = result.union(set(list_extended_models(s)))
return list(result)

def config_obj_to_model_configs(config: Dict[str, Any]) -> Dict[str, Dict[str, List[TorchBenchModelConfig]]]:
Expand All @@ -20,7 +20,8 @@ def config_obj_to_model_configs(config: Dict[str, Any]) -> Dict[str, Dict[str, L
test_groups = config["test_groups"]
result = {}
for group_name in test_groups.keys():
extra_args = test_groups[group_name].copy()
extra_args = test_groups[group_name].get("extra_args", [])
extra_args = [] if extra_args == None else extra_args.copy()
cfgs = itertools.product(*[devices, tests, batch_sizes, precisions, models])
for device, test, batch_size, precision, model_name in cfgs:
if precision:
Expand Down Expand Up @@ -73,15 +74,14 @@ def run_benchmark_group_config(group_config_file: str, dryrun: bool=False) -> Li
configs: Dict[str, Dict[str, List[TorchBenchModelConfig]]] = config_obj_to_model_configs(config_obj)
for common_key in configs.keys():
group_key = _common_key_to_group_key(common_key)
group_result = {"group_key": group_key, "group_results": []}
group_result = {"group_key": group_key, "group_results": {}}
for group_name in configs[common_key]:
group_result["group_results"] = [
_config_result_to_group_result(
group_name=group_name,
model_name=x.name,
metrics=run_config(x, as_dict=True, dryrun=dryrun),
required_metrics=x.metrics)
for x in configs[common_key][group_name]
]
for x in configs[common_key][group_name]:
group_result["group_results"].update(
_config_result_to_group_result(
group_name=group_name,
model_name=x.name,
metrics=run_config(x, as_dict=True, dryrun=dryrun),
required_metrics=x.metrics))
result.append(group_result)
return result
4 changes: 2 additions & 2 deletions userbenchmark/torch-nightly/nightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ device:
- cuda
metrics:
- latencies
- peak_cpu_mem
- peak_gpu_mem
- cpu_peak_mem
- gpu_peak_mem
test_groups:
eager:
extra_args:

0 comments on commit ea69e61

Please sign in to comment.