Skip to content

Commit

Permalink
Fix PyTorch CI HUD dashboard missing perf numbers: hf_Whisper (#1935)
Browse files Browse the repository at this point in the history
Summary:
A few models were passing accuracy check, but surprisingly failing the perf run, resulting in dashboard entries like:
<img width="1696" alt="image" src="https://github.com/pytorch/benchmark/assets/9547562/eb0be16e-7785-486d-a362-322146a97423">

Reproing the hud's commands locally,
```
# pass
python benchmarks/dynamo/torchbench.py --accuracy --no-translation-validation --training --amp --backend inductor --disable-cudagraphs --device cuda --total-partitions 4 --partition-id 1 --output hf_Whisper_accuracy.csv --only hf_Whisper

# fail (on https://github.com/pytorch/benchmark/blob/4ea3bba3b8010f5d4a629bb8f530a92570f34518/torchbenchmark/util/model.py#L195C48-L195C48)
python benchmarks/dynamo/torchbench.py --performance --cold-start-latency --training --amp --backend inductor --disable-cudagraphs --device cuda --total-partitions 4 --partition-id 1 --output hf_Whisper_perf.csv --only hf_Whisper
```

The error suggests that hf_Whisper does not provide a batch size for the training mode perf run.

Summarizing discussion with xuzhao9:
> I think we could:
> 1. set a default train batch size for hf_Whisper, if you still want to test forward/backward pass without a defined train test
> 2. in model.py, make sure self.batch_size is not None (before accuracy check overrides batch size to 4)

I implement 1, we set default batch sizes in the parent class of all benchmark models, with ability to be overwritten by individual models.

Pull Request resolved: #1935

Reviewed By: xuzhao9

Differential Revision: D49641235

Pulled By: xmfan

fbshipit-source-id: 2f93fb742846d7c34936cbbc8e8d3e22c5a76662
  • Loading branch information
xmfan authored and facebook-github-bot committed Sep 26, 2023
1 parent 64409d5 commit 3f1c3eb
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 24 deletions.
1 change: 1 addition & 0 deletions torchbenchmark/models/hf_Whisper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

class Model(HuggingFaceModel):
task = SPEECH.RECOGNITION
DEFAULT_TRAIN_BSIZE = 8
DEFAULT_EVAL_BSIZE = 8
DEFAULT_EVAL_CUDA_PRECISION = "fp16"

Expand Down
1 change: 1 addition & 0 deletions torchbenchmark/models/hf_Whisper/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ eval_deterministic: false
eval_nograd: true
not_implemented:
- device: cpu
- test: train
train_benchmark: false
train_deterministic: false
57 changes: 33 additions & 24 deletions torchbenchmark/util/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,36 +171,45 @@ def _determine_dynamic_num_batches(self, user_specified_num_batches: Optional[in
assert hasattr(self, 'DEFAULT_NUM_BATCH'), f"We expect all models with dynamic shapes specify field `DEFAULT_NUM_BATCHES`."
return self.DEFAULT_NUM_BATCH

def _determine_batch_size(self, batch_size=None):
def _get_batch_size_from_metadata(self) -> Optional[str]:
if self.device != "cuda":
current_device_name = str(self.device)
else:
current_device_name = torch.cuda.get_device_name()
assert current_device_name, f"torch.cuda.get_device_name() returns None when device is set to cuda, please double check."
if current_device_name in SPECIAL_DEVICE_MAPPING:
current_device_name = SPECIAL_DEVICE_MAPPING[current_device_name]

# use the device suggestion on CUDA inference tests, key should be either eval_batch_size or train_batch_size
device_batch_size_key = f"{self.test}_batch_size"
if self.metadata and "devices" in self.metadata and current_device_name in self.metadata["devices"] \
and device_batch_size_key in self.metadata["devices"][current_device_name]:
batch_size = self.metadata["devices"][current_device_name][device_batch_size_key]
return batch_size

def _determine_batch_size(self, user_specified_batch_size=None):
# batch size priority for eval tests: not ALLOW_CUSTOMIZE_BSIZE > user specified > device specified > default
# batch size priority for train tests: not ALLOW_CUSTOMIZE_BSIZE > user specified > default
self.batch_size = batch_size
if not batch_size:
self.batch_size = self.DEFAULT_TRAIN_BSIZE if self.test == "train" else self.DEFAULT_EVAL_BSIZE
if self.device == "cuda":
current_device_name = torch.cuda.get_device_name()
assert current_device_name, f"torch.cuda.get_device_name() returns None when device is set to cuda, please double check."
if current_device_name in SPECIAL_DEVICE_MAPPING:
current_device_name = SPECIAL_DEVICE_MAPPING[current_device_name]
else:
current_device_name = str(self.device)
# use the device suggestion on CUDA inference tests, key should be either eval_batch_size or train_batch_size
device_batch_size_key = f"{self.test}_batch_size"
if self.metadata and "devices" in self.metadata and current_device_name in self.metadata["devices"] \
and device_batch_size_key in self.metadata["devices"][current_device_name]:
self.batch_size = self.metadata["devices"][current_device_name][device_batch_size_key]
# If the model doesn't implement test or eval test
# its DEFAULT_TRAIN_BSIZE or DEFAULT_EVAL_BSIZE will still be None
if not self.batch_size:
raise NotImplementedError(f"Test {self.test} is not implemented.")
else:
self.batch_size = batch_size

self.batch_size = user_specified_batch_size

if not self.batch_size:
device_specified_batch_size = self._get_batch_size_from_metadata()
self.batch_size = device_specified_batch_size

if not self.batch_size:
default_batch_size = self.DEFAULT_TRAIN_BSIZE if self.test == "train" else self.DEFAULT_EVAL_BSIZE
self.batch_size = default_batch_size

if not self.batch_size:
raise NotImplementedError(f"Model's {'DEFAULT_TRAIN_BSIZE' if self.test == 'train' else 'DEFAULT_EVAL_BSIZE'} is not implemented.")

# Check if specified batch size is supported by the model
if hasattr(self, "ALLOW_CUSTOMIZE_BSIZE") and (not getattr(self, "ALLOW_CUSTOMIZE_BSIZE")):
if self.test == "train" and (not self.batch_size == self.DEFAULT_TRAIN_BSIZE):
raise NotImplementedError("Model doesn't support customizing batch size.")
raise NotImplementedError(f"Model doesn't support customizing batch size, but {self.test} test is providing a batch size other than DEFAULT_TRAIN_BSIZE")
elif self.test == "eval" and (not self.batch_size == self.DEFAULT_EVAL_BSIZE):
raise NotImplementedError("Model doesn't support customizing batch size.")
raise NotImplementedError(f"Model doesn't support customizing batch size, but {self.test} test is providing a batch size other than DEFAULT_EVAL_BSIZE")
elif self.dargs.accuracy:
self.batch_size = 4 if self.batch_size > 4 else self.batch_size

Expand Down

0 comments on commit 3f1c3eb

Please sign in to comment.