From 89f44976ca1b873b1323b64f824aa26315573f5a Mon Sep 17 00:00:00 2001 From: MaanavD Date: Fri, 14 Jul 2023 12:59:08 -0700 Subject: [PATCH 1/8] Added huggingface Whisper. --- torchbenchmark/models/hf_Whisper/__init__.py | 19 +++++++++++++++++++ torchbenchmark/models/hf_Whisper/install.py | 13 +++++++++++++ .../models/hf_Whisper/metadata.yaml | 10 ++++++++++ .../models/hf_Whisper/requirements.txt | 2 ++ .../framework/huggingface/model_factory.py | 3 ++- .../util/framework/huggingface/patch_hf.py | 2 +- 6 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 torchbenchmark/models/hf_Whisper/__init__.py create mode 100644 torchbenchmark/models/hf_Whisper/install.py create mode 100644 torchbenchmark/models/hf_Whisper/metadata.yaml create mode 100644 torchbenchmark/models/hf_Whisper/requirements.txt diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py new file mode 100644 index 0000000000..1f17863c9b --- /dev/null +++ b/torchbenchmark/models/hf_Whisper/__init__.py @@ -0,0 +1,19 @@ +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel +from torchbenchmark.tasks import SPEECH +import torch + +class Model(HuggingFaceModel): + task = SPEECH.RECOGNITION + # https://cdn.openai.com/papers/whisper.pdf Says for large-v2 they trained on 1024 batch sizes. + DEFAULT_TRAIN_BSIZE = 8 + DEFAULT_EVAL_BSIZE = 8 + + def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): + super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) + self.feature_size = 80 + self.sequence_length = 3000 + input_features = torch.randn(size=(self.batch_size, self.feature_size, self.sequence_length),device=self.device) + self.example_inputs = {"input_features": input_features.to(self.device)} + + def eval(self): + super().eval() \ No newline at end of file diff --git a/torchbenchmark/models/hf_Whisper/install.py b/torchbenchmark/models/hf_Whisper/install.py new file mode 100644 index 0000000000..1a49905932 --- /dev/null +++ b/torchbenchmark/models/hf_Whisper/install.py @@ -0,0 +1,13 @@ +import subprocess +import sys +import os +from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model + +def pip_install_requirements(): + subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt']) + +if __name__ == '__main__': + pip_install_requirements() + patch_transformers() + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name) \ No newline at end of file diff --git a/torchbenchmark/models/hf_Whisper/metadata.yaml b/torchbenchmark/models/hf_Whisper/metadata.yaml new file mode 100644 index 0000000000..df5ea16643 --- /dev/null +++ b/torchbenchmark/models/hf_Whisper/metadata.yaml @@ -0,0 +1,10 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 8 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +not_implemented: +- jit: true +train_benchmark: false +train_deterministic: false \ No newline at end of file diff --git a/torchbenchmark/models/hf_Whisper/requirements.txt b/torchbenchmark/models/hf_Whisper/requirements.txt new file mode 100644 index 0000000000..20d34b196a --- /dev/null +++ b/torchbenchmark/models/hf_Whisper/requirements.txt @@ -0,0 +1,2 @@ +sentencepiece +datasets \ No newline at end of file diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py index 137f5dcd23..dd39a3a185 100644 --- a/torchbenchmark/util/framework/huggingface/model_factory.py +++ b/torchbenchmark/util/framework/huggingface/model_factory.py @@ -8,7 +8,7 @@ from torchbenchmark.util.model import BenchmarkModel from torchbenchmark.tasks import NLP import transformers -from transformers import AutoConfig, ReformerConfig, BertConfig, GenerationConfig +from transformers import AutoConfig, ReformerConfig, BertConfig, GenerationConfig, WhisperConfig from typing import Tuple class_models = { @@ -27,6 +27,7 @@ 'hf_Bert': (512, 512, 'BertConfig()', 'AutoModelForMaskedLM'), # see https://huggingface.co/bert-large-cased 'hf_Bert_large': (512, 512, 'BertConfig(hidden_size=1024, num_hidden_layers=24, num_attention_heads=16)', 'AutoModelForMaskedLM'), + 'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'), } cpu_input_slice = { diff --git a/torchbenchmark/util/framework/huggingface/patch_hf.py b/torchbenchmark/util/framework/huggingface/patch_hf.py index 013e1a8a3c..901288d121 100644 --- a/torchbenchmark/util/framework/huggingface/patch_hf.py +++ b/torchbenchmark/util/framework/huggingface/patch_hf.py @@ -5,7 +5,7 @@ import subprocess import sys from .model_factory import class_models -from transformers import AutoConfig, ReformerConfig, BigBirdConfig, BertConfig +from transformers import AutoConfig, ReformerConfig, BigBirdConfig, BertConfig, WhisperConfig PATCH_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "patches") From ba57c50aece1832c8315e37c0f3aeda8c059ab9f Mon Sep 17 00:00:00 2001 From: MaanavD Date: Fri, 14 Jul 2023 14:33:24 -0700 Subject: [PATCH 2/8] Updated requirements, batch size. --- torchbenchmark/models/hf_Whisper/__init__.py | 6 +++--- torchbenchmark/models/hf_Whisper/requirements.txt | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py index 1f17863c9b..8975c6e381 100644 --- a/torchbenchmark/models/hf_Whisper/__init__.py +++ b/torchbenchmark/models/hf_Whisper/__init__.py @@ -4,9 +4,9 @@ class Model(HuggingFaceModel): task = SPEECH.RECOGNITION - # https://cdn.openai.com/papers/whisper.pdf Says for large-v2 they trained on 1024 batch sizes. - DEFAULT_TRAIN_BSIZE = 8 - DEFAULT_EVAL_BSIZE = 8 + # https://cdn.openai.com/papers/whisper.pdf Says for large-v2 they trained on 1024 batch sizes, with 16 GPUs + DEFAULT_TRAIN_BSIZE = 64 + DEFAULT_EVAL_BSIZE = 64 def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) diff --git a/torchbenchmark/models/hf_Whisper/requirements.txt b/torchbenchmark/models/hf_Whisper/requirements.txt index 20d34b196a..fd0728f16f 100644 --- a/torchbenchmark/models/hf_Whisper/requirements.txt +++ b/torchbenchmark/models/hf_Whisper/requirements.txt @@ -1,2 +1 @@ -sentencepiece -datasets \ No newline at end of file +numba \ No newline at end of file From bb3f33116fbbf512c783d2b15f8c2f7cb6414843 Mon Sep 17 00:00:00 2001 From: MaanavD Date: Fri, 14 Jul 2023 14:43:25 -0700 Subject: [PATCH 3/8] Updated to remove training. --- torchbenchmark/models/hf_Whisper/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py index 8975c6e381..96b581dc40 100644 --- a/torchbenchmark/models/hf_Whisper/__init__.py +++ b/torchbenchmark/models/hf_Whisper/__init__.py @@ -5,9 +5,9 @@ class Model(HuggingFaceModel): task = SPEECH.RECOGNITION # https://cdn.openai.com/papers/whisper.pdf Says for large-v2 they trained on 1024 batch sizes, with 16 GPUs - DEFAULT_TRAIN_BSIZE = 64 DEFAULT_EVAL_BSIZE = 64 - + DEFAULT_Train_BSIZE = 64 + def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) self.feature_size = 80 @@ -16,4 +16,6 @@ def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): self.example_inputs = {"input_features": input_features.to(self.device)} def eval(self): - super().eval() \ No newline at end of file + super().eval() + def train(self): + raise NotImplementedError("Training is not implemented.") \ No newline at end of file From 116df9cb937b6921d16eba34fc504776bb40a6ee Mon Sep 17 00:00:00 2001 From: MaanavD Date: Fri, 14 Jul 2023 17:11:55 -0700 Subject: [PATCH 4/8] Removed default train size. No training implemented. --- torchbenchmark/models/hf_Whisper/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py index 96b581dc40..4d6b30772d 100644 --- a/torchbenchmark/models/hf_Whisper/__init__.py +++ b/torchbenchmark/models/hf_Whisper/__init__.py @@ -6,7 +6,6 @@ class Model(HuggingFaceModel): task = SPEECH.RECOGNITION # https://cdn.openai.com/papers/whisper.pdf Says for large-v2 they trained on 1024 batch sizes, with 16 GPUs DEFAULT_EVAL_BSIZE = 64 - DEFAULT_Train_BSIZE = 64 def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) From c77ad909fcba4dc83944a9bf7ee289b2b066534f Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 25 Jul 2023 17:58:14 +0000 Subject: [PATCH 5/8] fix tests --- torchbenchmark/models/hf_Whisper/__init__.py | 8 +++++--- torchbenchmark/models/hf_Whisper/metadata.yaml | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py index 4d6b30772d..e5f1bcd5c9 100644 --- a/torchbenchmark/models/hf_Whisper/__init__.py +++ b/torchbenchmark/models/hf_Whisper/__init__.py @@ -11,10 +11,12 @@ def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) self.feature_size = 80 self.sequence_length = 3000 - input_features = torch.randn(size=(self.batch_size, self.feature_size, self.sequence_length),device=self.device) + input_features = torch.randn(size=(self.batch_size, self.feature_size, self.sequence_length),device=self.device).half() self.example_inputs = {"input_features": input_features.to(self.device)} + self.model.to(self.device) + + def get_module(self): + return self.model, (self.example_inputs) - def eval(self): - super().eval() def train(self): raise NotImplementedError("Training is not implemented.") \ No newline at end of file diff --git a/torchbenchmark/models/hf_Whisper/metadata.yaml b/torchbenchmark/models/hf_Whisper/metadata.yaml index df5ea16643..04f4fd2918 100644 --- a/torchbenchmark/models/hf_Whisper/metadata.yaml +++ b/torchbenchmark/models/hf_Whisper/metadata.yaml @@ -6,5 +6,7 @@ eval_deterministic: false eval_nograd: true not_implemented: - jit: true +- device: cpu + test: eval train_benchmark: false train_deterministic: false \ No newline at end of file From f232aac7a56d948eddaba7fdfed7b107b4d07830 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 25 Jul 2023 20:05:15 +0000 Subject: [PATCH 6/8] fix eval test --- torchbenchmark/models/hf_Whisper/__init__.py | 3 +-- torchbenchmark/models/hf_Whisper/metadata.yaml | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py index e5f1bcd5c9..6ff94c461c 100644 --- a/torchbenchmark/models/hf_Whisper/__init__.py +++ b/torchbenchmark/models/hf_Whisper/__init__.py @@ -4,8 +4,7 @@ class Model(HuggingFaceModel): task = SPEECH.RECOGNITION - # https://cdn.openai.com/papers/whisper.pdf Says for large-v2 they trained on 1024 batch sizes, with 16 GPUs - DEFAULT_EVAL_BSIZE = 64 + DEFAULT_EVAL_BSIZE = 8 def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) diff --git a/torchbenchmark/models/hf_Whisper/metadata.yaml b/torchbenchmark/models/hf_Whisper/metadata.yaml index 04f4fd2918..1fadf0eafc 100644 --- a/torchbenchmark/models/hf_Whisper/metadata.yaml +++ b/torchbenchmark/models/hf_Whisper/metadata.yaml @@ -7,6 +7,5 @@ eval_nograd: true not_implemented: - jit: true - device: cpu - test: eval train_benchmark: false train_deterministic: false \ No newline at end of file From c3d5d100f27939af568acf874d4e50cd081eb96a Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 25 Jul 2023 21:30:20 +0000 Subject: [PATCH 7/8] push --- torchbenchmark/models/hf_Whisper/__init__.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py index 6ff94c461c..a2d8126cfa 100644 --- a/torchbenchmark/models/hf_Whisper/__init__.py +++ b/torchbenchmark/models/hf_Whisper/__init__.py @@ -10,12 +10,10 @@ def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) self.feature_size = 80 self.sequence_length = 3000 - input_features = torch.randn(size=(self.batch_size, self.feature_size, self.sequence_length),device=self.device).half() - self.example_inputs = {"input_features": input_features.to(self.device)} + self.input_features = torch.randn(size=(self.batch_size, self.feature_size, self.sequence_length),device=self.device).half() + self.example_inputs = {"input_features": self.input_features.to(self.device), "input_ids" : self.input_features.to(self.device)} self.model.to(self.device) - def get_module(self): - return self.model, (self.example_inputs) - def train(self): - raise NotImplementedError("Training is not implemented.") \ No newline at end of file + raise NotImplementedError("Training is not implemented.") + From 9bca12c1b686588bcd151ba916c710b4a8401269 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 26 Jul 2023 00:30:05 +0000 Subject: [PATCH 8/8] add support for half() --- torchbenchmark/models/hf_Whisper/__init__.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py index a2d8126cfa..347caeb2bd 100644 --- a/torchbenchmark/models/hf_Whisper/__init__.py +++ b/torchbenchmark/models/hf_Whisper/__init__.py @@ -5,15 +5,24 @@ class Model(HuggingFaceModel): task = SPEECH.RECOGNITION DEFAULT_EVAL_BSIZE = 8 + DEFAULT_EVAL_CUDA_PRECISION = "fp16" def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) self.feature_size = 80 self.sequence_length = 3000 - self.input_features = torch.randn(size=(self.batch_size, self.feature_size, self.sequence_length),device=self.device).half() + self.input_features = torch.randn(size=(self.batch_size, self.feature_size, self.sequence_length),device=self.device) self.example_inputs = {"input_features": self.input_features.to(self.device), "input_ids" : self.input_features.to(self.device)} self.model.to(self.device) def train(self): raise NotImplementedError("Training is not implemented.") - + + def eval(self): + self.model.eval() + with torch.no_grad(): + self.model(self.example_inputs["input_ids"]) + + def enable_fp16_half(self): + self.model.half() + self.example_inputs = {"input_features": self.input_features.half().to(self.device), "input_ids" : self.input_features.half().to(self.device)}