pytorch · MaanavD · Jul 14, 2023 · Jul 14, 2023 · Jul 14, 2023 · Jul 15, 2023
diff --git a/torchbenchmark/models/hf_Whisper/__init__.py b/torchbenchmark/models/hf_Whisper/__init__.py
@@ -0,0 +1,22 @@
+from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
+from torchbenchmark.tasks import SPEECH
+import torch
+
+class Model(HuggingFaceModel):
+    task = SPEECH.RECOGNITION
+    # https://cdn.openai.com/papers/whisper.pdf Says for large-v2 they trained on 1024 batch sizes, with 16 GPUs
+    DEFAULT_EVAL_BSIZE = 64
+
+    def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
+        super().__init__(name="hf_Whisper", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
+        self.feature_size = 80
+        self.sequence_length = 3000
+        input_features = torch.randn(size=(self.batch_size, self.feature_size, self.sequence_length),device=self.device).half()
+        self.example_inputs = {"input_features": input_features.to(self.device)}
+        self.model.to(self.device)
+
+    def get_module(self):
+        return self.model, (self.example_inputs)
+
+    def train(self):
+        raise NotImplementedError("Training is not implemented.")
diff --git a/torchbenchmark/models/hf_Whisper/install.py b/torchbenchmark/models/hf_Whisper/install.py
@@ -0,0 +1,13 @@
+import subprocess
+import sys
+import os
+from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model
+
+def pip_install_requirements():
+    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt'])
+
+if __name__ == '__main__':
+    pip_install_requirements()
+    patch_transformers()
+    model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
+    cache_model(model_name)
diff --git a/torchbenchmark/models/hf_Whisper/metadata.yaml b/torchbenchmark/models/hf_Whisper/metadata.yaml
@@ -0,0 +1,12 @@
+devices:
+  NVIDIA A100-SXM4-40GB:
+    eval_batch_size: 8
+eval_benchmark: false
+eval_deterministic: false
+eval_nograd: true
+not_implemented:
+- jit: true
+- device: cpu
+  test: eval
+train_benchmark: false
+train_deterministic: false
diff --git a/torchbenchmark/models/hf_Whisper/requirements.txt b/torchbenchmark/models/hf_Whisper/requirements.txt
@@ -0,0 +1 @@
+numba
diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py
@@ -8,7 +8,7 @@
 from torchbenchmark.util.model import BenchmarkModel
 from torchbenchmark.tasks import NLP
 import transformers
-from transformers import AutoConfig, ReformerConfig, BertConfig, LlamaConfig, GenerationConfig
+from transformers import AutoConfig, ReformerConfig, BertConfig, GenerationConfig, WhisperConfig, LlamaConfig
 from typing import Tuple
 
 class_models = {
@@ -27,6 +27,7 @@
     'hf_Bert': (512, 512, 'BertConfig()', 'AutoModelForMaskedLM'),
     # see https://huggingface.co/bert-large-cased
     'hf_Bert_large': (512, 512, 'BertConfig(hidden_size=1024, num_hidden_layers=24, num_attention_heads=16)', 'AutoModelForMaskedLM'),
+    'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'),
     # default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real
     'llama_v2_7b_16h' : (512,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'),
 }

diff --git a/torchbenchmark/util/framework/huggingface/patch_hf.py b/torchbenchmark/util/framework/huggingface/patch_hf.py
@@ -5,7 +5,8 @@
 import subprocess
 import sys
 from .model_factory import class_models
-from transformers import AutoConfig, ReformerConfig, BigBirdConfig, BertConfig, LlamaConfig
+from transformers import AutoConfig, ReformerConfig, BigBirdConfig, BertConfig, WhisperConfig, LlamaConfig
+
 
 PATCH_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "patches")