🧪 Add speech_recognition to tests for datasets and trainer

hezarai · Feb 5, 2024 · c95c4f5 · c95c4f5
1 parent c4dd743
commit c95c4f5
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 0 deletions.
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
@@ -12,13 +12,15 @@
     "ocr": "hezarai/persian-license-plate-v1",
     "image-captioning": "hezarai/flickr30k-fa",
     "text-summarization": "hezarai/xlsum-fa",
+    "speech-recognition": "hezarai/common-voice-13-fa"
 }
 TASK_TO_TOKENIZER_MAPPING = {
     "text-classification": "hezarai/bert-base-fa",
     "sequence-labeling": "hezarai/bert-base-fa",
     "ocr": "hezarai/crnn-fa-printed-96-long",
     "image-captioning": "hezarai/roberta-base-fa",
     "text-summarization": "hezarai/t5-base-fa",
+    "speech-recognition": "hezarai/whisper-small-fa"
 }
 
 TASK_TO_REQUIRED_FIELDS = {
@@ -27,6 +29,7 @@
     "ocr": ["pixel_values", "labels"],
     "image-captioning": ["pixel_values", "labels"],
     "text-summarization": ["token_ids", "attention_mask", "labels"],
+    "speech-recognition": ["input_features", "labels"]
 }
 
 INVALID_DATASET_TYPE = "Dataset instance must be of type `Dataset`, got `{}`!"

diff --git a/tests/test_trainer.py b/tests/test_trainer.py
@@ -94,6 +94,23 @@
             "metrics": ["wer"]
         }
     },
+    "speech-recognition": {
+        "dataset": {
+            "path": "hezarai/common-voice-13-fa",
+            "config": {
+                "labels_max_length": 64,
+                "tokenizer_path": "hezarai/whisper-small",
+                "feature_extractor_path": "hezarai/whisper-small"
+            }
+        },
+        "model": {
+            "path": "hezarai/whisper-small"
+        },
+        "config": {
+            "task": "speech_recognition",
+            "metrics": ["wer", "cer"]
+        }
+    }
 }
 
 common_train_config = {