Merge pull request #23 from kadirnar/add-new-parameter

Update ASR model and add batch size and return
kadirnar · Nov 24, 2023 · 703a17d · 703a17d
2 parents 9ceb18b + 0aab9f8
commit 703a17d
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -79,7 +79,7 @@ pipeline = ASRDiarizationPipeline.from_pretrained(
     device=device,
 )
 
-output_text = pipeline(audio_path)
+output_text = pipeline(audio_path, num_speakers=2, min_speaker=1, max_speaker=2)
 dialogue = format_speech_to_dialogue(output_text)
 print(dialogue)
 ```

diff --git a/whisperplus/pipelines/whisper_diarize.py b/whisperplus/pipelines/whisper_diarize.py
@@ -24,7 +24,7 @@ def __init__(
     @classmethod
     def from_pretrained(
         cls,
-        asr_model: Optional[str] = "openai/whisper-medium",
+        asr_model: Optional[str] = "openai/whisper-large-v3",
         *,
         diarizer_model: Optional[str] = "pyannote/speaker-diarization",
         chunk_length_s: Optional[int] = 30,
@@ -35,7 +35,9 @@ def from_pretrained(
             "automatic-speech-recognition",
             model=asr_model,
             chunk_length_s=chunk_length_s,
-            token=use_auth_token,  # 08/25/2023: Changed argument from use_auth_token to token
+            token=use_auth_token,
+            batch_size=24,
+            return_timestamps=True,
             **kwargs,
         )
         diarization_pipeline = Pipeline.from_pretrained(diarizer_model, use_auth_token=use_auth_token)