Fix missed mp3 files issue.

zh-plus · Dec 9, 2024 · 51dd6e4 · 51dd6e4
1 parent 972282d
commit 51dd6e4
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,17 @@
+## 1.6.1
+
+Hot fix for missed mp3 files issue.
+
+## 1.6.0
+
+Update faster-whisper to the latest version. Add models.py for model info.
+
+### Other Changes:
+
+- Fixed issue #60.
+- Change default parameters for new faster-whisper.
+- Update installation guide in README.
+
 ## 1.5.2
 
 Code refactoring, documentation updates, and minor bug fixes.

diff --git a/openlrc/chatbot.py b/openlrc/chatbot.py
@@ -223,7 +223,8 @@ async def _create_achat(self, messages: List[Dict], stop_sequences: Optional[Lis
                     temperature=self.temperature,
                     top_p=self.top_p,
                     response_format={'type': 'json_object' if self.json_mode else 'text'},
-                    stop=stop_sequences
+                    stop=stop_sequences,
+                    max_tokens=self.model_info.max_tokens
                 )
                 self.update_fee(response)
                 if response.choices[0].finish_reason == 'length':
@@ -300,13 +301,13 @@ async def _create_achat(self, messages: List[Dict], stop_sequences: Optional[Lis
         for i in range(self.retry):
             try:
                 response = await self.async_client.messages.create(
-                    max_tokens=8192,
                     model=self.model_name,
                     messages=messages,
                     system=system_msg,
                     temperature=self.temperature,
                     top_p=self.top_p,
-                    stop_sequences=stop_sequences
+                    stop_sequences=stop_sequences,
+                    max_tokens=self.model_info.max_tokens,
                 )
                 self.update_fee(response)
 

diff --git a/openlrc/openlrc.py b/openlrc/openlrc.py
@@ -171,7 +171,7 @@ def translation_worker(self, transcription_queue, target_lang, skip_trans, bilin
         def process_translation(base_name, target_lang, transcribed_opt_sub, skip_trans):
             translated_path = extend_filename(transcribed_opt_sub.filename, '_translated')
             final_json_path = translated_path.with_name(f'{base_name}.json')
-            
+
             if final_json_path.exists():
                 return Subtitle.from_json(final_json_path)
 
@@ -440,39 +440,29 @@ def to_json(segments: List[Segment], name, lang):
 
     def pre_process(self, paths, noise_suppress=False):
         """
-        Preprocess the input audio/video files.
+        Preprocess input audio/video files.
 
         Args:
-            paths (List[Path]): List of paths to the input files.
-            noise_suppress (bool): Whether to apply noise suppression.
+            paths (List[Path]): Input file paths
+            noise_suppress (bool): Apply noise suppression if True
 
         Returns:
-            List[Path]: List of paths to the preprocessed audio files.
-
-        This method handles the initial processing of input files, including
-        audio extraction from videos and noise suppression if requested.
+            List[Path]: Preprocessed audio file paths
         """
-        paths = list(set(Path(path) for path in paths))
+        paths = [Path(p) for p in set(paths)]
 
-        # Check if path is audio or video
         for i, path in enumerate(paths):
-            if not path.exists() or not path.is_file():
+            if not path.is_file():
                 raise FileNotFoundError(f'File not found: {path}')
 
             if get_file_type(path) == 'video':
                 self.from_video.add(path.with_suffix(''))
+                audio_path = path.with_suffix('.wav')
+                if not audio_path.exists():
+                    extract_audio(path)
+                paths[i] = audio_path
 
-            extracted_audio_path = path.with_suffix('.wav')
-            if not extracted_audio_path.exists():
-                extract_audio(path)
-
-            paths[i] = extracted_audio_path
-
-        # Audio-based process
-        preprocessor = Preprocessor(paths, options=self.preprocess_options)
-        paths = preprocessor.run(noise_suppress)
-
-        return paths
+        return Preprocessor(paths, options=self.preprocess_options).run(noise_suppress)
 
     @staticmethod
     def post_process(transcribed_sub: Path, output_name: Path = None, remove_files: List[Path] = None,

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ in-project = true
 
 [tool.poetry]
 name = "openlrc"
-version = "1.6.0"
+version = "1.6.1"
 description = "Transcribe (whisper) and translate (gpt) voice into LRC file."
 license = "MIT"
 authors = [
@@ -56,7 +56,7 @@ filetype = "^1.2.0"
 jaconvV2 = "^0.4"
 spacy = "^3.7.0"
 pysbd = "^0.3.4"
-faster-whisper = "^1.1.0"
+faster-whisper = { url = "https://github.com/SYSTRAN/faster-whisper/archive/8327d8cc647266ed66f6cd878cf97eccface7351.tar.gz" }
 soundfile = "^0.12.1"
 ffmpeg-normalize = "^1.27.5"
 deepfilternet = "^0.5.6"