diff --git a/openlrc/openlrc.py b/openlrc/openlrc.py index 1008193..5fc0cbb 100644 --- a/openlrc/openlrc.py +++ b/openlrc/openlrc.py @@ -424,12 +424,19 @@ def to_json(segments: List[Segment], name, lang): 'segments': [] } - for segment in segments: + if not segments: result['segments'].append({ - 'start': segment.start, - 'end': segment.end, - 'text': segment.text + 'start': 0.0, + 'end': 5.0, + 'text': "no speech found" }) + else: + for segment in segments: + result['segments'].append({ + 'start': segment.start, + 'end': segment.end, + 'text': segment.text + }) with open(name, 'w', encoding='utf-8') as f: json.dump(result, f, ensure_ascii=False, indent=4) diff --git a/openlrc/transcribe.py b/openlrc/transcribe.py index a4a4e67..a2f03d7 100644 --- a/openlrc/transcribe.py +++ b/openlrc/transcribe.py @@ -98,10 +98,12 @@ def transcribe(self, audio_path: Union[str, Path], language: Optional[str] = Non if timestamps < info.duration: # silence at the end of the audio pbar.update(info.duration - timestamps) - assert segments, f'No voice found for {audio_path}' - - with Timer('Sentence Segmentation'): - result = self.sentence_split(segments, info.language) + if not segments: + logger.warning(f'No speech found for {audio_path}') + result = [] + else: + with Timer('Sentence Segmentation'): + result = self.sentence_split(segments, info.language) info = TranscriptionInfo(language=info.language, duration=get_audio_duration(audio_path), duration_after_vad=info.duration_after_vad) diff --git a/tests/data/test_nospeech_video.mp4 b/tests/data/test_nospeech_video.mp4 new file mode 100644 index 0000000..f64f31a Binary files /dev/null and b/tests/data/test_nospeech_video.mp4 differ diff --git a/tests/test_openlrc.py b/tests/test_openlrc.py index e502fd4..b14a4ce 100644 --- a/tests/test_openlrc.py +++ b/tests/test_openlrc.py @@ -31,6 +31,7 @@ class TestLRCer(unittest.TestCase): def setUp(self) -> None: self.audio_path = Path('data/test_audio.wav') self.video_path = Path('data/test_video.mp4') + self.nospeech_video_path = Path('data/test_nospeech_video.mp4') def tearDown(self) -> None: def clear_paths(input_path): @@ -78,6 +79,11 @@ def test_video_file_transcription_translation(self): result = lrcer.run('data/test_video.mp4') self.assertTrue(result) + def test_nospeech_video_file_transcription_translation(self): + lrcer = LRCer(whisper_model='tiny', device='cpu', compute_type='default') + result = lrcer.run('data/test_nospeech_video.mp4') + self.assertTrue(result) + @patch('openlrc.translate.LLMTranslator.translate', MagicMock(side_effect=Exception('test exception'))) def test_translation_error(self): lrcer = LRCer(whisper_model='tiny', device='cpu', compute_type='default')