diff --git a/openlrc/agents.py b/openlrc/agents.py index 8aa0218..c0fdbac 100644 --- a/openlrc/agents.py +++ b/openlrc/agents.py @@ -143,7 +143,9 @@ def build_context(self, texts, title='', glossary: Optional[dict] = None) -> str {'role': 'system', 'content': self.prompter.system()}, {'role': 'user', 'content': self.prompter.user(text_content, title=title, given_glossary=glossary)}, ] - resp = self.chatbot.message(messages_list, output_checker=self.prompter.check_format)[0] + resp = self.chatbot.message( + messages_list, stop_sequences=[self.prompter.stop_sequence], output_checker=self.prompter.check_format + )[0] context = self.chatbot.get_content(resp) context_pool = [context] diff --git a/openlrc/openlrc.py b/openlrc/openlrc.py index f535ee9..44a3473 100644 --- a/openlrc/openlrc.py +++ b/openlrc/openlrc.py @@ -188,7 +188,7 @@ def consumer_worker(self, transcription_queue, target_lang, skip_trans, bilingua subtitle_path = getattr(final_subtitle, f'to_{subtitle_format}')() result_path = subtitle_path.parents[1] / subtitle_path.name.replace(f'_preprocessed.{subtitle_format}', f'.{subtitle_format}') - shutil.copy(subtitle_path, result_path) + shutil.move(subtitle_path, result_path) if not skip_trans and bilingual_sub: bilingual_subtitle = BilingualSubtitle.from_preprocessed( @@ -199,14 +199,14 @@ def consumer_worker(self, transcription_queue, target_lang, skip_trans, bilingua # TODO: consider the edge case (audio file name contains _preprocessed) getattr(bilingual_subtitle, f'to_{subtitle_format}')() bilingual_lrc_path = bilingual_subtitle.filename.with_suffix(bilingual_subtitle.suffix) - shutil.copy(bilingual_lrc_path, result_path.parent / bilingual_lrc_path.name) + shutil.move(bilingual_lrc_path, result_path.parent / bilingual_lrc_path.name) non_translated_subtitle = transcribed_opt_sub optimizer = SubtitleOptimizer(non_translated_subtitle) optimizer.extend_time() # Extend 0.5s like what translated do getattr(non_translated_subtitle, f'to_{subtitle_format}')() non_translated_lrc_path = non_translated_subtitle.filename.with_suffix(non_translated_subtitle.suffix) - shutil.copy( + shutil.move( non_translated_lrc_path, result_path.parent / subtitle_path.name.replace( f'_preprocessed.{subtitle_format}', diff --git a/openlrc/prompter.py b/openlrc/prompter.py index e082b6d..032a6f3 100644 --- a/openlrc/prompter.py +++ b/openlrc/prompter.py @@ -190,6 +190,8 @@ def __init__(self, src_lang, target_lang): self.src_lang_display = Language.get(src_lang).display_name('en') self.target_lang_display = Language.get(target_lang).display_name('en') + self.stop_sequence = '<*--END-OF-CONTEXT--*>' + def system(self): return f'''You are a context reviewer responsible for ensuring the consistency and accuracy of translations between two languages. Your task involves reviewing and providing necessary contextual information for translations. @@ -221,7 +223,6 @@ def system(self): Then, they prepare to start their investigation. Example Output: - ### Glossary: - suspect: 嫌疑人 - uptown: 市中心 @@ -238,6 +239,8 @@ def system(self): ### Target Audience: The target audience is adult viewers with an interest in crime dramas. They are likely to be familiar with police procedurals and enjoy suspenseful storytelling. +{self.stop_sequence} + Note: @@ -247,7 +250,8 @@ def system(self): DO NOT include any translation segment. Sample Translation is NOT required for this task. You should adhere to the same format as the previous response, add or delete section is not allowed. -Remember to include the glossary, characters, summary, tone and style, and target audience sections in your response.''' +Remember to include the glossary, characters, summary, tone and style, and target audience sections in your response. +Remember to add {self.stop_sequence} after the generated contexts.''' def user(self, text, title='', given_glossary: Optional[dict] = None): glossary_text = f'Given glossary: {given_glossary}' if given_glossary else '' diff --git a/tests/test_validators.py b/tests/test_validators.py index beaa840..abb8468 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -53,14 +53,14 @@ class TestAtomicTranslateValidator(unittest.TestCase): def test_validate_returns_true_when_generated_content_matches_target_language(self): validator = AtomicTranslateValidator(target_lang='en') - user_input = "Hello" - generated_content = "Hello" + user_input = "你有什么问题?" + generated_content = "What's your problem?" result = validator.validate(user_input, generated_content) self.assertTrue(result) def test_validate_returns_false_when_generated_content_not_matches_target_language(self): - validator = AtomicTranslateValidator(target_lang='en') + validator = AtomicTranslateValidator(target_lang='cn-zh') user_input = "Hello" generated_content = "你好"