From 7778e781b3672db945c3af46b1d3defb9d372c60 Mon Sep 17 00:00:00 2001 From: zh-plus Date: Thu, 2 Nov 2023 17:19:55 +0800 Subject: [PATCH] Improve language detection stability by 3-voting. --- openlrc/prompter.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/openlrc/prompter.py b/openlrc/prompter.py index d7b130b..5723611 100644 --- a/openlrc/prompter.py +++ b/openlrc/prompter.py @@ -169,7 +169,22 @@ def check_format(self, messages, content): return False # Ensure the translated langauge is in the target language - translated_lang = self.lan_detector.detect_language_of(' '.join(translation)).name.lower() + if len(translation) >= 3: + # 3-voting for detection stability + chunk_size = len(translation) // 3 + translation_chunks = [translation[i:i + chunk_size] for i in range(0, len(translation), chunk_size)] + if len(translation_chunks) > 3: + translation_chunks[-2].extend(translation_chunks[-1]) + translation_chunks.pop() + + translated_langs = [self.lan_detector.detect_language_of(' '.join(chunk)).name.lower() + for chunk in translation_chunks] + + # get the most common language + translated_lang = max(set(translated_langs), key=translated_langs.count) + else: + translated_lang = self.lan_detector.detect_language_of(' '.join(translation)).name.lower() + target_lang = Language.get(self.target_lang).language_name().lower() if translated_lang != target_lang: logger.warning(f'Translated language is {translated_lang}, not {target_lang}.')