Skip to content

Commit

Permalink
Improve language detection stability by 3-voting.
Browse files Browse the repository at this point in the history
  • Loading branch information
zh-plus committed Nov 2, 2023
1 parent c1c8cb8 commit 7778e78
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion openlrc/prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,22 @@ def check_format(self, messages, content):
return False

# Ensure the translated langauge is in the target language
translated_lang = self.lan_detector.detect_language_of(' '.join(translation)).name.lower()
if len(translation) >= 3:
# 3-voting for detection stability
chunk_size = len(translation) // 3
translation_chunks = [translation[i:i + chunk_size] for i in range(0, len(translation), chunk_size)]
if len(translation_chunks) > 3:
translation_chunks[-2].extend(translation_chunks[-1])
translation_chunks.pop()

translated_langs = [self.lan_detector.detect_language_of(' '.join(chunk)).name.lower()
for chunk in translation_chunks]

# get the most common language
translated_lang = max(set(translated_langs), key=translated_langs.count)
else:
translated_lang = self.lan_detector.detect_language_of(' '.join(translation)).name.lower()

target_lang = Language.get(self.target_lang).language_name().lower()
if translated_lang != target_lang:
logger.warning(f'Translated language is {translated_lang}, not {target_lang}.')
Expand Down

0 comments on commit 7778e78

Please sign in to comment.