Skip to content

Commit

Permalink
Refactored the translation routine to prevent Google Translate from m…
Browse files Browse the repository at this point in the history
…essing with subtitles sequence by sending line by line (slower but better). #2558
  • Loading branch information
morpheus65535 committed Aug 24, 2024
1 parent 00c7eab commit 609349b
Showing 1 changed file with 43 additions and 28 deletions.
71 changes: 43 additions & 28 deletions bazarr/subtitles/tools/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@
from subliminal_patch.core import get_subtitle_path
from subzero.language import Language
from deep_translator import GoogleTranslator
from deep_translator.exceptions import TooManyRequests, RequestError, TranslationNotFound
from time import sleep
from concurrent.futures import ThreadPoolExecutor

from languages.custom_lang import CustomLanguage
from languages.get_languages import alpha3_from_alpha2, language_from_alpha2, language_from_alpha3
from radarr.history import history_log_movie
from sonarr.history import history_log
from subtitles.processing import ProcessSubtitlesResult
from app.event_handler import show_progress, hide_progress


def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, forced, hi, media_type, sonarr_series_id,
Expand All @@ -33,8 +37,6 @@ def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, fo

logging.debug(f'BAZARR is translating in {lang_obj} this subtitles {source_srt_file}')

max_characters = 5000

dest_srt_file = get_subtitle_path(video_path,
language=lang_obj if isinstance(lang_obj, Language) else lang_obj.subzero_language(),
extension='.srt',
Expand All @@ -44,40 +46,53 @@ def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, fo
subs = pysubs2.load(source_srt_file, encoding='utf-8')
subs.remove_miscellaneous_events()
lines_list = [x.plaintext for x in subs]
joined_lines_str = '\n\n'.join(lines_list)

logging.debug(f'BAZARR splitting subtitles into {max_characters} characters blocks')
lines_block_list = []
translated_lines_list = []
while len(joined_lines_str):
partial_lines_str = joined_lines_str[:max_characters]
lines_list_len = len(lines_list)

if len(joined_lines_str) > max_characters:
new_partial_lines_str = partial_lines_str.rsplit('\n\n', 1)[0]
def translate_line(id, line, attempt):
try:
translated_text = GoogleTranslator(
source='auto',
target=language_code_convert_dict.get(lang_obj.alpha2, lang_obj.alpha2)
).translate(text=line)
except TooManyRequests:
if attempt <= 5:
sleep(1)
super(translate_line(id, line, attempt+1))
else:
logging.debug(f'Too many requests while translating {line}')
translated_lines.append({'id': id, 'line': line})
except (RequestError, TranslationNotFound):
logging.debug(f'Unable to translate line {line}')
translated_lines.append({'id': id, 'line': line})
else:
new_partial_lines_str = partial_lines_str
translated_lines.append({'id': id, 'line': translated_text})
finally:
show_progress(id=f'translate_progress_{dest_srt_file}',
header=f'Translating subtitles lines to {language_from_alpha3(to_lang)}...',
name='',
value=len(translated_lines),
count=lines_list_len)

lines_block_list.append(new_partial_lines_str)
joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '')
logging.debug(f'BAZARR is sending {lines_list_len} blocks to Google Translate')

logging.debug(f'BAZARR is sending {len(lines_block_list)} blocks to Google Translate')
for block_str in lines_block_list:
try:
translated_partial_srt_text = GoogleTranslator(source='auto',
target=language_code_convert_dict.get(lang_obj.alpha2,
lang_obj.alpha2)
).translate(text=block_str)
except Exception:
logging.exception(f'BAZARR Unable to translate subtitles {source_srt_file}')
return False
else:
translated_partial_srt_list = translated_partial_srt_text.split('\n\n')
translated_lines_list += translated_partial_srt_list
pool = ThreadPoolExecutor(max_workers=10)

translated_lines = []

for i, line in enumerate(lines_list):
pool.submit(translate_line, i, line, 1)

pool.shutdown(wait=True)

for i, line in enumerate(translated_lines):
lines_list[line['id']] = line['line']

hide_progress(id=f'translate_progress_{dest_srt_file}')

logging.debug(f'BAZARR saving translated subtitles to {dest_srt_file}')
for i, line in enumerate(subs):
try:
line.plaintext = translated_lines_list[i]
line.plaintext = lines_list[i]
except IndexError:
logging.error(f'BAZARR is unable to translate malformed subtitles: {source_srt_file}')
return False
Expand Down

0 comments on commit 609349b

Please sign in to comment.