From 4053daf71ccc4387781d6426d9d891f7a84fda59 Mon Sep 17 00:00:00 2001 From: chpoit Date: Tue, 5 Jul 2022 12:40:13 -0700 Subject: [PATCH] fixed issues with tesseract loading --- data/translations/eng.json | 6 ++- data/translations/fra.json | 6 ++- data/versions.json | 6 +-- scripts/build_skill_files.py | 1 + scripts/check_missing_skills.py | 2 +- scripts/compare_charms.py | 19 ++++--- scripts/update_ver.py | 4 +- src/__main__.py | 45 +++++++++++----- src/arg_builder.py | 18 +++++-- src/charm_extraction.py | 4 +- src/resources.py | 20 +++++++ src/tesseract/tesseract_utils.py | 91 ++++++++++++++++++++++++++------ src/translator.py | 2 +- src/ui/MainWindow.py | 8 ++- src/updater/VersionChecker.py | 28 ++++++---- src/utils.py | 2 +- 16 files changed, 198 insertions(+), 64 deletions(-) diff --git a/data/translations/eng.json b/data/translations/eng.json index 46be416..76ddcc6 100644 --- a/data/translations/eng.json +++ b/data/translations/eng.json @@ -64,5 +64,9 @@ "new-skill-correction-update": "New automatic skill corrections were submitted (You: {}, New: {}), do you download them?", "wiki-button": "Open MHR Set Searcher", "wiki-clipboard": "Copy URL", - "wiki-attempt": "Attempting to open the website..." + "wiki-attempt": "Attempting to open the website...", + "tess-wait-5-retry": "Waiting 5 seconds before trying to download the language pack again...", + "tess-url-error": "A URL error occured, this might be a DNS issue... retrying", + "tess-cannot-download": "Unable to download Tesseract language pack.", + "tess-not-found": "Tesseract was not found. Please install it, and if it is installed, pass it as a command line option at least once. If you have done both and it still isn't found, please open an issue on Github." } \ No newline at end of file diff --git a/data/translations/fra.json b/data/translations/fra.json index 629da95..01ed334 100644 --- a/data/translations/fra.json +++ b/data/translations/fra.json @@ -64,5 +64,9 @@ "new-skill-correction-update": "Des nouvelles corrections automatiques ont été soumises, (Locale: {}, Nouvelle: {}), voulez-vous les télécharger?", "wiki-button": "Visiter le MHR Set Searcher", "wiki-clipboard": "Copier l'URL", - "wiki-attempt": "Tentative d'ouverture du site web..." + "wiki-attempt": "Tentative d'ouverture du site web...", + "tess-wait-5-retry":"Pause de 5 secondes avant de ré-essayer le téléchargement du language pack...", + "tess-url-error":"Une erreur d'URL s'est produite, potentiellement causé par une erreur DNS...", + "tess-cannot-download":"Impossible de télécharger le language pack Tesseract.", + "tess-not-found": "Tesseract n'a pas été trouv. Veuillez l'installer, s'il est installé, passez le à travers le command line au moins une fois. Si cette erreur continue d'apparaître, veuillez créer une issue sur Github.." } \ No newline at end of file diff --git a/data/versions.json b/data/versions.json index c6ff64d..82f9f53 100644 --- a/data/versions.json +++ b/data/versions.json @@ -1,10 +1,10 @@ { - "app": "1.6.0", + "app": "1.6.1", "skills": "3.1.1", "languages": { - "eng": "1.1", + "eng": "1.2", "jpn": 0, - "fra": "1.1", + "fra": "1.2", "ita": 0, "deu": 0, "spa": 0, diff --git a/scripts/build_skill_files.py b/scripts/build_skill_files.py index 069c296..bf5ba88 100644 --- a/scripts/build_skill_files.py +++ b/scripts/build_skill_files.py @@ -91,6 +91,7 @@ def _printem(x): os.path.join(sp, "skill_mappings.en.json"), "w", encoding="utf-8" ) as skill_map_fp: json.dump(skills_reverse, skill_map_fp, ensure_ascii=False) + with open( os.path.join(sp, "skill_mappings.alt.json"), "w", encoding="utf-8" ) as skill_map_fp: diff --git a/scripts/check_missing_skills.py b/scripts/check_missing_skills.py index cd2f70c..3d03553 100644 --- a/scripts/check_missing_skills.py +++ b/scripts/check_missing_skills.py @@ -1,7 +1,7 @@ import os all_skills = set() -with open(os.path.join("data", "skill_list.txt")) as slf: +with open(os.path.join("data", "skill_list.txt"), encoding="utf-8") as slf: for line in slf.readlines(): skill_name = line.strip() all_skills.add(skill_name) diff --git a/scripts/compare_charms.py b/scripts/compare_charms.py index 32ad47a..4f5cd19 100644 --- a/scripts/compare_charms.py +++ b/scripts/compare_charms.py @@ -1,19 +1,21 @@ import os import sys import json -PACKAGE_PARENT = '..' -SCRIPT_DIR = os.path.dirname(os.path.realpath( - os.path.join(os.getcwd(), os.path.expanduser(__file__)))) + +PACKAGE_PARENT = ".." +SCRIPT_DIR = os.path.dirname( + os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))) +) sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) from src.Charm import Charm f1 = "charms tesseract.json" f2 = "charms.json" -with open(f1) as f: +with open(f1, encoding="utf-8") as f: c1 = set(map(Charm.from_dict, json.load(f))) -with open(f2) as f: +with open(f2, encoding="utf-8") as f: c2 = set(map(Charm.from_dict, json.load(f))) c1 = set(c1) @@ -23,13 +25,14 @@ dif = c1.symmetric_difference(c2) -print("Diffs", len(dif)) +print("Diffs", len(dif)) if len(dif) == 0: exit() a = b = 0 -with open("missing_charms.json", "w", encoding="utf-8") as missing,\ - open("made_up_charms.json", "w", encoding="utf-8") as made_up: +with open("missing_charms.json", "w", encoding="utf-8") as missing, open( + "made_up_charms.json", "w", encoding="utf-8" +) as made_up: for i in dif: if i not in c1: a += 1 diff --git a/scripts/update_ver.py b/scripts/update_ver.py index 23a2f77..5656cfa 100644 --- a/scripts/update_ver.py +++ b/scripts/update_ver.py @@ -14,8 +14,8 @@ print("Writing", new_version, "to file") -with open(os.path.join("data", "versions.json"), "r") as f: +with open(os.path.join("data", "versions.json"), "r", encoding="utf-8") as f: versions = json.load(f) versions["app"] = new_version -with open(os.path.join("data", "versions.json"), "w") as f: +with open(os.path.join("data", "versions.json"), "w", encoding="utf-8") as f: json.dump(versions, f, indent=4) diff --git a/src/__main__.py b/src/__main__.py index 234b2f4..d6e04f4 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -17,12 +17,14 @@ from .ui.MainWindow import MainWindow from .translator import Translator from .resources import ( + default_lang, get_language_code, get_resource_path, get_app_language, save_app_language, get_game_language, save_game_language, + save_tesseract_location, ) from .updater.updater_utils import ( ask_main_update, @@ -36,7 +38,7 @@ import json logging.basicConfig( - filename="app.log", filemode="w", format="%(name)s - %(levelname)s - %(message)s" + filename="app.log", filemode="w", format="%(name)s - %(levelname)s - %(message)s",level='INFO', force=True ) logger = logging.getLogger(__name__) @@ -45,7 +47,15 @@ def handle_exception(exception, value, traceback): logger.error(f"An error occured {exception}, {value}, {str(traceback)}") logger.exception(f"An error occured") - print("An error occured", exception) + print("An error occured", exception, str(traceback)) + + +def reset_config(): + config_path = get_resource_path("CONFIG") + if os.path.exists(config_path): + os.remove(config_path) + + init_config(get_language_code(default_lang()), get_language_code(default_lang())) def init_config(app_language_code, skill_language_code): @@ -69,21 +79,30 @@ def init_config(app_language_code, skill_language_code): ) +def read_default_args(args): + if args.tess_dir is not None: + save_tesseract_location(args.tess_dir) + + if args.app_language is not None: + app_language_code = get_language_code(args.app_language) + save_app_language(app_language_code) + + if args.language is not None: + skill_language_code = get_language_code(args.language) + save_game_language(skill_language_code) + + def main(args): if args.license: print_licenses() sys.exit(0) - app_language_code = get_language_code(args.app_language) - skill_language_code = get_language_code(args.language) - - init_config(app_language_code, skill_language_code) if args.reset_config: - save_app_language(skill_language_code) - save_game_language(skill_language_code) + reset_config() app_language_code = get_app_language() skill_language_code = get_game_language() + init_config(app_language_code, skill_language_code) if args.console: run_in_console(args) @@ -92,7 +111,7 @@ def main(args): version_checker = VersionChecker() language_versions = version_checker.get_language_versions() - + for language_version in language_versions: lang, code, local, remote = language_version if local < remote: @@ -104,7 +123,10 @@ def main(args): pass main_window, translator = create_main_window( - args, skill_language_code, list(map(lambda x: x[0], language_versions)) + args, + skill_language_code, + app_language_code, + list(map(lambda x: x[0], language_versions)), ) new_app_update = ask_main_update(version_checker, main_window, translator) @@ -117,8 +139,7 @@ def main(args): main_window.mainloop() -def create_main_window(args, skill_language_code, app_langs): - app_language_code = get_language_code(args.app_language) +def create_main_window(args, skill_language_code, app_language_code, app_langs): translator = Translator(app_language_code) new_window = MainWindow(translator, args, skill_language_code, app_langs) new_window.report_callback_exception = handle_exception diff --git a/src/arg_builder.py b/src/arg_builder.py index fc19979..c202d56 100644 --- a/src/arg_builder.py +++ b/src/arg_builder.py @@ -35,6 +35,7 @@ def build_args(): help="Changes the Input directory for videos", default="inputs", ) + parser.add_argument( "-f", "--frames", @@ -44,6 +45,15 @@ def build_args(): default="frames", ) + parser.add_argument( + "-t", + "--tesseract", + dest="tess_dir", + required=False, + help="When you want to pass a custom tesseract location. It is saved to the config", + default=None, + ) + parser.add_argument( "-c", "--charm-json", @@ -66,8 +76,8 @@ def build_args(): "--language", dest="language", required=False, - help="Sets the language of the switch recording. (only works for first launch)", - default="English", + help="Sets the language of the switch recording. It is saved to the config", + default=None, ) parser.add_argument( @@ -75,8 +85,8 @@ def build_args(): "--app-language", dest="app_language", required=False, - help="Sets the language of the app. (only works for first launch)", - default="English", + help="Sets the language of the app. It is saved to the config", + default=None, ) parser.add_argument( diff --git a/src/charm_extraction.py b/src/charm_extraction.py index 30901ef..03f9d6a 100644 --- a/src/charm_extraction.py +++ b/src/charm_extraction.py @@ -254,7 +254,7 @@ def keep_existing_and_update(x): if charm.has_skills(): charms.append(charm) else: - logger.warn(_("logger-skill-less").format(frame_loc)) + logger.warning(_("logger-skill-less").format(frame_loc)) except Exception as e: logger.error(_("logger-charm-error").format(frame_loc, e)) logger.exception("Traceback") @@ -286,7 +286,7 @@ def save_duplicates(charms, mode="w"): charm_dupes[charm] = [] charm_dupes[charm].append(frame_loc) - with open(dupe_file_name, mode) as dupe_file: + with open(dupe_file_name, mode, encoding="utf-8") as dupe_file: for charm in filter(lambda x: len(charm_dupes[x]) > 1, charm_dupes): locations = charm_dupes[charm] dupe_file.write(f"{charm.to_dict()}\n") diff --git a/src/resources.py b/src/resources.py index 7ee42d3..b07b339 100644 --- a/src/resources.py +++ b/src/resources.py @@ -147,6 +147,11 @@ def get_english_skill_mapping_url(language="eng"): url = f"https://raw.githubusercontent.com/chpoit/utsushis-charm/master/data/skills/skill_mappings.en.json" return url +def get_wiki_url(skill_language_code): + if skill_language_code == "jpn": + return "https://mhrise.wiki-db.com/sim" + return "https://mhrise.wiki-db.com/sim/?hl=en" + def get_english_skill_mappping_location(): return os.path.join(get_resource_path("LOCAL_SKILLS"), "skill_mappings.en.json") @@ -214,12 +219,27 @@ def save_game_language(app_language_code): _write_config(config) +def get_tesseract_location(): + config = _load_config() + if "tesseract-directory" in config: + return config["tesseract-directory"] + return None + + +def save_tesseract_location(location): + config = _load_config() + config["tesseract-directory"] = location + _write_config(config) + + def reverse(dict_): reversed = {} for key in dict_: reversed[dict_[key]] = key return reversed +def default_lang(): + return "English" _local_root = os.getenv("LOCALAPPDATA") or HOME if WINDOWS else HOME _local_dir_name = "utsushis-charm" diff --git a/src/tesseract/tesseract_utils.py b/src/tesseract/tesseract_utils.py index cf0aa83..44c5a51 100644 --- a/src/tesseract/tesseract_utils.py +++ b/src/tesseract/tesseract_utils.py @@ -1,3 +1,5 @@ +from time import sleep +from urllib.error import URLError from .TesseractError import TesseractError import numpy as np import os @@ -10,7 +12,7 @@ import logging from pathlib import Path -from ..resources import get_language_from_code +from ..resources import get_language_from_code, get_tesseract_location logger = logging.getLogger(__name__) HOME = str(Path.home()) @@ -25,23 +27,62 @@ def _is_pyinstaller(): def _get_pyinstaller_tesseract_path(): base_path = sys._MEIPASS + bundled_paths = [] if WINDOWS: - bundled_path = os.path.join(base_path, "Tesseract-OCR", "libtesseract-5.dll") + bundled_paths += [ + os.path.join(base_path, "Tesseract-OCR", "libtesseract-5.dll"), + os.path.join(base_path, "libtesseract-5.dll"), + os.path.join(base_path, "Tesseract-OCR", "libtesseract-4.dll"), + os.path.join(base_path, "libtesseract-4.dll"), + ] else: - bundled_path = os.path.join(base_path, "libtesseract.so.5") - print("Expected Tesseract location", bundled_path) - return bundled_path + bundled_paths += [ + os.path.join(base_path, "libtesseract.so.5"), + os.path.join(base_path, "libtesseract.so.4"), + ] + return bundled_paths + + +def _get_config_tesseract_path(): + locations = [] + configLocation = get_tesseract_location() + + if configLocation is not None: + locations += [configLocation] + if WINDOWS: + locations += [ + os.path.join(configLocation, "libtesseract-5.dll"), + os.path.join(configLocation, "libtesseract-4.dll"), + os.path.join(configLocation, "libtesseract.dll"), + ] + else: + locations += [ + os.path.join(configLocation, "libtesseract.so.5"), + os.path.join(configLocation, "libtesseract.so.4"), + ] + return locations + + +def does_tess_exist(): + try: + find_tesseract(silent=False) + return True + except TesseractError: + return False + +def find_tesseract(silent=False): + locations = [] + locations += _get_config_tesseract_path() -def find_tesseract(): if _is_pyinstaller(): - print("Using bundled tesseract") - return _get_pyinstaller_tesseract_path() + print("Checking for bundled tesseract") + locations += _get_pyinstaller_tesseract_path() # TODO: Make this resilient to "change" (tesseract version), probably not necessary - locations = [ - ctypes.util.find_library("libtesseract-4"), # win32 + locations += [ ctypes.util.find_library("libtesseract-5"), # win32 + ctypes.util.find_library("libtesseract-4"), # win32 ctypes.util.find_library("libtesseract302"), # win32 version 3.2 ctypes.util.find_library("libtesseract"), # others ctypes.util.find_library("tesseract"), # others @@ -79,19 +120,24 @@ def find_tesseract(): # add potential environment paths here: # Example: # os.path.join(os.getenv("MACOS_ENV_NAME"), "Tesseract-OCR", "libtesseract-4.dll"), + # The locations should be covered by the ctypes.util.find_library("tesseract") call ] elif LINUX: locations += [ # add potential environment paths here: # Example: # os.path.join(os.getenv("LINUX_ENV_NAME"), "Tesseract-OCR", "libtesseract-4.dll"), + # The locations should be covered by the ctypes.util.find_library("tesseract") call ] for potential in filter(lambda x: x, locations): if os.path.isfile(potential) or potential.startswith("libtesseract.so."): + if not silent: + print(f"Tesseract: {potential}") logger.debug(f"Using tesseract at {potential}") return potential + logger.error("Tesseract library was not found on your system. Please install it") raise TesseractError( "Tesseract library was not found on your system. Please install it" ) @@ -114,7 +160,7 @@ def set_tessdata(): if "TESSDATA_PREFIX" in os.environ: return - path = find_tesseract() + path = find_tesseract(silent=True) path = os.path.dirname(path) TESSDATA_PREFIX = os.path.join(path, "tessdata") os.environ["TESSDATA_PREFIX"] = TESSDATA_PREFIX @@ -132,6 +178,9 @@ def get_datapath(): def download_language_data(lang="eng", _=lambda x: x, retry=False): + if retry: + print(_("tess-wait-5-retry")) + sleep(5) target_dir = get_datapath() os.makedirs(target_dir, exist_ok=True) full_name = os.path.join(target_dir, f"{lang}.traineddata") @@ -147,21 +196,29 @@ def download_language_data(lang="eng", _=lambda x: x, retry=False): ) print(_("tess-download-pack").format(pack_name, target_dir)) + e = None try: Path(full_name).touch() # "Simple" way to test if I can write there request.urlretrieve(url, filename=full_name, data=None) print(_("tess-download-done")) except PermissionError as e: - if retry: - raise TesseractError("Unable to download Tesseract language pack.") print(_("tess-permission-denied")) - - override_tessdata() - download_language_data(lang, _, retry=True) + if not retry: + override_tessdata() + download_language_data(lang, _, retry=True) + except URLError as e: + print(_("tess-url-error")) + if not retry: + download_language_data(lang, _, retry=True) + finally: + if e and retry: + raise TesseractError(_("tess-cannot-download")) + if e: + print(e) def process_image_with_tesseract(tesseract, image): - whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'/-" + whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'/-()" height, width = image.shape[:2] if len(image.shape) == 2: diff --git a/src/translator.py b/src/translator.py index fe24186..3447a89 100644 --- a/src/translator.py +++ b/src/translator.py @@ -18,7 +18,7 @@ def load_language(self, language): if not os.path.isfile(lang_file): raise Exception("Invalid language file") - with open(lang_file) as f: + with open(lang_file, encoding="utf-8") as f: self.translations = json.load(f) def get_tl(self, message_key): diff --git a/src/ui/MainWindow.py b/src/ui/MainWindow.py index 29c12f1..cc5d1f0 100644 --- a/src/ui/MainWindow.py +++ b/src/ui/MainWindow.py @@ -3,6 +3,8 @@ import json import tkinter as tk from tkinter import filedialog, END + +from ..tesseract.tesseract_utils import does_tess_exist from ..frame_extraction import extract_unique_frames from ..charm_extraction import extract_charms, save_charms, remove_duplicates from ..charm_encoding import encode_charms @@ -13,6 +15,7 @@ get_resource_path, get_language_code, get_language_list, + get_wiki_url, save_app_language, save_game_language, translate_lang, @@ -27,7 +30,7 @@ class MainWindow(tk.Tk): def __init__(self, _: Translator, args, skill_language_code, app_langs): super().__init__() - self.wiki_url = "https://mhrise.wiki-db.com/sim/?hl=en" + self.wiki_url = get_wiki_url(skill_language_code) self.charms = CharmList() self.args = args self._unchanged_langs = app_langs @@ -59,6 +62,9 @@ def __init__(self, _: Translator, args, skill_language_code, app_langs): self._regen_paths() self._build_ui() + if not does_tess_exist(): + print(_("tess-not-found")) + def refresh(self, _: Translator = None): if not _: _ = self._ diff --git a/src/updater/VersionChecker.py b/src/updater/VersionChecker.py index b9c48f1..3de0639 100644 --- a/src/updater/VersionChecker.py +++ b/src/updater/VersionChecker.py @@ -4,7 +4,14 @@ from urllib import request import logging -from ..resources import get_resource_path, get_update_url, get_versions_location, get_language_list, get_language_code, get_language_from_code +from ..resources import ( + get_resource_path, + get_update_url, + get_versions_location, + get_language_list, + get_language_code, + get_language_from_code, +) from .SimpleSemVer import SimpleSemVer logger = logging.getLogger(__name__) @@ -50,7 +57,7 @@ def is_outdated(self, local, remote): def get_language_versions(self): versions = [] language_list = get_language_list() - codes = list(map(get_language_code,language_list)) + codes = list(map(get_language_code, language_list)) for code in codes: local = self._get_version(True, "languages", code) remote = self._get_version(False, "languages", code) @@ -58,18 +65,20 @@ def get_language_versions(self): continue else: versions.append((code, local, remote)) - - return list(map(lambda x: (get_language_from_code(x[0]), x[0],x[1],x[2]), versions)) - + + return list( + map(lambda x: (get_language_from_code(x[0]), x[0], x[1], x[2]), versions) + ) + def _ensure_proper_app_version(self): - with open(get_resource_path("internal_versions")) as internal: + with open(get_resource_path("internal_versions"), encoding="utf-8") as internal: internal_data = json.load(internal) internal_version = internal_data["app"] self._write_new_version(internal_version, "app") def _write_new_version(self, version: SimpleSemVer, main_key, sub_key=None): version = str(version) - with open(get_resource_path("versions")) as local: + with open(get_resource_path("versions"), encoding="utf-8") as local: local_data = json.load(local) if sub_key is not None: @@ -77,10 +86,9 @@ def _write_new_version(self, version: SimpleSemVer, main_key, sub_key=None): else: local_data[main_key] = version - with open(get_resource_path("versions"), "w") as local: + with open(get_resource_path("versions"), "w", encoding="utf-8") as local: json.dump(local_data, local) - def _get_version(self, local: bool, main_key, sub_key=None): if local: versions = self._load_local_versions() @@ -102,7 +110,7 @@ def _load_local_versions(self): self._ensure_proper_app_version() - with open(version_path) as version_file: + with open(version_path, encoding="utf-8") as version_file: versions = json.load(version_file) return versions diff --git a/src/utils.py b/src/utils.py index 9c1f2ec..4ccda33 100644 --- a/src/utils.py +++ b/src/utils.py @@ -194,7 +194,7 @@ def print_licenses(): print("Third party licenses") for f in os.scandir(get_resource_path("LICENCES")): print(f"License for {f.name}") - with open(f.path, "r") as l_f: + with open(f.path, "r", encoding="utf-8") as l_f: print(l_f.read()) print("\n\n")