From fc65729cbed7e007e6226b9808838209addf2730 Mon Sep 17 00:00:00 2001 From: Thomas Wood Date: Thu, 13 Jul 2023 09:49:12 +0100 Subject: [PATCH] comment and document --- src/harmony/util/file_helper.py | 11 +++++++++-- src/harmony/util/model_downloader.py | 16 ++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/harmony/util/file_helper.py b/src/harmony/util/file_helper.py index 433e6ab..d13e6e5 100644 --- a/src/harmony/util/file_helper.py +++ b/src/harmony/util/file_helper.py @@ -8,6 +8,11 @@ def load_instruments_from_local_file(file_name: str) -> List[Instrument]: + """ + Open a local file (PDF, Excel, Word or TXT format) and parse it into a list of Instrument objects. + :param file_name: Local file path, either absolute or relative. + :return: List of Instruments. + """ if file_name.lower().endswith("pdf"): file_type = "pdf" elif file_name.lower().endswith("xlsx"): @@ -25,12 +30,14 @@ def load_instruments_from_local_file(file_name: str) -> List[Instrument]: file_as_base64 = base64.b64encode(file_as_bytes).decode('ascii') - harmony_file = RawFile(file_type=file_type, content="," + file_as_base64, file_id=uuid.uuid4().hex) + harmony_file = RawFile(file_type=file_type, content="," + file_as_base64, file_id=uuid.uuid4().hex, + instrument_name=file_name, file_name=file_name) else: with open( file_name, "r", encoding="utf-8") as f: file_as_string = f.read() - harmony_file = RawFile(file_type="txt", content=file_as_string, file_id=uuid.uuid4().hex) + harmony_file = RawFile(file_type="txt", content=file_as_string, file_id=uuid.uuid4().hex, + instrument_name=file_name, file_name=file_name) return convert_files_to_instruments([harmony_file]) diff --git a/src/harmony/util/model_downloader.py b/src/harmony/util/model_downloader.py index d72b94b..21572f5 100644 --- a/src/harmony/util/model_downloader.py +++ b/src/harmony/util/model_downloader.py @@ -1,17 +1,24 @@ import os import shutil +import sys import tarfile + import wget -import sys + def bar_custom(current, total, width=80): + """ + Display a progress bar to track the download. + :param current: Current bytes downloaded + :param total: Total bytes. + :param width: Width of the bar in chars. + """ print("Downloading: %d%% [%d / %d] bytes" % (current / total * 100, current, total), end="\r") -# List of model files that constitute the spaCy models. def download_models(is_force=False): """ - Downloads spaCy models to local. + Downloads spaCy models to local path HARMONY_SPACY_PATH, defaulting to home directory. """ local_path = os.getenv("HARMONY_SPACY_PATH", os.path.expanduser("~") + "/harmony") @@ -52,8 +59,9 @@ def download_models(is_force=False): os.remove(tmpfile) print(f"Deleted {tmpfile}.") + if __name__ == "__main__": - print ("Usage: python model_downloader.py --force [if you want to force overwrite of existing folder]") + print("Usage: python model_downloader.py --force [if you want to force overwrite of existing folder]") is_force = False if len(sys.argv) > 1 and "force" in sys.argv[1]: is_force = True