From 343ac5924382cc0aa9fa2c721880f8063a4f7853 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Fri, 28 Jun 2024 13:32:11 +0800 Subject: [PATCH 1/4] feat: add check_path(); --- tsdb/utils/file.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tsdb/utils/file.py b/tsdb/utils/file.py index 2f2bd58..c1ad4c4 100644 --- a/tsdb/utils/file.py +++ b/tsdb/utils/file.py @@ -15,6 +15,29 @@ from .logging import logger +def check_path(path: str) -> str: + """Check the given path and return the absolute path. + + Parameters + ---------- + path : + The path to be checked. + + Returns + ------- + checked_path: + The absolute path of the given path. + """ + # expand the home dir if the path starts with "~" + if path.startswith("~"): + checked_path = path.replace("~", os.path.expanduser("~")) + else: + checked_path = path + + checked_path = os.path.abspath(checked_path) + return checked_path + + def pickle_dump(data: object, path: str) -> Optional[str]: """Pickle the given object. From dba978ecdac83dc30bd6dd563af5a40af0682988 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Fri, 28 Jun 2024 13:43:55 +0800 Subject: [PATCH 2/4] refactor: update functions in tsdb.utils.file with check_path; --- tsdb/utils/file.py | 52 ++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/tsdb/utils/file.py b/tsdb/utils/file.py index c1ad4c4..eb92e04 100644 --- a/tsdb/utils/file.py +++ b/tsdb/utils/file.py @@ -121,18 +121,19 @@ def purge_path(path: str, ignore_errors: bool = True) -> None: def determine_data_home(): + # default path + default_path = check_path("~/.pypots/tsdb") + # read data_home from the config file + # data_home may be changed by users, hence not necessarily equal to the default path config = read_configs() data_home_path = config.get("path", "data_home") - # replace '~' with the absolute path if existing in the path - data_home_path = data_home_path.replace("~", os.path.expanduser("~")) + data_home_path = check_path(data_home_path) # old cached dataset dir path used in TSDB v0.2 - old_cached_dataset_dir_02 = os.path.join( - os.path.expanduser("~"), ".tsdb_cached_datasets" - ) + old_cached_dataset_dir_02 = check_path("~/.tsdb_cached_datasets") # old cached dataset dir path used in TSDB v0.4 - old_cached_dataset_dir_04 = os.path.join(os.path.expanduser("~"), ".tsdb") + old_cached_dataset_dir_04 = check_path("~/.tsdb") if os.path.exists(old_cached_dataset_dir_02) or os.path.exists( old_cached_dataset_dir_04 @@ -150,15 +151,18 @@ def determine_data_home(): # use the path directly, may be in a portable disk cached_dataset_dir = data_home_path else: - # use the default path for initialization, - # e.g. `data_home_path` in a portable disk but the disk is not connected - default_path = os.path.join(os.path.expanduser("~"), ".pypots", "tsdb") - cached_dataset_dir = default_path - if os.path.abspath(data_home_path) != os.path.abspath(default_path): + # if the preset data_home path does not exist, + # e.g. `data_home_path` is in a portable disk that is not connected + # then use the default path + if check_path(data_home_path) != check_path(default_path): logger.warning( - f"‼️ The preset data_home path '{data_home_path}' doesn't exist. " - f"Using the default path '{default_path}'" + f"❗️ The preset data_home {data_home_path} doesn't exist. " + f"This may be caused by the portable disk not connected." ) + logger.warning(f"‼️ Using the default path {default_path} for now") + + cached_dataset_dir = default_path + return cached_dataset_dir @@ -174,20 +178,24 @@ def migrate(old_path: str, new_path: str) -> None: The new path of the dataset. """ + # check both old_path and new_path + old_path = check_path(old_path) + new_path = check_path(new_path) + + # check if old_path exists if not os.path.exists(old_path): raise FileNotFoundError(f"Given old_path {old_path} does not exist.") + # create new_path if not exists if not os.path.exists(new_path): - # if new_path does not exist, just rename the old_path into it - new_parent_dir = os.path.abspath(os.path.join(new_path, "..")) - if not os.path.exists(new_parent_dir): - os.makedirs(new_parent_dir, exist_ok=True) + os.makedirs(new_path, exist_ok=True) + else: + logger.warning(f"‼️ Note that new_path {new_path} already exists.") - logger.warning(f"‼️ Please note that new_path {new_path} already exists.") - # if new_path exists, we have to move everything from old_path into it all_old_files = os.listdir(old_path) for f in all_old_files: old_f_path = os.path.join(old_path, f) + if os.path.isdir(old_f_path): new_f_path = os.path.join(new_path, f) shutil.copytree(old_f_path, new_f_path) @@ -195,9 +203,8 @@ def migrate(old_path: str, new_path: str) -> None: shutil.move(old_f_path, new_path) shutil.rmtree(old_path, ignore_errors=True) - logger.info( - f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}" - ) + logger.info(f"Successfully migrated {old_path} to {new_path}") + logger.info(f"Purged the old path {old_path}") def migrate_cache(target_path: str) -> None: @@ -209,6 +216,7 @@ def migrate_cache(target_path: str) -> None: The new path for TSDB to store cached datasets. """ + target_path = check_path(target_path) cached_dataset_dir = determine_data_home() migrate(cached_dataset_dir, target_path) config_parser = read_configs() From 7868c48127563783e33549175ced22f6bd8cf929 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Fri, 28 Jun 2024 13:44:14 +0800 Subject: [PATCH 3/4] refactor: update functions in tsdb.utils.file with check_path; --- tsdb/utils/file.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tsdb/utils/file.py b/tsdb/utils/file.py index eb92e04..76c7170 100644 --- a/tsdb/utils/file.py +++ b/tsdb/utils/file.py @@ -99,6 +99,8 @@ def purge_path(path: str, ignore_errors: bool = True) -> None: Errors are ignored if ignore_errors is set. """ + # check the path + path = check_path(path) assert os.path.exists( path ), f"The given path {path} does not exists. Operation aborted." @@ -216,7 +218,9 @@ def migrate_cache(target_path: str) -> None: The new path for TSDB to store cached datasets. """ + # check the target path target_path = check_path(target_path) + cached_dataset_dir = determine_data_home() migrate(cached_dataset_dir, target_path) config_parser = read_configs() From e6d536e6c128190a2cce3c03643c8ec069da56e1 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Fri, 28 Jun 2024 14:07:40 +0800 Subject: [PATCH 4/4] feat: add the arg check_exists check_path(); --- tsdb/utils/file.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tsdb/utils/file.py b/tsdb/utils/file.py index 76c7170..818b25b 100644 --- a/tsdb/utils/file.py +++ b/tsdb/utils/file.py @@ -15,7 +15,10 @@ from .logging import logger -def check_path(path: str) -> str: +def check_path( + path: str, + check_exists: bool = False, +) -> str: """Check the given path and return the absolute path. Parameters @@ -23,10 +26,14 @@ def check_path(path: str) -> str: path : The path to be checked. + check_exists : + If True, check if the path exists, and will raise an AssertionError if the path does not exist. + Returns ------- checked_path: The absolute path of the given path. + """ # expand the home dir if the path starts with "~" if path.startswith("~"): @@ -35,6 +42,12 @@ def check_path(path: str) -> str: checked_path = path checked_path = os.path.abspath(checked_path) + + if check_exists: + assert os.path.exists( + checked_path + ), f"The given path {checked_path} does not exists" + return checked_path @@ -54,6 +67,9 @@ def pickle_dump(data: object, path: str) -> Optional[str]: `path` if succeed else None """ + # check the given path + path = check_path(path) + try: with open(path, "wb") as f: pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) @@ -78,6 +94,8 @@ def pickle_load(path: str) -> object: Pickled object. """ + # check the given path + path = check_path(path, check_exists=True) try: with open(path, "rb") as f: data = pickle.load(f) @@ -99,11 +117,8 @@ def purge_path(path: str, ignore_errors: bool = True) -> None: Errors are ignored if ignore_errors is set. """ - # check the path + # check the given path, no need to check if the path exists because ignore_errors is set path = check_path(path) - assert os.path.exists( - path - ), f"The given path {path} does not exists. Operation aborted." try: if os.path.isdir(path): @@ -181,13 +196,9 @@ def migrate(old_path: str, new_path: str) -> None: """ # check both old_path and new_path - old_path = check_path(old_path) + old_path = check_path(old_path, check_exists=True) new_path = check_path(new_path) - # check if old_path exists - if not os.path.exists(old_path): - raise FileNotFoundError(f"Given old_path {old_path} does not exist.") - # create new_path if not exists if not os.path.exists(new_path): os.makedirs(new_path, exist_ok=True)