diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml index 5a7e39b..f9222d2 100644 --- a/.github/workflows/testing_ci.yml +++ b/.github/workflows/testing_ci.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macOS-latest] - python-version: ['3.7', '3.10'] + python-version: ['3.7', '3.11'] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/testing_daily.yml b/.github/workflows/testing_daily.yml index b3cb88d..3334f71 100644 --- a/.github/workflows/testing_daily.yml +++ b/.github/workflows/testing_daily.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macOS-latest] - python-version: ["3.7", "3.10"] + python-version: ["3.7", "3.11"] steps: - name: Check out the repo code diff --git a/MANIFEST.in b/MANIFEST.in index 1eeef06..519f3c8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ +include tsdb/config.ini prune tests diff --git a/requirements.txt b/requirements.txt index 5177a78..fe720b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ numpy scikit-learn pandas -scipy \ No newline at end of file +scipy +pyarrow diff --git a/setup.cfg b/setup.cfg index 548bf4e..5d5f94d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,3 +27,4 @@ basic = scikit-learn pandas scipy + pyarrow diff --git a/setup.py b/setup.py index 4fbb59c..3f59c1d 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ "classification", "forecasting", "partially observed", - "irregular sampled", + "irregularly sampled", "partially-observed time series", "incomplete time series", "missing data", @@ -48,6 +48,7 @@ "scikit-learn", "pandas", "scipy", + "pyarrow", ], setup_requires=["setuptools>=38.6.0"], classifiers=[ diff --git a/tests/environment_for_conda_test.yml b/tests/environment_for_conda_test.yml index 66d0202..f3b533a 100644 --- a/tests/environment_for_conda_test.yml +++ b/tests/environment_for_conda_test.yml @@ -12,6 +12,7 @@ dependencies: - conda-forge::numpy - conda-forge::scikit-learn - conda-forge::pandas + - conda-forge::pyarrow # test - conda-forge::pytest-cov diff --git a/tests/test_tsdb.py b/tests/test_tsdb.py index 41e7fd6..ce64de5 100644 --- a/tests/test_tsdb.py +++ b/tests/test_tsdb.py @@ -13,9 +13,13 @@ from tsdb.utils.logging import Logger DATASETS_TO_TEST = [ - "ucr_uea_Wine", "physionet_2012", + "physionet_2019", "beijing_multisite_air_quality", + "electricity_load_diagrams", + "electricity_transformer_temperature", + "vessel_ais", + "ucr_uea_Wine", ] @@ -45,7 +49,13 @@ def test_3_dataset_purging(self): tsdb.delete_cache("physionet_2012") # delete single tsdb.delete_cache() # delete all - def test_4_logging(self): + def test_4_migrate(self): + os.makedirs("dir_for_migration") + with open("dir_for_migration/test.txt", "a") as f: + f.write("hello world") + tsdb.migrate("dir_for_migration", "new_dir/put_it_here") + + def test_5_logging(self): # different level logging self.logger.debug("debug") self.logger.info("info") diff --git a/tsdb/__init__.py b/tsdb/__init__.py index 649d8df..f28f876 100644 --- a/tsdb/__init__.py +++ b/tsdb/__init__.py @@ -21,9 +21,9 @@ # # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' -__version__ = "0.2.1" - +__version__ = "0.3" +from .utils.file import migrate from .data_processing import ( list, load, @@ -53,6 +53,7 @@ "CACHED_DATASET_DIR", "pickle_dump", "pickle_load", + "migrate", # below are deprecated functions, import for now, will be removed in v0.2 "list_database", "list_available_datasets", diff --git a/tsdb/config.ini b/tsdb/config.ini new file mode 100644 index 0000000..ce896fc --- /dev/null +++ b/tsdb/config.ini @@ -0,0 +1,2 @@ +[path] +data_home = .tsdb diff --git a/tsdb/database.py b/tsdb/database.py index bdf8d5f..5af1380 100644 --- a/tsdb/database.py +++ b/tsdb/database.py @@ -6,8 +6,27 @@ # License: BSD-3-Clause import os +from configparser import ConfigParser + +from .utils.logging import logger + +config = ConfigParser() +tsdb_config_path = os.path.join(os.path.dirname(__file__), "config.ini") +config.read(tsdb_config_path) + +old_cached_dataset_dir = os.path.join(os.path.expanduser("~"), ".tsdb_cached_datasets") +CACHED_DATASET_DIR = os.path.join( + os.path.expanduser("~"), config.get("path", "data_home") +) +if os.path.exists(old_cached_dataset_dir): + logger.warning( + "‼️ Detected the home dir of the old version TSDB. " + "Since v0.3, TSDB has changed the default cache dir to '~/.tsdb'. " + "You can migrate downloaded datasets by invoking the new function " + f"tsdb.migrate(old='~/.tsdb_cached_datasets', new={CACHED_DATASET_DIR})" + ) + CACHED_DATASET_DIR = old_cached_dataset_dir -CACHED_DATASET_DIR = os.path.join(os.path.expanduser("~"), ".tsdb_cached_datasets") _DATABASE = { # http://www.physionet.org/challenge/2012 diff --git a/tsdb/loading_funcs/electricity_transformer_temperature.py b/tsdb/loading_funcs/electricity_transformer_temperature.py index 39d1bff..c7b3052 100644 --- a/tsdb/loading_funcs/electricity_transformer_temperature.py +++ b/tsdb/loading_funcs/electricity_transformer_temperature.py @@ -48,7 +48,7 @@ def load_ett(local_path): file_path = os.path.join(local_path, sub_set) df = pd.read_csv(file_path, index_col="date") df.index = pd.to_datetime(df.index) - df_name = sub_set.removesuffix(".csv") + df_name = sub_set.split(".csv")[0] data[df_name] = df return data diff --git a/tsdb/utils/file.py b/tsdb/utils/file.py index bac925d..f8dbdd9 100644 --- a/tsdb/utils/file.py +++ b/tsdb/utils/file.py @@ -10,9 +10,11 @@ import pickle import shutil from typing import Optional +from configparser import ConfigParser + -from ..database import CACHED_DATASET_DIR from .logging import logger +from ..database import CACHED_DATASET_DIR def pickle_dump(data: object, path: str) -> Optional[str]: @@ -94,3 +96,57 @@ def purge_path(path: str, ignore_errors: bool = True) -> None: ) except shutil.Error: raise shutil.Error("Operation failed.") + + +def migrate(old_path: str, new_path: str) -> None: + """Migrate datasets from old_path to new_path. + + Parameters + ---------- + old_path: + The old path of the dataset. + + new_path: + The new path of the dataset. + + """ + if not os.path.exists(old_path): + raise FileNotFoundError(f"Given old_path {old_path} does not exist.") + + if os.path.exists(new_path): + logger.warning(f"Please note that new_path {new_path} already exists.") + # if new_path exists, we have to move everything from old_path into it + all_old_files = os.listdir(old_path) + for f in all_old_files: + old_f_path = os.path.join(old_path, f) + if os.path.isdir(old_f_path): + new_f_path = os.path.join(new_path, f) + shutil.copytree(old_f_path, new_f_path) + else: + shutil.move(old_f_path, new_path) + shutil.rmtree(old_path, ignore_errors=True) + else: + # if new_path does not exist, just rename the old_path into it + new_parent_dir = os.path.abspath(os.path.join(new_path, "..")) + if not os.path.exists(new_parent_dir): + os.makedirs(new_parent_dir, exist_ok=True) + os.rename(old_path, new_path) + + config = ConfigParser() + parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) + tsdb_config_path = os.path.join(parent_dir, "config.ini") + config.read(tsdb_config_path) + + if os.path.abspath(old_path) == os.path.abspath(CACHED_DATASET_DIR): + config.set("path", "data_home", new_path) + with open(tsdb_config_path, "w") as f: + config.write(f) + + logger.info( + f"Found the given old_path is the current TSDB dataset cache directory. " + f"Have already set the new cache directory to {new_path}." + ) + + logger.info( + f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}" + )