Skip to content

Commit

Permalink
Merge pull request #53 from WenjieDu/dev
Browse files Browse the repository at this point in the history
Enable users to migrate tsdb cache data home
  • Loading branch information
WenjieDu authored Dec 20, 2023
2 parents 69d48ac + cf78382 commit 210febf
Show file tree
Hide file tree
Showing 13 changed files with 104 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/testing_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
python-version: ['3.7', '3.10']
python-version: ['3.7', '3.11']

steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/testing_daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
python-version: ["3.7", "3.10"]
python-version: ["3.7", "3.11"]

steps:
- name: Check out the repo code
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include tsdb/config.ini
prune tests
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
numpy
scikit-learn
pandas
scipy
scipy
pyarrow
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ basic =
scikit-learn
pandas
scipy
pyarrow
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"classification",
"forecasting",
"partially observed",
"irregular sampled",
"irregularly sampled",
"partially-observed time series",
"incomplete time series",
"missing data",
Expand All @@ -48,6 +48,7 @@
"scikit-learn",
"pandas",
"scipy",
"pyarrow",
],
setup_requires=["setuptools>=38.6.0"],
classifiers=[
Expand Down
1 change: 1 addition & 0 deletions tests/environment_for_conda_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependencies:
- conda-forge::numpy
- conda-forge::scikit-learn
- conda-forge::pandas
- conda-forge::pyarrow

# test
- conda-forge::pytest-cov
14 changes: 12 additions & 2 deletions tests/test_tsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@
from tsdb.utils.logging import Logger

DATASETS_TO_TEST = [
"ucr_uea_Wine",
"physionet_2012",
"physionet_2019",
"beijing_multisite_air_quality",
"electricity_load_diagrams",
"electricity_transformer_temperature",
"vessel_ais",
"ucr_uea_Wine",
]


Expand Down Expand Up @@ -45,7 +49,13 @@ def test_3_dataset_purging(self):
tsdb.delete_cache("physionet_2012") # delete single
tsdb.delete_cache() # delete all

def test_4_logging(self):
def test_4_migrate(self):
os.makedirs("dir_for_migration")
with open("dir_for_migration/test.txt", "a") as f:
f.write("hello world")
tsdb.migrate("dir_for_migration", "new_dir/put_it_here")

def test_5_logging(self):
# different level logging
self.logger.debug("debug")
self.logger.info("info")
Expand Down
5 changes: 3 additions & 2 deletions tsdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
__version__ = "0.2.1"

__version__ = "0.3"

from .utils.file import migrate
from .data_processing import (
list,
load,
Expand Down Expand Up @@ -53,6 +53,7 @@
"CACHED_DATASET_DIR",
"pickle_dump",
"pickle_load",
"migrate",
# below are deprecated functions, import for now, will be removed in v0.2
"list_database",
"list_available_datasets",
Expand Down
2 changes: 2 additions & 0 deletions tsdb/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[path]
data_home = .tsdb
21 changes: 20 additions & 1 deletion tsdb/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,27 @@
# License: BSD-3-Clause

import os
from configparser import ConfigParser

from .utils.logging import logger

config = ConfigParser()
tsdb_config_path = os.path.join(os.path.dirname(__file__), "config.ini")
config.read(tsdb_config_path)

old_cached_dataset_dir = os.path.join(os.path.expanduser("~"), ".tsdb_cached_datasets")
CACHED_DATASET_DIR = os.path.join(
os.path.expanduser("~"), config.get("path", "data_home")
)
if os.path.exists(old_cached_dataset_dir):
logger.warning(
"‼️ Detected the home dir of the old version TSDB. "
"Since v0.3, TSDB has changed the default cache dir to '~/.tsdb'. "
"You can migrate downloaded datasets by invoking the new function "
f"tsdb.migrate(old='~/.tsdb_cached_datasets', new={CACHED_DATASET_DIR})"
)
CACHED_DATASET_DIR = old_cached_dataset_dir

CACHED_DATASET_DIR = os.path.join(os.path.expanduser("~"), ".tsdb_cached_datasets")

_DATABASE = {
# http://www.physionet.org/challenge/2012
Expand Down
2 changes: 1 addition & 1 deletion tsdb/loading_funcs/electricity_transformer_temperature.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def load_ett(local_path):
file_path = os.path.join(local_path, sub_set)
df = pd.read_csv(file_path, index_col="date")
df.index = pd.to_datetime(df.index)
df_name = sub_set.removesuffix(".csv")
df_name = sub_set.split(".csv")[0]
data[df_name] = df

return data
58 changes: 57 additions & 1 deletion tsdb/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
import pickle
import shutil
from typing import Optional
from configparser import ConfigParser


from ..database import CACHED_DATASET_DIR
from .logging import logger
from ..database import CACHED_DATASET_DIR


def pickle_dump(data: object, path: str) -> Optional[str]:
Expand Down Expand Up @@ -94,3 +96,57 @@ def purge_path(path: str, ignore_errors: bool = True) -> None:
)
except shutil.Error:
raise shutil.Error("Operation failed.")


def migrate(old_path: str, new_path: str) -> None:
"""Migrate datasets from old_path to new_path.
Parameters
----------
old_path:
The old path of the dataset.
new_path:
The new path of the dataset.
"""
if not os.path.exists(old_path):
raise FileNotFoundError(f"Given old_path {old_path} does not exist.")

if os.path.exists(new_path):
logger.warning(f"Please note that new_path {new_path} already exists.")
# if new_path exists, we have to move everything from old_path into it
all_old_files = os.listdir(old_path)
for f in all_old_files:
old_f_path = os.path.join(old_path, f)
if os.path.isdir(old_f_path):
new_f_path = os.path.join(new_path, f)
shutil.copytree(old_f_path, new_f_path)
else:
shutil.move(old_f_path, new_path)
shutil.rmtree(old_path, ignore_errors=True)
else:
# if new_path does not exist, just rename the old_path into it
new_parent_dir = os.path.abspath(os.path.join(new_path, ".."))
if not os.path.exists(new_parent_dir):
os.makedirs(new_parent_dir, exist_ok=True)
os.rename(old_path, new_path)

config = ConfigParser()
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
tsdb_config_path = os.path.join(parent_dir, "config.ini")
config.read(tsdb_config_path)

if os.path.abspath(old_path) == os.path.abspath(CACHED_DATASET_DIR):
config.set("path", "data_home", new_path)
with open(tsdb_config_path, "w") as f:
config.write(f)

logger.info(
f"Found the given old_path is the current TSDB dataset cache directory. "
f"Have already set the new cache directory to {new_path}."
)

logger.info(
f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}"
)

0 comments on commit 210febf

Please sign in to comment.