Skip to content

Commit

Permalink
feat: enable users to migrate their tsdb cache dataset home;
Browse files Browse the repository at this point in the history
  • Loading branch information
WenjieDu committed Dec 20, 2023
1 parent 6e95d14 commit a89efb6
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 5 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include tsdb/config.ini
prune tests
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"classification",
"forecasting",
"partially observed",
"irregular sampled",
"irregularly sampled",
"partially-observed time series",
"incomplete time series",
"missing data",
Expand Down
5 changes: 3 additions & 2 deletions tsdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
__version__ = "0.2.1"

__version__ = "0.3"

from .utils.file import migrate
from .data_processing import (
list,
load,
Expand Down Expand Up @@ -53,6 +53,7 @@
"CACHED_DATASET_DIR",
"pickle_dump",
"pickle_load",
"migrate",
# below are deprecated functions, import for now, will be removed in v0.2
"list_database",
"list_available_datasets",
Expand Down
2 changes: 2 additions & 0 deletions tsdb/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[path]
data_home = .tsdb
21 changes: 20 additions & 1 deletion tsdb/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,27 @@
# License: BSD-3-Clause

import os
from configparser import ConfigParser

from .utils.logging import logger

config = ConfigParser()
tsdb_config_path = os.path.join(os.path.dirname(__file__), "config.ini")
config.read(tsdb_config_path)

old_cached_dataset_dir = os.path.join(os.path.expanduser("~"), ".tsdb_cached_datasets")
CACHED_DATASET_DIR = os.path.join(
os.path.expanduser("~"), config.get("path", "data_home")
)
if os.path.exists(old_cached_dataset_dir):
logger.warning(
"‼️ Detected the home dir of the old version TSDB. "
"Since v0.3, TSDB has changed the default cache dir to '~/.tsdb'. "
"You can migrate downloaded datasets by invoking the new function "
f"tsdb.migrate(old='~/.tsdb_cached_datasets', new={CACHED_DATASET_DIR})"
)
CACHED_DATASET_DIR = old_cached_dataset_dir

CACHED_DATASET_DIR = os.path.join(os.path.expanduser("~"), ".tsdb_cached_datasets")

_DATABASE = {
# http://www.physionet.org/challenge/2012
Expand Down
55 changes: 54 additions & 1 deletion tsdb/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
import pickle
import shutil
from typing import Optional
from configparser import ConfigParser


from ..database import CACHED_DATASET_DIR
from .logging import logger
from ..database import CACHED_DATASET_DIR


def pickle_dump(data: object, path: str) -> Optional[str]:
Expand Down Expand Up @@ -94,3 +96,54 @@ def purge_path(path: str, ignore_errors: bool = True) -> None:
)
except shutil.Error:
raise shutil.Error("Operation failed.")


def migrate(old_path: str, new_path: str) -> None:
"""Migrate datasets from old_path to new_path.
Parameters
----------
old_path:
The old path of the dataset.
new_path:
The new path of the dataset.
"""
if not os.path.exists(old_path):
raise FileNotFoundError(f"Given old_path {old_path} does not exist.")
if os.path.exists(new_path):
logger.warning(f"Please note that new_path {new_path} already exists.")
# if new_path exists, we have to move everything from old_path into it
all_old_files = os.listdir(old_path)
for f in all_old_files:
old_f_path = os.path.join(old_path, f)
if os.path.isdir(old_f_path):
new_f_path = os.path.join(new_path, f)
shutil.copytree(old_f_path, new_f_path)
else:
shutil.move(old_f_path, new_path)

shutil.rmtree(old_path, ignore_errors=True)
else:
# if new_path does not exist, just rename the old_path into it
os.rename(old_path, new_path)

config = ConfigParser()
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
tsdb_config_path = os.path.join(parent_dir, "config.ini")
config.read(tsdb_config_path)

if os.path.abspath(old_path) == os.path.abspath(CACHED_DATASET_DIR):
config.set("path", "data_home", new_path)
with open(tsdb_config_path, "w") as f:
config.write(f)

logger.info(
f"Found the given old_path is the current TSDB dataset cache directory. "
f"Have already set the new cache directory to {new_path}."
)

logger.info(
f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}"
)

0 comments on commit a89efb6

Please sign in to comment.