Skip to content

Commit

Permalink
Add migrate_cache() and remove deprecated funcs (#56)
Browse files Browse the repository at this point in the history
* feat: add migrate_cache();

* refactor: remove deprecated functions;

* fix: update workflow;
  • Loading branch information
WenjieDu authored Jan 16, 2024
1 parent 507ed23 commit 758821c
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 205 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/testing_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest
pip install pytest pytest-cov
pip install coverage
- name: Test with pytest
run: |
coverage run --source=tsdb -m pytest
python -m pytest -rA tests/test_tsdb.py -s --cov=tsdb
- name: Write the LCOV report
run: |
coverage lcov
python -m coverage lcov
- name: Submit report
uses: coverallsapp/github-action@master
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/testing_daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
- name: Test with pytest
run: |
python -m pytest --cov=tsdb
python -m pytest -rA tests/test_tsdb.py -s --cov=tsdb
- name: Generate the LCOV report
run: |
Expand Down
3 changes: 2 additions & 1 deletion tests/test_tsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,14 @@ def test_3_dataset_purging(self):
cached_datasets = tsdb.list_cache()
assert isinstance(cached_datasets, list)
tsdb.delete_cache("physionet_2012") # delete single
tsdb.delete_cache() # delete all

def test_4_migrate(self):
os.makedirs("dir_for_migration")
with open("dir_for_migration/test.txt", "a") as f:
f.write("hello world")
tsdb.migrate("dir_for_migration", "new_dir/put_it_here")
tsdb.migrate_cache("new_cache_dir")
tsdb.delete_cache() # delete all datasets

def test_5_logging(self):
# different level logging
Expand Down
25 changes: 9 additions & 16 deletions tsdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,22 @@
#
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
__version__ = "0.3"
__version__ = "0.3.1"

from .utils.file import migrate
from .data_processing import (
CACHED_DATASET_DIR,
list,
load,
download_and_extract,
list_cache,
delete_cache,
)
from .utils.file import (
purge_path,
CACHED_DATASET_DIR,
pickle_dump,
pickle_load,
# below are deprecated functions, import for now, will be removed in v0.2
list_database,
list_available_datasets,
list_cached_data,
load_dataset,
delete_cached_data,
migrate,
migrate_cache,
)

__all__ = [
Expand All @@ -49,15 +46,11 @@
"download_and_extract",
"list_cache",
"delete_cache",
"purge_path",
"CACHED_DATASET_DIR",
# file
"purge_path",
"pickle_dump",
"pickle_load",
"migrate",
# below are deprecated functions, import for now, will be removed in v0.2
"list_database",
"list_available_datasets",
"list_cached_data",
"load_dataset",
"delete_cached_data",
"migrate_cache",
]
2 changes: 1 addition & 1 deletion tsdb/config.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[path]
data_home = .tsdb
data_home = ~/.tsdb
152 changes: 35 additions & 117 deletions tsdb/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@

import os
import shutil
import sys
import warnings

from .database import AVAILABLE_DATASETS, CACHED_DATASET_DIR
from .database import AVAILABLE_DATASETS
from .loading_funcs import (
load_physionet2012,
load_physionet2019,
Expand All @@ -21,9 +20,11 @@
load_ais,
)
from .utils.downloading import download_and_extract
from .utils.file import purge_path, pickle_load, pickle_dump
from .utils.file import purge_path, pickle_load, pickle_dump, determine_data_home
from .utils.logging import logger

CACHED_DATASET_DIR = determine_data_home()


def list() -> list:
"""List the database.
Expand Down Expand Up @@ -146,122 +147,39 @@ def list_cache() -> list:
return dir_content


def delete_cache(dataset_name=None) -> None:
"""Delete CACHED_DATASET_DIR if exists."""
# if CACHED_DATASET_DIR does not exist, abort
if not os.path.exists(CACHED_DATASET_DIR):
logger.info("No cached data. Operation aborted.")
sys.exit()
# if CACHED_DATASET_DIR exists, then purge
if dataset_name is not None:
assert (
dataset_name in AVAILABLE_DATASETS
), f"{dataset_name} is not available in TSDB, so it has no cache. Please check your dataset name."
dir_to_delete = os.path.join(CACHED_DATASET_DIR, dataset_name)
if not os.path.exists(dir_to_delete):
logger.info(f"Dataset {dataset_name} is not cached. Operation aborted.")
sys.exit()
logger.info(f"Purging cached dataset {dataset_name} under {dir_to_delete}...")
else:
dir_to_delete = CACHED_DATASET_DIR
logger.info(f"Purging all cached data under {CACHED_DATASET_DIR}...")
purge_path(dir_to_delete)


# deprecated functions below


def list_available_datasets():
"""List all available datasets.
Returns
-------
AVAILABLE_DATASETS : list
A list contains all datasets' names.
Warnings
--------
The method list_available_datasets is deprecated. Please use ``list()`` instead.
"""
logger.warning(
"🚨DeprecationWarning: The method list_available_datasets is deprecated. Please use `list()` instead."
)
return list()


def list_database():
"""List the database.
Returns
-------
DATABASE : dict
A dict contains all datasets' names and download links.
Warnings
--------
The method list_available_datasets is deprecated. Please use `list()` instead.
"""
logger.warning(
"🚨DeprecationWarning: The method list_available_datasets is deprecated. Please use `list()` instead."
)
return list()


def list_cached_data():
"""List names of all cached datasets.
Returns
-------
list,
A list contains all cached datasets' names.
Warnings
--------
The method list_cached_data is deprecated. Please use `list_cache()` instead.
"""
logger.warning(
"🚨DeprecationWarning: The method list_cached_data is deprecated. Please use `list_cache()` instead."
)
return list_cache()


def load_dataset(dataset_name, use_cache=True):
"""Load dataset with given name.
def delete_cache(dataset_name: str = None) -> None:
"""Delete CACHED_DATASET_DIR if exists.
Parameters
----------
dataset_name : str,
dataset_name : str, optional
The name of the specific dataset in database.DATABASE.
use_cache : bool,
Whether to use cache (including data downloading and processing)
Returns
-------
result:
Loaded dataset in a Python dict.
Warnings
--------
The method load_dataset is deprecated. Please use `load()` instead.
If dataset is not cached, then abort.
Delete all cached datasets if dataset_name is left as None.
"""
logger.warning(
"🚨DeprecationWarning: The method load_dataset is deprecated. Please use `load()` instead."
)
return load(dataset_name, use_cache)


def delete_cached_data(dataset_name=None):
"""Delete CACHED_DATASET_DIR if exists.
Warnings
--------
The method delete_cached_data is deprecated. Please use `delete_cache()` instead.
"""
logger.warning(
"🚨DeprecationWarning: The method delete_cached_data is deprecated. Please use `delete_cache()` instead."
)
delete_cache(dataset_name)
# if CACHED_DATASET_DIR does not exist, abort
if not os.path.exists(CACHED_DATASET_DIR):
logger.error("❌ No cached data. Operation aborted.")
else:
# if CACHED_DATASET_DIR exists, then execute purging procedure
if dataset_name is None: # if dataset_name is not given, then purge all
logger.info(
f"`dataset_name` not given. Purging all cached data under {CACHED_DATASET_DIR}..."
)
purge_path(CACHED_DATASET_DIR)
os.makedirs(CACHED_DATASET_DIR)
else:
assert (
dataset_name in AVAILABLE_DATASETS
), f"{dataset_name} is not available in TSDB, so it has no cache. Please check your dataset name."
dir_to_delete = os.path.join(CACHED_DATASET_DIR, dataset_name)
if not os.path.exists(dir_to_delete):
logger.error(
f"❌ Dataset {dataset_name} is not cached. Operation aborted."
)
return
else:
logger.info(
f"Purging cached dataset {dataset_name} under {dir_to_delete}..."
)
purge_path(dir_to_delete)
33 changes: 0 additions & 33 deletions tsdb/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,6 @@
# Created by Wenjie Du <wenjay.du@gmail.com>
# License: BSD-3-Clause

import os
from configparser import ConfigParser

from .utils.logging import logger

config = ConfigParser()
tsdb_config_path = os.path.join(os.path.dirname(__file__), "config.ini")
config.read(tsdb_config_path)

data_home_path = os.path.abspath(config.get("path", "data_home"))
old_cached_dataset_dir = os.path.abspath("~/.tsdb_cached_datasets")

if os.path.exists(old_cached_dataset_dir):
# use the old path and warn the user
logger.warning(
"‼️ Detected the home dir of the old version TSDB. "
"Since v0.3, TSDB has changed the default cache dir to '~/.tsdb'. "
"You can migrate downloaded datasets by invoking the new function "
"tsdb.migrate(old='~/.tsdb_cached_datasets', new='~/.tsdb')"
)
CACHED_DATASET_DIR = old_cached_dataset_dir
elif os.path.exists(data_home_path):
# use the path directly, may be in a portable disk
CACHED_DATASET_DIR = data_home_path
else:
# use the default path
default_path = os.path.abspath("~/.tsdb")
CACHED_DATASET_DIR = default_path
logger.warning(
f"‼️ The preset data_home path '{data_home_path}' doesn't exist. "
f"Using the default path '{default_path}'."
)


_DATABASE = {
# http://www.physionet.org/challenge/2012
Expand Down
32 changes: 32 additions & 0 deletions tsdb/utils/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Config functions for TSDB.
"""

# Created by Wenjie Du <wenjay.du@gmail.com>
# License: BSD-3-Clause

import os
from configparser import ConfigParser

from .logging import logger

TSDB_BASE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
TSDB_CONFIG_FILE = os.path.join(TSDB_BASE_PATH, "config.ini")


def read_configs():
config_parser = ConfigParser()
config_parser.read(TSDB_CONFIG_FILE)
return config_parser


def write_configs(config_parser, key_value_set):
for section in key_value_set.keys():
for key in key_value_set[section].keys():
value = key_value_set[section][key]
config_parser.set(section, key, value)

with open(TSDB_CONFIG_FILE, "w") as f:
config_parser.write(f)

logger.info("Wrote new configs to config.ini successfully.")
Loading

0 comments on commit 758821c

Please sign in to comment.