From a2f0ef2c0add00c53965a1532955a6eba1c0f0fc Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Tue, 23 Jul 2024 16:17:25 +0800 Subject: [PATCH 1/4] feat: config the project with pyproject.toml; --- .github/workflows/publish_to_PyPI.yml | 7 +-- .pre-commit-config.yaml | 2 +- pyproject.toml | 75 +++++++++++++++++++++++++++ setup.cfg | 32 ------------ setup.py | 66 ----------------------- tsdb/__init__.py | 19 +------ tsdb/version.py | 24 +++++++++ 7 files changed, 103 insertions(+), 122 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 setup.py create mode 100644 tsdb/version.py diff --git a/.github/workflows/publish_to_PyPI.yml b/.github/workflows/publish_to_PyPI.yml index f4da89b..0fe51cd 100644 --- a/.github/workflows/publish_to_PyPI.yml +++ b/.github/workflows/publish_to_PyPI.yml @@ -32,14 +32,11 @@ jobs: with: python-version: '3.10' check-latest: true - cache-dependency-path: | - requirements.txt - name: Install dependencies run: | python -m pip install --upgrade pip - pip install build wheel - pip install -r requirements.txt + pip install build - name: Fetch the test environment details run: | @@ -48,7 +45,7 @@ jobs: - name: Build package run: | - python -m build --no-isolation + python -m build - name: Publish the new package to PyPI uses: pypa/gh-action-pypi-publish@v1.8.7 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 357561e..a8c807b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,6 @@ repos: hooks: - id: flake8 args: [ - --max-line-length=120, # refer to setup.cfg + --max-line-length=120, # refer to pyproject.toml --extend-ignore=E203, # why ignore E203? Refer to https://github.com/PyCQA/pycodestyle/issues/373 ] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f8a82fc --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,75 @@ +[build-system] +requires = ["setuptools>=71"] + +[project] +name = "tsdb" +description = "TSDB (Time Series Data Beans): a Python toolbox helping load 172 open-source time-series datasets" +authors = [{ name = "Wenjie Du", email = "wenjay.du@gmail.com" }] +dynamic = ["version", "readme", "dependencies"] +license = { file = "LICENSE" } +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Application Frameworks", +] +keywords = [ + "data mining", + "time series", + "time-series analysis", + "time-series database", + "time-series datasets", + "database", + "datasets", + "dataset downloading", + "imputation", + "classification", + "forecasting", + "partially observed", + "irregularly sampled", + "partially-observed time series", + "incomplete time series", + "missing data", + "missing values", + "pypots", +] + +[project.urls] +Source = "https://github.com/WenjieDu/TSDB" +Homepage = "https://pypots.com" +Documentation = "https://docs.pypots.com" +"Bug Tracker" = "https://github.com/WenjieDu/TSDB/issues" +Download = "https://github.com/WenjieDu/TSDB/archive/main.zip" + +[tool.setuptools.packages.find] +exclude = [ + "docs*", + "test*", + "dataset_profiles*", +] + +[tool.setuptools.dynamic] +version = { attr = "tsdb.version.__version__" } +readme = { file = "README.md", content-type = "text/markdown" } +dependencies = { file = "requirements.txt" } + +[tool.flake8] +# People may argue that coding style is personal. This may be true if the project is personal and one works like a +# hermit, but to PyPOTS and its community, the answer is NO. +# We use Black and Flake8 to lint code style and keep the style consistent across all commits and pull requests. +# Black only reformats the code, and Flake8 is necessary for checking for some other issues not covered by Black. + +# The Black line length is default as 88, while the default of Flake8 is 79. However, considering our monitors are +# much more advanced nowadays, I extend the maximum line length to 120, like other project e.g. transformers. People +# who prefer the default setting can keep using 88 or 79 while coding. Please ensure your code lines not exceeding 120. +max-line-length = 120 +# why ignore E203? Refer to https://github.com/PyCQA/pycodestyle/issues/373 +extend-ignore = """ + E203 +""" \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 4ef5c4c..0000000 --- a/setup.cfg +++ /dev/null @@ -1,32 +0,0 @@ -# This file stores some meta configurations for project PyPOTS. - -# Created by Wenjie Du -# License: BSD-3-Clause - -[flake8] -# People may argue that coding style is personal. This may be true if the project is personal and one works like a -# hermit, but to PyPOTS and its community, the answer is NO. -# We use Black and Flake8 to lint code style and keep the style consistent across all commits and pull requests. -# Black only reformats the code, and Flake8 is necessary for checking for some other issues not covered by Black. - -# The Black line length is default as 88, while the default of Flake8 is 79. However, considering our monitors are -# much more advanced nowadays, I extend the maximum line length to 120, like other project e.g. transformers. People -# who prefer the default setting can keep using 88 or 79 while coding. Please ensure your code lines not exceeding 120. -max-line-length = 120 -extend-ignore = - # why ignore E203? Refer to https://github.com/PyCQA/pycodestyle/issues/373 - E203, - - -[options.extras_require] -# add dependencies for different usages below - -# basic dependencies -basic = - tqdm - numpy - scipy - pandas - pyarrow - requests - scikit-learn diff --git a/setup.py b/setup.py deleted file mode 100644 index 3fdc7d8..0000000 --- a/setup.py +++ /dev/null @@ -1,66 +0,0 @@ -from setuptools import setup, find_packages - -from tsdb import __version__ - -with open("./README.md", encoding="utf-8") as f: - README = f.read() - -setup( - name="tsdb", - version=__version__, - description="TSDB (Time Series Data Beans): a Python toolbox helping load 172 open-source time-series datasets", - long_description=README, - long_description_content_type="text/markdown", - license="BSD-3-Clause", - author="Wenjie Du", - author_email="wenjay.du@gmail.com", - url="https://github.com/WenjieDu/TSDB", - project_urls={ - "Documentation": "https://tsdb.readthedocs.io/", - "Source": "https://github.com/WenjieDu/TSDB/", - "Tracker": "https://github.com/WenjieDu/TSDB/issues/", - "Download": "https://github.com/WenjieDu/TSDB/archive/main.zip", - }, - keywords=[ - "data mining", - "time series", - "time-series analysis", - "time-series database", - "time-series datasets", - "database", - "datasets", - "dataset downloading", - "imputation", - "classification", - "forecasting", - "partially observed", - "irregularly sampled", - "partially-observed time series", - "incomplete time series", - "missing data", - "missing values", - "pypots", - ], - packages=find_packages(exclude=["tests"]), - include_package_data=True, - install_requires=[ - "tqdm", - "numpy", - "scipy", - "pandas", - "pyarrow", - "requests", - "scikit-learn", - ], - setup_requires=["setuptools>=38.6.0"], - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3", - "Topic :: Database", - ], -) diff --git a/tsdb/__init__.py b/tsdb/__init__.py index cf29bbc..394f58e 100644 --- a/tsdb/__init__.py +++ b/tsdb/__init__.py @@ -5,24 +5,6 @@ # Created by Wenjie Du # License: BSD-3-Clause -# TSDB version -# -# PEP0440 compatible formatted version, see: -# https://www.python.org/dev/peps/pep-0440/ -# Generic release markers: -# X.Y -# X.Y.Z # For bugfix releases -# -# Admissible pre-release markers: -# X.YaN # Alpha release -# X.YbN # Beta release -# X.YrcN # Release Candidate -# X.Y # Final release -# -# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. -# 'X.Y.dev0' is the canonical version of 'X.Y.dev' -__version__ = "0.6" - from .data_processing import ( CACHED_DATASET_DIR, list, @@ -38,6 +20,7 @@ migrate, migrate_cache, ) +from .version import __version__ __all__ = [ "__version__", diff --git a/tsdb/version.py b/tsdb/version.py new file mode 100644 index 0000000..bff1f2e --- /dev/null +++ b/tsdb/version.py @@ -0,0 +1,24 @@ +""" + +""" + +# Created by Wenjie Du +# License: BSD-3-Clause + +# TSDB version +# +# PEP0440 compatible formatted version, see: +# https://www.python.org/dev/peps/pep-0440/ +# Generic release markers: +# X.Y +# X.Y.Z # For bugfix releases +# +# Admissible pre-release markers: +# X.YaN # Alpha release +# X.YbN # Beta release +# X.YrcN # Release Candidate +# X.Y # Final release +# +# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. +# 'X.Y.dev0' is the canonical version of 'X.Y.dev' +__version__ = "0.6.1" \ No newline at end of file From 54dd3c9a98fbf5a5cb716217dc27c0bef984782c Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Tue, 23 Jul 2024 18:20:16 +0800 Subject: [PATCH 2/4] fix: install flake8-pyproject for read config from toml for linting; --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 6a1bb6f..341a593 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -23,7 +23,7 @@ jobs: - name: Install Flake8 run: | - pip install flake8 + pip install flake8 flake8-pyproject - name: Run linting run: | From 285d986aeb65eea728b2f7c6479c70bc23161777 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Tue, 23 Jul 2024 20:09:37 +0800 Subject: [PATCH 3/4] refactor: pickle saving and loading funcs; --- tsdb/utils/file.py | 67 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/tsdb/utils/file.py b/tsdb/utils/file.py index 818b25b..79eabde 100644 --- a/tsdb/utils/file.py +++ b/tsdb/utils/file.py @@ -9,7 +9,6 @@ import os import pickle import shutil -from typing import Optional from .config import read_configs, write_configs from .logging import logger @@ -51,7 +50,44 @@ def check_path( return checked_path -def pickle_dump(data: object, path: str) -> Optional[str]: +def extract_parent_dir(path: str) -> str: + """Extract the given path's parent directory. + + Parameters + ---------- + path : + The path for extracting. + + Returns + ------- + parent_dir : + The path to the parent dir of the given path. + + """ + parent_dir = os.path.abspath(os.path.join(path, "..")) + return parent_dir + + +def create_dir_if_not_exist(path: str, is_dir: bool = True) -> None: + """Create the given directory if it doesn't exist. + + Parameters + ---------- + path : + The path for check. + + is_dir : + Whether the given path is to a directory. If `is_dir` is False, the given path is to a file or an object, + then this file's parent directory will be checked. + + """ + path = extract_parent_dir(path) if not is_dir else path + if not os.path.exists(path): + os.makedirs(path, exist_ok=True) + logger.info(f"Successfully created the given path {path}") + + +def pickle_dump(data: object, path: str) -> None: """Pickle the given object. Parameters @@ -67,17 +103,18 @@ def pickle_dump(data: object, path: str) -> Optional[str]: `path` if succeed else None """ - # check the given path - path = check_path(path) - try: + # help create the parent dir if not exist + create_dir_if_not_exist(extract_parent_dir(path)) with open(path, "wb") as f: pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) - except pickle.PicklingError: - logger.info("Pickling failed. No cache will be saved.") - return None - logger.info(f"Successfully saved to {path}") - return path + logger.info(f"Successfully saved to {path}") + except Exception as e: + logger.error( + f"❌ Pickling failed. No cache data saved. Investigate the error below:\n{e}" + ) + + return None def pickle_load(path: str) -> object: @@ -94,13 +131,15 @@ def pickle_load(path: str) -> object: Pickled object. """ - # check the given path - path = check_path(path, check_exists=True) try: with open(path, "rb") as f: data = pickle.load(f) - except pickle.UnpicklingError as e: - logger.info("Cached data corrupted. Aborting...\n" f"{e}") + except Exception as e: + logger.error( + f"❌ Loading data failed. Operation aborted. Investigate the error below:\n{e}" + ) + return None + return data From 0a16b4d0db9433937275f27c77bcab0a50a7cf02 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Tue, 23 Jul 2024 21:31:06 +0800 Subject: [PATCH 4/4] refactor: fix linting; --- tsdb/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsdb/version.py b/tsdb/version.py index bff1f2e..d0fad9a 100644 --- a/tsdb/version.py +++ b/tsdb/version.py @@ -21,4 +21,4 @@ # # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' -__version__ = "0.6.1" \ No newline at end of file +__version__ = "0.6.1"