Skip to content

Commit

Permalink
Merge pull request #72 from WenjieDu/dev
Browse files Browse the repository at this point in the history
Display download progress
  • Loading branch information
WenjieDu authored Jul 9, 2024
2 parents 46910f8 + d8a04f4 commit 3b0c22e
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 17 deletions.
35 changes: 26 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<h3 align="center">Welcome to TSDB</h3>

*<p align='center'>a Python toolbox to ease loading 172 public time-series datasets</p>*
*<p align='center'>load 172 public time-series datasets with a single line of code ;-)</p>*

<p align='center'>
<a href='https://github.com/WenjieDu/TSDB'>
Expand Down Expand Up @@ -67,21 +67,38 @@ if it helps with your research. This really means a lot to our open-source resea


## ❖ Usage Examples
TSDB now is available on <a alt='Anaconda' href='https://anaconda.org/conda-forge/tsdb'><img align='center' src='https://img.shields.io/badge/Anaconda--lightgreen?style=social&logo=anaconda'></a>❗️
> [!IMPORTANT]
> TSDB is available on both <a alt='PyPI' href='https://pypi.python.org/pypi/tsdb'><img align='center' src='https://img.shields.io/badge/PyPI--lightgreen?style=social&logo=pypi'></a>
> and <a alt='Anaconda' href='https://anaconda.org/conda-forge/tsdb'><img align='center' src='https://img.shields.io/badge/Anaconda--lightgreen?style=social&logo=anaconda'></a>❗️
>
> Install via pip:
> > pip install tsdb
>
> or install from source code:
> > pip install `https://github.com/WenjieDu/TSDB/archive/main.zip`
>
> or install via conda:
> > conda install tsdb -c conda-forge
Install it with `conda install tsdb `, you may need to specify the channel with option `-c conda-forge`

or install via PyPI:
> pip install tsdb
or install from source code:
> pip install `https://github.com/WenjieDu/TSDB/archive/main.zip`

```python
import tsdb

# list all available datasets in TSDB
tsdb.list()
# ['physionet_2012',
# 'physionet_2019',
# 'electricity_load_diagrams',
# 'beijing_multisite_air_quality',
# 'italy_air_quality',
# 'vessel_ais',
# 'electricity_transformer_temperature',
# 'pems_traffic',
# 'solar_alabama',
# 'ucr_uea_ACSF1',
# 'ucr_uea_Adiac',
# ...

# select the dataset you need and load it, TSDB will download, extract, and process it automatically
data = tsdb.load('physionet_2012')
# if you need the raw data, use download_and_extract()
Expand Down
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
tqdm
numpy
scikit-learn
pandas
scipy
pandas
pyarrow
requests
scikit-learn
6 changes: 4 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ extend-ignore =

# basic dependencies
basic =
tqdm
numpy
scikit-learn
pandas
scipy
pandas
pyarrow
requests
scikit-learn
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@
packages=find_packages(exclude=["tests"]),
include_package_data=True,
install_requires=[
"tqdm",
"numpy",
"scikit-learn",
"pandas",
"scipy",
"pandas",
"pyarrow",
"requests",
"scikit-learn",
],
setup_requires=["setuptools>=38.6.0"],
classifiers=[
Expand Down
26 changes: 24 additions & 2 deletions tsdb/utils/downloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
import os
import shutil
import tempfile
import urllib.request
import warnings
from typing import Optional

import requests
from tqdm import tqdm

from .logging import logger
from ..database import DATABASE

Expand Down Expand Up @@ -54,7 +56,27 @@ def _download_and_extract(url: str, saving_path: str) -> Optional[str]:

# download and save the raw dataset
try:
urllib.request.urlretrieve(url, raw_data_saving_path)
with requests.get(url, stream=True) as r:
r.raise_for_status()
chunk_size = 8192
try:
size = int(r.headers["Content-Length"])
except KeyError:
size = None

with tqdm(
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
desc=f"Downloading {file_name}",
total=size,
) as pbar:
with open(raw_data_saving_path, "wb") as f:
for chunk in r.iter_content(chunk_size=chunk_size):
f.write(chunk)
pbar.update(len(chunk))

except Exception as e:
shutil.rmtree(saving_path, ignore_errors=True)
shutil.rmtree(raw_data_saving_path, ignore_errors=True)
Expand Down

0 comments on commit 3b0c22e

Please sign in to comment.