diff --git a/README.md b/README.md index fdc847d..a2a49d7 100644 --- a/README.md +++ b/README.md @@ -46,13 +46,13 @@

-> 📣 TSDB now supports a total of 1️⃣6️⃣8️⃣ time-series datasets ‼️ +> 📣 TSDB now supports a total of 1️⃣6️⃣9️⃣ time-series datasets ‼️ -TSDB is a part of +TSDB is a part of PyPOTS - + (a Python toolbox for data mining on Partially-Observed Time Series), and was separated from PyPOTS for decoupling datasets from learning algorithms. TSDB is created to help researchers and engineers get rid of data collecting and downloading, and focus back on data processing details. TSDB provides all-in-one-stop convenience for downloading and loading open-source time-series datasets (available datasets listed [below](https://github.com/WenjieDu/TSDB#-list-of-available-datasets)). @@ -99,14 +99,15 @@ That's all. Simple and efficient. Enjoy it! 😃 ## ❖ List of Available Datasets -| Name | Main Tasks | -|----------------------------------------------------------------------------------|-----------------------------------------| -| [PhysioNet Challenge 2012](dataset_profiles/physionet_2012) | Classification, Forecasting, Imputation | -| [PhysioNet Challenge 2019](dataset_profiles/physionet_2019) | Classification, Imputation | -| [Beijing Multi-Site Air-Quality](dataset_profiles/beijing_multisite_air_quality) | Forecasting, Imputation | -| [Electricity Load Diagrams](dataset_profiles/electricity_load_diagrams) | Forecasting, Imputation | -| [UCR & UEA Datasets](dataset_profiles/ucr_uea_datasets) (all 163 datasets) | Classification | -| [Vessel AIS](dataset_profiles/vessel_ais) | Classification, Forecasting, Imputation | +| Name | Main Tasks | +|---------------------------------------------------------------------------------------------------|-----------------------------------------| +| [PhysioNet Challenge 2012](dataset_profiles/physionet_2012) | Forecasting, Imputation, Classification | +| [PhysioNet Challenge 2019](dataset_profiles/physionet_2019) | Forecasting, Imputation, Classification | +| [Beijing Multi-Site Air-Quality](dataset_profiles/beijing_multisite_air_quality) | Forecasting, Imputation | +| [Electricity Load Diagrams](dataset_profiles/electricity_load_diagrams) | Forecasting, Imputation | +| [Electricity Transformer Temperature (ETT)](dataset_profiles/electricity_transformer_temperature) | Forecasting, Imputation | +| [Vessel AIS](dataset_profiles/vessel_ais) | Forecasting, Imputation, Classification | +| [UCR & UEA Datasets](dataset_profiles/ucr_uea_datasets) (all 163 datasets) | Classification | ## ❖ Citing TSDB/PyPOTS diff --git a/dataset_profiles/electricity_transformer_temperature/README.md b/dataset_profiles/electricity_transformer_temperature/README.md new file mode 100644 index 0000000..2a0b829 --- /dev/null +++ b/dataset_profiles/electricity_transformer_temperature/README.md @@ -0,0 +1,22 @@ +# Electricity Transformer Temperature + +## Citing this dataset 🤗 + +`Zhou, H., Zhang, S., Peng, J., Zhang, S., Li, J., Xiong, H., & Zhang, W. (2021, May). +Informer: Beyond efficient transformer for long sequence time-series forecasting. +In Proceedings of the AAAI conference on artificial intelligence (Vol. 35, No. 12, pp. 11106-11115).` + +or + +```bibtex +@inproceedings{zhou2021informer, +author = {Haoyi Zhou and Shanghang Zhang and Jieqi Peng and Shuai Zhang and Jianxin Li and Hui Xiong and Wancai Zhang}, +title = {Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting}, +booktitle = {The Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Virtual Conference}, +volume = {35}, +number = {12}, +pages = {11106--11115}, +publisher = {{AAAI} Press}, +year = {2021}, +} +``` diff --git a/docs/index.rst b/docs/index.rst index c75478c..6842eee 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -98,16 +98,17 @@ That's all. Simple and efficient. Enjoy it! 😃 ❖ List of Available Datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -=================================================================================================================================================================== ========================================== - Name Main Tasks -=================================================================================================================================================================== ========================================== - `PhysioNet Challenge 2012 `_ :cite:`silva2012physionet` Classification, Forecasting, Imputation - `PhysioNet Challenge 2019 `_ :cite:`reyna2019physionet` Classification, Imputation - `Beijing Multi-Site Air-Quality `_ :cite:`zhang2017airquality` Forecasting, Imputation - `Electricity Load Diagrams `_ :cite:`trindade2015electricity` Forecasting, Imputation - `UCR & UEA Datasets `_ (all 163 datasets) :cite:`bagnall2018uea` :cite:`dau2018ucr` Classification - `Vessel AIS data `_ :cite:`grgicevic2023ais` Imputation, Forecasting, Classification -=================================================================================================================================================================== ========================================== +========================================================================================================================================================================== ========================================== + Name Main Tasks +========================================================================================================================================================================== ========================================== + `PhysioNet Challenge 2012 `_ :cite:`silva2012physionet` Forecasting, Imputation, Classification + `PhysioNet Challenge 2019 `_ :cite:`reyna2019physionet` Forecasting, Imputation, Classification + `Beijing Multi-Site Air-Quality `_ :cite:`zhang2017airquality` Forecasting, Imputation + `Electricity Load Diagrams `_ :cite:`trindade2015electricity` Forecasting, Imputation + `Electricity Transformer Temperature (ETT) `_ :cite:`zhou2021informer` Forecasting, Imputation + `Vessel AIS data `_ :cite:`grgicevic2023ais` Forecasting, Imputation, Classification + `UCR & UEA Datasets `_ (all 163 datasets) :cite:`bagnall2018uea` :cite:`dau2018ucr` Classification +========================================================================================================================================================================== ========================================== ❖ Citing TSDB/PyPOTS ^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/references.bib b/docs/references.bib index 8b29ace..ffe97df 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -64,3 +64,14 @@ @misc{grgicevic2023ais doi = {10.5281/zenodo.8064564}, url = {https://doi.org/10.5281/zenodo.8064564} } + +@inproceedings{zhou2021informer, +author = {Haoyi Zhou and Shanghang Zhang and Jieqi Peng and Shuai Zhang and Jianxin Li and Hui Xiong and Wancai Zhang}, +title = {Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting}, +booktitle = {The Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Virtual Conference}, +volume = {35}, +number = {12}, +pages = {11106--11115}, +publisher = {{AAAI} Press}, +year = {2021}, +} diff --git a/tsdb/data_processing.py b/tsdb/data_processing.py index dd53a12..0ab99d4 100644 --- a/tsdb/data_processing.py +++ b/tsdb/data_processing.py @@ -15,6 +15,7 @@ load_physionet2012, load_physionet2019, load_electricity, + load_ett, load_beijing_air_quality, load_ucr_uea_dataset, load_ais, @@ -94,6 +95,8 @@ def load(dataset_name: str, use_cache: bool = True) -> dict: result = load_physionet2019(dataset_saving_path) elif dataset_name == "electricity_load_diagrams": result = load_electricity(dataset_saving_path) + elif dataset_name == "electricity_transformer_temperature": + result = load_ett(dataset_saving_path) elif dataset_name == "beijing_multisite_air_quality": result = load_beijing_air_quality(dataset_saving_path) elif dataset_name == "vessel_ais": diff --git a/tsdb/database.py b/tsdb/database.py index fb855de..bdf8d5f 100644 --- a/tsdb/database.py +++ b/tsdb/database.py @@ -36,58 +36,17 @@ # # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/vessel_ais "vessel_ais": "https://zenodo.org/record/8064564/files/parquets.zip", + # + # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/electricity_transformer_temperature + "electricity_transformer_temperature": [ + "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTm1.csv", + "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTm2.csv", + "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh1.csv", + "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv", + ], } -# The list of raw data files to be downloaded -MATR_LINKS = ( - ( - "https://data.matr.io/1/api/v1/file/5c86c0b5fa2ede00015ddf66/download", - "2017-05-12_batchdata_updated_struct_errorcorrect.mat", - ), - ( - "https://data.matr.io/1/api/v1/file/5c86bf13fa2ede00015ddd82/download", - "2017-06-30_batchdata_updated_struct_errorcorrect.mat", - ), - ( - "https://data.matr.io/1/api/v1/file/5c86bd64fa2ede00015ddbb2/download", - "2018-04-12_batchdata_updated_struct_errorcorrect.mat", - ), - ( - "https://data.matr.io/1/api/v1/file/5dcef152110002c7215b2c90/download", - "2019-01-24_batchdata_updated_struct_errorcorrect.mat", - ), -) - -HUST_LINKS = ( - ( - "https://data.mendeley.com/public-files/datasets/nsc7hnsg4s/" - "files/5ca0ac3e-d598-4d07-8dcb-879aa047e98b/file_downloaded", - "hust_data.zip", - ), -) - -CALCE_LINKS = ( - ("https://web.calce.umd.edu/batteries/data/CS2_33.zip", "CS2_33.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_34.zip", "CS2_34.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_35.zip", "CS2_35.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_36.zip", "CS2_36.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_37.zip", "CS2_37.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_38.zip", "CS2_38.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_16.zip", "CX2_16.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_33.zip", "CX2_33.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_35.zip", "CX2_35.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_34.zip", "CX2_34.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_36.zip", "CX2_36.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_37.zip", "CX2_37.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_38.zip", "CX2_38.zip"), -) - - -RWTH_LINKS = ( - ("https://publications.rwth-aachen.de/record/818642/files/Rawdata.zip", "raw.zip"), -) - # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/ucr_uea_datasets # 128 UCR + 33 UEA + 2 old removed (NonInvasiveFatalECGThorax1 and 2) = 163 _ucr_uea_datasets = [ diff --git a/tsdb/loading_funcs/__init__.py b/tsdb/loading_funcs/__init__.py index 18d2016..fae0583 100644 --- a/tsdb/loading_funcs/__init__.py +++ b/tsdb/loading_funcs/__init__.py @@ -11,6 +11,7 @@ from .physionet_2019 import load_physionet2019 from .ucr_uea_datasets import load_ucr_uea_dataset from .vessel_ais import load_ais +from .electricity_transformer_temperature import load_ett __all__ = [ "load_beijing_air_quality", @@ -19,4 +20,5 @@ "load_physionet2019", "load_ucr_uea_dataset", "load_ais", + "load_ett", ] diff --git a/tsdb/loading_funcs/electricity_transformer_temperature.py b/tsdb/loading_funcs/electricity_transformer_temperature.py new file mode 100644 index 0000000..39d1bff --- /dev/null +++ b/tsdb/loading_funcs/electricity_transformer_temperature.py @@ -0,0 +1,54 @@ +""" +Scripts related to dataset Electricity Transformer Temperature. + +For more information please refer to: +https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/electricity_transformer_temperature + +""" + +# Created by Wenjie Du +# License: BSD-3-Clause + +import os + +import pandas as pd + + +def load_ett(local_path): + """Load dataset Electricity Transformer Temperature. + + Parameters + ---------- + local_path : str, + The local path of dir saving the raw data of Electricity Transformer Temperature. + + Returns + ------- + data : dict + A dictionary contains all four sub datasets: + ETTm1 : pandas.DataFrame + The time-series data of ETTm1 + ETTm2 : pandas.DataFrame + The time-series data of ETTm2 + ETTh1 : pandas.DataFrame + The time-series data of ETTh1 + ETTh2 : pandas.DataFrame + The time-series data of ETTh2 + + """ + sub_datasets = [ + "ETTm1.csv", + "ETTm2.csv", + "ETTh1.csv", + "ETTh2.csv", + ] + + data = {} + for sub_set in sub_datasets: + file_path = os.path.join(local_path, sub_set) + df = pd.read_csv(file_path, index_col="date") + df.index = pd.to_datetime(df.index) + df_name = sub_set.removesuffix(".csv") + data[df_name] = df + + return data diff --git a/tsdb/utils/downloading.py b/tsdb/utils/downloading.py index 5e629e6..fff06d6 100644 --- a/tsdb/utils/downloading.py +++ b/tsdb/utils/downloading.py @@ -65,7 +65,7 @@ def _download_and_extract(url: str, saving_path: str) -> Optional[str]: logger.info("Download cancelled by the user.") raise - logger.info(f"Successfully downloaded data to {raw_data_saving_path}.") + logger.info(f"Successfully downloaded data to {raw_data_saving_path}") if ( suffix in supported_compression_format