From 625a0773aa7c24a122ad1fe5e843f7fe21e7fbd5 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 20 Dec 2023 13:36:02 +0800 Subject: [PATCH 1/3] feat: add the dataset ETT; --- .../README.md | 22 +++++++ tsdb/data_processing.py | 3 + tsdb/database.py | 57 +++---------------- tsdb/loading_funcs/__init__.py | 2 + .../electricity_transformer_temperature.py | 47 +++++++++++++++ tsdb/utils/downloading.py | 2 +- 6 files changed, 83 insertions(+), 50 deletions(-) create mode 100644 dataset_profiles/electricity_transformer_temperature/README.md create mode 100644 tsdb/loading_funcs/electricity_transformer_temperature.py diff --git a/dataset_profiles/electricity_transformer_temperature/README.md b/dataset_profiles/electricity_transformer_temperature/README.md new file mode 100644 index 0000000..2a0b829 --- /dev/null +++ b/dataset_profiles/electricity_transformer_temperature/README.md @@ -0,0 +1,22 @@ +# Electricity Transformer Temperature + +## Citing this dataset 🤗 + +`Zhou, H., Zhang, S., Peng, J., Zhang, S., Li, J., Xiong, H., & Zhang, W. (2021, May). +Informer: Beyond efficient transformer for long sequence time-series forecasting. +In Proceedings of the AAAI conference on artificial intelligence (Vol. 35, No. 12, pp. 11106-11115).` + +or + +```bibtex +@inproceedings{zhou2021informer, +author = {Haoyi Zhou and Shanghang Zhang and Jieqi Peng and Shuai Zhang and Jianxin Li and Hui Xiong and Wancai Zhang}, +title = {Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting}, +booktitle = {The Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Virtual Conference}, +volume = {35}, +number = {12}, +pages = {11106--11115}, +publisher = {{AAAI} Press}, +year = {2021}, +} +``` diff --git a/tsdb/data_processing.py b/tsdb/data_processing.py index dd53a12..0ab99d4 100644 --- a/tsdb/data_processing.py +++ b/tsdb/data_processing.py @@ -15,6 +15,7 @@ load_physionet2012, load_physionet2019, load_electricity, + load_ett, load_beijing_air_quality, load_ucr_uea_dataset, load_ais, @@ -94,6 +95,8 @@ def load(dataset_name: str, use_cache: bool = True) -> dict: result = load_physionet2019(dataset_saving_path) elif dataset_name == "electricity_load_diagrams": result = load_electricity(dataset_saving_path) + elif dataset_name == "electricity_transformer_temperature": + result = load_ett(dataset_saving_path) elif dataset_name == "beijing_multisite_air_quality": result = load_beijing_air_quality(dataset_saving_path) elif dataset_name == "vessel_ais": diff --git a/tsdb/database.py b/tsdb/database.py index fb855de..bdf8d5f 100644 --- a/tsdb/database.py +++ b/tsdb/database.py @@ -36,58 +36,17 @@ # # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/vessel_ais "vessel_ais": "https://zenodo.org/record/8064564/files/parquets.zip", + # + # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/electricity_transformer_temperature + "electricity_transformer_temperature": [ + "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTm1.csv", + "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTm2.csv", + "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh1.csv", + "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv", + ], } -# The list of raw data files to be downloaded -MATR_LINKS = ( - ( - "https://data.matr.io/1/api/v1/file/5c86c0b5fa2ede00015ddf66/download", - "2017-05-12_batchdata_updated_struct_errorcorrect.mat", - ), - ( - "https://data.matr.io/1/api/v1/file/5c86bf13fa2ede00015ddd82/download", - "2017-06-30_batchdata_updated_struct_errorcorrect.mat", - ), - ( - "https://data.matr.io/1/api/v1/file/5c86bd64fa2ede00015ddbb2/download", - "2018-04-12_batchdata_updated_struct_errorcorrect.mat", - ), - ( - "https://data.matr.io/1/api/v1/file/5dcef152110002c7215b2c90/download", - "2019-01-24_batchdata_updated_struct_errorcorrect.mat", - ), -) - -HUST_LINKS = ( - ( - "https://data.mendeley.com/public-files/datasets/nsc7hnsg4s/" - "files/5ca0ac3e-d598-4d07-8dcb-879aa047e98b/file_downloaded", - "hust_data.zip", - ), -) - -CALCE_LINKS = ( - ("https://web.calce.umd.edu/batteries/data/CS2_33.zip", "CS2_33.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_34.zip", "CS2_34.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_35.zip", "CS2_35.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_36.zip", "CS2_36.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_37.zip", "CS2_37.zip"), - ("https://web.calce.umd.edu/batteries/data/CS2_38.zip", "CS2_38.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_16.zip", "CX2_16.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_33.zip", "CX2_33.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_35.zip", "CX2_35.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_34.zip", "CX2_34.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_36.zip", "CX2_36.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_37.zip", "CX2_37.zip"), - ("https://web.calce.umd.edu/batteries/data/CX2_38.zip", "CX2_38.zip"), -) - - -RWTH_LINKS = ( - ("https://publications.rwth-aachen.de/record/818642/files/Rawdata.zip", "raw.zip"), -) - # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/ucr_uea_datasets # 128 UCR + 33 UEA + 2 old removed (NonInvasiveFatalECGThorax1 and 2) = 163 _ucr_uea_datasets = [ diff --git a/tsdb/loading_funcs/__init__.py b/tsdb/loading_funcs/__init__.py index 18d2016..fae0583 100644 --- a/tsdb/loading_funcs/__init__.py +++ b/tsdb/loading_funcs/__init__.py @@ -11,6 +11,7 @@ from .physionet_2019 import load_physionet2019 from .ucr_uea_datasets import load_ucr_uea_dataset from .vessel_ais import load_ais +from .electricity_transformer_temperature import load_ett __all__ = [ "load_beijing_air_quality", @@ -19,4 +20,5 @@ "load_physionet2019", "load_ucr_uea_dataset", "load_ais", + "load_ett", ] diff --git a/tsdb/loading_funcs/electricity_transformer_temperature.py b/tsdb/loading_funcs/electricity_transformer_temperature.py new file mode 100644 index 0000000..509ea90 --- /dev/null +++ b/tsdb/loading_funcs/electricity_transformer_temperature.py @@ -0,0 +1,47 @@ +""" +Scripts related to dataset Electricity Transformer Temperature. + +For more information please refer to: +https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/electricity_transformer_temperature + +""" + +# Created by Wenjie Du +# License: BSD-3-Clause + +import os + +import pandas as pd + + +def load_ett(local_path): + """Load dataset Electricity Transformer Temperature. + + Parameters + ---------- + local_path : str, + The local path of dir saving the raw data of Electricity Transformer Temperature. + + Returns + ------- + data : dict + A dictionary contains X: + X : pandas.DataFrame + The time-series data of Electricity Load Diagrams. + """ + sub_datasets = [ + "ETTm1.csv", + "ETTm2.csv", + "ETTh1.csv", + "ETTh2.csv", + ] + + data = {} + for sub_set in sub_datasets: + file_path = os.path.join(local_path, sub_set) + df = pd.read_csv(file_path, index_col="date") + df.index = pd.to_datetime(df.index) + df_name = sub_set.removesuffix(".csv") + data[df_name] = df + + return data diff --git a/tsdb/utils/downloading.py b/tsdb/utils/downloading.py index 5e629e6..fff06d6 100644 --- a/tsdb/utils/downloading.py +++ b/tsdb/utils/downloading.py @@ -65,7 +65,7 @@ def _download_and_extract(url: str, saving_path: str) -> Optional[str]: logger.info("Download cancelled by the user.") raise - logger.info(f"Successfully downloaded data to {raw_data_saving_path}.") + logger.info(f"Successfully downloaded data to {raw_data_saving_path}") if ( suffix in supported_compression_format From 41dc923c1a9821f02ccded5f186e16e9ce489ba3 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 20 Dec 2023 13:47:11 +0800 Subject: [PATCH 2/3] docs: update some text; --- README.md | 6 +++--- .../electricity_transformer_temperature.py | 13 ++++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fdc847d..4c5fca0 100644 --- a/README.md +++ b/README.md @@ -46,13 +46,13 @@

-> 📣 TSDB now supports a total of 1️⃣6️⃣8️⃣ time-series datasets ‼️ +> 📣 TSDB now supports a total of 1️⃣6️⃣9️⃣ time-series datasets ‼️ -TSDB is a part of +TSDB is a part of PyPOTS - + (a Python toolbox for data mining on Partially-Observed Time Series), and was separated from PyPOTS for decoupling datasets from learning algorithms. TSDB is created to help researchers and engineers get rid of data collecting and downloading, and focus back on data processing details. TSDB provides all-in-one-stop convenience for downloading and loading open-source time-series datasets (available datasets listed [below](https://github.com/WenjieDu/TSDB#-list-of-available-datasets)). diff --git a/tsdb/loading_funcs/electricity_transformer_temperature.py b/tsdb/loading_funcs/electricity_transformer_temperature.py index 509ea90..39d1bff 100644 --- a/tsdb/loading_funcs/electricity_transformer_temperature.py +++ b/tsdb/loading_funcs/electricity_transformer_temperature.py @@ -25,9 +25,16 @@ def load_ett(local_path): Returns ------- data : dict - A dictionary contains X: - X : pandas.DataFrame - The time-series data of Electricity Load Diagrams. + A dictionary contains all four sub datasets: + ETTm1 : pandas.DataFrame + The time-series data of ETTm1 + ETTm2 : pandas.DataFrame + The time-series data of ETTm2 + ETTh1 : pandas.DataFrame + The time-series data of ETTh1 + ETTh2 : pandas.DataFrame + The time-series data of ETTh2 + """ sub_datasets = [ "ETTm1.csv", From dc82c5e455325f5d099a3651bccbca955cf8ce2c Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 20 Dec 2023 13:55:13 +0800 Subject: [PATCH 3/3] docs: update the docs; --- README.md | 17 +++++++++-------- docs/index.rst | 21 +++++++++++---------- docs/references.bib | 11 +++++++++++ 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 4c5fca0..a2a49d7 100644 --- a/README.md +++ b/README.md @@ -99,14 +99,15 @@ That's all. Simple and efficient. Enjoy it! 😃 ## ❖ List of Available Datasets -| Name | Main Tasks | -|----------------------------------------------------------------------------------|-----------------------------------------| -| [PhysioNet Challenge 2012](dataset_profiles/physionet_2012) | Classification, Forecasting, Imputation | -| [PhysioNet Challenge 2019](dataset_profiles/physionet_2019) | Classification, Imputation | -| [Beijing Multi-Site Air-Quality](dataset_profiles/beijing_multisite_air_quality) | Forecasting, Imputation | -| [Electricity Load Diagrams](dataset_profiles/electricity_load_diagrams) | Forecasting, Imputation | -| [UCR & UEA Datasets](dataset_profiles/ucr_uea_datasets) (all 163 datasets) | Classification | -| [Vessel AIS](dataset_profiles/vessel_ais) | Classification, Forecasting, Imputation | +| Name | Main Tasks | +|---------------------------------------------------------------------------------------------------|-----------------------------------------| +| [PhysioNet Challenge 2012](dataset_profiles/physionet_2012) | Forecasting, Imputation, Classification | +| [PhysioNet Challenge 2019](dataset_profiles/physionet_2019) | Forecasting, Imputation, Classification | +| [Beijing Multi-Site Air-Quality](dataset_profiles/beijing_multisite_air_quality) | Forecasting, Imputation | +| [Electricity Load Diagrams](dataset_profiles/electricity_load_diagrams) | Forecasting, Imputation | +| [Electricity Transformer Temperature (ETT)](dataset_profiles/electricity_transformer_temperature) | Forecasting, Imputation | +| [Vessel AIS](dataset_profiles/vessel_ais) | Forecasting, Imputation, Classification | +| [UCR & UEA Datasets](dataset_profiles/ucr_uea_datasets) (all 163 datasets) | Classification | ## ❖ Citing TSDB/PyPOTS diff --git a/docs/index.rst b/docs/index.rst index c75478c..6842eee 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -98,16 +98,17 @@ That's all. Simple and efficient. Enjoy it! 😃 ❖ List of Available Datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -=================================================================================================================================================================== ========================================== - Name Main Tasks -=================================================================================================================================================================== ========================================== - `PhysioNet Challenge 2012 `_ :cite:`silva2012physionet` Classification, Forecasting, Imputation - `PhysioNet Challenge 2019 `_ :cite:`reyna2019physionet` Classification, Imputation - `Beijing Multi-Site Air-Quality `_ :cite:`zhang2017airquality` Forecasting, Imputation - `Electricity Load Diagrams `_ :cite:`trindade2015electricity` Forecasting, Imputation - `UCR & UEA Datasets `_ (all 163 datasets) :cite:`bagnall2018uea` :cite:`dau2018ucr` Classification - `Vessel AIS data `_ :cite:`grgicevic2023ais` Imputation, Forecasting, Classification -=================================================================================================================================================================== ========================================== +========================================================================================================================================================================== ========================================== + Name Main Tasks +========================================================================================================================================================================== ========================================== + `PhysioNet Challenge 2012 `_ :cite:`silva2012physionet` Forecasting, Imputation, Classification + `PhysioNet Challenge 2019 `_ :cite:`reyna2019physionet` Forecasting, Imputation, Classification + `Beijing Multi-Site Air-Quality `_ :cite:`zhang2017airquality` Forecasting, Imputation + `Electricity Load Diagrams `_ :cite:`trindade2015electricity` Forecasting, Imputation + `Electricity Transformer Temperature (ETT) `_ :cite:`zhou2021informer` Forecasting, Imputation + `Vessel AIS data `_ :cite:`grgicevic2023ais` Forecasting, Imputation, Classification + `UCR & UEA Datasets `_ (all 163 datasets) :cite:`bagnall2018uea` :cite:`dau2018ucr` Classification +========================================================================================================================================================================== ========================================== ❖ Citing TSDB/PyPOTS ^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/references.bib b/docs/references.bib index 8b29ace..ffe97df 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -64,3 +64,14 @@ @misc{grgicevic2023ais doi = {10.5281/zenodo.8064564}, url = {https://doi.org/10.5281/zenodo.8064564} } + +@inproceedings{zhou2021informer, +author = {Haoyi Zhou and Shanghang Zhang and Jieqi Peng and Shuai Zhang and Jianxin Li and Hui Xiong and Wancai Zhang}, +title = {Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting}, +booktitle = {The Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Virtual Conference}, +volume = {35}, +number = {12}, +pages = {11106--11115}, +publisher = {{AAAI} Press}, +year = {2021}, +}