Skip to content

Commit

Permalink
Merge pull request #68 from WenjieDu/dev
Browse files Browse the repository at this point in the history
Add `Solar Alabama` dataset
  • Loading branch information
WenjieDu authored Jun 27, 2024
2 parents b24f303 + 8e9d31a commit bd2fe56
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 5 deletions.
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<h3 align="center">Welcome to TSDB</h3>

*<p align='center'>a Python toolbox to ease loading public time-series datasets</p>*
*<p align='center'>a Python toolbox to ease loading 172 public time-series datasets</p>*

<p align='center'>
<a href='https://github.com/WenjieDu/TSDB'>
Expand Down Expand Up @@ -46,7 +46,7 @@
</a>
</p>

> 📣 TSDB now supports a total of 1️⃣7️⃣0️⃣ time-series datasets ‼️
> 📣 TSDB now supports a total of 1️⃣7️⃣2️⃣ time-series datasets ‼️
<a href='https://github.com/WenjieDu/PyPOTS'><img src='https://pypots.com/figs/pypots_logos/PyPOTS/logo_FFBG.svg' width='160' align='left' /></a>
TSDB is a part of
Expand Down Expand Up @@ -88,7 +88,9 @@ data = tsdb.load('physionet_2012')
tsdb.download_and_extract('physionet_2012', './save_it_here')
# datasets you once loaded are cached, and you can check them with list_cached_data()
tsdb.list_cache()
# you can delete only one specific dataset and preserve others
# you can delete only one specific dataset's pickled cache
tsdb.delete_cache(dataset_name='physionet_2012', only_pickle=True)
# you can delete only one specific dataset raw files and preserve others
tsdb.delete_cache(dataset_name='physionet_2012')
# or you can delete all cache with delete_cached_data() to free disk space
tsdb.delete_cache()
Expand All @@ -112,6 +114,8 @@ That's all. Simple and efficient. Enjoy it! 😃
| [Electricity Load Diagrams](dataset_profiles/electricity_load_diagrams) | Forecasting, Imputation |
| [Electricity Transformer Temperature (ETT)](dataset_profiles/electricity_transformer_temperature) | Forecasting, Imputation |
| [Vessel AIS](dataset_profiles/vessel_ais) | Forecasting, Imputation, Classification |
| [PeMS Traffic](dataset_profiles/pems_traffic) | Forecasting, Imputation |
| [Solar Alabama](dataset_profiles/solar_alabama) | Forecasting, Imputation |
| [UCR & UEA Datasets](dataset_profiles/ucr_uea_datasets) (all 163 datasets) | Classification |


Expand Down
5 changes: 5 additions & 0 deletions dataset_profiles/solar_alabama/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Solar Alabama

## Citing this dataset 🤗

`https://www.nrel.gov/grid/solar-power-data.html`
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
setup(
name="tsdb",
version=__version__,
description="TSDB (Time Series Data Beans): a Python toolbox helping load open-source time-series datasets",
description="TSDB (Time Series Data Beans): a Python toolbox helping load 172 open-source time-series datasets",
long_description=README,
long_description_content_type="text/markdown",
license="BSD-3-Clause",
Expand Down
3 changes: 3 additions & 0 deletions tests/test_tsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
"physionet_2012",
"physionet_2019",
"beijing_multisite_air_quality",
"italy_air_quality",
"electricity_load_diagrams",
"electricity_transformer_temperature",
"vessel_ais",
"pems_traffic",
"solar_alabama",
"ucr_uea_Wine",
]

Expand Down
3 changes: 3 additions & 0 deletions tsdb/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
load_ais,
load_italy_air_quality,
load_pems_traffic,
load_solar_alabama,
)
from .utils.downloading import download_and_extract
from .utils.file import purge_path, pickle_load, pickle_dump, determine_data_home
Expand Down Expand Up @@ -108,6 +109,8 @@ def load(dataset_name: str, use_cache: bool = True) -> dict:
result = load_ais(dataset_saving_path)
elif dataset_name == "pems_traffic":
result = load_pems_traffic(dataset_saving_path)
elif dataset_name == "solar_alabama":
result = load_solar_alabama(dataset_saving_path)
elif "ucr_uea_" in dataset_name:
actual_dataset_name = dataset_name.replace(
"ucr_uea_", ""
Expand Down
5 changes: 4 additions & 1 deletion tsdb/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@
"https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh1.csv",
"https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv",
],
# https://pems.dot.ca.gov
# https://pems.dot.ca.gov, https://github.com/laiguokun/multivariate-time-series-data
"pems_traffic": "https://raw.githubusercontent.com/laiguokun/multivariate-time-series-data/master/"
"traffic/traffic.txt.gz",
# https://www.nrel.gov/grid/solar-power-data.html, https://github.com/laiguokun/multivariate-time-series-data
"solar_alabama": "https://raw.githubusercontent.com/laiguokun/multivariate-time-series-data/master/"
"solar-energy/solar_AL.txt.gz",
}


Expand Down
2 changes: 2 additions & 0 deletions tsdb/loading_funcs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .ucr_uea_datasets import load_ucr_uea_dataset
from .vessel_ais import load_ais
from .pems_traffic import load_pems_traffic
from .solar_alabama import load_solar_alabama

__all__ = [
"load_beijing_air_quality",
Expand All @@ -25,4 +26,5 @@
"load_ett",
"load_italy_air_quality",
"load_pems_traffic",
"load_solar_alabama",
]
50 changes: 50 additions & 0 deletions tsdb/loading_funcs/solar_alabama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Scripts related to dataset Solar Alabama. It contains the solar power production records in the year 2006,
which are sampled every 10 minutes from 137 PV plants in Alabama State.
https://www.nrel.gov/grid/solar-power-data.html
For more information please refer to:
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/solar_alabama
"""

# Created by Wenjie Du <wenjay.du@gmail.com>
# License: BSD-3-Clause

import os

import pandas as pd


def load_solar_alabama(local_path):
"""Load dataset Solar Alabama.
Parameters
----------
local_path : str,
The local path of dir saving the raw data of Solar Alabama.
Returns
-------
data : dict
A dictionary contains X:
X : pandas.DataFrame
The time-series data of Solar Alabama.
"""
dir_path = os.path.join(local_path, "solar_AL.txt")

# make columns names
col_names = [str(i) for i in range(137)]
df = pd.read_csv(dir_path, index_col=None, names=col_names)
date = pd.date_range(
start="2006-01-01 00:00:00",
end="2006-12-31 23:50:00",
freq="10min",
)
df["date"] = date
col_names.insert(0, "date")
df = df[col_names]

data = {
"X": df,
}
return data

0 comments on commit bd2fe56

Please sign in to comment.