From 773b67a177d823e719a778229e4eadf82e8d0691 Mon Sep 17 00:00:00 2001 From: Oskar Triebe Date: Tue, 3 Sep 2024 20:03:10 -0700 Subject: [PATCH] [Minor] Structure component configurations to be more consistent (#1645) * remove double features map * remove empty features map init * add n_forecasts to config_model * rename n_forecasts in forecaster to config_model.n_forecasts and remove n_lags/n_forecasts from TimeNet init * remove features_map, add pass of config_ar to TimeDataset * remove features_map, add pass of config_ar to TimeDataset * reset predict_component stacker after seaonal predict * keep self.n_forecasts for now * fixes * remove unused TimeDataset args * fix predict_steps * fix predict_steps * fixes * fix seasonal_componetns * rename autoregression * fix n_forecasts references * ruff * replace model_config.n_forecasts with config_model.n_forecasts * move component configs to new file * separation of config_components * fix imports * fix comp imports * fix config imports * fix * fix typos * fix lagged_reg * fox * fix reg * fut * standardize regressors * create add functions for regressors * d * remove unused import --- docs/source/code/forecaster.rst | 2 + docs/source/code/index.rst | 2 + .../feature-guides/collect_predictions.ipynb | 4 +- .../global_local_modeling_fut_regr.ipynb | 56 ---- .../\350\207\252\345\233\236\345\275\222.md" | 2 +- .../future_regressors/neural_nets.py | 6 +- .../future_regressors/shared_neural_nets.py | 6 +- .../shared_neural_nets_coef.py | 6 +- neuralprophet/components/router.py | 2 +- neuralprophet/configure.py | 306 +----------------- neuralprophet/configure_components.py | 306 ++++++++++++++++++ neuralprophet/data/process.py | 110 ++----- neuralprophet/data/split.py | 19 +- neuralprophet/df_utils.py | 46 +-- neuralprophet/forecaster.py | 203 ++++++------ neuralprophet/plot_utils.py | 6 +- neuralprophet/time_dataset.py | 58 ++-- neuralprophet/time_net.py | 46 +-- neuralprophet/utils.py | 39 ++- tests/debug/debug-energy-price-hourly.ipynb | 2 +- tests/test_integration.py | 24 +- tests/test_plotting.py | 20 +- tests/test_uncertainty.py | 2 +- tests/test_unit.py | 56 ++-- tests/utils/benchmark_time_dataset.py | 37 ++- 25 files changed, 646 insertions(+), 720 deletions(-) create mode 100644 neuralprophet/configure_components.py diff --git a/docs/source/code/forecaster.rst b/docs/source/code/forecaster.rst index d48d700f7..885579f66 100644 --- a/docs/source/code/forecaster.rst +++ b/docs/source/code/forecaster.rst @@ -6,6 +6,7 @@ Core Module Documentation :maxdepth: 1 configure.py + configure_components.py df_utils.py event_utils.py plot_forecast_plotly.py @@ -14,6 +15,7 @@ Core Module Documentation plot_model_parameters_matplotlib.py time_dataset.py time_net.py + utils_time_dataset.py utils.py .. automodule:: neuralprophet.forecaster diff --git a/docs/source/code/index.rst b/docs/source/code/index.rst index 5c4fb19c5..329b649df 100644 --- a/docs/source/code/index.rst +++ b/docs/source/code/index.rst @@ -6,6 +6,7 @@ Code Documentation forecaster.py (NeuralProphet) configure.py + configure_components.py time_dataset.py time_net.py torch_prophet.py @@ -23,6 +24,7 @@ Code Documentation plot_model_parameters_plotly.py plot_model_parameters_matplotlib.py utils.py + utils_time_dataset.py df_utils.py hdays_utils.py plot_utils.py diff --git a/docs/source/how-to-guides/feature-guides/collect_predictions.ipynb b/docs/source/how-to-guides/feature-guides/collect_predictions.ipynb index c2c84cdad..45ff195c5 100644 --- a/docs/source/how-to-guides/feature-guides/collect_predictions.ipynb +++ b/docs/source/how-to-guides/feature-guides/collect_predictions.ipynb @@ -34,7 +34,7 @@ "source": [ "if \"google.colab\" in str(get_ipython()):\n", " # uninstall preinstalled packages from Colab to avoid conflicts\n", - " !pip uninstall -y torch notebook notebook_shim tensorflow tensorflow-datasets prophet torchaudio torchdata torchtext torchvision \n", + " !pip uninstall -y torch notebook notebook_shim tensorflow tensorflow-datasets prophet torchaudio torchdata torchtext torchvision\n", " !pip install git+https://github.com/ourownstory/neural_prophet.git # may take a while\n", " #!pip install neuralprophet # much faster, but may not have the latest upgrades/bugfixes\n", "\n", @@ -787,7 +787,7 @@ "outputs": [], "source": [ "df = pd.read_csv(data_location + \"air_passengers.csv\")\n", - "future = m.make_future_dataframe(df, periods=3) # periods=m.n_forecasts, n_historic_predictions=False" + "future = m.make_future_dataframe(df, periods=3) # periods=m.config_model.n_forecasts, n_historic_predictions=False" ] }, { diff --git a/docs/source/how-to-guides/feature-guides/global_local_modeling_fut_regr.ipynb b/docs/source/how-to-guides/feature-guides/global_local_modeling_fut_regr.ipynb index 02cf34719..985f882ac 100644 --- a/docs/source/how-to-guides/feature-guides/global_local_modeling_fut_regr.ipynb +++ b/docs/source/how-to-guides/feature-guides/global_local_modeling_fut_regr.ipynb @@ -344,62 +344,6 @@ "df_local = df_local[[\"ds\", \"y\"]]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import neuralprophet.configure as configure" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from dataclasses import dataclass, field\n", - "# from typing import Callable, List, Optional\n", - "# from typing import OrderedDict as OrderedDictType\n", - "# from typing import Type, Union\n", - "# from collections import OrderedDict\n", - "\n", - "# @dataclass\n", - "# class Regressor:\n", - "# reg_lambda: Optional[float]\n", - "# normalize: str\n", - "# mode: str\n", - "\n", - "# @dataclass\n", - "# class ConfigFutureRegressors:\n", - "# model: str\n", - "# regressors: OrderedDict = field(init=False) # contains SeasonConfig objects\n", - "\n", - "# def __post_init__(self):\n", - "# self.regressors = OrderedDictType[str, Regressor]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ConfigFutureRegressors(\n", - "# model='linear',\n", - "# ) # Opti" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# m.config_regressors" - ] - }, { "cell_type": "code", "execution_count": null, diff --git "a/docs/zh/\350\207\252\345\233\236\345\275\222.md" "b/docs/zh/\350\207\252\345\233\236\345\275\222.md" index cd8b0d3df..31d2b7109 100644 --- "a/docs/zh/\350\207\252\345\233\236\345\275\222.md" +++ "b/docs/zh/\350\207\252\345\233\236\345\275\222.md" @@ -65,7 +65,7 @@ m = NeuralProphet( weekly_seasonality=False, daily_seasonality=False ) -m.highlight_nth_step_ahead_of_each_forecast(step_number=m.n_forecasts) +m.highlight_nth_step_ahead_of_each_forecast(step_number=m.config_model.n_forecasts) ``` 您可以指定任何小于或等于`n_forecasts`的值到`step_number`参数。一旦你这样做,指标将看起来像下面。 diff --git a/neuralprophet/components/future_regressors/neural_nets.py b/neuralprophet/components/future_regressors/neural_nets.py index 32110b0d9..4880e33cc 100644 --- a/neuralprophet/components/future_regressors/neural_nets.py +++ b/neuralprophet/components/future_regressors/neural_nets.py @@ -21,14 +21,14 @@ def __init__(self, config, id_list, quantiles, n_forecasts, device, config_trend if self.regressors_dims is not None: # Regresors params self.regressor_nets = nn.ModuleDict({}) - self.regressors_layers = config.regressors_layers + self.layers = config.layers # one net per regressor. to be adapted to combined network for regressor in self.regressors_dims.keys(): # Nets for both additive and multiplicative regressors regressor_net = nn.ModuleList() # This will be later 1 + static covariates d_inputs = 1 - for d_hidden_i in self.regressors_layers: + for d_hidden_i in self.layers: regressor_net.append(nn.Linear(d_inputs, d_hidden_i, bias=True)) d_inputs = d_hidden_i # final layer has input size d_inputs and output size equal to no. of quantiles @@ -77,7 +77,7 @@ def regressor(self, regressor_input, name): Forecast component of dims (batch, n_forecasts, num_quantiles) """ x = regressor_input - for i in range(len(self.regressors_layers) + 1): + for i in range(len(self.layers) + 1): if i > 0: x = nn.functional.relu(x) x = self.regressor_nets[name][i](x) diff --git a/neuralprophet/components/future_regressors/shared_neural_nets.py b/neuralprophet/components/future_regressors/shared_neural_nets.py index 9acd2b6a4..d19c5349b 100644 --- a/neuralprophet/components/future_regressors/shared_neural_nets.py +++ b/neuralprophet/components/future_regressors/shared_neural_nets.py @@ -21,14 +21,14 @@ def __init__(self, config, id_list, quantiles, n_forecasts, device, config_trend if self.regressors_dims is not None: # Regresors params self.regressor_nets = nn.ModuleDict({}) - self.regressors_layers = config.regressors_layers + self.layers = config.layers # Combined network for net_i, size_i in Counter([x["mode"] for x in self.regressors_dims.values()]).items(): # Nets for both additive and multiplicative regressors regressor_net = nn.ModuleList() # This will be later size_i(1 + static covariates) d_inputs = size_i - for d_hidden_i in self.regressors_layers: + for d_hidden_i in self.layers: regressor_net.append(nn.Linear(d_inputs, d_hidden_i, bias=True)) d_inputs = d_hidden_i # final layer has input size d_inputs and output size equal to no. of quantiles @@ -79,7 +79,7 @@ def regressors(self, regressor_inputs, mode): Forecast component of dims (batch, n_forecasts, num_quantiles) """ x = regressor_inputs - for i in range(len(self.regressors_layers) + 1): + for i in range(len(self.layers) + 1): if i > 0: x = nn.functional.relu(x) x = self.regressor_nets[mode][i](x) diff --git a/neuralprophet/components/future_regressors/shared_neural_nets_coef.py b/neuralprophet/components/future_regressors/shared_neural_nets_coef.py index 43fcfcb2b..ec88ac3c1 100644 --- a/neuralprophet/components/future_regressors/shared_neural_nets_coef.py +++ b/neuralprophet/components/future_regressors/shared_neural_nets_coef.py @@ -21,14 +21,14 @@ def __init__(self, config, id_list, quantiles, n_forecasts, device, config_trend if self.regressors_dims is not None: # Regresors params self.regressor_nets = nn.ModuleDict({}) - self.regressors_layers = config.regressors_layers + self.layers = config.layers # Combined network for net_i, size_i in Counter([x["mode"] for x in self.regressors_dims.values()]).items(): # Nets for both additive and multiplicative regressors regressor_net = nn.ModuleList() # This will be later size_i(1 + static covariates) d_inputs = size_i - for d_hidden_i in self.regressors_layers: + for d_hidden_i in self.layers: regressor_net.append(nn.Linear(d_inputs, d_hidden_i, bias=True)) d_inputs = d_hidden_i # final layer has input size d_inputs and output size equal to no. of quantiles @@ -80,7 +80,7 @@ def regressors(self, regressor_inputs, mode): Forecast component of dims (batch, n_forecasts, num_quantiles) """ x = regressor_inputs - for i in range(len(self.regressors_layers) + 1): + for i in range(len(self.layers) + 1): if i > 0: x = nn.functional.relu(x) x = self.regressor_nets[mode][i](x) diff --git a/neuralprophet/components/router.py b/neuralprophet/components/router.py index 09195d5f0..47d05af18 100644 --- a/neuralprophet/components/router.py +++ b/neuralprophet/components/router.py @@ -20,7 +20,7 @@ def get_trend(config, n_forecasts, quantiles, id_list, num_trends_modelled, devi Parameters ---------- - config : configure.Trend + config : configure_components.Trend n_forecasts : int number of steps to forecast. Aka number of model outputs quantiles : list diff --git a/neuralprophet/configure.py b/neuralprophet/configure.py index 1e3287af8..a8e01cce3 100644 --- a/neuralprophet/configure.py +++ b/neuralprophet/configure.py @@ -3,29 +3,23 @@ import logging import math import types -from collections import OrderedDict from dataclasses import dataclass, field -from typing import Callable, Dict, List, Optional -from typing import OrderedDict as OrderedDictType -from typing import Type, Union +from typing import Callable, Dict, List, Optional, Type, Union import numpy as np -import pandas as pd import torch -from neuralprophet import df_utils, np_types, utils_torch +from neuralprophet import configure_components, df_utils from neuralprophet.custom_loss_metrics import PinballLoss -from neuralprophet.event_utils import get_holiday_names log = logging.getLogger("NP.config") @dataclass class Model: - features_map: dict + n_forecasts: int quantiles: Optional[List[float]] = None prediction_frequency: Optional[Dict[str]] = None - features_map: Optional[dict] = field(default_factory=dict) max_lags: Optional[int] = field(init=False) def setup_quantiles(self): @@ -45,14 +39,16 @@ def setup_quantiles(self): # 0 is the median quantile index self.quantiles.insert(0, 0.5) - def set_max_num_lags(self, n_lags: int, config_lagged_regressors: Optional[ConfigLaggedRegressors] = None) -> int: + def set_max_num_lags( + self, n_lags: int, config_lagged_regressors: Optional[configure_components.LaggedRegressors] = None + ) -> int: """Get the greatest number of lags between the autoregression lags and the covariates lags. Parameters ---------- n_lags : int number of autoregressive lagged values of series to include as model inputs - config_lagged_regressors : configure.ConfigLaggedRegressors + config_lagged_regressors : configure_components.LaggedRegressors Configurations for lagged regressors Returns @@ -72,9 +68,6 @@ def set_max_num_lags(self, n_lags: int, config_lagged_regressors: Optional[Confi self.max_lags = n_lags -ConfigModel = Model - - @dataclass class Normalization: normalize: str @@ -87,10 +80,10 @@ class Normalization: def init_data_params( self, df, - config_lagged_regressors: Optional[ConfigLaggedRegressors] = None, + config_lagged_regressors: Optional[configure_components.LaggedRegressors] = None, config_regressors=None, - config_events: Optional[ConfigEvents] = None, - config_seasonality: Optional[ConfigSeasonality] = None, + config_events: Optional[configure_components.Events] = None, + config_seasonality: Optional[configure_components.Seasonalities] = None, ): if len(df["ID"].unique()) == 1 and not self.global_normalization: log.info("Setting normalization to global as only one dataframe provided for training.") @@ -142,7 +135,6 @@ class Train: batch_size: Optional[int] loss_func: Union[str, torch.nn.modules.loss._Loss, Callable] optimizer: Union[str, Type[torch.optim.Optimizer]] - # quantiles: List[float] = field(default_factory=list) optimizer_args: dict = field(default_factory=dict) scheduler: Optional[Union[str, Type[torch.optim.lr_scheduler.LRScheduler]]] = None scheduler_args: dict = field(default_factory=dict) @@ -312,281 +304,3 @@ def get_reg_delay_weight(self, progress, reg_start_pct: float = 0.66, reg_full_p def set_batches_per_epoch(self, batches_per_epoch: int): self.batches_per_epoch = batches_per_epoch - - -@dataclass -class Trend: - growth: np_types.GrowthMode - changepoints: Optional[list] - n_changepoints: int - changepoints_range: float - trend_reg: float - trend_reg_threshold: Optional[Union[bool, float]] - trend_global_local: str - trend_local_reg: Optional[Union[bool, float]] = None - - def __post_init__(self): - if self.growth not in ["off", "linear", "discontinuous"]: - log.error(f"Invalid trend growth '{self.growth}'. Set to 'linear'") - self.growth = "linear" - - if self.growth == "off": - self.changepoints = None - self.n_changepoints = 0 - - if self.changepoints is not None: - self.n_changepoints = len(self.changepoints) - self.changepoints = pd.to_datetime(self.changepoints).sort_values().values - - if self.trend_reg_threshold is None: - pass - elif isinstance(self.trend_reg_threshold, bool): - if self.trend_reg_threshold: - self.trend_reg_threshold = 3.0 / (3.0 + (1.0 + self.trend_reg) * np.sqrt(self.n_changepoints)) - log.debug(f"Trend reg threshold automatically set to: {self.trend_reg_threshold}") - else: - self.trend_reg_threshold = None - elif self.trend_reg_threshold < 0: - log.warning("Negative trend reg threshold set to zero.") - self.trend_reg_threshold = None - elif math.isclose(self.trend_reg_threshold, 0): - self.trend_reg_threshold = None - - if self.trend_reg < 0: - log.warning("Negative trend reg lambda set to zero.") - self.trend_reg = 0 - if self.trend_reg > 0: - if self.n_changepoints > 0: - log.info("Note: Trend changepoint regularization is experimental.") - self.trend_reg = 0.001 * self.trend_reg - else: - log.info("Trend reg lambda ignored due to no changepoints.") - self.trend_reg = 0 - if self.trend_reg_threshold and self.trend_reg_threshold > 0: - log.info("Trend reg threshold ignored due to no changepoints.") - else: - if self.trend_reg_threshold is not None and self.trend_reg_threshold > 0: - log.info("Trend reg threshold ignored due to reg lambda <= 0.") - - # If trend_global_local is not in the expected set, set to "global" - if self.trend_global_local not in ["global", "local"]: - log.error("Invalid global_local mode '{}'. Set to 'global'".format(self.trend_global_local)) - self.trend_global_local = "global" - - # If growth is off we want set to "global" - if (self.growth == "off") and (self.trend_global_local == "local"): - log.error("Invalid growth for global_local mode '{}'. Set to 'global'".format(self.trend_global_local)) - self.trend_global_local = "global" - - if self.trend_local_reg < 0: - log.error("Invalid negative trend_local_reg '{}'. Set to False".format(self.trend_local_reg)) - self.trend_local_reg = False - - if self.trend_local_reg is True: - log.error("trend_local_reg = True. Default trend_local_reg value set to 1") - self.trend_local_reg = 1 - - # If Trend modelling is global but local regularization is set. - if self.trend_global_local == "global" and self.trend_local_reg: - log.error("Trend modeling is '{}'. Setting the trend_local_reg to False".format(self.trend_global_local)) - self.trend_local_reg = False - - -@dataclass -class Season: - resolution: int - period: float - arg: np_types.SeasonalityArgument - condition_name: Optional[str] - global_local: np_types.SeasonGlobalLocalMode = "local" - - -@dataclass -class ConfigSeasonality: - mode: np_types.SeasonalityMode = "additive" - computation: str = "fourier" - reg_lambda: float = 0 - yearly_arg: np_types.SeasonalityArgument = "auto" - weekly_arg: np_types.SeasonalityArgument = "auto" - daily_arg: np_types.SeasonalityArgument = "auto" - periods: OrderedDict = field(init=False) # contains SeasonConfig objects - global_local: np_types.SeasonGlobalLocalMode = "global" - seasonality_local_reg: Optional[Union[bool, float]] = None - yearly_global_local: np_types.SeasonalityArgument = "auto" - weekly_global_local: np_types.SeasonalityArgument = "auto" - daily_global_local: np_types.SeasonalityArgument = "auto" - condition_name: Optional[str] = None - - def __post_init__(self): - if self.reg_lambda > 0 and self.computation == "fourier": - log.info("Note: Fourier-based seasonality regularization is experimental.") - self.reg_lambda = 0.001 * self.reg_lambda - - # If global_local is not in the expected set, set to "global" - if self.global_local not in ["global", "local"]: - log.error("Invalid global_local mode '{}'. Set to 'global'".format(self.global_local)) - self.global_local = "global" - - self.periods = OrderedDict( - { - "yearly": Season( - resolution=6, - period=365.25, - arg=self.yearly_arg, - global_local=( - self.yearly_global_local - if self.yearly_global_local in ["global", "local"] - else self.global_local - ), - condition_name=None, - ), - "weekly": Season( - resolution=3, - period=7, - arg=self.weekly_arg, - global_local=( - self.weekly_global_local - if self.weekly_global_local in ["global", "local"] - else self.global_local - ), - condition_name=None, - ), - "daily": Season( - resolution=6, - period=1, - arg=self.daily_arg, - global_local=( - self.daily_global_local if self.daily_global_local in ["global", "local"] else self.global_local - ), - condition_name=None, - ), - } - ) - - assert self.seasonality_local_reg >= 0, "Invalid seasonality_local_reg '{}'.".format(self.seasonality_local_reg) - - if self.seasonality_local_reg is True: - log.warning("seasonality_local_reg = True. Default seasonality_local_reg value set to 1") - self.seasonality_local_reg = 1 - - # If Season modelling is global but local regularization is set. - if self.global_local == "global" and self.seasonality_local_reg: - log.error( - "Seasonality modeling is '{}'. Setting the seasonality_local_reg to False".format(self.global_local) - ) - self.seasonality_local_reg = False - - def append(self, name, period, resolution, arg, condition_name, global_local="auto"): - self.periods[name] = Season( - resolution=resolution, - period=period, - arg=arg, - global_local=global_local if global_local in ["global", "local"] else self.global_local, - condition_name=condition_name, - ) - - -@dataclass -class AR: - n_lags: int - ar_reg: Optional[float] = None - ar_layers: Optional[List[int]] = None - - def __post_init__(self): - if self.ar_reg is not None and self.n_lags == 0: - raise ValueError("AR regularization is set, but n_lags is 0. Please set n_lags to a positive integer.") - if self.ar_reg is not None and self.ar_reg > 0: - if self.ar_reg < 0: - raise ValueError("regularization must be >= 0") - self.reg_lambda = 0.0001 * self.ar_reg - else: - self.reg_lambda = None - - def regularize(self, weights, original=False): - """Regularization of AR coefficients - - Parameters - ---------- - weights : torch.Tensor - Model weights to be regularized towards zero - original : bool - Do not penalize non-zeros - - Returns - ------- - numeric - Regularization loss - """ - - if original: - reg = torch.div(2.0, 1.0 + torch.exp(-2 * (1e-9 + torch.abs(weights)).pow(1 / 2.0))) - 1.0 - else: - reg = utils_torch.penalize_nonzero(weights, eagerness=3, acceptance=1.0) - return reg - - -@dataclass -class LaggedRegressor: - reg_lambda: Optional[float] - as_scalar: bool - normalize: Union[bool, str] - n_lags: int - - def __post_init__(self): - if self.reg_lambda is not None: - if self.reg_lambda < 0: - raise ValueError("regularization must be >= 0") - - -@dataclass -class ConfigLaggedRegressors: - layers: Optional[List[int]] = field(default_factory=list) - # List of hidden layers for shared NN across LaggedReg. The default value is ``[]``, which initializes no hidden layers. - regressors: OrderedDict[LaggedRegressor] = field(init=False) - - def __post_init__(self): - self.regressors = None - - -@dataclass -class Regressor: - reg_lambda: Optional[float] - normalize: Union[str, bool] - mode: str - - -@dataclass -class ConfigFutureRegressors: - model: str - regressors_layers: Optional[List[int]] - regressors: OrderedDict = field(init=False) # contains Regressor objects - - def __post_init__(self): - self.regressors = None - - -@dataclass -class Event: - lower_window: int - upper_window: int - reg_lambda: Optional[float] - mode: str - - -ConfigEvents = OrderedDictType[str, Event] - - -@dataclass -class Holidays: - country: Union[str, List[str], dict] - lower_window: int - upper_window: int - mode: str = "additive" - reg_lambda: Optional[float] = None - holiday_names: set = field(init=False) - - def init_holidays(self, df=None): - self.holiday_names = get_holiday_names(self.country, df) - - -ConfigCountryHolidays = Holidays diff --git a/neuralprophet/configure_components.py b/neuralprophet/configure_components.py new file mode 100644 index 000000000..d382d2441 --- /dev/null +++ b/neuralprophet/configure_components.py @@ -0,0 +1,306 @@ +from __future__ import annotations + +import logging +import math +from collections import OrderedDict +from dataclasses import dataclass, field +from typing import List, Optional +from typing import OrderedDict as OrderedDictType +from typing import Union + +import numpy as np +import pandas as pd +import torch + +from neuralprophet import np_types, utils_torch +from neuralprophet.event_utils import get_holiday_names + +log = logging.getLogger("NP.config_components") + + +@dataclass +class Trend: + growth: np_types.GrowthMode + changepoints: Optional[list] + n_changepoints: int + changepoints_range: float + trend_reg: float + trend_reg_threshold: Optional[Union[bool, float]] + trend_global_local: str + trend_local_reg: Optional[Union[bool, float]] = None + + def __post_init__(self): + if self.growth not in ["off", "linear", "discontinuous"]: + log.error(f"Invalid trend growth '{self.growth}'. Set to 'linear'") + self.growth = "linear" + + if self.growth == "off": + self.changepoints = None + self.n_changepoints = 0 + + if self.changepoints is not None: + self.n_changepoints = len(self.changepoints) + self.changepoints = pd.to_datetime(self.changepoints).sort_values().values + + if self.trend_reg_threshold is None: + pass + elif isinstance(self.trend_reg_threshold, bool): + if self.trend_reg_threshold: + self.trend_reg_threshold = 3.0 / (3.0 + (1.0 + self.trend_reg) * np.sqrt(self.n_changepoints)) + log.debug(f"Trend reg threshold automatically set to: {self.trend_reg_threshold}") + else: + self.trend_reg_threshold = None + elif self.trend_reg_threshold < 0: + log.warning("Negative trend reg threshold set to zero.") + self.trend_reg_threshold = None + elif math.isclose(self.trend_reg_threshold, 0): + self.trend_reg_threshold = None + + if self.trend_reg < 0: + log.warning("Negative trend reg lambda set to zero.") + self.trend_reg = 0 + if self.trend_reg > 0: + if self.n_changepoints > 0: + log.info("Note: Trend changepoint regularization is experimental.") + self.trend_reg = 0.001 * self.trend_reg + else: + log.info("Trend reg lambda ignored due to no changepoints.") + self.trend_reg = 0 + if self.trend_reg_threshold and self.trend_reg_threshold > 0: + log.info("Trend reg threshold ignored due to no changepoints.") + else: + if self.trend_reg_threshold is not None and self.trend_reg_threshold > 0: + log.info("Trend reg threshold ignored due to reg lambda <= 0.") + + # If trend_global_local is not in the expected set, set to "global" + if self.trend_global_local not in ["global", "local"]: + log.error("Invalid global_local mode '{}'. Set to 'global'".format(self.trend_global_local)) + self.trend_global_local = "global" + + # If growth is off we want set to "global" + if (self.growth == "off") and (self.trend_global_local == "local"): + log.error("Invalid growth for global_local mode '{}'. Set to 'global'".format(self.trend_global_local)) + self.trend_global_local = "global" + + if self.trend_local_reg < 0: + log.error("Invalid negative trend_local_reg '{}'. Set to False".format(self.trend_local_reg)) + self.trend_local_reg = False + + if self.trend_local_reg is True: + log.error("trend_local_reg = True. Default trend_local_reg value set to 1") + self.trend_local_reg = 1 + + # If Trend modelling is global but local regularization is set. + if self.trend_global_local == "global" and self.trend_local_reg: + log.error("Trend modeling is '{}'. Setting the trend_local_reg to False".format(self.trend_global_local)) + self.trend_local_reg = False + + +@dataclass +class SingleSeasonality: + resolution: int + period: float + arg: np_types.SeasonalityArgument + condition_name: Optional[str] + global_local: np_types.SeasonGlobalLocalMode = "local" + + +@dataclass +class Seasonalities: + mode: np_types.SeasonalityMode = "additive" + computation: str = "fourier" + reg_lambda: float = 0 + yearly_arg: np_types.SeasonalityArgument = "auto" + weekly_arg: np_types.SeasonalityArgument = "auto" + daily_arg: np_types.SeasonalityArgument = "auto" + periods: OrderedDict = field(init=False) # contains SeasonConfig objects + global_local: np_types.SeasonGlobalLocalMode = "global" + seasonality_local_reg: Optional[Union[bool, float]] = None + yearly_global_local: np_types.SeasonalityArgument = "auto" + weekly_global_local: np_types.SeasonalityArgument = "auto" + daily_global_local: np_types.SeasonalityArgument = "auto" + condition_name: Optional[str] = None + + def __post_init__(self): + if self.reg_lambda > 0 and self.computation == "fourier": + log.info("Note: Fourier-based seasonality regularization is experimental.") + self.reg_lambda = 0.001 * self.reg_lambda + + # If global_local is not in the expected set, set to "global" + if self.global_local not in ["global", "local"]: + log.error("Invalid global_local mode '{}'. Set to 'global'".format(self.global_local)) + self.global_local = "global" + + self.periods = OrderedDict( + { + "yearly": SingleSeasonality( + resolution=6, + period=365.25, + arg=self.yearly_arg, + global_local=( + self.yearly_global_local + if self.yearly_global_local in ["global", "local"] + else self.global_local + ), + condition_name=None, + ), + "weekly": SingleSeasonality( + resolution=3, + period=7, + arg=self.weekly_arg, + global_local=( + self.weekly_global_local + if self.weekly_global_local in ["global", "local"] + else self.global_local + ), + condition_name=None, + ), + "daily": SingleSeasonality( + resolution=6, + period=1, + arg=self.daily_arg, + global_local=( + self.daily_global_local if self.daily_global_local in ["global", "local"] else self.global_local + ), + condition_name=None, + ), + } + ) + + assert self.seasonality_local_reg >= 0, "Invalid seasonality_local_reg '{}'.".format(self.seasonality_local_reg) + + if self.seasonality_local_reg is True: + log.warning("seasonality_local_reg = True. Default seasonality_local_reg value set to 1") + self.seasonality_local_reg = 1 + + # If Season modelling is global but local regularization is set. + if self.global_local == "global" and self.seasonality_local_reg: + log.error( + "Seasonality modeling is '{}'. Setting the seasonality_local_reg to False".format(self.global_local) + ) + self.seasonality_local_reg = False + + def append(self, name, period, resolution, arg, condition_name, global_local="auto"): + self.periods[name] = SingleSeasonality( + resolution=resolution, + period=period, + arg=arg, + global_local=global_local if global_local in ["global", "local"] else self.global_local, + condition_name=condition_name, + ) + + +@dataclass +class AutoregRession: + n_lags: int + ar_reg: Optional[float] = None + ar_layers: Optional[List[int]] = None + + def __post_init__(self): + if self.ar_reg is not None and self.n_lags == 0: + raise ValueError("AR regularization is set, but n_lags is 0. Please set n_lags to a positive integer.") + if self.ar_reg is not None and self.ar_reg > 0: + if self.ar_reg < 0: + raise ValueError("regularization must be >= 0") + self.reg_lambda = 0.0001 * self.ar_reg + else: + self.reg_lambda = None + + def regularize(self, weights, original=False): + """Regularization of AR coefficients + + Parameters + ---------- + weights : torch.Tensor + Model weights to be regularized towards zero + original : bool + Do not penalize non-zeros + + Returns + ------- + numeric + Regularization loss + """ + + if original: + reg = torch.div(2.0, 1.0 + torch.exp(-2 * (1e-9 + torch.abs(weights)).pow(1 / 2.0))) - 1.0 + else: + reg = utils_torch.penalize_nonzero(weights, eagerness=3, acceptance=1.0) + return reg + + +@dataclass +class SingleLaggedRegressor: + n_lags: int + as_scalar: bool + normalize: Union[bool, str] + reg_lambda: Optional[float] + + def __post_init__(self): + if self.reg_lambda is not None: + if self.reg_lambda < 0: + raise ValueError("regularization must be >= 0") + + +@dataclass +class LaggedRegressors: + layers: Optional[List[int]] = field(default_factory=list) + # List of hidden layers for shared NN across LaggedReg. The default value is ``[]``, which initializes no hidden layers. + regressors: OrderedDict[SingleLaggedRegressor] = field(init=False) + + def __post_init__(self): + self.regressors = None + + def add(self, name, n_lags, as_scalar, normalize, reg_lambda): + if self.regressors is None: + self.regressors = OrderedDict() + self.regressors[name] = SingleLaggedRegressor( + n_lags=n_lags, as_scalar=as_scalar, normalize=normalize, reg_lambda=reg_lambda + ) + + +@dataclass +class SingleFutureRegressor: + mode: str + normalize: Union[str, bool] + reg_lambda: Optional[float] + + +@dataclass +class FutureRegressors: + model: Optional[str] = "linear" + layers: Optional[List[int]] = field(default_factory=list) + regressors: OrderedDict[SingleFutureRegressor] = field(init=False) + + def __post_init__(self): + self.regressors = None + + def add(self, name, mode, normalize, reg_lambda): + if self.regressors is None: + self.regressors = OrderedDict() + self.regressors[name] = SingleFutureRegressor(mode=mode, normalize=normalize, reg_lambda=reg_lambda) + + +@dataclass +class SingleEvent: + lower_window: int + upper_window: int + reg_lambda: Optional[float] + mode: str + + +# TODO: convert to dataclass +Events = OrderedDictType[str, SingleEvent] + + +@dataclass +class Holidays: + country: Union[str, List[str], dict] + lower_window: int + upper_window: int + mode: str = "additive" + reg_lambda: Optional[float] = None + holiday_names: set = field(init=False) + + def init_holidays(self, df=None): + self.holiday_names = get_holiday_names(self.country, df) diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py index 8705c641a..549c747cd 100644 --- a/neuralprophet/data/process.py +++ b/neuralprophet/data/process.py @@ -4,14 +4,7 @@ import numpy as np import pandas as pd -from neuralprophet import df_utils, time_dataset -from neuralprophet.configure import ( - ConfigCountryHolidays, - ConfigEvents, - ConfigFutureRegressors, - ConfigLaggedRegressors, - ConfigSeasonality, -) +from neuralprophet import configure_components, df_utils from neuralprophet.np_types import Components log = logging.getLogger("NP.data.processing") @@ -27,7 +20,7 @@ def _reshape_raw_predictions_to_forecst_df( max_lags: int, freq: Optional[str], quantiles: List[float], - config_lagged_regressors: Optional[ConfigLaggedRegressors], + config_lagged_regressors: Optional[configure_components.LaggedRegressors], ) -> pd.DataFrame: """ Turns forecast-origin-wise predictions into forecast-target-wise predictions. @@ -52,7 +45,7 @@ def _reshape_raw_predictions_to_forecst_df( Data step sizes. Frequency of data recording. quantiles : list[float] List of quantiles to include in the forecast - config_lagged_regressors : ConfigLaggedRegressors + config_lagged_regressors : configure_components.LaggedRegressors Configuration for lagged regressors Returns @@ -282,7 +275,7 @@ def _prepare_dataframe_to_predict(model, df: pd.DataFrame, max_lags: int, freq: df=df_i, freq=freq, n_lags=model.config_ar.n_lags, - n_forecasts=model.n_forecasts, + n_forecasts=model.config_model.n_forecasts, config_missing=model.config_missing, config_regressors=model.config_regressors, config_lagged_regressors=model.config_lagged_regressors, @@ -296,11 +289,11 @@ def _prepare_dataframe_to_predict(model, df: pd.DataFrame, max_lags: int, freq: def _validate_column_name( name: str, - config_events: Optional[ConfigEvents], - config_country_holidays: Optional[ConfigCountryHolidays], - config_seasonality: Optional[ConfigSeasonality], - config_lagged_regressors: Optional[ConfigLaggedRegressors], - config_regressors: Optional[ConfigFutureRegressors], + config_events: Optional[configure_components.Events], + config_country_holidays: Optional[configure_components.Holidays], + config_seasonality: Optional[configure_components.Seasonalities], + config_lagged_regressors: Optional[configure_components.LaggedRegressors], + config_regressors: Optional[configure_components.FutureRegressors], events: Optional[bool] = True, seasons: Optional[bool] = True, regressors: Optional[bool] = True, @@ -312,15 +305,15 @@ def _validate_column_name( ---------- name : str name of seasonality, event or regressor - config_events : Optional[ConfigEvents] + config_events : Optional[configure_components.Events] Configuration options for adding events to the model. - config_country_holidays : Optional[ConfigCountryHolidays] + config_country_holidays : Optional[configure_components.Holidays] Configuration options for adding country holidays to the model. - config_seasonality : Optional[ConfigSeasonality] + config_seasonality : Optional[configure_components.Seasonalities] Configuration options for adding seasonal components to the model. - config_lagged_regressors : Optional[ConfigLaggedRegressors] + config_lagged_regressors : Optional[configure_components.LaggedRegressors] Configuration options for adding lagged external regressors to the model. - config_regressors : Optional[ConfigFutureRegressors] + config_regressors : Optional[configure_components.FutureRegressors] Configuration options for adding future regressors to the model. events : bool check if name already used for event @@ -401,7 +394,7 @@ def _check_dataframe( pd.DataFrame checked dataframe """ - if len(df) < (model.n_forecasts + model.config_ar.n_lags) and not future: + if len(df) < (model.config_model.n_forecasts + model.config_ar.n_lags) and not future: raise ValueError( "Dataframe has less than n_forecasts + n_lags rows. " "Forecasting not possible. Please either use a larger dataset, or adjust the model parameters." @@ -438,10 +431,10 @@ def _handle_missing_data( n_lags: int, n_forecasts: int, config_missing, - config_regressors: Optional[ConfigFutureRegressors] = None, - config_lagged_regressors: Optional[ConfigLaggedRegressors] = None, - config_events: Optional[ConfigEvents] = None, - config_seasonality: Optional[ConfigSeasonality] = None, + config_regressors: Optional[configure_components.FutureRegressors] = None, + config_lagged_regressors: Optional[configure_components.LaggedRegressors] = None, + config_events: Optional[configure_components.Events] = None, + config_seasonality: Optional[configure_components.Seasonalities] = None, predicting: bool = False, ) -> pd.DataFrame: """ @@ -464,13 +457,13 @@ def _handle_missing_data( Number of steps ahead of prediction time step to forecast. config_missing : Configuration options for handling missing data. - config_regressors : Optional[ConfigFutureRegressors] + config_regressors : Optional[configure_components.FutureRegressors] Configuration options for adding future regressors to the model. - config_lagged_regressors : Optional[ConfigLaggedRegressors] + config_lagged_regressors : Optional[configure_components.LaggedRegressors] Configuration options for adding lagged external regressors to the model. - config_events : Optional[ConfigEvents] + config_events : Optional[configure_components.Events] Configuration options for adding events to the model. - config_seasonality : Optional[ConfigSeasonality] + config_seasonality : Optional[configure_components.Seasonalities] Configuration options for adding seasonal components to the model. predicting : bool, default False If True, allows missing values in the 'y' column for the forecast period, or missing completely. @@ -573,60 +566,3 @@ def _handle_missing_data( if config_seasonality is not None and len(conditional_cols) > 0: df[conditional_cols] = df[conditional_cols].ffill() # type: ignore return df - - -def _create_dataset(model, df, predict_mode, prediction_frequency=None, components_stacker=None): - """Construct dataset from dataframe. - - (Configured Hyperparameters can be overridden by explicitly supplying them. - Useful to predict a single model component.) - - Parameters - ---------- - df : pd.DataFrame - dataframe containing column ``ds``, ``y``, and optionally``ID`` and - normalized columns normalized columns ``ds``, ``y``, ``t``, ``y_scaled`` - predict_mode : bool - specifies predict mode - - Options - * ``False``: includes target values. - * ``True``: does not include targets but includes entire dataset as input - - prediction_frequency: dict - periodic interval in which forecasts should be made. - Key: str - periodicity of the predictions to be made, e.g. 'daily-hour'. - - Options - * ``'hourly-minute'``: forecast once per hour at a specified minute - * ``'daily-hour'``: forecast once per day at a specified hour - * ``'weekly-day'``: forecast once per week at a specified day - * ``'monthly-day'``: forecast once per month at a specified day - * ``'yearly-month'``: forecast once per year at a specified month - - value: int - forecast origin of the predictions to be made, e.g. 7 for 7am in case of 'daily-hour'. - - Returns - ------- - TimeDataset - """ - df, _, _, _ = df_utils.prep_or_copy_df(df) - return time_dataset.GlobalTimeDataset( - df, - predict_mode=predict_mode, - n_lags=model.config_ar.n_lags, - n_forecasts=model.n_forecasts, - prediction_frequency=prediction_frequency, - predict_steps=model.predict_steps, - config_seasonality=model.config_seasonality, - config_events=model.config_events, - config_country_holidays=model.config_country_holidays, - config_regressors=model.config_regressors, - config_lagged_regressors=model.config_lagged_regressors, - config_missing=model.config_missing, - config_model=model.config_model, - components_stacker=components_stacker, - # config_train=model.config_train, # no longer needed since JIT tabularization. - ) diff --git a/neuralprophet/data/split.py b/neuralprophet/data/split.py index 0663616b0..8a39700f6 100644 --- a/neuralprophet/data/split.py +++ b/neuralprophet/data/split.py @@ -1,12 +1,9 @@ import logging -from typing import Optional -from typing import OrderedDict as OrderedDictType -from typing import Tuple +from typing import Optional, Tuple import pandas as pd -from neuralprophet import df_utils -from neuralprophet.configure import ConfigEvents, Regressor +from neuralprophet import configure_components, df_utils from neuralprophet.data.process import _check_dataframe log = logging.getLogger("NP.data.splitting") @@ -17,8 +14,8 @@ def _maybe_extend_df( n_forecasts: int, max_lags: int, freq: Optional[str], - config_regressors: Optional[OrderedDictType[str, Regressor]], - config_events: Optional[ConfigEvents], + config_regressors: Optional[configure_components.FutureRegressors], + config_events: Optional[configure_components.Events], ) -> Tuple[pd.DataFrame, dict]: """ Extend the input DataFrame based on the number of forecasts, maximum lags, @@ -34,9 +31,9 @@ def _maybe_extend_df( Number of steps ahead of prediction time step to forecast. freq : str Frequency of the time series data. - config_regressors : OrderedDict[str, Regressor] + config_regressors : configure_components.FutureRegressors Configuration of regressors. - config_events : ConfigEvents + config_events : configure_components.Events Configuration of events. Returns @@ -76,7 +73,7 @@ def _get_maybe_extend_periods( df: pd.DataFrame, n_forecasts: int, max_lags: int, - config_regressors: Optional[OrderedDictType[str, Regressor]], + config_regressors: Optional[configure_components.FutureRegressors], ) -> int: """ Determine the number of periods to extend the input DataFrame based on the @@ -91,7 +88,7 @@ def _get_maybe_extend_periods( Number of steps ahead of prediction time step to forecast. max_lags : int Maximum number of lags to consider. - config_regressors : OrderedDictType[str, Regressor] + config_regressors : configure_components.FutureRegressors Configuration of regressors. If None, the function may extend the DataFrame based on `n_forecasts` and `max_lags`. diff --git a/neuralprophet/df_utils.py b/neuralprophet/df_utils.py index 1f390db4f..4bcb558cc 100644 --- a/neuralprophet/df_utils.py +++ b/neuralprophet/df_utils.py @@ -10,7 +10,7 @@ import pandas as pd if TYPE_CHECKING: - from neuralprophet.configure import ConfigEvents, ConfigFutureRegressors, ConfigLaggedRegressors, ConfigSeasonality + from neuralprophet import configure_components log = logging.getLogger("NP.df_utils") @@ -115,10 +115,10 @@ def merge_dataframes(df: pd.DataFrame) -> pd.DataFrame: def data_params_definition( df, normalize, - config_lagged_regressors: Optional[ConfigLaggedRegressors] = None, - config_regressors: Optional[ConfigFutureRegressors] = None, - config_events: Optional[ConfigEvents] = None, - config_seasonality: Optional[ConfigSeasonality] = None, + config_lagged_regressors: Optional[configure_components.LaggedRegressors] = None, + config_regressors: Optional[configure_components.FutureRegressors] = None, + config_events: Optional[configure_components.Events] = None, + config_seasonality: Optional[configure_components.Seasonalities] = None, local_run_despite_global: Optional[bool] = None, ): """ @@ -149,15 +149,15 @@ def data_params_definition( ``soft`` scales the minimum value to 0.0 and the 95th quantile to 1.0 ``soft1`` scales the minimum value to 0.1 and the 90th quantile to 0.9 - config_lagged_regressors : configure.ConfigLaggedRegressors + config_lagged_regressors : configure_components.LaggedRegressors Configurations for lagged regressors normalize : bool data normalization - config_regressors : configure.ConfigFutureRegressors + config_regressors : configure_components.FutureRegressors extra regressors (with known future values) with sub_parameters normalize (bool) - config_events : configure.ConfigEvents + config_events : configure_components.Events user specified events configs - config_seasonality : configure.ConfigSeasonality + config_seasonality : configure_components.Seasonalities user specified seasonality configs Returns @@ -223,10 +223,10 @@ def data_params_definition( def init_data_params( df, normalize="auto", - config_lagged_regressors: Optional[ConfigLaggedRegressors] = None, - config_regressors: Optional[ConfigFutureRegressors] = None, - config_events: Optional[ConfigEvents] = None, - config_seasonality: Optional[ConfigSeasonality] = None, + config_lagged_regressors: Optional[configure_components.LaggedRegressors] = None, + config_regressors: Optional[configure_components.FutureRegressors] = None, + config_events: Optional[configure_components.Events] = None, + config_seasonality: Optional[configure_components.Seasonalities] = None, global_normalization=False, global_time_normalization=False, ): @@ -256,13 +256,13 @@ def init_data_params( ``soft`` scales the minimum value to 0.0 and the 95th quantile to 1.0 ``soft1`` scales the minimum value to 0.1 and the 90th quantile to 0.9 - config_lagged_regressors : configure.ConfigLaggedRegressors + config_lagged_regressors : configure_components.LaggedRegressors Configurations for lagged regressors - config_regressors : configure.ConfigFutureRegressors + config_regressors : configure_components.FutureRegressors extra regressors (with known future values) - config_events : configure.ConfigEvents + config_events : configure_components.Events user specified events configs - config_seasonality : configure.ConfigSeasonality + config_seasonality : configure_components.Seasonalities user specified seasonality configs global_normalization : bool @@ -922,8 +922,8 @@ def make_future_df( last_date, periods, freq, - config_events: ConfigEvents, - config_regressors: ConfigFutureRegressors, + config_events: configure_components.Events, + config_regressors: configure_components.FutureRegressors, events_df=None, regressors_df=None, ): @@ -940,11 +940,11 @@ def make_future_df( freq : str Data step sizes. Frequency of data recording, any valid frequency for pd.date_range, such as ``D`` or ``M`` - config_events : configure.ConfigEvents + config_events : configure_components.Events User specified events configs events_df : pd.DataFrame containing column ``ds`` and ``event`` - config_regressors : configure.ConfigFutureRegressors + config_regressors : configure_components.FutureRegressors configuration for user specified regressors, regressors_df : pd.DataFrame containing column ``ds`` and one column for each of the external regressors @@ -974,7 +974,7 @@ def make_future_df( return future_df -def convert_events_to_features(df, config_events: ConfigEvents, events_df): +def convert_events_to_features(df, config_events: configure_components.Events, events_df): """ Converts events information into binary features of the df @@ -982,7 +982,7 @@ def convert_events_to_features(df, config_events: ConfigEvents, events_df): ---------- df : pd.DataFrame Dataframe with columns ``ds`` datestamps and ``y`` time series values - config_events : configure.ConfigEvents + config_events : configure_components.Events User specified events configs events_df : pd.DataFrame containing column ``ds`` and ``event`` diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py index e2c34f463..22aa9c58f 100644 --- a/neuralprophet/forecaster.py +++ b/neuralprophet/forecaster.py @@ -15,6 +15,7 @@ from neuralprophet import ( configure, + configure_components, df_utils, np_types, time_dataset, @@ -27,7 +28,6 @@ from neuralprophet.data.process import ( _check_dataframe, _convert_raw_predictions_to_raw_df, - _create_dataset, _handle_missing_data, _prepare_dataframe_to_predict, _reshape_raw_predictions_to_forecst_df, @@ -479,15 +479,15 @@ def __init__( # General self.name = "NeuralProphet" - self.n_forecasts = n_forecasts # Model self.config_model = configure.Model( + n_forecasts=n_forecasts, quantiles=quantiles, prediction_frequency=prediction_frequency, - features_map={}, ) self.config_model.setup_quantiles() + # self.n_forecasts = self.config_model.n_forecasts # Data Normalization settings self.config_normalization = configure.Normalization( @@ -515,10 +515,10 @@ def __init__( self.metrics = utils_metrics.get_metrics(collect_metrics) # AR - self.config_ar = configure.AR(n_lags=n_lags, ar_reg=ar_reg, ar_layers=ar_layers) + self.config_ar = configure_components.AutoregRession(n_lags=n_lags, ar_reg=ar_reg, ar_layers=ar_layers) # Trend - self.config_trend = configure.Trend( + self.config_trend = configure_components.Trend( growth=growth, changepoints=changepoints, n_changepoints=n_changepoints, @@ -545,7 +545,7 @@ def __init__( ) # Seasonality - self.config_seasonality = configure.ConfigSeasonality( + self.config_seasonality = configure_components.Seasonalities( mode=seasonality_mode, reg_lambda=seasonality_reg, yearly_arg=yearly_seasonality, @@ -560,11 +560,11 @@ def __init__( ) # Events - self.config_events: Optional[configure.ConfigEvents] = None - self.config_country_holidays: Optional[configure.ConfigCountryHolidays] = None + self.config_events: Optional[configure_components.Events] = None + self.config_country_holidays: Optional[configure_components.Holidays] = None # Lagged Regressors - self.config_lagged_regressors = configure.ConfigLaggedRegressors( + self.config_lagged_regressors = configure_components.LaggedRegressors( layers=lagged_reg_layers, ) # Update max_lags @@ -572,9 +572,9 @@ def __init__( n_lags=self.config_ar.n_lags, config_lagged_regressors=self.config_lagged_regressors ) # Future Regressors - self.config_regressors = configure.ConfigFutureRegressors( + self.config_regressors = configure_components.FutureRegressors( model=future_regressors_model, - regressors_layers=future_regressors_layers, + layers=future_regressors_layers, ) # set during fit() @@ -589,17 +589,54 @@ def __init__( # set during prediction self.future_periods = None - self.predict_steps = self.n_forecasts + self.predict_steps = self.config_model.n_forecasts # later set by user (optional) self.highlight_forecast_step_n = None self.true_ar_weights = None + def _create_dataset(self, df, predict_mode, components_stacker=None): + """Construct dataset from dataframe. + + (Configured Hyperparameters can be overridden by explicitly supplying them. + Useful to predict a single model component.) + + Parameters + ---------- + df : pd.DataFrame + dataframe containing column ``ds``, ``y``, and optionally``ID`` and + normalized columns normalized columns ``ds``, ``y``, ``t``, ``y_scaled`` + predict_mode : bool + specifies predict mode + + Options + * ``False``: includes target values. + * ``True``: does not include targets but includes entire dataset as input + + Returns + ------- + TimeDataset + """ + df, _, _, _ = df_utils.prep_or_copy_df(df) + return time_dataset.GlobalTimeDataset( + df, + predict_mode=predict_mode, + config_ar=self.config_ar, + config_seasonality=self.config_seasonality, + config_events=self.config_events, + config_country_holidays=self.config_country_holidays, + config_regressors=self.config_regressors, + config_lagged_regressors=self.config_lagged_regressors, + config_missing=self.config_missing, + config_model=self.config_model, + components_stacker=components_stacker, + ) + def add_lagged_regressor( self, names: Union[str, List[str]], n_lags: Union[int, np_types.Literal["auto", "scalar"]] = "auto", - regularization: Optional[float] = None, normalize: Union[bool, str] = "auto", + regularization: Optional[float] = None, ): """Add a covariate or list of covariate time series as additional lagged regressors to be used for fitting and predicting. @@ -654,13 +691,12 @@ def add_lagged_regressor( config_lagged_regressors=self.config_lagged_regressors, config_regressors=self.config_regressors, ) - if self.config_lagged_regressors.regressors is None: - self.config_lagged_regressors.regressors = OrderedDict() - self.config_lagged_regressors.regressors[name] = configure.LaggedRegressor( - reg_lambda=regularization, - normalize=normalize, - as_scalar=only_last_value, + self.config_lagged_regressors.add( + name=name, n_lags=n_lags, + as_scalar=only_last_value, + normalize=normalize, + reg_lambda=regularization, ) self.config_model.set_max_num_lags( n_lags=self.config_ar.n_lags, config_lagged_regressors=self.config_lagged_regressors @@ -730,12 +766,9 @@ def add_future_regressor( config_lagged_regressors=self.config_lagged_regressors, config_regressors=self.config_regressors, ) + # add to Config + self.config_regressors.add(name, mode=mode, normalize=normalize, reg_lambda=regularization) - if self.config_regressors.regressors is None: - self.config_regressors.regressors = OrderedDict() - self.config_regressors.regressors[name] = configure.Regressor( - reg_lambda=regularization, normalize=normalize, mode=mode - ) return self def add_events( @@ -788,7 +821,7 @@ def add_events( config_lagged_regressors=self.config_lagged_regressors, config_regressors=self.config_regressors, ) - self.config_events[event_name] = configure.Event( + self.config_events[event_name] = configure_components.SingleEvent( lower_window=lower_window, upper_window=upper_window, reg_lambda=regularization, mode=mode ) return self @@ -835,7 +868,7 @@ def add_country_holidays( raise ValueError("regularization must be >= 0") if regularization == 0: regularization = None - self.config_country_holidays = configure.Holidays( + self.config_country_holidays = configure_components.Holidays( country=country_name, lower_window=lower_window, upper_window=upper_window, @@ -1030,8 +1063,8 @@ def fit( n_lags=self.config_ar.n_lags, config_lagged_regressors=self.config_lagged_regressors ) - if self.config_model.max_lags == 0 and self.n_forecasts > 1: - self.n_forecasts = 1 + if self.config_model.max_lags == 0 and self.config_model.n_forecasts > 1: + self.config_model.n_forecasts = 1 self.predict_steps = 1 log.error( "Changing n_forecasts to 1. Without lags, the forecast can be " @@ -1129,7 +1162,7 @@ def fit( df=df, freq=self.data_freq, n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, config_missing=self.config_missing, config_regressors=self.config_regressors, config_lagged_regressors=self.config_lagged_regressors, @@ -1173,19 +1206,13 @@ def fit( # Note: _create_dataset() needs to be called after set_auto_seasonalities() train_components_stacker = utils_time_dataset.ComponentStacker( n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, max_lags=self.config_model.max_lags, config_seasonality=self.config_seasonality, lagged_regressor_config=self.config_lagged_regressors, feature_indices={}, ) - dataset = _create_dataset( - self, - df, - predict_mode=False, - prediction_frequency=self.config_model.prediction_frequency, - components_stacker=train_components_stacker, - ) + dataset = self._create_dataset(df, predict_mode=False, components_stacker=train_components_stacker) # Determine the max_number of epochs self.config_train.set_auto_batch_epoch(n_data=len(dataset)) # Create Train DataLoader @@ -1210,7 +1237,7 @@ def fit( df=df_val, freq=self.data_freq, n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, config_missing=self.config_missing, config_regressors=self.config_regressors, config_lagged_regressors=self.config_lagged_regressors, @@ -1223,12 +1250,12 @@ def fit( val_components_stacker = utils_time_dataset.ComponentStacker( n_lags=self.config_ar.n_lags, max_lags=self.config_model.max_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, config_seasonality=self.config_seasonality, lagged_regressor_config=self.config_lagged_regressors, feature_indices={}, ) - dataset_val = _create_dataset(self, df_val, predict_mode=False, components_stacker=val_components_stacker) + dataset_val = self._create_dataset(df_val, predict_mode=False, components_stacker=val_components_stacker) loader_val = DataLoader(dataset_val, batch_size=min(1024, len(dataset_val)), shuffle=False, drop_last=False) # Init the Trainer @@ -1379,7 +1406,7 @@ def predict(self, df: pd.DataFrame, decompose: bool = True, raw: bool = False, a # to get all forecasteable values with df given, maybe extend into future: df, periods_added = _maybe_extend_df( df=df, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, max_lags=self.config_model.max_lags, freq=self.data_freq, config_regressors=self.config_regressors, @@ -1390,9 +1417,7 @@ def predict(self, df: pd.DataFrame, decompose: bool = True, raw: bool = False, a df = _normalize(df=df, config_normalization=self.config_normalization) forecast = pd.DataFrame() for df_name, df_i in df.groupby("ID"): - dates, predicted, components = self._predict_raw( - df_i, df_name, include_components=decompose, prediction_frequency=self.config_model.prediction_frequency - ) + dates, predicted, components = self._predict_raw(df_i, df_name, include_components=decompose) df_i = df_utils.drop_missing_from_df( df_i, self.config_missing.drop_missing, self.predict_steps, self.config_ar.n_lags ) @@ -1400,7 +1425,7 @@ def predict(self, df: pd.DataFrame, decompose: bool = True, raw: bool = False, a fcst = _convert_raw_predictions_to_raw_df( dates=dates, predicted=predicted, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, quantiles=self.config_model.quantiles, components=components, ) @@ -1413,7 +1438,7 @@ def predict(self, df: pd.DataFrame, decompose: bool = True, raw: bool = False, a components=components, prediction_frequency=self.config_model.prediction_frequency, dates=dates, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, max_lags=self.config_model.max_lags, freq=self.data_freq, quantiles=self.config_model.quantiles, @@ -1424,7 +1449,7 @@ def predict(self, df: pd.DataFrame, decompose: bool = True, raw: bool = False, a forecast = pd.concat((forecast, fcst), ignore_index=True) df = df_utils.return_df_in_original_format(forecast, received_ID_col, received_single_time_series) - self.predict_steps = self.n_forecasts + self.predict_steps = self.config_model.n_forecasts return df def test(self, df: pd.DataFrame, verbose: bool = True): @@ -1450,7 +1475,7 @@ def test(self, df: pd.DataFrame, verbose: bool = True): df=df, freq=freq, n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, config_missing=self.config_missing, config_regressors=self.config_regressors, config_lagged_regressors=self.config_lagged_regressors, @@ -1462,13 +1487,13 @@ def test(self, df: pd.DataFrame, verbose: bool = True): df = _normalize(df=df, config_normalization=self.config_normalization) components_stacker = utils_time_dataset.ComponentStacker( n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, max_lags=self.config_model.max_lags, config_seasonality=self.config_seasonality, lagged_regressor_config=self.config_lagged_regressors, feature_indices={}, ) - dataset = _create_dataset(self, df, predict_mode=False, components_stacker=components_stacker) + dataset = self._create_dataset(df, predict_mode=False, components_stacker=components_stacker) self.model.set_components_stacker(components_stacker, mode="test") test_loader = DataLoader(dataset, batch_size=min(1024, len(dataset)), shuffle=False, drop_last=False) # Use Lightning to calculate metrics @@ -1602,7 +1627,7 @@ def split_df(self, df: pd.DataFrame, freq: str = "auto", valid_p: float = 0.2, l df=df, freq=freq, n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, config_missing=self.config_missing, config_regressors=self.config_regressors, config_lagged_regressors=self.config_lagged_regressors, @@ -1613,7 +1638,7 @@ def split_df(self, df: pd.DataFrame, freq: str = "auto", valid_p: float = 0.2, l df_train, df_val = df_utils.split_df( df, n_lags=self.config_model.max_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, valid_p=valid_p, inputs_overbleed=True, local_split=local_split, @@ -1791,7 +1816,7 @@ def crossvalidation_split_df( df=df, freq=freq, n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, config_missing=self.config_missing, config_regressors=self.config_regressors, config_lagged_regressors=self.config_lagged_regressors, @@ -1802,7 +1827,7 @@ def crossvalidation_split_df( folds = df_utils.crossvalidation_split_df( df, n_lags=self.config_model.max_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, k=k, fold_pct=fold_pct, fold_overlap_pct=fold_overlap_pct, @@ -1855,7 +1880,7 @@ def double_crossvalidation_split_df( df=df, freq=freq, n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, config_missing=self.config_missing, config_regressors=self.config_regressors, config_lagged_regressors=self.config_lagged_regressors, @@ -1866,7 +1891,7 @@ def double_crossvalidation_split_df( folds_val, folds_test = df_utils.double_crossvalidation_split_df( df, n_lags=self.config_model.max_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, k=k, valid_pct=valid_pct, test_pct=test_pct, @@ -1983,7 +2008,7 @@ def make_future_dataframe( regressors_df=regressors_dict[df_name], periods=periods, n_historic_predictions=n_historic_predictions, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, max_lags=self.config_model.max_lags, freq=self.data_freq, ) @@ -2100,12 +2125,14 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5): raise ValueError("The quantile needs to have been specified in the model configuration.") df_seasonal = pd.DataFrame() - prev_n_forecasts = self.n_forecasts prev_n_lags = self.config_ar.n_lags prev_max_lags = self.config_model.max_lags - prev_features_map = {key: value for key, value in self.config_model.features_map.items()} + prev_n_forecasts = self.config_model.n_forecasts + prev_predict_components_stacker = self.model.predict_components_stacker self.config_model.max_lags = 0 + self.config_ar.n_lags = 0 + self.config_model.n_forecasts = 1 df, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(df) df = _check_dataframe(self, df, check_y=False, exogenous=False) @@ -2120,19 +2147,16 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5): ) dataset = time_dataset.TimeDataset( df=df_i, + components_stacker=feature_unstackor, predict_mode=True, - n_lags=0, - n_forecasts=1, - prediction_frequency=self.config_model.prediction_frequency, - predict_steps=1, - config_missing=self.config_missing, config_model=self.config_model, + config_missing=self.config_missing, + config_ar=self.config_ar, config_seasonality=self.config_seasonality, config_events=None, config_country_holidays=None, config_regressors=None, config_lagged_regressors=None, - components_stacker=feature_unstackor, ) self.model.set_components_stacker(feature_unstackor, mode="predict") loader = DataLoader(dataset, batch_size=min(4096, len(df)), shuffle=False, drop_last=False) @@ -2166,15 +2190,16 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5): if self.config_seasonality.mode == "additive": data_params = self.config_normalization.get_data_params(df_name) predicted[name] = predicted[name] * data_params["y"].scale - df_i = df_i[:: self.config_model.prediction_frequency].reset_index(drop=True) + df_i = df_i[:: self.config_model.prediction_frequency].reset_index(drop=True) # this may cause a bug df_aux = pd.DataFrame({"ds": df_i["ds"], "ID": df_i["ID"], **predicted}) df_seasonal = pd.concat((df_seasonal, df_aux), ignore_index=True) df = df_utils.return_df_in_original_format(df_seasonal, received_ID_col, received_single_time_series) # reset possibly altered values - self.n_forecasts = prev_n_forecasts self.config_ar.n_lags = prev_n_lags self.config_model.max_lags = prev_max_lags - self.config_model.features_map = prev_features_map + self.config_model.n_forecasts = prev_n_forecasts + self.model.predict_components_stacker = prev_predict_components_stacker + return df def set_true_ar_for_eval(self, true_ar_weights: np.ndarray): @@ -2225,7 +2250,7 @@ def highlight_nth_step_ahead_of_each_forecast(self, step_number: Optional[int] = Set to None to reset. """ if step_number is not None: - assert step_number <= self.n_forecasts + assert step_number <= self.config_model.n_forecasts self.highlight_forecast_step_n = step_number return self @@ -2292,7 +2317,7 @@ def plot( forecast_in_focus = self.highlight_forecast_step_n if len(self.config_model.quantiles) > 1: if (self.highlight_forecast_step_n) is None and ( - self.n_forecasts > 1 or self.config_model.max_lags > 0 + self.config_model.n_forecasts > 1 or self.config_model.max_lags > 0 ): # rather query if n_forecasts >1 than n_lags>1 raise ValueError( "Please specify step_number using the highlight_nth_step_ahead_of_each_forecast function" @@ -2301,7 +2326,7 @@ def plot( if (self.highlight_forecast_step_n or forecast_in_focus) is not None and self.config_model.max_lags == 0: log.warning("highlight_forecast_step_n is ignored since auto-regression not enabled.") self.highlight_forecast_step_n = None - if forecast_in_focus is not None and forecast_in_focus > self.n_forecasts: + if forecast_in_focus is not None and forecast_in_focus > self.config_model.n_forecasts: raise ValueError( "Forecast_in_focus is out of range. Specify a number smaller or equal to the steps ahead of " "prediction time step to forecast " @@ -2309,7 +2334,7 @@ def plot( if self.config_model.max_lags > 0: num_forecasts = sum(fcst["yhat1"].notna()) - if num_forecasts < self.n_forecasts: + if num_forecasts < self.config_model.n_forecasts: log.warning( "Too few forecasts to plot a line per forecast step." "Plotting a line per forecast origin instead." ) @@ -2404,9 +2429,9 @@ def get_latest_forecast( fcst = fcst[fcst["ID"] == df_name].copy(deep=True) log.info(f"Getting data from ID {df_name}") if include_history_data is None: - fcst = fcst[-(include_previous_forecasts + self.n_forecasts + self.config_model.max_lags) :] + fcst = fcst[-(include_previous_forecasts + self.config_model.n_forecasts + self.config_model.max_lags) :] elif include_history_data is False: - fcst = fcst[-(include_previous_forecasts + self.n_forecasts) :] + fcst = fcst[-(include_previous_forecasts + self.config_model.n_forecasts) :] elif include_history_data is True: fcst = fcst fcst = utils.fcst_df_to_latest_forecast( @@ -2485,9 +2510,9 @@ def plot_latest_forecast( " plots only the median quantile forecasts." ) if plot_history_data is None: - fcst = fcst[-(include_previous_forecasts + self.n_forecasts + self.config_model.max_lags) :] + fcst = fcst[-(include_previous_forecasts + self.config_model.n_forecasts + self.config_model.max_lags) :] elif plot_history_data is False: - fcst = fcst[-(include_previous_forecasts + self.n_forecasts) :] + fcst = fcst[-(include_previous_forecasts + self.config_model.n_forecasts) :] elif plot_history_data is True: fcst = fcst fcst = utils.fcst_df_to_latest_forecast( @@ -2626,7 +2651,7 @@ def plot_components( log.warning("highlight_forecast_step_n is ignored since autoregression not enabled.") # self.highlight_forecast_step_n = None forecast_in_focus = None - if forecast_in_focus is not None and forecast_in_focus > self.n_forecasts: + if forecast_in_focus is not None and forecast_in_focus > self.config_model.n_forecasts: raise ValueError( "Forecast_in_focus is out of range. Specify a number smaller or equal to the steps ahead of " "prediction time step to forecast " @@ -2779,7 +2804,7 @@ def plot_parameters( if (self.highlight_forecast_step_n or forecast_in_focus) is not None and self.config_ar.n_lags == 0: log.warning("highlight_forecast_step_n is ignored since autoregression not enabled.") forecast_in_focus = None - if forecast_in_focus is not None and forecast_in_focus > self.n_forecasts: + if forecast_in_focus is not None and forecast_in_focus > self.config_model.n_forecasts: raise ValueError( "Forecast_in_focus is out of range. Specify a number smaller or equal to the steps ahead of " "prediction time step to forecast " @@ -2873,7 +2898,7 @@ def _init_model(self): config_events=self.config_events, config_holidays=self.config_country_holidays, config_normalization=self.config_normalization, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, n_lags=self.config_ar.n_lags, ar_layers=self.config_ar.ar_layers, metrics=self.metrics, @@ -2916,7 +2941,7 @@ def _eval_true_ar(self): log.info("AR parameters: ", self.true_ar_weights, "\n", "Model weights: ", weights) return sTPE - def _predict_raw(self, df, df_name, include_components=False, prediction_frequency=None): + def _predict_raw(self, df, df_name, include_components=False): """Runs the model to make predictions. Predictions are returned in raw vector format without decomposition. @@ -2960,23 +2985,17 @@ def _predict_raw(self, df, df_name, include_components=False, prediction_frequen raise ValueError("Received unprepared dataframe to predict. " "Please call predict_dataframe_to_predict.") components_stacker = utils_time_dataset.ComponentStacker( n_lags=self.config_ar.n_lags, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, max_lags=self.config_model.max_lags, config_seasonality=self.config_seasonality, lagged_regressor_config=self.config_lagged_regressors, feature_indices={}, ) - dataset = _create_dataset( - self, - df, - predict_mode=True, - prediction_frequency=prediction_frequency, - components_stacker=components_stacker, - ) + dataset = self._create_dataset(df, predict_mode=True, components_stacker=components_stacker) self.model.set_components_stacker(components_stacker, mode="predict") loader = DataLoader(dataset, batch_size=min(1024, len(df)), shuffle=False, drop_last=False) - if self.n_forecasts > 1: - dates = df["ds"].iloc[self.config_model.max_lags : -self.n_forecasts + 1] + if self.config_model.n_forecasts > 1: + dates = df["ds"].iloc[self.config_model.max_lags : -self.config_model.n_forecasts + 1] else: dates = df["ds"].iloc[self.config_model.max_lags :] @@ -3114,7 +3133,7 @@ def conformal_predict( c = Conformal( alpha=alpha, method=method, - n_forecasts=self.n_forecasts, + n_forecasts=self.config_model.n_forecasts, quantiles=self.config_model.quantiles, ) @@ -3150,7 +3169,7 @@ def conformal_plot( # quantile regression dataframe cols = list(df.columns) qr_cols = [col for col in df.columns if "%" in col and "qhat" not in col] - forecast_cols = cols[: self.n_forecasts + 2] + forecast_cols = cols[: self.config_model.n_forecasts + 2] df_qr = df[forecast_cols + qr_cols] fig = self.highlight_nth_step_ahead_of_each_forecast(n_highlight).plot(df_qr, plotting_backend="plotly") diff --git a/neuralprophet/plot_utils.py b/neuralprophet/plot_utils.py index e5e035902..d80c9fb59 100644 --- a/neuralprophet/plot_utils.py +++ b/neuralprophet/plot_utils.py @@ -385,7 +385,7 @@ def get_valid_configuration( { "plot_name": "Auto-Regression", "comp_name": "ar", - "num_overplot": m.n_forecasts, + "num_overplot": m.config_model.n_forecasts, "bar": True, } ) @@ -418,7 +418,7 @@ def get_valid_configuration( { "plot_name": f'Lagged Regressor "{name}"', "comp_name": f"lagged_regressor_{name}", - "num_overplot": m.n_forecasts, + "num_overplot": m.config_model.n_forecasts, "bar": True, } ) @@ -534,7 +534,7 @@ def get_valid_configuration( and forecast_in_focus is None ): if len(m.config_train.quantiles) > 1 and ( - m.n_forecasts > 1 or m.config_ar.n_lags > 0 + m.config_model.n_forecasts > 1 or m.config_ar.n_lags > 0 ): # rather query if n_forecasts >1 than n_lags>1 raise ValueError( "Please specify step_number using the highlight_nth_step_ahead_of_each_forecast function" diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index 303d28d44..84be0dc05 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -9,7 +9,7 @@ from numpy.lib.stride_tricks import sliding_window_view from torch.utils.data.dataset import Dataset -from neuralprophet import configure, utils +from neuralprophet import configure_components, utils from neuralprophet.event_utils import get_all_holidays log = logging.getLogger("NP.time_dataset") @@ -21,19 +21,16 @@ class TimeDataset(Dataset): def __init__( self, df, + components_stacker, predict_mode, - n_lags, - n_forecasts, - prediction_frequency, - predict_steps, + config_missing, + config_model, + config_ar, config_seasonality, config_events, config_country_holidays, config_regressors, config_lagged_regressors, - config_missing, - config_model, - components_stacker, ): """Initialize Timedataset from time-series df. Parameters @@ -66,10 +63,7 @@ def __init__( self.meta["df_name"] = self.df_name self.predict_mode = predict_mode - self.n_lags = n_lags - self.n_forecasts = n_forecasts - self.prediction_frequency = prediction_frequency - self.predict_steps = predict_steps # currently unused + self.config_ar = config_ar self.config_seasonality = config_seasonality self.config_events = config_events self.config_country_holidays = config_country_holidays @@ -79,7 +73,7 @@ def __init__( self.config_model = config_model if self.config_model.max_lags == 0: - assert self.n_forecasts == 1 + assert self.config_model.n_forecasts == 1 self.two_level_inputs = ["seasonalities", "covariates", "events", "regressors"] # Preprocessing of events and holidays features (added to self.df) @@ -133,7 +127,7 @@ def stack_all_features(self): current_idx = self.components_stacker.stack_targets_component(self.df_tensors, feature_list, current_idx) current_idx = self.components_stacker.stack_lags_component( - self.df_tensors, feature_list, current_idx, self.n_lags + self.df_tensors, feature_list, current_idx, self.config_ar.n_lags ) current_idx = self.components_stacker.stack_lagged_regerssors_component( self.df_tensors, feature_list, current_idx, self.config_lagged_regressors @@ -236,7 +230,7 @@ def __getitem__(self, index): # Extract features from dataframe at given target index position if self.config_model.max_lags > 0: min_start_index = df_index - self.config_model.max_lags + 1 - max_end_index = df_index + self.n_forecasts + 1 + max_end_index = df_index + self.config_model.n_forecasts + 1 inputs = self.all_features[min_start_index:max_end_index, :] else: inputs = self.all_features[df_index, :] @@ -260,13 +254,13 @@ def create_sample2index_map(self, df, df_tensors): # Limit target range due to input lags and number of forecasts df_length = len(df_tensors["ds"]) origin_start_end_mask = self.create_origin_start_end_mask( - df_length=df_length, max_lags=self.config_model.max_lags, n_forecasts=self.n_forecasts + df_length=df_length, max_lags=self.config_model.max_lags, n_forecasts=self.config_model.n_forecasts ) # Prediction Frequency # Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping) prediction_frequency_mask = self.create_prediction_frequency_filter_mask( - df_tensors["ds"], self.prediction_frequency + df_tensors["ds"], self.config_model.prediction_frequency ) # Combine prediction origin masks @@ -277,8 +271,8 @@ def create_sample2index_map(self, df, df_tensors): df_tensors=df_tensors, predict_mode=self.predict_mode, max_lags=self.config_model.max_lags, - n_lags=self.n_lags, - n_forecasts=self.n_forecasts, + n_lags=self.config_ar.n_lags, + n_forecasts=self.config_model.n_forecasts, config_lagged_regressors=self.config_lagged_regressors, future_regressor_names=self.additive_regressors_names + self.multiplicative_regressors_names, event_names=self.additive_event_and_holiday_names + self.multiplicative_event_and_holiday_names, @@ -325,7 +319,7 @@ def get_event_offset_features(self, event, config, feature): ---------- event : str Name of the event - config : configure.ConfigEvents + config : configure_components.Events User specified events, holidays, and country specific holidays feature : pd.Series Feature for the event @@ -347,8 +341,8 @@ def get_event_offset_features(self, event, config, feature): def add_event_features_to_df( self, df, - config_events: Optional[configure.ConfigEvents] = None, - config_country_holidays: Optional[configure.ConfigCountryHolidays] = None, + config_events: Optional[configure_components.Events] = None, + config_country_holidays: Optional[configure_components.Holidays] = None, ): """ Construct columns containing the features of each event, added to df. @@ -356,9 +350,9 @@ def add_event_features_to_df( ---------- df : pd.DataFrame Dataframe with all values including the user specified events (provided by user) - config_events : configure.ConfigEvents + config_events : configure_components.Events User specified events, each with their upper, lower windows (int), regularization - config_country_holidays : configure.ConfigCountryHolidays + config_country_holidays : configure_components.CountryHolidays Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays Returns ------- @@ -591,19 +585,16 @@ class GlobalTimeDataset(TimeDataset): def __init__( self, df, + components_stacker, predict_mode, - n_lags, - n_forecasts, - prediction_frequency, - predict_steps, + config_missing, + config_model, + config_ar, config_seasonality, config_events, config_country_holidays, config_regressors, config_lagged_regressors, - config_missing, - config_model, - components_stacker, ): """Initialize Timedataset from time-series df. Parameters @@ -619,10 +610,7 @@ def __init__( self.datasets[df_name] = TimeDataset( df=df[df["ID"] == df_name], predict_mode=predict_mode, - n_lags=n_lags, - n_forecasts=n_forecasts, - prediction_frequency=prediction_frequency, - predict_steps=predict_steps, + config_ar=config_ar, config_seasonality=config_seasonality, config_events=config_events, config_country_holidays=config_country_holidays, diff --git a/neuralprophet/time_net.py b/neuralprophet/time_net.py index a45fb9875..096da0e67 100644 --- a/neuralprophet/time_net.py +++ b/neuralprophet/time_net.py @@ -9,7 +9,7 @@ import torch.nn as nn import torchmetrics -from neuralprophet import configure, np_types +from neuralprophet import configure, configure_components, np_types from neuralprophet.components.router import get_future_regressors, get_seasonality, get_trend from neuralprophet.utils import ( check_for_regularization, @@ -43,15 +43,15 @@ class TimeNet(pl.LightningModule): def __init__( self, config_model: configure.Model, - config_seasonality: configure.ConfigSeasonality, + config_seasonality: configure_components.Seasonalities, config_train: Optional[configure.Train] = None, - config_trend: Optional[configure.Trend] = None, - config_ar: Optional[configure.AR] = None, + config_trend: Optional[configure_components.Trend] = None, + config_ar: Optional[configure_components.AutoregRession] = None, config_normalization: Optional[configure.Normalization] = None, - config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None, - config_regressors: Optional[configure.ConfigFutureRegressors] = None, - config_events: Optional[configure.ConfigEvents] = None, - config_holidays: Optional[configure.ConfigCountryHolidays] = None, + config_lagged_regressors: Optional[configure_components.LaggedRegressors] = None, + config_regressors: Optional[configure_components.FutureRegressors] = None, + config_events: Optional[configure_components.Events] = None, + config_holidays: Optional[configure_components.Holidays] = None, n_forecasts: int = 1, n_lags: int = 0, ar_layers: Optional[List[int]] = [], @@ -73,14 +73,14 @@ def __init__( quantiles : list the set of quantiles estimated config_train : configure.Train - config_trend : configure.Trend - config_seasonality : configure.ConfigSeasonality - config_ar : configure.AR - config_lagged_regressors : configure.ConfigLaggedRegressors + config_trend : configure_components.Trend + config_seasonality : configure_components.Seasonalities + config_ar : configure_components.AutoregRession + config_lagged_regressors : configure_components.LaggedRegressors Configurations for lagged regressors - config_regressors : configure.ConfigFutureRegressors + config_regressors : configure_components.FutureRegressors Configs of regressors with mode and index. - config_events : configure.ConfigEvents + config_events : configure_components.Events config_holidays : OrderedDict config_normalization: OrderedDict n_forecasts : int @@ -142,7 +142,7 @@ def __init__( # General self.config_model = config_model - self.n_forecasts = n_forecasts + self.config_model.n_forecasts = n_forecasts self.train_components_stacker = train_components_stacker self.val_components_stacker = val_components_stacker self.test_components_stacker = test_components_stacker @@ -271,7 +271,7 @@ def __init__( ar_net_layers.append(nn.ReLU()) d_inputs = d_hidden_i # final layer has input size d_inputs and output size equal to no. of forecasts * no. of quantiles - ar_net_layers.append(nn.Linear(d_inputs, self.n_forecasts * len(self.quantiles), bias=False)) + ar_net_layers.append(nn.Linear(d_inputs, self.config_model.n_forecasts * len(self.quantiles), bias=False)) self.ar_net = nn.Sequential(*ar_net_layers) for lay in self.ar_net: if isinstance(lay, nn.Linear): @@ -286,7 +286,9 @@ def __init__( covar_net_layers.append(nn.Linear(d_inputs, d_hidden_i, bias=True)) covar_net_layers.append(nn.ReLU()) d_inputs = d_hidden_i - covar_net_layers.append(nn.Linear(d_inputs, self.n_forecasts * len(self.quantiles), bias=False)) + covar_net_layers.append( + nn.Linear(d_inputs, self.config_model.n_forecasts * len(self.quantiles), bias=False) + ) self.covar_net = nn.Sequential(*covar_net_layers) for lay in self.covar_net: if isinstance(lay, nn.Linear): @@ -490,7 +492,7 @@ def auto_regression(self, lags: Union[torch.Tensor, float]) -> torch.Tensor: """ x = self.ar_net(lags) # segment the last dimension to match the quantiles - x = x.view(x.shape[0], self.n_forecasts, len(self.quantiles)) + x = x.view(x.shape[0], self.config_model.n_forecasts, len(self.quantiles)) return x def forward_covar_net(self, covariates): @@ -512,7 +514,7 @@ def forward_covar_net(self, covariates): x = covariates x = self.covar_net(x) # segment the last dimension to match the quantiles - x = x.view(x.shape[0], self.n_forecasts, len(self.quantiles)) + x = x.view(x.shape[0], self.config_model.n_forecasts, len(self.quantiles)) return x def forward( @@ -536,7 +538,7 @@ def forward( # Initialize components and nonstationary tensors components = {} additive_components = torch.zeros( - size=(time_input.shape[0], self.n_forecasts, len(self.quantiles)), + size=(time_input.shape[0], self.config_model.n_forecasts, len(self.quantiles)), device=self.device, ) additive_components_nonstationary = torch.zeros( @@ -700,7 +702,7 @@ def compute_components( torch.divide( torch.sum(covar_attributions[name], axis=1).to(all_covariates.device), covar_attribution_sum_per_forecast, - ).reshape(self.n_forecasts, len(self.quantiles)), + ).reshape(self.config_model.n_forecasts, len(self.quantiles)), ) if self.config_events is not None or self.config_holidays is not None: if additive_events_input is not None: @@ -943,7 +945,7 @@ def _add_batch_regularizations(self, loss, progress): # Add regularization of AR weights - sparsify if self.config_model.max_lags > 0 and self.config_ar.reg_lambda is not None: reg_ar = self.config_ar.regularize(self.ar_weights) - reg_ar = torch.sum(reg_ar).squeeze() / self.n_forecasts + reg_ar = torch.sum(reg_ar).squeeze() / self.config_model.n_forecasts reg_loss += self.config_ar.reg_lambda * reg_ar # Regularize trend to be smoother/sparse diff --git a/neuralprophet/utils.py b/neuralprophet/utils.py index f656f9e64..52c027266 100644 --- a/neuralprophet/utils.py +++ b/neuralprophet/utils.py @@ -15,7 +15,7 @@ from neuralprophet import utils_torch if TYPE_CHECKING: - from neuralprophet.configure import ConfigEvents, ConfigLaggedRegressors, ConfigSeasonality + from neuralprophet import configure_components log = logging.getLogger("NP.utils") @@ -217,15 +217,19 @@ def _regularize_weights(weights, reg_lambda): return reg_loss -def reg_func_events(config_events: Optional[ConfigEvents], config_country_holidays, model): +def reg_func_events( + config_events: Optional[configure_components.Events], + config_country_holidays: Optional[configure_components.Holidays], + model, +): """ Regularization of events coefficients to induce sparcity Parameters ---------- - config_events : configure.ConfigEvents + config_events : configure_components.Events Configurations (upper, lower windows, regularization) for user specified events - config_country_holidays : configure.ConfigCountryHolidays + config_country_holidays : configure_components.Holidays Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays model : TimeNet @@ -249,13 +253,13 @@ def reg_func_events(config_events: Optional[ConfigEvents], config_country_holida return reg_events_loss -def reg_func_covariates(config_lagged_regressors: ConfigLaggedRegressors, model): +def reg_func_covariates(config_lagged_regressors: configure_components.LaggedRegressors, model): """ Regularization of lagged covariates to induce sparsity Parameters ---------- - config_lagged_regressors : configure.ConfigLaggedRegressors + config_lagged_regressors : configure_components.LaggedRegressors Configurations for lagged regressors model : TimeNet TimeNet model object @@ -282,7 +286,7 @@ def reg_func_regressors(config_regressors, model): Parameters ---------- - config_regressors : configure.ConfigFutureRegressors + config_regressors : configure_components.FutureRegressors Configurations for user specified regressors model : TimeNet TimeNet model object @@ -350,12 +354,12 @@ def symmetric_total_percentage_error(values, estimates): return 100 * sum_abs_diff / (10e-9 + sum_abs) -def config_seasonality_to_model_dims(config_seasonality: ConfigSeasonality): +def config_seasonality_to_model_dims(config_seasonality: configure_components.Seasonalities): """Convert the NeuralProphet seasonal model configuration to input dims for TimeNet model. Parameters ---------- - config_seasonality : configure.ConfigSeasonality + config_seasonality : configure_components.Seasonalities NeuralProphet seasonal model configuration Returns @@ -374,16 +378,19 @@ def config_seasonality_to_model_dims(config_seasonality: ConfigSeasonality): return seasonal_dims -def config_events_to_model_dims(config_events: Optional[ConfigEvents], config_country_holidays): +def config_events_to_model_dims( + config_events: Optional[configure_components.Events], + config_country_holidays: Optional[configure_components.Holidays], +): """ Convert user specified events configurations along with country specific holidays to input dims for TimeNet model. Parameters ---------- - config_events : configure.ConfigEvents + config_events : configure_components.Events Configurations (upper, lower windows, regularization) for user specified events - config_country_holidays : configure.ConfigCountryHolidays + config_country_holidays : configure_components.Holidays Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays Returns @@ -496,7 +503,7 @@ def config_regressors_to_model_dims(config_regressors): Parameters ---------- - config_regressors : configure.ConfigFutureRegressors + config_regressors : configure_components.FutureRegressors Configurations for user specified regressors Returns @@ -545,7 +552,7 @@ def config_regressors_to_model_dims(config_regressors): return regressors_dims_dic -def set_auto_seasonalities(df, config_seasonality: ConfigSeasonality): +def set_auto_seasonalities(df, config_seasonality: configure_components.Seasonalities): """Set seasonalities that were left on auto or set by user. Note @@ -562,11 +569,11 @@ def set_auto_seasonalities(df, config_seasonality: ConfigSeasonality): ---------- df : pd.Dataframe Dataframe from which datestamps will be retrieved from - config_seasonality : configure.ConfigSeasonality + config_seasonality : configure_components.Seasonalities NeuralProphet seasonal model configuration, as after __init__ Returns ------- - configure.ConfigSeasonality + configure_components.Seasonalities Processed NeuralProphet seasonal model configuration """ diff --git a/tests/debug/debug-energy-price-hourly.ipynb b/tests/debug/debug-energy-price-hourly.ipynb index d53f1f1fa..cec72350a 100644 --- a/tests/debug/debug-energy-price-hourly.ipynb +++ b/tests/debug/debug-energy-price-hourly.ipynb @@ -2271,7 +2271,7 @@ } ], "source": [ - "m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts)\n", + "m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts)\n", "m.plot(forecast, df_name=\"test\")" ] }, diff --git a/tests/test_integration.py b/tests/test_integration.py index a6d727acc..8185f5d2e 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -60,7 +60,7 @@ def test_train_eval_test(): df=df, freq="D", n_lags=m.config_ar.n_lags, - n_forecasts=m.n_forecasts, + n_forecasts=m.config_model.n_forecasts, config_missing=m.config_missing, config_regressors=m.config_regressors, config_lagged_regressors=m.config_lagged_regressors, @@ -206,7 +206,7 @@ def test_no_trend(): batch_size=BATCH_SIZE, learning_rate=LR, ) - # m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + # m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) m.fit(df, freq="D") future = m.make_future_dataframe(df, periods=60, n_historic_predictions=60) forecast = m.predict(df=future) @@ -303,7 +303,7 @@ def test_ar(): batch_size=BATCH_SIZE, learning_rate=LR, ) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) m.fit(df, freq="D") future = m.make_future_dataframe(df, n_historic_predictions=90) forecast = m.predict(df=future) @@ -327,7 +327,7 @@ def test_ar_sparse(): batch_size=BATCH_SIZE, learning_rate=LR, ) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) m.fit(df, freq="D") future = m.make_future_dataframe(df, n_historic_predictions=90) forecast = m.predict(df=future) @@ -353,7 +353,7 @@ def test_ar_deep(): batch_size=BATCH_SIZE, learning_rate=LR, ) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) m.fit(df, freq="D") future = m.make_future_dataframe(df, n_historic_predictions=90) forecast = m.predict(df=future) @@ -415,7 +415,7 @@ def test_lag_reg_deep(): df["C"] = df["y"].rolling(30, min_periods=1).mean() cols = [col for col in df.columns if col not in ["ds", "y"]] m = m.add_lagged_regressor(names=cols) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) m.fit(df, freq="D") m.predict(df) if PLOT: @@ -551,14 +551,16 @@ def check_cv(df, freq, n_lags, n_forecasts, k, fold_pct, fold_overlap_pct): learning_rate=LR, ) folds = m.crossvalidation_split_df(df, freq=freq, k=k, fold_pct=fold_pct, fold_overlap_pct=fold_overlap_pct) - total_samples = len(df) - m.config_ar.n_lags + 2 - (2 * m.n_forecasts) + total_samples = len(df) - m.config_ar.n_lags + 2 - (2 * m.config_model.n_forecasts) per_fold = int(fold_pct * total_samples) not_overlap = per_fold - int(fold_overlap_pct * per_fold) - assert all([per_fold == len(val) - m.config_ar.n_lags + 1 - m.n_forecasts for (train, val) in folds]) + assert all( + [per_fold == len(val) - m.config_ar.n_lags + 1 - m.config_model.n_forecasts for (train, val) in folds] + ) assert all( [ total_samples - per_fold - (k - i - 1) * not_overlap - == len(train) - m.config_ar.n_lags + 1 - m.n_forecasts + == len(train) - m.config_ar.n_lags + 1 - m.config_model.n_forecasts for i, (train, val) in enumerate(folds) ] ) @@ -1310,7 +1312,7 @@ def test_get_latest_forecast(): weekly_seasonality=False, ) m.fit(df_global, freq="D") - future = m.make_future_dataframe(df_global, periods=m.n_forecasts, n_historic_predictions=10) + future = m.make_future_dataframe(df_global, periods=m.config_model.n_forecasts, n_historic_predictions=10) forecast = m.predict(future) log.info("Plot forecast with many IDs - Raise exceptions") forecast = m.predict(df_global) @@ -1552,7 +1554,7 @@ def test_accelerator(): df["A"] = df["y"].rolling(7, min_periods=1).mean() cols = [col for col in df.columns if col not in ["ds", "y"]] m = m.add_lagged_regressor(names=cols) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) m.fit(df, freq="D") m.predict(df) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 7c1b1a256..894a96586 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -204,7 +204,7 @@ def test_plot_global_local_parameters(plotting_backend): learning_rate=LR, ) m.fit(df_global, freq="D") - future = m.make_future_dataframe(df_global, periods=m.n_forecasts, n_historic_predictions=10) + future = m.make_future_dataframe(df_global, periods=m.config_model.n_forecasts, n_historic_predictions=10) forecast = m.predict(future) log.info(f"Plot forecast with many IDs with {plotting_backend} - Raise exceptions") @@ -611,7 +611,7 @@ def test_plot_uncertainty(plotting_backend): ) m.fit(df, freq="D") - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) future = m.make_future_dataframe(df, periods=30, n_historic_predictions=100) forecast = m.predict(future) fig4 = m.plot(forecast, plotting_backend=plotting_backend) @@ -668,9 +668,9 @@ def test_plot_conformal_prediction(plotting_backend): m.fit(train_df, freq="D") alpha = 0.1 for method in ["naive", "cqr"]: # Naive and CQR SCP methods - future = m.make_future_dataframe(test_df, periods=m.n_forecasts, n_historic_predictions=10) + future = m.make_future_dataframe(test_df, periods=m.config_model.n_forecasts, n_historic_predictions=10) forecast = m.conformal_predict(future, calibration_df=cal_df, alpha=alpha, method=method) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) fig0 = m.plot(forecast, plotting_backend=plotting_backend) fig1 = m.plot_components(forecast, plotting_backend=plotting_backend) fig2 = m.plot_parameters(plotting_backend=plotting_backend) @@ -692,9 +692,9 @@ def test_plot_conformal_prediction(plotting_backend): m.fit(train_df, freq="D") alpha = 0.1 for method in ["naive", "cqr"]: # Naive and CQR SCP methods - future = m.make_future_dataframe(df, periods=m.n_forecasts, n_historic_predictions=10) + future = m.make_future_dataframe(df, periods=m.config_model.n_forecasts, n_historic_predictions=10) forecast = m.conformal_predict(future, calibration_df=cal_df, alpha=alpha, method=method) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) fig0 = m.plot(forecast) fig1 = m.plot_latest_forecast(forecast, include_previous_forecasts=10, plotting_backend=plotting_backend) fig2 = m.plot_latest_forecast( @@ -730,7 +730,7 @@ def test_advanced_conformal_prediction_plots(): m.fit(train_df, freq="D") alpha = 0.1 for method in ["naive", "cqr"]: # Naive and CQR SCP methods - future = m.make_future_dataframe(test_df, periods=m.n_forecasts, n_historic_predictions=10) + future = m.make_future_dataframe(test_df, periods=m.config_model.n_forecasts, n_historic_predictions=10) forecast = m.conformal_predict( future, calibration_df=cal_df, @@ -761,9 +761,9 @@ def test_plot_conformal_prediction_asymmetric(plotting_backend): m.fit(train_df, freq="D") alpha = (0.03, 0.07) method = "cqr" - future = m.make_future_dataframe(test_df, periods=m.n_forecasts, n_historic_predictions=10) + future = m.make_future_dataframe(test_df, periods=m.config_model.n_forecasts, n_historic_predictions=10) forecast = m.conformal_predict(future, calibration_df=cal_df, alpha=alpha, method=method) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) fig0 = m.plot(forecast, plotting_backend=plotting_backend) fig1 = m.plot_components(forecast, plotting_backend=plotting_backend) fig2 = m.plot_parameters(plotting_backend=plotting_backend) @@ -787,7 +787,7 @@ def test_plot_latest_forecast(plotting_backend): forecast = m.predict(future) fig1 = m.plot_latest_forecast(forecast, include_previous_forecasts=10, plotting_backend=plotting_backend) fig2 = m.plot_latest_forecast(forecast, plotting_backend=plotting_backend) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) fig3 = m.plot_latest_forecast(forecast, include_previous_forecasts=10, plotting_backend=plotting_backend) fig2 = m.plot_latest_forecast( forecast, include_previous_forecasts=10, plot_history_data=True, plotting_backend=plotting_backend diff --git a/tests/test_uncertainty.py b/tests/test_uncertainty.py index 3423edf8d..c5f8dfe0a 100644 --- a/tests/test_uncertainty.py +++ b/tests/test_uncertainty.py @@ -116,7 +116,7 @@ def test_uncertainty_estimation_yosemite_temps(): m.fit(df, freq="5min") future = m.make_future_dataframe(df, periods=6, n_historic_predictions=3 * 24 * 12) m.predict(future) - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) def test_uncertainty_estimation_air_travel(): diff --git a/tests/test_unit.py b/tests/test_unit.py index 182e2ad90..a273f0acd 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -10,8 +10,8 @@ import pytest from torch.utils.data import DataLoader -from neuralprophet import NeuralProphet, configure, df_utils, time_dataset, utils_time_dataset -from neuralprophet.data.process import _create_dataset, _handle_missing_data +from neuralprophet import NeuralProphet, configure, configure_components, df_utils, time_dataset, utils_time_dataset +from neuralprophet.data.process import _handle_missing_data from neuralprophet.data.transform import _normalize log = logging.getLogger("NP.test") @@ -74,7 +74,8 @@ def test_timedataset_minimal(): log.debug(f"Infile shape: {df_in.shape}") valid_p = 0.2 for n_forecasts, n_lags in [(1, 0), (1, 5), (3, 5)]: - config_model = configure.Model() + config_ar = configure_components.AutoregRession(n_lags=n_lags) + config_model = configure.Model(n_forecasts=n_forecasts) config_model.set_max_num_lags(n_lags) config_missing = configure.MissingDataHandling() # config_train = configure.Train() @@ -88,10 +89,10 @@ def test_timedataset_minimal(): n_lags=n_lags, n_forecasts=n_forecasts, config_missing=config_missing, - # config_regressors: Optional[ConfigFutureRegressors], - # config_lagged_regressors: Optional[ConfigLaggedRegressors], - # config_events: Optional[ConfigEvents], - # config_seasonality: Optional[ConfigSeasonality], + # config_regressors: Optional[configure_components.FutureRegressors], + # config_lagged_regressors: Optional[configure_components.LaggedRegressors], + # config_events: Optional[configure_components.Events], + # config_seasonality: Optional[configure_components.Seasonalities], predicting=False, ) local_data_params, global_data_params = df_utils.init_data_params(df=df, normalize="minmax") @@ -109,19 +110,16 @@ def test_timedataset_minimal(): dataset = time_dataset.TimeDataset( df=df, + components_stacker=components_stacker, predict_mode=False, - n_lags=n_lags, - n_forecasts=n_forecasts, - prediction_frequency=None, - predict_steps=1, + config_model=config_model, + config_missing=config_missing, + config_ar=config_ar, config_seasonality=None, config_events=None, config_country_holidays=None, config_regressors=None, config_lagged_regressors=None, - config_missing=config_missing, - config_model=config_model, - components_stacker=components_stacker, ) input, meta = dataset.__getitem__(0) # # inputs50, targets50, meta50 = dataset.__getitem__(50) @@ -697,13 +695,13 @@ def test_globaltimedataset(): df_global = _normalize(df=df_global, config_normalization=m.config_normalization) components_stacker = utils_time_dataset.ComponentStacker( n_lags=m.config_ar.n_lags, - n_forecasts=m.n_forecasts, + n_forecasts=m.config_model.n_forecasts, max_lags=m.config_model.max_lags, config_seasonality=m.config_seasonality, lagged_regressor_config=m.config_lagged_regressors, ) - _create_dataset(m, df_global, predict_mode=False, components_stacker=components_stacker) - _create_dataset(m, df_global, predict_mode=True, components_stacker=components_stacker) + m._create_dataset(df_global, predict_mode=False, components_stacker=components_stacker) + m._create_dataset(df_global, predict_mode=True, components_stacker=components_stacker) # lagged_regressors, future_regressors df4 = df.copy() @@ -727,13 +725,13 @@ def test_globaltimedataset(): df4 = _normalize(df=df4, config_normalization=m.config_normalization) components_stacker = utils_time_dataset.ComponentStacker( n_lags=m.config_ar.n_lags, - n_forecasts=m.n_forecasts, + n_forecasts=m.config_model.n_forecasts, max_lags=m.config_model.max_lags, config_seasonality=m.config_seasonality, lagged_regressor_config=m.config_lagged_regressors, ) - _create_dataset(m, df4, predict_mode=False, components_stacker=components_stacker) - _create_dataset(m, df4, predict_mode=True, components_stacker=components_stacker) + m._create_dataset(df4, predict_mode=False, components_stacker=components_stacker) + m._create_dataset(df4, predict_mode=True, components_stacker=components_stacker) def test_dataloader(): @@ -769,7 +767,7 @@ def test_dataloader(): config_seasonality=None, lagged_regressor_config=None, ) - dataset = _create_dataset(m, df_global, predict_mode=False, components_stacker=components_stacker) + dataset = m._create_dataset(df_global, predict_mode=False, components_stacker=components_stacker) loader = DataLoader(dataset, batch_size=min(1024, len(df)), shuffle=True, drop_last=False) for _, meta in loader: assert set(meta["df_name"]) == set(df_global["ID"].unique()) @@ -872,7 +870,8 @@ def test_make_future(): def test_too_many_NaN(): n_lags = 12 n_forecasts = 1 - config_model = configure.Model() + config_ar = configure_components.AutoregRession(n_lags=n_lags) + config_model = configure.Model(n_forecasts=n_forecasts) config_model.set_max_num_lags(n_lags) config_missing = configure.MissingDataHandling( impute_missing=True, @@ -902,25 +901,22 @@ def test_too_many_NaN(): components_stacker = utils_time_dataset.ComponentStacker( n_lags=n_lags, n_forecasts=n_forecasts, - max_lags=n_lags, + max_lags=config_model.max_lags, config_seasonality=None, lagged_regressor_config=None, ) time_dataset.TimeDataset( df=df, + components_stacker=components_stacker, predict_mode=False, - n_lags=n_lags, - n_forecasts=n_forecasts, - prediction_frequency=None, - predict_steps=1, + config_model=config_model, + config_missing=config_missing, + config_ar=config_ar, config_seasonality=None, config_events=None, config_country_holidays=None, config_regressors=None, config_lagged_regressors=None, - config_missing=config_missing, - config_model=config_model, - components_stacker=components_stacker, ) diff --git a/tests/utils/benchmark_time_dataset.py b/tests/utils/benchmark_time_dataset.py index e2984b10a..af2dda090 100644 --- a/tests/utils/benchmark_time_dataset.py +++ b/tests/utils/benchmark_time_dataset.py @@ -8,8 +8,8 @@ import torch.utils.benchmark as benchmark from torch.utils.data import DataLoader -from neuralprophet import NeuralProphet, df_utils, utils -from neuralprophet.data.process import _check_dataframe, _create_dataset, _handle_missing_data +from neuralprophet import NeuralProphet, df_utils, utils, utils_time_dataset +from neuralprophet.data.process import _check_dataframe, _handle_missing_data from neuralprophet.data.transform import _normalize # from neuralprophet.forecaster import @@ -55,9 +55,12 @@ def load(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True, iterations=1 freq = "5min" num_workers = 0 + n_lags = (12,) + n_forecasts = (6,) + m = NeuralProphet( - n_lags=12, - n_forecasts=6, + n_lags=n_lags, + n_forecasts=n_forecasts, epochs=epochs, batch_size=batch, learning_rate=LR, @@ -75,7 +78,7 @@ def load(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True, iterations=1 df=df, freq=m.data_freq, n_lags=m.config_ar.n_lags, - n_forecasts=m.n_forecasts, + n_forecasts=m.config_model.n_forecasts, config_missing=m.config_missing, config_regressors=m.config_regressors, config_lagged_regressors=m.config_lagged_regressors, @@ -98,8 +101,16 @@ def load(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True, iterations=1 if m.config_country_holidays is not None: m.config_country_holidays.init_holidays(df_merged) - dataset = _create_dataset( - m, df, predict_mode=False, prediction_frequency=m.model_config.prediction_frequency + components_stacker = utils_time_dataset.ComponentStacker( + n_lags=m.config_ar.n_lags, + n_forecasts=m.config_model.n_forecasts, + max_lags=m.config_model.max_lags, + config_seasonality=m.config_seasonality, + lagged_regressor_config=m.config_lagged_regressors, + ) + + dataset = m._create_dataset( + df, predict_mode=False, components_stacker=components_stacker ) # needs to be called after set_auto_seasonalities # Determine the max_number of epochs @@ -120,11 +131,11 @@ def load(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True, iterations=1 tic = time.perf_counter() for i in range(iterations): data, target, meta = next(dataloader_iterator) - # try: - # data, target, meta = next(dataloader_iterator) - # except StopIteration: - # dataloader_iterator = iter(loader) - # data, target, meta = next(dataloader_iterator) + try: + data, target, meta = next(dataloader_iterator) + except StopIteration: + dataloader_iterator = iter(loader) + data, target, meta = next(dataloader_iterator) # do_something() toc = time.perf_counter() # print_input_shapes(data) @@ -164,7 +175,7 @@ def yosemite(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True): # toc = time.perf_counter() # print(f"######## Time: {toc - tic:0.4f} for predict") - m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts) + m.highlight_nth_step_ahead_of_each_forecast(m.config_model.n_forecasts) def peyton(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True):