Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Docs] Document configure.py #1649

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 210 additions & 13 deletions neuralprophet/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,29 @@

@dataclass
class Model:
"""
General configuration settings of the forecasting model.

Attributes
n_forecasts : int
Number of forecasts to be made.
quantiles : Optional[List[float]]
List of quantiles for prediction intervals. Default is None.
prediction_frequency : Optional[Dict[str]]
Frequency of predictions. Default is None.
max_lags : Optional[int]
Maximum number of lags used in the model. This is set during model configuration.

Methods
setup_quantiles()
Configures the quantiles for prediction intervals.
set_max_num_lags(n_lags, config_lagged_regressors)
Determines the maximum number of lags between autoregression lags and covariate lags.
"""

n_forecasts: int
quantiles: Optional[List[float]] = None
prediction_frequency: Optional[Dict[str]] = None

Check failure on line 42 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Too few type arguments provided for "Dict"; expected 2 but received 1 (reportInvalidTypeArguments)
max_lags: Optional[int] = field(init=False)

def setup_quantiles(self):
Expand All @@ -41,7 +61,7 @@

def set_max_num_lags(
self, n_lags: int, config_lagged_regressors: Optional[configure_components.LaggedRegressors] = None
) -> int:

Check failure on line 64 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Function with declared return type "int" must return value on all code paths   "None" is not assignable to "int" (reportReturnType)
"""Get the greatest number of lags between the autoregression lags and the covariates lags.

Parameters
Expand Down Expand Up @@ -70,6 +90,33 @@

@dataclass
class Normalization:
"""
Cofiguration settings for normalization of data.

Attributes
----------
normalize : str
The type of normalization to apply.
global_normalization : bool
Flag indicating whether to apply global normalization.
global_time_normalization : bool
Flag indicating whether to apply global time normalization.
unknown_data_normalization : bool
Flag indicating whether to apply normalization to unknown data.
local_data_params : dict
Dictionary containing local data parameters, where the key is the name of the dataset and the value is another dictionary with variable names.
global_data_params : dict
Dictionary containing global data parameters, where the key is the name of the variable.

Methods
-------
init_data_params(df, config_lagged_regressors=None, config_regressors=None, config_events=None, config_seasonality=None)
Initializes the data parameters for normalization based on the provided dataframe and configuration components.

get_data_params(df_name)
Retrieves the data parameters for a given dataset name, handling both local and global normalization scenarios.
"""

normalize: str
global_normalization: bool
global_time_normalization: bool
Expand All @@ -85,6 +132,23 @@
config_events: Optional[configure_components.Events] = None,
config_seasonality: Optional[configure_components.Seasonalities] = None,
):
"""
Compute parameters for data normalization.

This method sets up the local and global data parameters required for the normalization of the data.
based on the provided dataframe and configuration options. If only one dataframe
is provided and global normalization is not set, it will enable global normalization.

Args:
df (pd.DataFrame): The input dataframe containing the data.
config_lagged_regressors (Optional[configure_components.LaggedRegressors]): Configuration for lagged regressors.
config_regressors (Optional): Configuration for additional regressors.
config_events (Optional[configure_components.Events]): Configuration for events.
config_seasonality (Optional[configure_components.Seasonalities]): Configuration for seasonalities.

Returns:
None
"""
if len(df["ID"].unique()) == 1 and not self.global_normalization:
log.info("Setting normalization to global as only one dataframe provided for training.")
self.global_normalization = True
Expand All @@ -100,17 +164,37 @@
)

def get_data_params(self, df_name):
"""
Retrieve the data normalization parameters for a given dataset name.

Parameters:
-----------
df_name : str
The name of the dataset for which to retrieve the data parameters.

Returns:
--------
dict
The data parameters associated with the given dataset name.

Raises:
-------
ValueError
If the dataset name is not found in the local data parameters and
`unknown_data_normalization` is False.

"""
if self.global_normalization:
data_params = self.global_data_params
else:
if df_name in self.local_data_params.keys() and df_name != "__df__":
log.debug(f"Dataset name {df_name!r} found in training data_params")
# log.debug(f"Dataset name {df_name!r} found in training data_params")
data_params = self.local_data_params[df_name]
elif self.unknown_data_normalization:
log.debug(
f"Dataset name {df_name!r} is not present in valid data_params but unknown_data_normalization is \
True. Using global_data_params"
)
# log.debug(
# f"Dataset name {df_name!r} is not present in valid data_params but unknown_data_normalization is \
# True. Using global_data_params"
# )
data_params = self.global_data_params
else:
raise ValueError(
Expand All @@ -122,6 +206,16 @@

@dataclass
class MissingDataHandling:
"""
Configuration for handling missing data in the dataset.

Attributes:
impute_missing (bool): Flag to indicate if missing data should be imputed. Default is True.
impute_linear (int): Number of missing data points to impute using linear interpolation. Default is 10.
impute_rolling (int): Number of missing data points to impute using rolling average. Default is 10.
drop_missing (bool): Flag to indicate if rows with missing data should be dropped. Default is False.
"""

impute_missing: bool = True
impute_linear: int = 10
impute_rolling: int = 10
Expand All @@ -130,6 +224,48 @@

@dataclass
class Train:
"""
Settings for model training.
This class encapsulates the configuration parameters and methods for training the model including PyTorch Lightning arguments.

Attributes
----------
learning_rate : Optional[float]
Learning rate for the optimizer.
epochs : Optional[int]
Number of epochs for training.
batch_size : Optional[int]
Batch size for training.
loss_func : Union[str, torch.nn.modules.loss._Loss, Callable]
Loss function for training.
optimizer : Union[str, Type[torch.optim.Optimizer]]
Optimizer for training.
optimizer_args : dict
Arguments for the optimizer.
scheduler : Optional[Union[str, Type[torch.optim.lr_scheduler.LRScheduler]]]
Learning rate scheduler.
scheduler_args : dict
Arguments for the scheduler.
early_stopping : Optional[bool]
Whether to use early stopping.
newer_samples_weight : float
Weight for newer samples.
newer_samples_start : float
Start point for newer samples.
reg_delay_pct : float
Regularization delay percentage.
reg_lambda_trend : Optional[float]
Regularization lambda trend.
trend_reg_threshold : Optional[Union[bool, float]]
Trend regularization threshold.
n_data : int
Number of data points in the dataset.
loss_func_name : str
Name of the loss function.
pl_trainer_config : dict
Configuration for PyTorch Lightning trainer.
"""

learning_rate: Optional[float]
epochs: Optional[int]
batch_size: Optional[int]
Expand Down Expand Up @@ -159,6 +295,15 @@
# self.set_scheduler()

def set_loss_func(self, quantiles: List[float]):
"""
Set the loss function based on the provided quantiles.
If quantiles are provided, the loss function is wrapped in a PinballLoss.

Parameters
----------
quantiles : List[float]
List of quantiles for the loss function.
"""
if isinstance(self.loss_func, str):
if self.loss_func.lower() in ["smoothl1", "smoothl1loss", "huber"]:
# keeping 'huber' for backwards compatiblility, though not identical
Expand Down Expand Up @@ -189,6 +334,28 @@
min_epoch: int = 20,
max_epoch: int = 500,
):
"""
Automatically sets the batch size and number of epochs based on the size of the dataset.

Parameters
----------
n_data : int
The number of data points in the dataset. Must be greater than or equal to 1.
min_batch : int, optional
The minimum batch size. Default is 8.
max_batch : int, optional
The maximum batch size. Default is 2048.
min_epoch : int, optional
The minimum number of epochs. Default is 20.
max_epoch : int, optional
The maximum number of epochs. Default is 500.

Notes
-----
- If `self.batch_size` is not set, it will be automatically determined based on the size of the dataset.
- If `self.epochs` is not set, it will be automatically determined to ensure a minimum of 1000 steps and a maximum of 100,000 steps.
- The `lambda_delay` attribute is also set based on the regularization delay percentage and the number of epochs.
"""
assert n_data >= 1
self.n_data = n_data
if self.batch_size is None:
Expand All @@ -206,16 +373,17 @@

def set_optimizer(self):
"""
Set the optimizer and optimizer args. If optimizer is a string, then it will be converted to the corresponding
torch optimizer. The optimizer is not initialized yet as this is done in configure_optimizers in TimeNet.
Set the optimizer and optimizer args from stored values in self.

Parameters
----------
optimizer_name : int
Object provided to NeuralProphet as optimizer.
optimizer_args : dict
Arguments for the optimizer.
If optimizer is a string, then it will be converted to the corresponding torch optimizer class.
The optimizer is not initialized yet as this is done in configure_optimizers in TimeNet.

Notes
-----
- `self.optimizer_name` : int
Object provided to NeuralProphet as optimizer.
- `self.optimizer_args` : dict
Arguments for the optimizer.
"""
if isinstance(self.optimizer, str):
if self.optimizer.lower() == "adamw":
Expand All @@ -238,6 +406,10 @@
"""
Set the scheduler and scheduler arg depending on the user selection.
The scheduler is not initialized yet as this is done in configure_optimizers in TimeNet.

Notes
-----
- If no scheduler is specified, falls back to ExponentialLR scheduler.
"""

if self.scheduler is None:
Expand Down Expand Up @@ -289,6 +461,23 @@
), "Scheduler must be a subclass of torch.optim.lr_scheduler.LRScheduler"

def get_reg_delay_weight(self, progress, reg_start_pct: float = 0.66, reg_full_pct: float = 1.0):
"""
Get the regularization delay weight based on current position in training progress.

Parameters
----------
progress : float
Current progress of the training.
reg_start_pct : float, optional
Percentage of progress to start regularization. Default is 0.66.
reg_full_pct : float, optional
Percentage of progress to fully apply regularization. Default is 1.0.

Returns
-------
float
Regularization delay weight.
"""
# Ignore type warning of epochs possibly being None (does not work with dataclasses)
if reg_start_pct == reg_full_pct:
reg_progress = float(progress > reg_start_pct)
Expand All @@ -303,4 +492,12 @@
return delay_weight

def set_batches_per_epoch(self, batches_per_epoch: int):
"""
Set the number of batches per epoch.

Parameters
----------
batches_per_epoch : int
Number of batches per epoch.
"""
self.batches_per_epoch = batches_per_epoch
Loading