diff --git a/mambular/base_models/tabularnn.py b/mambular/base_models/tabularnn.py new file mode 100644 index 0000000..a3e31bc --- /dev/null +++ b/mambular/base_models/tabularnn.py @@ -0,0 +1,153 @@ +import torch +import torch.nn as nn +from ..arch_utils.mlp_utils import MLP +from ..configs.tabularnn_config import DefaultTabulaRNNConfig +from .basemodel import BaseModel +from ..arch_utils.embedding_layer import EmbeddingLayer +from ..arch_utils.normalization_layers import ( + RMSNorm, + LayerNorm, + LearnableLayerScaling, + BatchNorm, + InstanceNorm, + GroupNorm, +) + + +class TabulaRNN(BaseModel): + def __init__( + self, + cat_feature_info, + num_feature_info, + num_classes=1, + config: DefaultTabulaRNNConfig = DefaultTabulaRNNConfig(), + **kwargs, + ): + super().__init__(**kwargs) + self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + + self.lr = self.hparams.get("lr", config.lr) + self.lr_patience = self.hparams.get("lr_patience", config.lr_patience) + self.weight_decay = self.hparams.get("weight_decay", config.weight_decay) + self.lr_factor = self.hparams.get("lr_factor", config.lr_factor) + self.pooling_method = self.hparams.get("pooling_method", config.pooling_method) + self.cat_feature_info = cat_feature_info + self.num_feature_info = num_feature_info + + norm_layer = self.hparams.get("norm", config.norm) + if norm_layer == "RMSNorm": + self.norm_f = RMSNorm( + self.hparams.get("dim_feedforward", config.dim_feedforward) + ) + elif norm_layer == "LayerNorm": + self.norm_f = LayerNorm( + self.hparams.get("dim_feedforward", config.dim_feedforward) + ) + elif norm_layer == "BatchNorm": + self.norm_f = BatchNorm( + self.hparams.get("dim_feedforward", config.dim_feedforward) + ) + elif norm_layer == "InstanceNorm": + self.norm_f = InstanceNorm( + self.hparams.get("dim_feedforward", config.dim_feedforward) + ) + elif norm_layer == "GroupNorm": + self.norm_f = GroupNorm( + 1, self.hparams.get("dim_feedforward", config.dim_feedforward) + ) + elif norm_layer == "LearnableLayerScaling": + self.norm_f = LearnableLayerScaling( + self.hparams.get("dim_feedforward", config.dim_feedforward) + ) + else: + self.norm_f = None + + rnn_layer = {"RNN": nn.RNN, "LSTM": nn.LSTM, "GRU": nn.GRU}[config.model_type] + self.rnn = rnn_layer( + input_size=self.hparams.get("d_model", config.d_model), + hidden_size=self.hparams.get("dim_feedforward", config.dim_feedforward), + num_layers=self.hparams.get("n_layers", config.n_layers), + bidirectional=self.hparams.get("bidirectional", config.bidirectional), + batch_first=True, + dropout=self.hparams.get("rnn_dropout", config.rnn_dropout), + bias=self.hparams.get("bias", config.bias), + nonlinearity=( + self.hparams.get("rnn_activation", config.rnn_activation) + if config.model_type == "RNN" + else None + ), + ) + + self.embedding_layer = EmbeddingLayer( + num_feature_info=num_feature_info, + cat_feature_info=cat_feature_info, + d_model=self.hparams.get("d_model", config.d_model), + embedding_activation=self.hparams.get( + "embedding_activation", config.embedding_activation + ), + layer_norm_after_embedding=self.hparams.get( + "layer_norm_after_embedding", config.layer_norm_after_embedding + ), + use_cls=False, + cls_position=-1, + cat_encoding=self.hparams.get("cat_encoding", config.cat_encoding), + ) + + head_activation = self.hparams.get("head_activation", config.head_activation) + + self.tabular_head = MLP( + self.hparams.get("dim_feedforward", config.dim_feedforward), + hidden_units_list=self.hparams.get( + "head_layer_sizes", config.head_layer_sizes + ), + dropout_rate=self.hparams.get("head_dropout", config.head_dropout), + use_skip_layers=self.hparams.get( + "head_skip_layers", config.head_skip_layers + ), + activation_fn=head_activation, + use_batch_norm=self.hparams.get( + "head_use_batch_norm", config.head_use_batch_norm + ), + n_output_units=num_classes, + ) + + self.linear = nn.Linear(config.d_model, config.dim_feedforward) + + def forward(self, num_features, cat_features): + """ + Defines the forward pass of the model. + + Parameters + ---------- + num_features : Tensor + Tensor containing the numerical features. + cat_features : Tensor + Tensor containing the categorical features. + + Returns + ------- + Tensor + The output predictions of the model. + """ + + x = self.embedding_layer(num_features, cat_features) + # RNN forward pass + out, _ = self.rnn(x) + z = self.linear(torch.mean(x, dim=1)) + + if self.pooling_method == "avg": + x = torch.mean(out, dim=1) + elif self.pooling_method == "max": + x, _ = torch.max(out, dim=1) + elif self.pooling_method == "sum": + x = torch.sum(out, dim=1) + elif self.pooling_method == "last": + x = x[:, -1, :] + else: + raise ValueError(f"Invalid pooling method: {self.pooling_method}") + x = x + z + if self.norm_f is not None: + x = self.norm_f(x) + preds = self.tabular_head(x) + + return preds diff --git a/mambular/configs/tabularnn_config.py b/mambular/configs/tabularnn_config.py new file mode 100644 index 0000000..700181c --- /dev/null +++ b/mambular/configs/tabularnn_config.py @@ -0,0 +1,83 @@ +from dataclasses import dataclass +import torch.nn as nn + + +@dataclass +class DefaultTabulaRNNConfig: + """ + Configuration class for the default TabulaRNN model with predefined hyperparameters. + + Parameters + ---------- + lr : float, default=1e-04 + Learning rate for the optimizer. + model_type : str, default="RNN" + type of model, one of "RNN", "LSTM", "GRU" + lr_patience : int, default=10 + Number of epochs with no improvement after which learning rate will be reduced. + weight_decay : float, default=1e-06 + Weight decay (L2 penalty) for the optimizer. + lr_factor : float, default=0.1 + Factor by which the learning rate will be reduced. + d_model : int, default=64 + Dimensionality of the model. + n_layers : int, default=8 + Number of layers in the transformer. + norm : str, default="RMSNorm" + Normalization method to be used. + activation : callable, default=nn.SELU() + Activation function for the transformer. + embedding_activation : callable, default=nn.Identity() + Activation function for numerical embeddings. + head_layer_sizes : list, default=(128, 64, 32) + Sizes of the layers in the head of the model. + head_dropout : float, default=0.5 + Dropout rate for the head layers. + head_skip_layers : bool, default=False + Whether to skip layers in the head. + head_activation : callable, default=nn.SELU() + Activation function for the head layers. + head_use_batch_norm : bool, default=False + Whether to use batch normalization in the head layers. + layer_norm_after_embedding : bool, default=False + Whether to apply layer normalization after embedding. + pooling_method : str, default="cls" + Pooling method to be used ('cls', 'avg', etc.). + norm_first : bool, default=False + Whether to apply normalization before other operations in each transformer block. + bias : bool, default=True + Whether to use bias in the linear layers. + rnn_activation : callable, default=nn.SELU() + Activation function for the transformer layers. + bidirectional : bool, default=False. + Whether to process data bidirectionally + cat_encoding : str, default="int" + Encoding method for categorical features. + """ + + lr: float = 1e-04 + model_type: str = "RNN" + lr_patience: int = 10 + weight_decay: float = 1e-06 + lr_factor: float = 0.1 + d_model: int = 128 + n_layers: int = 4 + rnn_dropout: float = 0.2 + norm: str = "RMSNorm" + activation: callable = nn.SELU() + embedding_activation: callable = nn.Identity() + head_layer_sizes: list = () + head_dropout: float = 0.5 + head_skip_layers: bool = False + head_activation: callable = nn.SELU() + head_use_batch_norm: bool = False + layer_norm_after_embedding: bool = False + pooling_method: str = "avg" + norm_first: bool = False + bias: bool = True + rnn_activation: str = "relu" + layer_norm_eps: float = 1e-05 + dim_feedforward: int = 256 + numerical_embedding: str = "ple" + bidirectional: bool = False + cat_encoding: str = "int" diff --git a/mambular/models/__init__.py b/mambular/models/__init__.py index 28a0ee4..6b9f40c 100644 --- a/mambular/models/__init__.py +++ b/mambular/models/__init__.py @@ -16,6 +16,7 @@ ) from .mambatab import MambaTabClassifier, MambaTabRegressor, MambaTabLSS +from .tabularnn import TabulaRNNClassifier, TabulaRNNRegressor, TabulaRNNLSS __all__ = [ @@ -40,4 +41,7 @@ "MambaTabRegressor", "MambaTabClassifier", "MambaTabLSS", + "TabulaRNNClassifier", + "TabulaRNNRegressor", + "TabulaRNNLSS", ] diff --git a/mambular/models/tabularnn.py b/mambular/models/tabularnn.py new file mode 100644 index 0000000..60daf2a --- /dev/null +++ b/mambular/models/tabularnn.py @@ -0,0 +1,255 @@ +from .sklearn_base_regressor import SklearnBaseRegressor +from .sklearn_base_classifier import SklearnBaseClassifier +from .sklearn_base_lss import SklearnBaseLSS + +from ..base_models.tabularnn import TabulaRNN +from ..configs.tabularnn_config import DefaultTabulaRNNConfig + + +class TabulaRNNRegressor(SklearnBaseRegressor): + """ + RNN regressor. This class extends the SklearnBaseRegressor class and uses the TabulaRNN model + with the default TabulaRNN configuration. + + The accepted arguments to the TabulaRNNRegressor class include both the attributes in the DefaultTabulaRNNConfig dataclass + and the parameters for the Preprocessor class. + + Parameters + ---------- + lr : float, default=1e-04 + Learning rate for the optimizer. + model_type : str, default="RNN" + type of model, one of "RNN", "LSTM", "GRU" + lr_patience : int, default=10 + Number of epochs with no improvement after which learning rate will be reduced. + weight_decay : float, default=1e-06 + Weight decay (L2 penalty) for the optimizer. + lr_factor : float, default=0.1 + Factor by which the learning rate will be reduced. + d_model : int, default=64 + Dimensionality of the model. + n_layers : int, default=8 + Number of layers in the transformer. + norm : str, default="RMSNorm" + Normalization method to be used. + activation : callable, default=nn.SELU() + Activation function for the transformer. + embedding_activation : callable, default=nn.Identity() + Activation function for numerical embeddings. + head_layer_sizes : list, default=(128, 64, 32) + Sizes of the layers in the head of the model. + head_dropout : float, default=0.5 + Dropout rate for the head layers. + head_skip_layers : bool, default=False + Whether to skip layers in the head. + head_activation : callable, default=nn.SELU() + Activation function for the head layers. + head_use_batch_norm : bool, default=False + Whether to use batch normalization in the head layers. + layer_norm_after_embedding : bool, default=False + Whether to apply layer normalization after embedding. + pooling_method : str, default="cls" + Pooling method to be used ('cls', 'avg', etc.). + norm_first : bool, default=False + Whether to apply normalization before other operations in each transformer block. + bias : bool, default=True + Whether to use bias in the linear layers. + rnn_activation : callable, default=nn.SELU() + Activation function for the transformer layers. + bidirectional : bool, default=False. + Whether to process data bidirectionally + cat_encoding : str, default="int" + Encoding method for categorical features. + n_bins : int, default=50 + The number of bins to use for numerical feature binning. This parameter is relevant + only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + numerical_preprocessing : str, default="ple" + The preprocessing strategy for numerical features. Valid options are + 'binning', 'one_hot', 'standardization', and 'normalization'. + use_decision_tree_bins : bool, default=False + If True, uses decision tree regression/classification to determine + optimal bin edges for numerical feature binning. This parameter is + relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + binning_strategy : str, default="uniform" + Defines the strategy for binning numerical features. Options include 'uniform', + 'quantile', or other sklearn-compatible strategies. + cat_cutoff : float or int, default=0.03 + Indicates the cutoff after which integer values are treated as categorical. + If float, it's treated as a percentage. If int, it's the maximum number of + unique values for a column to be considered categorical. + treat_all_integers_as_numerical : bool, default=False + If True, all integer columns will be treated as numerical, regardless + of their unique value count or proportion. + degree : int, default=3 + The degree of the polynomial features to be used in preprocessing. + knots : int, default=12 + The number of knots to be used in spline transformations. + """ + + +class TabulaRNNClassifier(SklearnBaseClassifier): + """ + RNN classifier. This class extends the SklearnBaseClassifier class and uses the TabulaRNN model + with the default TabulaRNN configuration. + + The accepted arguments to the TabulaRNNClassifier class include both the attributes in the DefaultTabulaRNNConfig dataclass + and the parameters for the Preprocessor class. + + Parameters + ---------- + lr : float, default=1e-04 + Learning rate for the optimizer. + model_type : str, default="RNN" + type of model, one of "RNN", "LSTM", "GRU" + lr_patience : int, default=10 + Number of epochs with no improvement after which learning rate will be reduced. + weight_decay : float, default=1e-06 + Weight decay (L2 penalty) for the optimizer. + lr_factor : float, default=0.1 + Factor by which the learning rate will be reduced. + d_model : int, default=64 + Dimensionality of the model. + n_layers : int, default=8 + Number of layers in the transformer. + norm : str, default="RMSNorm" + Normalization method to be used. + activation : callable, default=nn.SELU() + Activation function for the transformer. + embedding_activation : callable, default=nn.Identity() + Activation function for numerical embeddings. + head_layer_sizes : list, default=(128, 64, 32) + Sizes of the layers in the head of the model. + head_dropout : float, default=0.5 + Dropout rate for the head layers. + head_skip_layers : bool, default=False + Whether to skip layers in the head. + head_activation : callable, default=nn.SELU() + Activation function for the head layers. + head_use_batch_norm : bool, default=False + Whether to use batch normalization in the head layers. + layer_norm_after_embedding : bool, default=False + Whether to apply layer normalization after embedding. + pooling_method : str, default="cls" + Pooling method to be used ('cls', 'avg', etc.). + norm_first : bool, default=False + Whether to apply normalization before other operations in each transformer block. + bias : bool, default=True + Whether to use bias in the linear layers. + rnn_activation : callable, default=nn.SELU() + Activation function for the transformer layers. + bidirectional : bool, default=False. + Whether to process data bidirectionally + cat_encoding : str, default="int" + Encoding method for categorical features. + n_bins : int, default=50 + The number of bins to use for numerical feature binning. This parameter is relevant + only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + numerical_preprocessing : str, default="ple" + The preprocessing strategy for numerical features. Valid options are + 'binning', 'one_hot', 'standardization', and 'normalization'. + use_decision_tree_bins : bool, default=False + If True, uses decision tree regression/classification to determine + optimal bin edges for numerical feature binning. This parameter is + relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + binning_strategy : str, default="uniform" + Defines the strategy for binning numerical features. Options include 'uniform', + 'quantile', or other sklearn-compatible strategies. + cat_cutoff : float or int, default=0.03 + Indicates the cutoff after which integer values are treated as categorical. + If float, it's treated as a percentage. If int, it's the maximum number of + unique values for a column to be considered categorical. + treat_all_integers_as_numerical : bool, default=False + If True, all integer columns will be treated as numerical, regardless + of their unique value count or proportion. + degree : int, default=3 + The degree of the polynomial features to be used in preprocessing. + knots : int, default=12 + The number of knots to be used in spline transformations. + """ + + def __init__(self, **kwargs): + super().__init__(model=TabulaRNN, config=DefaultTabulaRNNConfig, **kwargs) + + +class TabulaRNNLSS(SklearnBaseLSS): + """ + RNN LSS. This class extends the SklearnBaseLSS class and uses the TabulaRNN model + with the default TabulaRNN configuration. + + The accepted arguments to the TabulaRNNLSS class include both the attributes in the DefaultTabulaRNNConfig dataclass + and the parameters for the Preprocessor class. + + Parameters + ---------- + lr : float, default=1e-04 + Learning rate for the optimizer. + model_type : str, default="RNN" + type of model, one of "RNN", "LSTM", "GRU" + lr_patience : int, default=10 + Number of epochs with no improvement after which learning rate will be reduced. + weight_decay : float, default=1e-06 + Weight decay (L2 penalty) for the optimizer. + lr_factor : float, default=0.1 + Factor by which the learning rate will be reduced. + d_model : int, default=64 + Dimensionality of the model. + n_layers : int, default=8 + Number of layers in the transformer. + norm : str, default="RMSNorm" + Normalization method to be used. + activation : callable, default=nn.SELU() + Activation function for the transformer. + embedding_activation : callable, default=nn.Identity() + Activation function for numerical embeddings. + head_layer_sizes : list, default=(128, 64, 32) + Sizes of the layers in the head of the model. + head_dropout : float, default=0.5 + Dropout rate for the head layers. + head_skip_layers : bool, default=False + Whether to skip layers in the head. + head_activation : callable, default=nn.SELU() + Activation function for the head layers. + head_use_batch_norm : bool, default=False + Whether to use batch normalization in the head layers. + layer_norm_after_embedding : bool, default=False + Whether to apply layer normalization after embedding. + pooling_method : str, default="cls" + Pooling method to be used ('cls', 'avg', etc.). + norm_first : bool, default=False + Whether to apply normalization before other operations in each transformer block. + bias : bool, default=True + Whether to use bias in the linear layers. + rnn_activation : callable, default=nn.SELU() + Activation function for the transformer layers. + bidirectional : bool, default=False. + Whether to process data bidirectionally + cat_encoding : str, default="int" + Encoding method for categorical features. + n_bins : int, default=50 + The number of bins to use for numerical feature binning. This parameter is relevant + only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + numerical_preprocessing : str, default="ple" + The preprocessing strategy for numerical features. Valid options are + 'binning', 'one_hot', 'standardization', and 'normalization'. + use_decision_tree_bins : bool, default=False + If True, uses decision tree regression/classification to determine + optimal bin edges for numerical feature binning. This parameter is + relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + binning_strategy : str, default="uniform" + Defines the strategy for binning numerical features. Options include 'uniform', + 'quantile', or other sklearn-compatible strategies. + cat_cutoff : float or int, default=0.03 + Indicates the cutoff after which integer values are treated as categorical. + If float, it's treated as a percentage. If int, it's the maximum number of + unique values for a column to be considered categorical. + treat_all_integers_as_numerical : bool, default=False + If True, all integer columns will be treated as numerical, regardless + of their unique value count or proportion. + degree : int, default=3 + The degree of the polynomial features to be used in preprocessing. + knots : int, default=12 + The number of knots to be used in spline transformations. + """ + + def __init__(self, **kwargs): + super().__init__(model=TabulaRNN, config=DefaultTabulaRNNConfig, **kwargs)