diff --git a/docs/api/models/Models.rst b/docs/api/models/Models.rst index fff2798..bb5aaf8 100644 --- a/docs/api/models/Models.rst +++ b/docs/api/models/Models.rst @@ -1,74 +1,98 @@ -mambular.models -=============== - -.. autoclass:: mambular.models.MambularClassifier - :members: - :inherited-members: - -.. autoclass:: mambular.models.MambularRegressor - :members: - :inherited-members: - -.. autoclass:: mambular.models.MambularLSS - :members: - :undoc-members: - -.. autoclass:: mambular.models.FTTransformerClassifier - :members: - :undoc-members: - -.. autoclass:: mambular.models.FTTransformerRegressor - :members: - :undoc-members: - -.. autoclass:: mambular.models.FTTransformerLSS - :members: - :undoc-members: - -.. autoclass:: mambular.models.MLPClassifier - :members: - :undoc-members: - -.. autoclass:: mambular.models.MLPRegressor - :members: - :undoc-members: - -.. autoclass:: mambular.models.MLPLSS - :members: - :undoc-members: - -.. autoclass:: mambular.models.TabTransformerClassifier - :members: - :undoc-members: - -.. autoclass:: mambular.models.TabTransformerRegressor - :members: - :undoc-members: - -.. autoclass:: mambular.models.TabTransformerLSS - :members: - :undoc-members: - -.. autoclass:: mambular.models.ResNetClassifier - :members: - :undoc-members: - -.. autoclass:: mambular.models.ResNetRegressor - :members: - :undoc-members: - -.. autoclass:: mambular.models.ResNetLSS - :members: - :undoc-members: - -.. autoclass:: mambular.models.SklearnBaseClassifier - :members: - :undoc-members: - -.. autoclass:: mambular.models.SklearnBaseLSS - :members: - :undoc-members: - -.. autoclass:: mambular.models.SklearnBaseRegressor - :members: - :undoc-members: +mambular.models +=============== + +.. autoclass:: mambular.models.MambularClassifier + :members: + :inherited-members: + +.. autoclass:: mambular.models.MambularRegressor + :members: + :inherited-members: + +.. autoclass:: mambular.models.MambularLSS + :members: + :undoc-members: + +.. autoclass:: mambular.models.FTTransformerClassifier + :members: + :undoc-members: + +.. autoclass:: mambular.models.FTTransformerRegressor + :members: + :undoc-members: + +.. autoclass:: mambular.models.FTTransformerLSS + :members: + :undoc-members: + +.. autoclass:: mambular.models.MLPClassifier + :members: + :undoc-members: + +.. autoclass:: mambular.models.MLPRegressor + :members: + :undoc-members: + +.. autoclass:: mambular.models.MLPLSS + :members: + :undoc-members: + +.. autoclass:: mambular.models.TabTransformerClassifier + :members: + :undoc-members: + +.. autoclass:: mambular.models.TabTransformerRegressor + :members: + :undoc-members: + +.. autoclass:: mambular.models.TabTransformerLSS + :members: + :undoc-members: + +.. autoclass:: mambular.models.ResNetClassifier + :members: + :undoc-members: + +.. autoclass:: mambular.models.ResNetRegressor + :members: + :undoc-members: + +.. autoclass:: mambular.models.ResNetLSS + :members: + :undoc-members: + +.. autoclass:: mambular.models.MambaTabClassifier + :members: + :undoc-members: + +.. autoclass:: mambular.models.MambaTabRegressor + :members: + :undoc-members: + +.. autoclass:: mambular.models.MambaTabLSS + :members: + :undoc-members: + +.. autoclass:: mambular.models.TabulaRNNClassifier + :members: + :undoc-members: + +.. autoclass:: mambular.models.TabulaRNNRegressor + :members: + :undoc-members: + +.. autoclass:: mambular.models.TabulaRNNLSS + :members: + :undoc-members: + +.. autoclass:: mambular.models.SklearnBaseClassifier + :members: + :undoc-members: + +.. autoclass:: mambular.models.SklearnBaseLSS + :members: + :undoc-members: + +.. autoclass:: mambular.models.SklearnBaseRegressor + :members: + :undoc-members: diff --git a/docs/api/models/index.rst b/docs/api/models/index.rst index 9446da6..5bb5980 100644 --- a/docs/api/models/index.rst +++ b/docs/api/models/index.rst @@ -1,37 +1,43 @@ -.. -*- mode: rst -*- - -.. currentmodule:: mambular.models - -Models -====== - -This module provides classes for the Mambular models that adhere to scikit-learn's `BaseEstimator` interface. - -======================================= ======================================================================================================= -Modules Description -======================================= ======================================================================================================= -:class:`MambularClassifier` Multi-class and binary classification tasks. -:class:`MambularRegressor` Regression tasks. -:class:`MambularLSS` Various statistical distribution families for different types of regression and classification tasks. -:class:`FTTransformerClassifier` FT transformer for classification tasks. -:class:`FTTransformerRegressor` FT transformer for regression tasks. -:class:`FTTransformerLSS` Various statistical distribution families for different types of regression and classification tasks. -:class:`MLPClassifier` Multi-class and binary classification tasks. -:class:`MLPRegressor` MLP for regression tasks. -:class:`MLPLSS` Various statistical distribution families for different types of regression and classification tasks. -:class:`TabTransformerClassifier` TabTransformer for classification tasks. -:class:`TabTransformerRegressor` TabTransformer for regression tasks. -:class:`TabTransformerLSS` TabTransformer for distributional tasks. -:class:`ResNetClassifier` Multi-class and binary classification tasks using ResNet. -:class:`ResNetRegressor` Regression tasks using ResNet. -:class:`ResNetLSS` Distributional tasks using ResNet. -:class:`SklearnBaseClassifier` Base class for classification tasks. -:class:`SklearnBaseLSS` Base class for distributional tasks. -:class:`SklearnBaseRegressor` Base class for regression tasks. -======================================= ======================================================================================================= - -.. toctree:: - :maxdepth: 1 - - Models - +.. -*- mode: rst -*- + +.. currentmodule:: mambular.models + +Models +====== + +This module provides classes for the Mambular models that adhere to scikit-learn's `BaseEstimator` interface. + +======================================= ======================================================================================================= +Modules Description +======================================= ======================================================================================================= +:class:`MambularClassifier` Multi-class and binary classification tasks with a sequential Mambular Model. +:class:`MambularRegressor` Regression tasks with a sequential Mambular Model. +:class:`MambularLSS` Various statistical distribution families for different types of regression and classification tasks. +:class:`FTTransformerClassifier` FT transformer for classification tasks. +:class:`FTTransformerRegressor` FT transformer for regression tasks. +:class:`FTTransformerLSS` Various statistical distribution families for different types of regression and classification tasks. +:class:`MLPClassifier` Multi-class and binary classification tasks. +:class:`MLPRegressor` MLP for regression tasks. +:class:`MLPLSS` Various statistical distribution families for different types of regression and classification tasks. +:class:`TabTransformerClassifier` TabTransformer for classification tasks. +:class:`TabTransformerRegressor` TabTransformer for regression tasks. +:class:`TabTransformerLSS` TabTransformer for distributional tasks. +:class:`ResNetClassifier` Multi-class and binary classification tasks using ResNet. +:class:`ResNetRegressor` Regression tasks using ResNet. +:class:`ResNetLSS` Distributional tasks using ResNet. +:class:`MambaTabClassifier` Multi-class and binary classification tasks using MambaTab. +:class:`MambaTabRegressor` Regression tasks using MambaTab. +:class:`MambaTabLSS` Distributional tasks using MambaTab. +:class:`TabulaRNNClassifier` Multi-class and binary classification tasks using a RNN. +:class:`TabulaRNNRegressor` Regression tasks using a RNN. +:class:`TabulaRNNLSS` Distributional tasks using a RNN. +:class:`SklearnBaseClassifier` Base class for classification tasks. +:class:`SklearnBaseLSS` Base class for distributional tasks. +:class:`SklearnBaseRegressor` Base class for regression tasks. +======================================= ======================================================================================================= + +.. toctree:: + :maxdepth: 1 + + Models + diff --git a/docs/homepage.md b/docs/homepage.md index abf5925..443e2d7 100644 --- a/docs/homepage.md +++ b/docs/homepage.md @@ -1,245 +1,247 @@ -# Mambular: Tabular Deep Learning with Mamba Architectures - -Mambular is a Python package that brings the power of advanced deep learning architectures to tabular data, offering a suite of models for regression, classification, and distributional regression tasks. Designed with ease of use in mind, Mambular models adhere to scikit-learn's `BaseEstimator` interface, making them highly compatible with the familiar scikit-learn ecosystem. This means you can fit, predict, and evaluate using Mambular models just as you would with any traditional scikit-learn model, but with the added performance and flexibility of deep learning. - -## Features - -- **Comprehensive Model Suite**: Includes modules for regression, classification, and distributional regression, catering to a wide range of tabular data tasks. -- **State-of-the-Art Architectures**: Leverages various advanced architectures known for their effectiveness in handling tabular data. Mambular models include powerful Mamba blocks [Gu and Dao](https://arxiv.org/pdf/2312.00752) and can include bidirectional processing as well as feature interaction layers. -- **Seamless Integration**: Designed to work effortlessly with scikit-learn, allowing for easy inclusion in existing machine learning pipelines, cross-validation, and hyperparameter tuning workflows. -- **Extensive Preprocessing**: Comes with a powerful preprocessing module that supports a broad array of data transformation techniques, ensuring that your data is optimally prepared for model training. -- **Sklearn-like API**: The familiar scikit-learn `fit`, `predict`, and `predict_proba` methods mean minimal learning curve for those already accustomed to scikit-learn. -- **PyTorch Lightning Under the Hood**: Built on top of PyTorch Lightning, Mambular models benefit from streamlined training processes, easy customization, and advanced features like distributed training and 16-bit precision. - - -## Models - -| Model | Description | -|---------------------|--------------------------------------------------------------------------------------------------| -| `Mambular` | An advanced model using Mamba blocks [Gu and Dao](https://arxiv.org/pdf/2312.00752) specifically designed for various tabular data tasks. | -| `FTTransformer` | A model leveraging transformer encoders, as introduced by [Gorishniy et al.](https://arxiv.org/abs/2106.11959), for tabular data. | -| `MLP` | A classical Multi-Layer Perceptron (MLP) model for handling tabular data tasks. | -| `ResNet` | An adaptation of the ResNet architecture for tabular data applications. | -| `TabTransformer` | A transformer-based model for tabular data introduced by [Huang et al.](https://arxiv.org/abs/2012.06678), enhancing feature learning capabilities. | - -All models are available for `regression`, `classification` and distributional regression, denoted by `LSS`. -Hence, they are available as e.g. `MambularRegressor`, `MambularClassifier` or `MambularLSS` - - - -## Documentation - -You can find the Mamba-Tabular API documentation [here](https://mamba-tabular.readthedocs.io/en/latest/index.html). - -## Installation - -Install Mambular using pip: -```sh -pip install mambular -``` - -## Preprocessing - -Mambular simplifies the preprocessing stage of model development with a comprehensive set of techniques to prepare your data for Mamba architectures. Our preprocessing module is designed to be both powerful and easy to use, offering a variety of options to efficiently transform your tabular data. - -### Data Type Detection and Transformation - -Mambular automatically identifies the type of each feature in your dataset and applies the most appropriate transformations for numerical and categorical variables. This includes: -- **Ordinal Encoding**: Categorical features are seamlessly transformed into numerical values, preserving their inherent order and making them model-ready. -- **One-Hot Encoding**: For nominal data, Mambular employs one-hot encoding to capture the presence or absence of categories without imposing ordinality. -- **Binning**: Numerical features can be discretized into bins, a useful technique for handling continuous variables in certain modeling contexts. -- **Decision Tree Binning**: Optionally, Mambular can use decision trees to find the optimal binning strategy for numerical features, enhancing model interpretability and performance. -- **Normalization**: Mambular can easily handle numerical features without specifically turning them into categorical features. Standard preprocessing steps such as normalization per feature are possible. -- **Standardization**: Similarly, standardization instead of normalization can be used to scale features based on the mean and standard deviation. -- **PLE (Periodic Linear Encoding)**: This technique can be applied to numerical features to enhance the performance of tabular deep learning methods by encoding periodicity. -- **Quantile Transformation**: Numerical features can be transformed to follow a uniform or normal distribution, improving model robustness to outliers. -- **Spline Transformation**: Applies piecewise polynomial functions to numerical features, capturing nonlinear relationships more effectively. -- **Polynomial Features**: Generates polynomial and interaction features, increasing the feature space to capture more complex relationships within the data. - - -### Handling Missing Values - -Our preprocessing pipeline effectively handles missing data by using mean imputation for numerical features and mode imputation for categorical features. This ensures that your models receive complete data inputs without needing manual intervention. -Additionally, Mambular can manage unknown categorical values during inference by incorporating classical tokens in categorical preprocessing. - - -## Fit a Model -Fitting a model in mambular is as simple as it gets. All models in mambular are sklearn BaseEstimators. Thus the `.fit` method is implemented for all of them. Additionally, this allows for using all other sklearn inherent methods such as their built in hyperparameter optimization tools. - -```python -from mambular.models import MambularClassifier -# Initialize and fit your model -model = MambularClassifier( - d_model=64, - n_layers=8, - numerical_preprocessing="ple", - n_bins=50 -) - -# X can be a dataframe or something that can be easily transformed into a pd.DataFrame as a np.array -model.fit(X, y, max_epochs=150, lr=1e-04) -``` - -Predictions are also easily obtained: -```python -# simple predictions -preds = model.predict(X) - -# Predict probabilities -preds = model.predict_proba(X) -``` - - -## Distributional Regression with MambularLSS - -Mambular introduces an approach to distributional regression through its `MambularLSS` module, allowing users to model the full distribution of a response variable, not just its mean. This method is particularly valuable in scenarios where understanding the variability, skewness, or kurtosis of the response distribution is as crucial as predicting its central tendency. All available moedls in mambular are also available as distributional models. - -### Key Features of MambularLSS: - -- **Full Distribution Modeling**: Unlike traditional regression models that predict a single value (e.g., the mean), `MambularLSS` models the entire distribution of the response variable. This allows for more informative predictions, including quantiles, variance, and higher moments. -- **Customizable Distribution Types**: `MambularLSS` supports a variety of distribution families (e.g., Gaussian, Poisson, Binomial), making it adaptable to different types of response variables, from continuous to count data. -- **Location, Scale, Shape Parameters**: The model predicts parameters corresponding to the location, scale, and shape of the distribution, offering a nuanced understanding of the data's underlying distributional characteristics. -- **Enhanced Predictive Uncertainty**: By modeling the full distribution, `MambularLSS` provides richer information on predictive uncertainty, enabling more robust decision-making processes in uncertain environments. - - - -### Available Distribution Classes: - -`MambularLSS` offers a wide range of distribution classes to cater to various statistical modeling needs. The available distribution classes include: - -- `normal`: Normal Distribution for modeling continuous data with a symmetric distribution around the mean. -- `poisson`: Poisson Distribution for modeling count data that for instance represent the number of events occurring within a fixed interval. -- `gamma`: Gamma Distribution for modeling continuous data that is skewed and bounded at zero, often used for waiting times. -- `beta`: Beta Distribution for modeling data that is bounded between 0 and 1, useful for proportions and percentages. -- `dirichlet`: Dirichlet Distribution for modeling multivariate data where individual components are correlated, and the sum is constrained to 1. -- `studentt`: Student's T-Distribution for modeling data with heavier tails than the normal distribution, useful when the sample size is small. -- `negativebinom`: Negative Binomial Distribution for modeling count data with over-dispersion relative to the Poisson distribution. -- `inversegamma`: Inverse Gamma Distribution, often used as a prior distribution in Bayesian inference for scale parameters. -- `categorical`: Categorical Distribution for modeling categorical data with more than two categories. - -These distribution classes allow `MambularLSS` to flexibly model a wide variety of data types and distributions, providing users with the tools needed to capture the full complexity of their data. - - -### Getting Started with MambularLSS: - -To integrate distributional regression into your workflow with `MambularLSS`, start by initializing the model with your desired configuration, similar to other Mambular models: - -```python -from mambular.models import MambularLSS - -# Initialize the MambularLSS model -model = MambularLSS( - dropout=0.2, - d_model=64, - n_layers=8, - -) - -# Fit the model to your data -model.fit( - X, - y, - max_epochs=150, - lr=1e-04, - patience=10, - family="normal" # define your distribution - ) - -``` - - -### Implement Your Own Model - -Mambular allows users to easily integrate their custom models into the existing logic. This process is designed to be straightforward, making it simple to create a PyTorch model and define its forward pass. Instead of inheriting from `nn.Module`, you inherit from Mambular's `BaseModel`. Each Mambular model takes three main arguments: the number of classes (e.g., 1 for regression or 2 for binary classification), `cat_feature_info`, and `num_feature_info` for categorical and numerical feature information, respectively. Additionally, you can provide a config argument, which can either be a custom configuration or one of the provided default configs. - -One of the key advantages of using Mambular is that the inputs to the forward passes are lists of tensors. While this might be unconventional, it is highly beneficial for models that treat different data types differently. For example, the TabTransformer model leverages this feature to handle categorical and numerical data separately, applying different transformations and processing steps to each type of data. - -Here's how you can implement a custom model with Mambular: - - -1. First, define your config: -The configuration class allows you to specify hyperparameters and other settings for your model. This can be done using a simple dataclass. - -```python -from dataclasses import dataclass - -@dataclass -class MyConfig: - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 -``` - -2. Second, define your model: -Define your custom model just as you would for an `nn.Module`. The main difference is that you will inherit from `BaseModel` and use the provided feature information to construct your layers. To integrate your model into the existing API, you only need to define the architecture and the forward pass. - -```python -from mambular.base_models import BaseModel -import torch -import torch.nn - -class MyCustomModel(BaseModel): - def __init__( - self, - cat_feature_info, - num_feature_info, - num_classes: int = 1, - config=None, - **kwargs, - ): - super().__init__(**kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) - - input_dim = 0 - for feature_name, input_shape in num_feature_info.items(): - input_dim += input_shape - for feature_name, input_shape in cat_feature_info.items(): - input_dim += 1 - - self.linear = nn.Linear(input_dim, num_classes) - - def forward(self, num_features, cat_features): - x = num_features + cat_features - x = torch.cat(x, dim=1) - - # Pass through linear layer - output = self.linear(x) - return output -``` - -3. Leverage the Mambular API: -You can build a regression, classification or distributional regression model that can leverage all of mambulars built-in methods, by using the following: - -```python -from mambular.models import SklearnBaseRegressor - -class MyRegressor(SklearnBaseRegressor): - def __init__(self, **kwargs): - super().__init__(model=MyCustomModel, config=MyConfig, **kwargs) -``` - -4. Train and evaluate your model: -You can now fit, evaluate, and predict with your custom model just like with any other Mambular model. For classification or distributional regression, inherit from `SklearnBaseClassifier` or `SklearnBaseLSS` respectively. - -```python -regressor = MyRegressor(numerical_preprocessing="ple") -regressor.fit(X_train, y_train, max_epochs=50) -``` - - -## Citation - -If you find this project useful in your research, please consider cite: -```BibTeX -@misc{2024, - title={Mambular: Tabular Deep Learning with Mamba Architectures}, - author={Anton Frederik Thielmann, Manish Kumar, Christoph Weisser, Benjamin Saefken, Soheila Samiee}, - howpublished = {\url{https://github.com/basf/mamba-tabular}}, - year={2024} -} -``` - -## License - +# Mambular: Tabular Deep Learning with Mamba Architectures + +Mambular is a Python package that brings the power of advanced deep learning architectures to tabular data, offering a suite of models for regression, classification, and distributional regression tasks. Designed with ease of use in mind, Mambular models adhere to scikit-learn's `BaseEstimator` interface, making them highly compatible with the familiar scikit-learn ecosystem. This means you can fit, predict, and evaluate using Mambular models just as you would with any traditional scikit-learn model, but with the added performance and flexibility of deep learning. + +## Features + +- **Comprehensive Model Suite**: Includes modules for regression, classification, and distributional regression, catering to a wide range of tabular data tasks. +- **State-of-the-Art Architectures**: Leverages various advanced architectures known for their effectiveness in handling tabular data. Mambular models include powerful Mamba blocks [Gu and Dao](https://arxiv.org/pdf/2312.00752) and can include bidirectional processing as well as feature interaction layers. +- **Seamless Integration**: Designed to work effortlessly with scikit-learn, allowing for easy inclusion in existing machine learning pipelines, cross-validation, and hyperparameter tuning workflows. +- **Extensive Preprocessing**: Comes with a powerful preprocessing module that supports a broad array of data transformation techniques, ensuring that your data is optimally prepared for model training. +- **Sklearn-like API**: The familiar scikit-learn `fit`, `predict`, and `predict_proba` methods mean minimal learning curve for those already accustomed to scikit-learn. +- **PyTorch Lightning Under the Hood**: Built on top of PyTorch Lightning, Mambular models benefit from streamlined training processes, easy customization, and advanced features like distributed training and 16-bit precision. + + +## Models + +| Model | Description | +| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Mambular` | A sequential model using Mamba blocks [Gu and Dao](https://arxiv.org/pdf/2312.00752) specifically designed for various tabular data tasks. | +| `FTTransformer` | A model leveraging transformer encoders, as introduced by [Gorishniy et al.](https://arxiv.org/abs/2106.11959), for tabular data. | +| `MLP` | A classical Multi-Layer Perceptron (MLP) model for handling tabular data tasks. | +| `ResNet` | An adaptation of the ResNet architecture for tabular data applications. | +| `TabTransformer` | A transformer-based model for tabular data introduced by [Huang et al.](https://arxiv.org/abs/2012.06678), enhancing feature learning capabilities. | +| `MambaTab` | A tabular model using a Mamba-Block on a joint input representation described [here](https://arxiv.org/abs/2401.08867) . Not a sequential model. | +| `TabulaRNN` | A Recurrent Neural Network for Tabular data. Not yet included in the benchmarks | + + + +## Documentation + +You can find the Mamba-Tabular API documentation [here](https://mamba-tabular.readthedocs.io/en/latest/index.html). + +## Installation + +Install Mambular using pip: +```sh +pip install mambular +``` + +## Preprocessing + +Mambular simplifies the preprocessing stage of model development with a comprehensive set of techniques to prepare your data for Mamba architectures. Our preprocessing module is designed to be both powerful and easy to use, offering a variety of options to efficiently transform your tabular data. + +### Data Type Detection and Transformation + +Mambular automatically identifies the type of each feature in your dataset and applies the most appropriate transformations for numerical and categorical variables. This includes: +- **Ordinal Encoding**: Categorical features are seamlessly transformed into numerical values, preserving their inherent order and making them model-ready. +- **One-Hot Encoding**: For nominal data, Mambular employs one-hot encoding to capture the presence or absence of categories without imposing ordinality. +- **Binning**: Numerical features can be discretized into bins, a useful technique for handling continuous variables in certain modeling contexts. +- **Decision Tree Binning**: Optionally, Mambular can use decision trees to find the optimal binning strategy for numerical features, enhancing model interpretability and performance. +- **Normalization**: Mambular can easily handle numerical features without specifically turning them into categorical features. Standard preprocessing steps such as normalization per feature are possible. +- **Standardization**: Similarly, standardization instead of normalization can be used to scale features based on the mean and standard deviation. +- **PLE (Periodic Linear Encoding)**: This technique can be applied to numerical features to enhance the performance of tabular deep learning methods by encoding periodicity. +- **Quantile Transformation**: Numerical features can be transformed to follow a uniform or normal distribution, improving model robustness to outliers. +- **Spline Transformation**: Applies piecewise polynomial functions to numerical features, capturing nonlinear relationships more effectively. +- **Polynomial Features**: Generates polynomial and interaction features, increasing the feature space to capture more complex relationships within the data. + + +### Handling Missing Values + +Our preprocessing pipeline effectively handles missing data by using mean imputation for numerical features and mode imputation for categorical features. This ensures that your models receive complete data inputs without needing manual intervention. +Additionally, Mambular can manage unknown categorical values during inference by incorporating classical tokens in categorical preprocessing. + + +## Fit a Model +Fitting a model in mambular is as simple as it gets. All models in mambular are sklearn BaseEstimators. Thus the `.fit` method is implemented for all of them. Additionally, this allows for using all other sklearn inherent methods such as their built in hyperparameter optimization tools. + +```python +from mambular.models import MambularClassifier +# Initialize and fit your model +model = MambularClassifier( + d_model=64, + n_layers=8, + numerical_preprocessing="ple", + n_bins=50 +) + +# X can be a dataframe or something that can be easily transformed into a pd.DataFrame as a np.array +model.fit(X, y, max_epochs=150, lr=1e-04) +``` + +Predictions are also easily obtained: +```python +# simple predictions +preds = model.predict(X) + +# Predict probabilities +preds = model.predict_proba(X) +``` + + +## Distributional Regression with MambularLSS + +Mambular introduces an approach to distributional regression through its `MambularLSS` module, allowing users to model the full distribution of a response variable, not just its mean. This method is particularly valuable in scenarios where understanding the variability, skewness, or kurtosis of the response distribution is as crucial as predicting its central tendency. All available moedls in mambular are also available as distributional models. + +### Key Features of MambularLSS: + +- **Full Distribution Modeling**: Unlike traditional regression models that predict a single value (e.g., the mean), `MambularLSS` models the entire distribution of the response variable. This allows for more informative predictions, including quantiles, variance, and higher moments. +- **Customizable Distribution Types**: `MambularLSS` supports a variety of distribution families (e.g., Gaussian, Poisson, Binomial), making it adaptable to different types of response variables, from continuous to count data. +- **Location, Scale, Shape Parameters**: The model predicts parameters corresponding to the location, scale, and shape of the distribution, offering a nuanced understanding of the data's underlying distributional characteristics. +- **Enhanced Predictive Uncertainty**: By modeling the full distribution, `MambularLSS` provides richer information on predictive uncertainty, enabling more robust decision-making processes in uncertain environments. + + + +### Available Distribution Classes: + +`MambularLSS` offers a wide range of distribution classes to cater to various statistical modeling needs. The available distribution classes include: + +- `normal`: Normal Distribution for modeling continuous data with a symmetric distribution around the mean. +- `poisson`: Poisson Distribution for modeling count data that for instance represent the number of events occurring within a fixed interval. +- `gamma`: Gamma Distribution for modeling continuous data that is skewed and bounded at zero, often used for waiting times. +- `beta`: Beta Distribution for modeling data that is bounded between 0 and 1, useful for proportions and percentages. +- `dirichlet`: Dirichlet Distribution for modeling multivariate data where individual components are correlated, and the sum is constrained to 1. +- `studentt`: Student's T-Distribution for modeling data with heavier tails than the normal distribution, useful when the sample size is small. +- `negativebinom`: Negative Binomial Distribution for modeling count data with over-dispersion relative to the Poisson distribution. +- `inversegamma`: Inverse Gamma Distribution, often used as a prior distribution in Bayesian inference for scale parameters. +- `categorical`: Categorical Distribution for modeling categorical data with more than two categories. + +These distribution classes allow `MambularLSS` to flexibly model a wide variety of data types and distributions, providing users with the tools needed to capture the full complexity of their data. + + +### Getting Started with MambularLSS: + +To integrate distributional regression into your workflow with `MambularLSS`, start by initializing the model with your desired configuration, similar to other Mambular models: + +```python +from mambular.models import MambularLSS + +# Initialize the MambularLSS model +model = MambularLSS( + dropout=0.2, + d_model=64, + n_layers=8, + +) + +# Fit the model to your data +model.fit( + X, + y, + max_epochs=150, + lr=1e-04, + patience=10, + family="normal" # define your distribution + ) + +``` + + +### Implement Your Own Model + +Mambular allows users to easily integrate their custom models into the existing logic. This process is designed to be straightforward, making it simple to create a PyTorch model and define its forward pass. Instead of inheriting from `nn.Module`, you inherit from Mambular's `BaseModel`. Each Mambular model takes three main arguments: the number of classes (e.g., 1 for regression or 2 for binary classification), `cat_feature_info`, and `num_feature_info` for categorical and numerical feature information, respectively. Additionally, you can provide a config argument, which can either be a custom configuration or one of the provided default configs. + +One of the key advantages of using Mambular is that the inputs to the forward passes are lists of tensors. While this might be unconventional, it is highly beneficial for models that treat different data types differently. For example, the TabTransformer model leverages this feature to handle categorical and numerical data separately, applying different transformations and processing steps to each type of data. + +Here's how you can implement a custom model with Mambular: + + +1. First, define your config: +The configuration class allows you to specify hyperparameters and other settings for your model. This can be done using a simple dataclass. + +```python +from dataclasses import dataclass + +@dataclass +class MyConfig: + lr: float = 1e-04 + lr_patience: int = 10 + weight_decay: float = 1e-06 + lr_factor: float = 0.1 +``` + +2. Second, define your model: +Define your custom model just as you would for an `nn.Module`. The main difference is that you will inherit from `BaseModel` and use the provided feature information to construct your layers. To integrate your model into the existing API, you only need to define the architecture and the forward pass. + +```python +from mambular.base_models import BaseModel +import torch +import torch.nn + +class MyCustomModel(BaseModel): + def __init__( + self, + cat_feature_info, + num_feature_info, + num_classes: int = 1, + config=None, + **kwargs, + ): + super().__init__(**kwargs) + self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + + input_dim = 0 + for feature_name, input_shape in num_feature_info.items(): + input_dim += input_shape + for feature_name, input_shape in cat_feature_info.items(): + input_dim += 1 + + self.linear = nn.Linear(input_dim, num_classes) + + def forward(self, num_features, cat_features): + x = num_features + cat_features + x = torch.cat(x, dim=1) + + # Pass through linear layer + output = self.linear(x) + return output +``` + +3. Leverage the Mambular API: +You can build a regression, classification or distributional regression model that can leverage all of mambulars built-in methods, by using the following: + +```python +from mambular.models import SklearnBaseRegressor + +class MyRegressor(SklearnBaseRegressor): + def __init__(self, **kwargs): + super().__init__(model=MyCustomModel, config=MyConfig, **kwargs) +``` + +4. Train and evaluate your model: +You can now fit, evaluate, and predict with your custom model just like with any other Mambular model. For classification or distributional regression, inherit from `SklearnBaseClassifier` or `SklearnBaseLSS` respectively. + +```python +regressor = MyRegressor(numerical_preprocessing="ple") +regressor.fit(X_train, y_train, max_epochs=50) +``` + + +## Citation + +If you find this project useful in your research, please consider cite: +```BibTeX +@misc{thielmann2024mambularsequentialmodeltabular, + title={Mambular: A Sequential Model for Tabular Deep Learning}, + author={Anton Frederik Thielmann and Manish Kumar and Christoph Weisser and Arik Reuter and Benjamin Säfken and Soheila Samiee}, + year={2024}, + eprint={2408.06291}, + archivePrefix={arXiv}, + primaryClass={cs.LG}, + url={https://arxiv.org/abs/2408.06291}, +} +``` + +## License + The entire codebase is under MIT license. \ No newline at end of file diff --git a/mambular/__version__.py b/mambular/__version__.py index 36509b4..43adb84 100644 --- a/mambular/__version__.py +++ b/mambular/__version__.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.2.1" +__version__ = "0.2.2" diff --git a/mambular/models/fttransformer.py b/mambular/models/fttransformer.py index efd346e..a84e448 100644 --- a/mambular/models/fttransformer.py +++ b/mambular/models/fttransformer.py @@ -20,6 +20,8 @@ class FTTransformerRegressor(SklearnBaseRegressor): Learning rate for the optimizer. lr_patience : int, default=10 Number of epochs with no improvement after which learning rate will be reduced. + family : str, default=None + Distributional family to be used for the model. weight_decay : float, default=1e-06 Weight decay (L2 penalty) for the optimizer. lr_factor : float, default=0.1 diff --git a/mambular/models/mambatab.py b/mambular/models/mambatab.py index d8e6fad..baccad2 100644 --- a/mambular/models/mambatab.py +++ b/mambular/models/mambatab.py @@ -6,15 +6,290 @@ class MambaTabRegressor(SklearnBaseRegressor): + """ + MambaTab regressor. This class extends the SklearnBaseRegressor class and uses the MambaTab model + with the default MambaTab configuration. + + The accepted arguments to the MambaTabRegressor class include both the attributes in the DefaultMambaTabConfig dataclass + and the parameters for the Preprocessor class. + + Parameters + ---------- + lr : float, default=1e-04 + Learning rate for the optimizer. + lr_patience : int, default=10 + Number of epochs with no improvement after which learning rate will be reduced. + weight_decay : float, default=1e-06 + Weight decay (L2 penalty) for the optimizer. + lr_factor : float, default=0.1 + Factor by which the learning rate will be reduced. + d_model : int, default=64 + Dimensionality of the model. + n_layers : int, default=8 + Number of layers in the model. + expand_factor : int, default=2 + Expansion factor for the feed-forward layers. + bias : bool, default=False + Whether to use bias in the linear layers. + d_conv : int, default=16 + Dimensionality of the convolutional layers. + conv_bias : bool, default=True + Whether to use bias in the convolutional layers. + dropout : float, default=0.05 + Dropout rate for regularization. + dt_rank : str, default="auto" + Rank of the decision tree. + d_state : int, default=32 + Dimensionality of the state in recurrent layers. + dt_scale : float, default=1.0 + Scaling factor for decision tree. + dt_init : str, default="random" + Initialization method for decision tree. + dt_max : float, default=0.1 + Maximum value for decision tree initialization. + dt_min : float, default=1e-04 + Minimum value for decision tree initialization. + dt_init_floor : float, default=1e-04 + Floor value for decision tree initialization. + norm : str, default="RMSNorm" + Normalization method to be used. + activation : callable, default=nn.SELU() + Activation function for the model. + num_embedding_activation : callable, default=nn.Identity() + Activation function for numerical embeddings. + head_layer_sizes : list, default=(128, 64, 32) + Sizes of the layers in the head of the model. + head_dropout : float, default=0.5 + Dropout rate for the head layers. + head_skip_layers : bool, default=False + Whether to skip layers in the head. + head_activation : callable, default=nn.SELU() + Activation function for the head layers. + head_use_batch_norm : bool, default=False + Whether to use batch normalization in the head layers. + norm : str, default="LayerNorm" + Normalization method to be used. + axis : int, default=1 + Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns. + n_bins : int, default=50 + The number of bins to use for numerical feature binning. This parameter is relevant + only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + numerical_preprocessing : str, default="ple" + The preprocessing strategy for numerical features. Valid options are + 'binning', 'one_hot', 'standardization', and 'normalization'. + use_decision_tree_bins : bool, default=False + If True, uses decision tree regression/classification to determine + optimal bin edges for numerical feature binning. This parameter is + relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + binning_strategy : str, default="uniform" + Defines the strategy for binning numerical features. Options include 'uniform', + 'quantile', or other sklearn-compatible strategies. + cat_cutoff : float or int, default=0.03 + Indicates the cutoff after which integer values are treated as categorical. + If float, it's treated as a percentage. If int, it's the maximum number of + unique values for a column to be considered categorical. + treat_all_integers_as_numerical : bool, default=False + If True, all integer columns will be treated as numerical, regardless + of their unique value count or proportion. + degree : int, default=3 + The degree of the polynomial features to be used in preprocessing. + knots : int, default=12 + The number of knots to be used in spline transformations. + """ + def __init__(self, **kwargs): super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs) class MambaTabClassifier(SklearnBaseClassifier): + """ + MambaTab Classifier. This class extends the SklearnBaseClassifier class and uses the MambaTab model + with the default MambaTab configuration. + + The accepted arguments to the MambaTabClassifier class include both the attributes in the DefaultMambaTabConfig dataclass + and the parameters for the Preprocessor class. + + Parameters + ---------- + lr : float, default=1e-04 + Learning rate for the optimizer. + lr_patience : int, default=10 + Number of epochs with no improvement after which learning rate will be reduced. + weight_decay : float, default=1e-06 + Weight decay (L2 penalty) for the optimizer. + lr_factor : float, default=0.1 + Factor by which the learning rate will be reduced. + d_model : int, default=64 + Dimensionality of the model. + n_layers : int, default=8 + Number of layers in the model. + expand_factor : int, default=2 + Expansion factor for the feed-forward layers. + bias : bool, default=False + Whether to use bias in the linear layers. + d_conv : int, default=16 + Dimensionality of the convolutional layers. + conv_bias : bool, default=True + Whether to use bias in the convolutional layers. + dropout : float, default=0.05 + Dropout rate for regularization. + dt_rank : str, default="auto" + Rank of the decision tree. + d_state : int, default=32 + Dimensionality of the state in recurrent layers. + dt_scale : float, default=1.0 + Scaling factor for decision tree. + dt_init : str, default="random" + Initialization method for decision tree. + dt_max : float, default=0.1 + Maximum value for decision tree initialization. + dt_min : float, default=1e-04 + Minimum value for decision tree initialization. + dt_init_floor : float, default=1e-04 + Floor value for decision tree initialization. + norm : str, default="RMSNorm" + Normalization method to be used. + activation : callable, default=nn.SELU() + Activation function for the model. + num_embedding_activation : callable, default=nn.Identity() + Activation function for numerical embeddings. + head_layer_sizes : list, default=(128, 64, 32) + Sizes of the layers in the head of the model. + head_dropout : float, default=0.5 + Dropout rate for the head layers. + head_skip_layers : bool, default=False + Whether to skip layers in the head. + head_activation : callable, default=nn.SELU() + Activation function for the head layers. + head_use_batch_norm : bool, default=False + Whether to use batch normalization in the head layers. + norm : str, default="LayerNorm" + Normalization method to be used. + axis : int, default=1 + Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns. + n_bins : int, default=50 + The number of bins to use for numerical feature binning. This parameter is relevant + only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + numerical_preprocessing : str, default="ple" + The preprocessing strategy for numerical features. Valid options are + 'binning', 'one_hot', 'standardization', and 'normalization'. + use_decision_tree_bins : bool, default=False + If True, uses decision tree regression/classification to determine + optimal bin edges for numerical feature binning. This parameter is + relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + binning_strategy : str, default="uniform" + Defines the strategy for binning numerical features. Options include 'uniform', + 'quantile', or other sklearn-compatible strategies. + cat_cutoff : float or int, default=0.03 + Indicates the cutoff after which integer values are treated as categorical. + If float, it's treated as a percentage. If int, it's the maximum number of + unique values for a column to be considered categorical. + treat_all_integers_as_numerical : bool, default=False + If True, all integer columns will be treated as numerical, regardless + of their unique value count or proportion. + degree : int, default=3 + The degree of the polynomial features to be used in preprocessing. + knots : int, default=12 + The number of knots to be used in spline transformations. + """ + def __init__(self, **kwargs): super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs) class MambaTabLSS(SklearnBaseLSS): + """ + MambaTab for distributinoal regression. This class extends the SklearnBaseLSS class and uses the MambaTab model + with the default MambaTab configuration. + + The accepted arguments to the MambaTabLSS class include both the attributes in the DefaultMambaTabConfig dataclass + and the parameters for the Preprocessor class. + + Parameters + ---------- + lr : float, default=1e-04 + Learning rate for the optimizer. + lr_patience : int, default=10 + Number of epochs with no improvement after which learning rate will be reduced. + family : str, default=None + Distributional family to be used for the model. + weight_decay : float, default=1e-06 + Weight decay (L2 penalty) for the optimizer. + lr_factor : float, default=0.1 + Factor by which the learning rate will be reduced. + d_model : int, default=64 + Dimensionality of the model. + n_layers : int, default=8 + Number of layers in the model. + expand_factor : int, default=2 + Expansion factor for the feed-forward layers. + bias : bool, default=False + Whether to use bias in the linear layers. + d_conv : int, default=16 + Dimensionality of the convolutional layers. + conv_bias : bool, default=True + Whether to use bias in the convolutional layers. + dropout : float, default=0.05 + Dropout rate for regularization. + dt_rank : str, default="auto" + Rank of the decision tree. + d_state : int, default=32 + Dimensionality of the state in recurrent layers. + dt_scale : float, default=1.0 + Scaling factor for decision tree. + dt_init : str, default="random" + Initialization method for decision tree. + dt_max : float, default=0.1 + Maximum value for decision tree initialization. + dt_min : float, default=1e-04 + Minimum value for decision tree initialization. + dt_init_floor : float, default=1e-04 + Floor value for decision tree initialization. + norm : str, default="RMSNorm" + Normalization method to be used. + activation : callable, default=nn.SELU() + Activation function for the model. + num_embedding_activation : callable, default=nn.Identity() + Activation function for numerical embeddings. + head_layer_sizes : list, default=(128, 64, 32) + Sizes of the layers in the head of the model. + head_dropout : float, default=0.5 + Dropout rate for the head layers. + head_skip_layers : bool, default=False + Whether to skip layers in the head. + head_activation : callable, default=nn.SELU() + Activation function for the head layers. + head_use_batch_norm : bool, default=False + Whether to use batch normalization in the head layers. + norm : str, default="LayerNorm" + Normalization method to be used. + axis : int, default=1 + Axis over which Mamba iterates. If 1, it iterates over the rows; if 0, it iterates over the columns. + n_bins : int, default=50 + The number of bins to use for numerical feature binning. This parameter is relevant + only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + numerical_preprocessing : str, default="ple" + The preprocessing strategy for numerical features. Valid options are + 'binning', 'one_hot', 'standardization', and 'normalization'. + use_decision_tree_bins : bool, default=False + If True, uses decision tree regression/classification to determine + optimal bin edges for numerical feature binning. This parameter is + relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. + binning_strategy : str, default="uniform" + Defines the strategy for binning numerical features. Options include 'uniform', + 'quantile', or other sklearn-compatible strategies. + cat_cutoff : float or int, default=0.03 + Indicates the cutoff after which integer values are treated as categorical. + If float, it's treated as a percentage. If int, it's the maximum number of + unique values for a column to be considered categorical. + treat_all_integers_as_numerical : bool, default=False + If True, all integer columns will be treated as numerical, regardless + of their unique value count or proportion. + degree : int, default=3 + The degree of the polynomial features to be used in preprocessing. + knots : int, default=12 + The number of knots to be used in spline transformations. + """ + def __init__(self, **kwargs): super().__init__(model=MambaTab, config=DefaultMambaTabConfig, **kwargs) diff --git a/mambular/models/mambular.py b/mambular/models/mambular.py index 104448a..ef65ceb 100644 --- a/mambular/models/mambular.py +++ b/mambular/models/mambular.py @@ -275,6 +275,8 @@ class MambularLSS(SklearnBaseLSS): Learning rate for the optimizer. lr_patience : int, default=10 Number of epochs with no improvement after which learning rate will be reduced. + family : str, default=None + Distributional family to be used for the model. weight_decay : float, default=1e-06 Weight decay (L2 penalty) for the optimizer. lr_factor : float, default=0.1 diff --git a/mambular/models/mlp.py b/mambular/models/mlp.py index fb6baa9..60d77e3 100644 --- a/mambular/models/mlp.py +++ b/mambular/models/mlp.py @@ -201,6 +201,8 @@ class MLPLSS(SklearnBaseLSS): Learning rate for the optimizer. lr_patience : int, default=10 Number of epochs with no improvement after which learning rate will be reduced. + family : str, default=None + Distributional family to be used for the model. weight_decay : float, default=1e-06 Weight decay (L2 penalty) for the optimizer. lr_factor : float, default=0.1 diff --git a/mambular/models/resnet.py b/mambular/models/resnet.py index 1f6bc5f..e6cceb6 100644 --- a/mambular/models/resnet.py +++ b/mambular/models/resnet.py @@ -219,6 +219,8 @@ class ResNetLSS(SklearnBaseLSS): Dropout rate for regularization. norm : str, default=None Normalization method to be used, if any. + family : str, default=None + Distributional family to be used for the model. use_glu : bool, default=False Whether to use Gated Linear Units (GLU) in the ResNet. skip_connections : bool, default=True diff --git a/mambular/models/sklearn_base_classifier.py b/mambular/models/sklearn_base_classifier.py index f442688..0c7e30f 100644 --- a/mambular/models/sklearn_base_classifier.py +++ b/mambular/models/sklearn_base_classifier.py @@ -316,7 +316,7 @@ def fit( self : object The fitted classifier. """ - if not self.built and not rebuild: + if rebuild: if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) if isinstance(y, pd.Series): diff --git a/mambular/models/tabtransformer.py b/mambular/models/tabtransformer.py index 901369e..c7305d7 100644 --- a/mambular/models/tabtransformer.py +++ b/mambular/models/tabtransformer.py @@ -19,6 +19,8 @@ class TabTransformerRegressor(SklearnBaseRegressor): Learning rate for the optimizer. lr_patience : int, default=10 Number of epochs with no improvement after which learning rate will be reduced. + family : str, default=None + Distributional family to be used for the model. weight_decay : float, default=1e-06 Weight decay (L2 penalty) for the optimizer. lr_factor : float, default=0.1 diff --git a/mambular/models/tabularnn.py b/mambular/models/tabularnn.py index 60daf2a..dd96690 100644 --- a/mambular/models/tabularnn.py +++ b/mambular/models/tabularnn.py @@ -86,6 +86,9 @@ class TabulaRNNRegressor(SklearnBaseRegressor): The number of knots to be used in spline transformations. """ + def __init__(self, **kwargs): + super().__init__(model=TabulaRNN, config=DefaultTabulaRNNConfig, **kwargs) + class TabulaRNNClassifier(SklearnBaseClassifier): """ @@ -185,6 +188,8 @@ class TabulaRNNLSS(SklearnBaseLSS): Learning rate for the optimizer. model_type : str, default="RNN" type of model, one of "RNN", "LSTM", "GRU" + family : str, default=None + Distributional family to be used for the model. lr_patience : int, default=10 Number of epochs with no improvement after which learning rate will be reduced. weight_decay : float, default=1e-06