Add support for mixture distributions

StatMixedML · Aug 25, 2023 · c45f0fd · c45f0fd
1 parent 3f5b5e1
commit c45f0fd
Show file tree

Hide file tree

Showing 23 changed files with 2,699 additions and 67 deletions.
diff --git a/docs/dgbm.md b/docs/dgbm.md
diff --git a/docs/distributions.md b/docs/distributions.md
diff --git a/docs/examples/GaussianMixture_Regression_CaliforniaHousing.ipynb b/docs/examples/GaussianMixture_Regression_CaliforniaHousing.ipynb
diff --git a/docs/mixture.png b/docs/mixture.png
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -35,7 +35,8 @@ nav:
     - Basic Walkthrough - Gaussian Regression: examples/Gaussian_Regression.ipynb
     - Dirichlet Regression: examples/Dirichlet_Regression.ipynb
     - Expectile Regression: examples/Expectile_Regression.ipynb
-    - Gamma Regression (Boston Housing Data): examples/Gamma_Regression_BostonHousing.ipynb
+    - Gaussian-Mixture Regression: examples/GaussianMixture_Regression_CaliforniaHousing.ipynb
+    - Gamma Regression (California Housing Data): examples/Gamma_Regression_CaliforniaHousing.ipynb
     - How to Select a Univariate Distribution: examples/How_To_Select_A_Univariate_Distribution.ipynb
     - How to Select a Multivariate Distribution: examples/How_To_Select_A_Multivariate_Distribution.ipynb
     - Multivariate Gaussian Regression (Cholesky Decomposition): examples/MVN_Cholesky.ipynb

diff --git a/tests/test_distribution_utils/test_calculate_start_values.py b/tests/test_distribution_utils/test_calculate_start_values.py
@@ -22,3 +22,23 @@ def test_calculate_start_values(self, dist_class, loss_fn):
         assert start_values.shape[0] == dist_class.dist.n_dist_param
         assert not np.isnan(start_values).any()
         assert not np.isinf(start_values).any()
+
+    def test_calculate_start_values_mixture(self, mixture_class, loss_fn):
+        # Create data for testing
+        _, target, _ = gen_test_data(mixture_class)
+
+        # Set the loss function for testing
+        mixture_class.dist.loss_fn = loss_fn
+
+        # Call the objective_fn method
+        loss, start_values = mixture_class.dist.calculate_start_values(target)
+
+        # Assertions
+        assert isinstance(loss, np.ndarray)
+        assert not np.isnan(loss).any()
+        assert not np.isinf(loss).any()
+
+        assert isinstance(start_values, np.ndarray)
+        assert start_values.shape[0] == mixture_class.dist.n_dist_param
+        assert not np.isnan(start_values).any()
+        assert not np.isinf(start_values).any()
diff --git a/tests/test_distribution_utils/test_compute_gradients_and_hessians.py b/tests/test_distribution_utils/test_compute_gradients_and_hessians.py
@@ -1,5 +1,5 @@
 from ..utils import BaseTestClass, gen_test_data
-from typing import List
+from typing import List, Tuple
 import numpy as np
 import torch
 
@@ -41,6 +41,42 @@ def test_compute_gradients_and_hessians(self, dist_class, loss_fn, stabilization
         assert not np.isnan(grad).any()
         assert not np.isnan(hess).any()
 
+    def test_compute_gradients_and_hessians_mixture(self, mixture_class, loss_fn, stabilization):
+        # Create data for testing
+        params, target, weights, _ = gen_test_data(mixture_class, weights=True)
+        if mixture_class.dist.univariate:
+            target = torch.tensor(target)
+        else:
+            target = torch.tensor(target)[:, :mixture_class.dist.n_targets]
+        start_values = np.array([0.5 for _ in range(mixture_class.dist.n_dist_param)])
+
+        # Set the loss function for testing
+        mixture_class.dist.loss_fn = loss_fn
+
+        # Set the stabilization for testing
+        mixture_class.dist.stabilization = stabilization
+
+        # Call the function
+        predt, loss = mixture_class.dist.get_params_loss(params, target, start_values, requires_grad=True)
+        grad, hess = mixture_class.dist.compute_gradients_and_hessians(loss, predt, weights)
+
+        # Assertions
+        assert isinstance(predt, Tuple)
+        for i in range(len(predt)):
+            assert isinstance(predt[i], torch.Tensor)
+            assert not torch.isnan(predt[i]).any()
+            assert not torch.isinf(predt[i]).any()
+        assert isinstance(loss, torch.Tensor)
+        assert not torch.isnan(loss).any()
+        assert not torch.isinf(loss).any()
+
+        assert isinstance(grad, np.ndarray)
+        assert isinstance(hess, np.ndarray)
+        assert grad.shape == params.flatten().shape
+        assert hess.shape == params.flatten().shape
+        assert not np.isnan(grad).any()
+        assert not np.isnan(hess).any()
+
     def test_compute_gradients_and_hessians_crps(self, dist_class_crps, stabilization):
         # Create data for testing
         params, target, weights, _ = gen_test_data(dist_class_crps, weights=True)
@@ -113,3 +149,40 @@ def test_compute_gradients_and_hessians_nans(self, dist_class, loss_fn, stabiliz
         assert hess.shape == params.flatten().shape
         assert not np.isnan(grad).any()
         assert not np.isnan(hess).any()
+
+    def test_compute_gradients_and_hessians_mixture_nans(self, mixture_class, loss_fn, stabilization):
+        # Create data for testing
+        params, target, weights, _ = gen_test_data(mixture_class, weights=True)
+        params[0, 0] = np.nan
+        if mixture_class.dist.univariate:
+            target = torch.tensor(target)
+        else:
+            target = torch.tensor(target)[:, :mixture_class.dist.n_targets]
+        start_values = np.array([0.5 for _ in range(mixture_class.dist.n_dist_param)])
+
+        # Set the loss function for testing
+        mixture_class.dist.loss_fn = loss_fn
+
+        # Set the stabilization for testing
+        mixture_class.dist.stabilization = stabilization
+
+        # Call the function
+        predt, loss = mixture_class.dist.get_params_loss(params, target, start_values, requires_grad=True)
+        grad, hess = mixture_class.dist.compute_gradients_and_hessians(loss, predt, weights)
+
+        # Assertions
+        assert isinstance(predt, Tuple)
+        for i in range(len(predt)):
+            assert isinstance(predt[i], torch.Tensor)
+            assert not torch.isnan(predt[i]).any()
+            assert not torch.isinf(predt[i]).any()
+        assert isinstance(loss, torch.Tensor)
+        assert not torch.isnan(loss).any()
+        assert not torch.isinf(loss).any()
+
+        assert isinstance(grad, np.ndarray)
+        assert isinstance(hess, np.ndarray)
+        assert grad.shape == params.flatten().shape
+        assert hess.shape == params.flatten().shape
+        assert not np.isnan(grad).any()
+        assert not np.isnan(hess).any()
diff --git a/tests/test_distribution_utils/test_dist_select.py b/tests/test_distribution_utils/test_dist_select.py
@@ -1,13 +1,24 @@
 from ..utils import BaseTestClass
 
-from xgboostlss.distributions import Beta, Gaussian, StudentT, Gamma, Cauchy, LogNormal, Weibull, Gumbel, Laplace
+from xgboostlss.distributions import (
+    Beta,
+    Gaussian,
+    StudentT,
+    Gamma,
+    Cauchy,
+    LogNormal,
+    Weibull,
+    Gumbel,
+    Laplace)
+from xgboostlss.distributions.Mixture import *
 from xgboostlss.distributions.SplineFlow import *
 from xgboostlss.distributions.MVN import *
 from xgboostlss.distributions.MVT import *
 from xgboostlss.distributions.MVN_LoRa import *
 from xgboostlss.distributions.distribution_utils import DistributionClass as univariate_dist_class
 from xgboostlss.distributions.multivariate_distribution_utils import Multivariate_DistributionClass as multivariate_dist_class
 from xgboostlss.distributions.flow_utils import NormalizingFlowClass as flow_dist_class
+from xgboostlss.distributions.mixture_distribution_utils import MixtureDistributionClass as mixture_dist_class
 
 
 class TestClass(BaseTestClass):
@@ -21,7 +32,7 @@ def test_univar_dist_select(self):
 
         # Call the function
         dist_df = univariate_dist_class().dist_select(
-            target, candidate_distributions, n_samples=10, plot=False
+            target, candidate_distributions, plot=False, max_iter=2
         ).reset_index(drop=True)
 
         # Assertions
@@ -39,7 +50,7 @@ def test_univar_dist_select_plot(self):
 
         # Call the function
         dist_df = univariate_dist_class().dist_select(
-            target, candidate_distributions, n_samples=10, plot=True
+            target, candidate_distributions, plot=True, max_iter=2
         ).reset_index(drop=True)
 
         # Assertions
@@ -66,7 +77,7 @@ def test_flow_select(self):
 
         # Call the function
         dist_df = flow_dist_class().flow_select(
-            target, candidate_flows, n_samples=10, plot=False
+            target, candidate_flows, plot=False, max_iter=2
         ).reset_index(drop=True)
 
         # Assertions
@@ -90,7 +101,7 @@ def test_flow_select_plot(self):
 
         # Call the function
         dist_df = flow_dist_class().flow_select(
-            target, candidate_flows, n_samples=10, plot=True
+            target, candidate_flows, plot=True, max_iter=2
         ).reset_index(drop=True)
 
         # Assertions
@@ -101,26 +112,61 @@ def test_flow_select_plot(self):
         assert not np.isnan(dist_df["nll"].values).any()
         assert not np.isinf(dist_df["nll"].values).any()
 
-    def test_flow_select_plot(self):
+    ####################################################################################################################
+    # Mixture Distributions
+    ####################################################################################################################
+    def test_mixture_dist_select(self):
         # Create data for testing
         target = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1)
-        bound = np.max([np.abs(target.min()), target.max()])
-        target_support = "real"
+        candidate_distributions = [
+            Mixture(Beta.Beta()),
+            Mixture(Gaussian.Gaussian()),
+            Mixture(StudentT.StudentT()),
+            Mixture(Gamma.Gamma()),
+            Mixture(Cauchy.Cauchy()),
+            Mixture(LogNormal.LogNormal()),
+            Mixture(Weibull.Weibull()),
+            Mixture(Gumbel.Gumbel()),
+            Mixture(Laplace.Laplace())
+        ]
 
-        candidate_flows = [
-            SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="linear"),
-            SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="quadratic")
+        # Call the function
+        dist_df = mixture_dist_class().dist_select(
+            target, candidate_distributions, plot=False, max_iter=2
+        ).reset_index(drop=True)
+
+        # Assertions
+        assert isinstance(dist_df, pd.DataFrame)
+        assert not dist_df.isna().any().any()
+        assert isinstance(dist_df["distribution"].values[0], str)
+        assert np.issubdtype(dist_df["nll"].dtype, np.float64)
+        assert not np.isnan(dist_df["nll"].values).any()
+        assert not np.isinf(dist_df["nll"].values).any()
+
+    def test_mixture_dist_select_plot(self):
+        # Create data for testing
+        target = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1)
+        candidate_distributions = [
+            Mixture(Beta.Beta()),
+            Mixture(Gaussian.Gaussian()),
+            Mixture(StudentT.StudentT()),
+            Mixture(Gamma.Gamma()),
+            Mixture(Cauchy.Cauchy()),
+            Mixture(LogNormal.LogNormal()),
+            Mixture(Weibull.Weibull()),
+            Mixture(Gumbel.Gumbel()),
+            Mixture(Laplace.Laplace())
         ]
 
         # Call the function
-        dist_df = flow_dist_class().flow_select(
-            target, candidate_flows, n_samples=10, plot=True
+        dist_df = mixture_dist_class().dist_select(
+            target, candidate_distributions, plot=True, max_iter=2
         ).reset_index(drop=True)
 
         # Assertions
         assert isinstance(dist_df, pd.DataFrame)
         assert not dist_df.isna().any().any()
-        assert isinstance(dist_df["NormFlow"].values[0], str)
+        assert isinstance(dist_df["distribution"].values[0], str)
         assert np.issubdtype(dist_df["nll"].dtype, np.float64)
         assert not np.isnan(dist_df["nll"].values).any()
         assert not np.isinf(dist_df["nll"].values).any()
@@ -141,7 +187,7 @@ def test_multivar_dist_select(self):
 
         # Call the function
         dist_df = multivariate_dist_class().dist_select(
-            target, candidate_distributions, n_samples=10, plot=False
+            target, candidate_distributions, plot=False, max_iter=2
         ).reset_index(drop=True)
 
         # Assertions
@@ -165,7 +211,7 @@ def test_multivar_dist_select_plot(self):
 
         # Call the function
         dist_df = multivariate_dist_class().dist_select(
-            target, candidate_distributions, n_samples=10, plot=True, ncol=1
+            target, candidate_distributions, plot=True, ncol=1, max_iter=2
         ).reset_index(drop=True)
 
         # Assertions

diff --git a/tests/test_distribution_utils/test_draw_samples.py b/tests/test_distribution_utils/test_draw_samples.py
@@ -44,3 +44,17 @@ def test_draw_samples(self, dist_class):
             assert isinstance(dist_samples, (pd.DataFrame, type(None)))
             assert not dist_samples.isna().any().any()
             assert not np.isinf(dist_samples.iloc[:, 1:]).any().any()
+
+    def test_draw_samples_mixture(self, mixture_class):
+        # Create data for testing
+        predt_params = pd.DataFrame(np.array([0.5 for _ in range(mixture_class.dist.n_dist_param)], dtype="float32")).T
+
+        # Call the function
+        dist_samples = mixture_class.dist.draw_samples(predt_params)
+
+        # Assertions
+        assert isinstance(dist_samples, (pd.DataFrame, type(None)))
+        assert not dist_samples.isna().any().any()
+        assert not np.isinf(dist_samples).any().any()
+
+
diff --git a/tests/test_distribution_utils/test_get_params_loss.py b/tests/test_distribution_utils/test_get_params_loss.py
@@ -1,5 +1,5 @@
 from ..utils import BaseTestClass, gen_test_data
-from typing import List
+from typing import List, Tuple
 import numpy as np
 import torch
 
@@ -80,3 +80,60 @@ def test_get_params_loss_crps(self, dist_class_crps, requires_grad):
         assert isinstance(loss, torch.Tensor)
         assert not torch.isnan(loss).any()
         assert not torch.isinf(loss).any()
+
+    def test_get_params_loss_mixture(self, mixture_class, loss_fn, requires_grad, hessian_mode):
+        # Create data for testing
+        predt, target, _ = gen_test_data(mixture_class)
+        if mixture_class.dist.univariate:
+            target = torch.tensor(target)
+        else:
+            target = torch.tensor(target)[:, :mixture_class.dist.n_targets]
+        start_values = np.array([0.5 for _ in range(mixture_class.dist.n_dist_param)])
+
+        # Set the loss function for testing
+        mixture_class.dist.loss_fn = loss_fn
+
+        # Set the hessian mode for testing
+        mixture_class.dist.hessian_mode = hessian_mode
+
+        # Call the function
+        predt, loss = mixture_class.dist.get_params_loss(predt, target, start_values, requires_grad)
+
+        # Assertions
+        assert isinstance(predt, Tuple)
+        for i in range(len(predt)):
+            assert isinstance(predt[i], torch.Tensor)
+            assert not torch.isnan(predt[i]).any()
+            assert not torch.isinf(predt[i]).any()
+        assert isinstance(loss, torch.Tensor)
+        assert not torch.isnan(loss).any()
+        assert not torch.isinf(loss).any()
+
+    def test_get_params_loss_mixture_nans(self, mixture_class, loss_fn, requires_grad, hessian_mode):
+        # Create data for testing
+        predt, target, _ = gen_test_data(mixture_class)
+        predt[0, 0] = np.nan
+        if mixture_class.dist.univariate:
+            target = torch.tensor(target)
+        else:
+            target = torch.tensor(target)[:, :mixture_class.dist.n_targets]
+        start_values = np.array([0.5 for _ in range(mixture_class.dist.n_dist_param)])
+
+        # Set the loss function for testing
+        mixture_class.dist.loss_fn = loss_fn
+
+        # Set the hessian mode for testing
+        mixture_class.dist.hessian_mode = hessian_mode
+
+        # Call the function
+        predt, loss = mixture_class.dist.get_params_loss(predt, target, start_values, requires_grad)
+
+        # Assertions
+        assert isinstance(predt, Tuple)
+        for i in range(len(predt)):
+            assert isinstance(predt[i], torch.Tensor)
+            assert not torch.isnan(predt[i]).any()
+            assert not torch.isinf(predt[i]).any()
+        assert isinstance(loss, torch.Tensor)
+        assert not torch.isnan(loss).any()
+        assert not torch.isinf(loss).any()
diff --git a/tests/test_distribution_utils/test_loss_fn_start_values.py b/tests/test_distribution_utils/test_loss_fn_start_values.py
@@ -27,3 +27,26 @@ def test_loss_fn_start_values(self, dist_class, loss_fn):
             assert isinstance(loss, torch.Tensor)
             assert not torch.isnan(loss).any()
             assert not torch.isinf(loss).any()
+
+    def test_loss_fn_start_values_mixture(self, mixture_class, loss_fn):
+        # Create data for testing
+        _, target, _ = gen_test_data(mixture_class)
+        predt = [
+            torch.tensor(0.5, dtype=torch.float64).reshape(-1, 1).requires_grad_(True) for _ in
+            range(mixture_class.dist.n_dist_param)
+        ]
+        if mixture_class.dist.univariate:
+            target = torch.tensor(target)
+        else:
+            target = torch.tensor(target)[:, :mixture_class.dist.n_targets]
+
+        # Set the loss function for testing
+        mixture_class.dist.loss_fn = loss_fn
+
+        # Call the function
+        loss = mixture_class.dist.loss_fn_start_values(predt, target)
+
+        # Assertions
+        assert isinstance(loss, torch.Tensor)
+        assert not torch.isnan(loss).any()
+        assert not torch.isinf(loss).any()