diff --git a/peak_performance/models.py b/peak_performance/models.py index ea674e0..a781afd 100644 --- a/peak_performance/models.py +++ b/peak_performance/models.py @@ -193,7 +193,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model: def double_model_mean_prior(time): """ - Function creating prior probability distributions for double peaks using a ZeroSumNormal distribution. + Function creating prior probability distributions for multi-peaks using a ZeroSumNormal distribution. Parameters ---------- @@ -203,23 +203,31 @@ def double_model_mean_prior(time): Returns ------- mean - Normally distributed prior for the ordered means of the double peak model. + Normally distributed prior for the ordered means of the multi-peak model. diff - Difference between meanmean and mean. + Difference between the group mean and peak-wise mean. meanmean - Normally distributed prior for the mean of the double peak means. + Normally distributed prior for the group mean of the peak means. """ + pmodel = pm.modelcontext(None) meanmean = pm.Normal("meanmean", mu=np.min(time) + np.ptp(time) / 2, sigma=np.ptp(time) / 6) - diff = pm.ZeroSumNormal( - "diff", - sigma=1, - shape=(2,), # currently no dims due to bug with ordered transformation + diff_unsorted = pm.ZeroSumNormal( + "diff_unsorted", + sigma=2, + # Support arbitrary number of subpeaks + shape=len(pmodel.coords["subpeak"]), + # NOTE: As of PyMC v5.14, the OrderedTransform and ZeroSumTransform are incompatible. + # See https://github.com/pymc-devs/pymc/issues/6975. + # As a workaround we'll call pt.sort a few lines below. ) + diff = pm.Deterministic("diff", pt.sort(diff_unsorted), dims="subpeak") mean = pm.Normal( "mean", - mu=meanmean + diff, - sigma=1, - transform=pm.distributions.transforms.ordered, + meanmean + diff, + # Introduce a small jitter to the subpeak means to decouple them + # from the strictly asymmetric ZeroSumNormal entries. + # This reduces the chances of unwanted bimodality. + sigma=0.01, dims=("subpeak",), ) return mean, diff, meanmean diff --git a/peak_performance/pipeline.py b/peak_performance/pipeline.py index 66caa1e..df43023 100644 --- a/peak_performance/pipeline.py +++ b/peak_performance/pipeline.py @@ -489,6 +489,7 @@ def sampling(pmodel, **sample_kwargs): idata Inference data object. """ + sample_kwargs.setdefault("chains", 4) sample_kwargs.setdefault("tune", 2000) sample_kwargs.setdefault("draws", 2000) # check if nutpie is available; if so, use it to enhance performance diff --git a/peak_performance/test_models.py b/peak_performance/test_models.py index ffbe2ea..7a814ab 100644 --- a/peak_performance/test_models.py +++ b/peak_performance/test_models.py @@ -3,6 +3,7 @@ import arviz as az import numpy as np import pymc as pm +import pytensor.tensor as pt import pytest import scipy.integrate import scipy.stats as st @@ -26,6 +27,43 @@ def test_initial_guesses(): pass +def test_zsn_sorting(): + """This tests a workaround that we rely on for multi-peak models.""" + coords = { + "thing": ["left", "right"], + } + with pm.Model(coords=coords) as pmodel: + hyper = pm.Normal("hyper", mu=0, sigma=3) + diff = pm.ZeroSumNormal( + "diff", + sigma=1, + shape=2, + ) + # Create a sorted deterministic without using transforms + diff_sorted = pm.Deterministic("diff_sorted", pt.sort(diff), dims="thing") + pos = pm.Deterministic( + "pos", + hyper + diff_sorted, + dims="thing", + ) + # Observe the two things in incorrect order to provoke the model 😈 + dat = pm.Data("dat", [0.2, -0.3], dims="thing") + pm.Normal("L", pos, observed=dat, dims="thing") + + # Check draws from the prior + drawn = pm.draw(diff_sorted, draws=69) + np.testing.assert_array_less(drawn[:, 0], drawn[:, 1]) + + # And check MCMC draws too + with pmodel: + idata = pm.sample( + chains=1, tune=10, draws=69, step=pm.Metropolis(), compute_convergence_checks=False + ) + sampled = idata.posterior["diff_sorted"].stack(sample=("chain", "draw")).values.T + np.testing.assert_array_less(sampled[:, 0], sampled[:, 1]) + pass + + class TestDistributions: def test_normal_posterior(self): x = np.linspace(-5, 10, 10000) @@ -158,21 +196,20 @@ def test_double_skew_normal_posterior(self): @pytest.mark.parametrize( - "model_type", ["normal", "skew_normal", "double_normal", "double_skew_normal"] + "model_type,define_func", + [ + ("normal", models.define_model_normal), + ("skew_normal", models.define_model_skew), + ("double_normal", models.define_model_double_normal), + ("double_skew_normal", models.define_model_double_skew_normal), + ], ) -def test_pymc_sampling(model_type): +def test_pymc_sampling(model_type, define_func): timeseries = np.load( Path(__file__).absolute().parent.parent / "example" / "A2t2R1Part1_132_85.9_86.1.npy" ) - if model_type == models.ModelType.Normal: - pmodel = models.define_model_normal(timeseries[0], timeseries[1]) - elif model_type == models.ModelType.SkewNormal: - pmodel = models.define_model_skew(timeseries[0], timeseries[1]) - elif model_type == models.ModelType.DoubleNormal: - pmodel = models.define_model_double_normal(timeseries[0], timeseries[1]) - elif model_type == models.ModelType.DoubleSkewNormal: - pmodel = models.define_model_double_skew_normal(timeseries[0], timeseries[1]) + pmodel = define_func(timeseries[0], timeseries[1]) with pmodel: idata = pm.sample(cores=2, chains=2, tune=3, draws=5) if model_type in [models.ModelType.DoubleNormal, models.ModelType.DoubleSkewNormal]: diff --git a/pyproject.toml b/pyproject.toml index fa050b2..63b1393 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "peak_performance" -version = "0.6.5" +version = "0.7.0" authors = [ {name = "Jochen Nießer", email = "j.niesser@fz-juelich.de"}, {name = "Michael Osthege", email = "m.osthege@fz-juelich.de"},