From a5125fbbdf226c8d64163063da12f17b1b8977ba Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 3 May 2024 18:55:46 +0200 Subject: [PATCH 1/4] Refactor test to avoid lots of `if/elif` --- peak_performance/test_models.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/peak_performance/test_models.py b/peak_performance/test_models.py index ffbe2ea..a7e400a 100644 --- a/peak_performance/test_models.py +++ b/peak_performance/test_models.py @@ -158,21 +158,20 @@ def test_double_skew_normal_posterior(self): @pytest.mark.parametrize( - "model_type", ["normal", "skew_normal", "double_normal", "double_skew_normal"] + "model_type,define_func", + [ + ("normal", models.define_model_normal), + ("skew_normal", models.define_model_skew), + ("double_normal", models.define_model_double_normal), + ("double_skew_normal", models.define_model_double_skew_normal), + ], ) -def test_pymc_sampling(model_type): +def test_pymc_sampling(model_type, define_func): timeseries = np.load( Path(__file__).absolute().parent.parent / "example" / "A2t2R1Part1_132_85.9_86.1.npy" ) - if model_type == models.ModelType.Normal: - pmodel = models.define_model_normal(timeseries[0], timeseries[1]) - elif model_type == models.ModelType.SkewNormal: - pmodel = models.define_model_skew(timeseries[0], timeseries[1]) - elif model_type == models.ModelType.DoubleNormal: - pmodel = models.define_model_double_normal(timeseries[0], timeseries[1]) - elif model_type == models.ModelType.DoubleSkewNormal: - pmodel = models.define_model_double_skew_normal(timeseries[0], timeseries[1]) + pmodel = define_func(timeseries[0], timeseries[1]) with pmodel: idata = pm.sample(cores=2, chains=2, tune=3, draws=5) if model_type in [models.ModelType.DoubleNormal, models.ModelType.DoubleSkewNormal]: From a5c7d448f1a626e3fe8e3729f8abfd7dd15e9d32 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 3 May 2024 20:36:25 +0200 Subject: [PATCH 2/4] Replace ordered-RV workaround by `pt.sort`ing This fixes a problem in the gradient of our previous workaround. Also, the previous workaround unnecessarily introduced a `Normal` just so we could apply the ordered transform. The minor version number was increased, because this changes the structure of multi-peak models. --- peak_performance/models.py | 21 ++++++++++-------- peak_performance/test_models.py | 38 +++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/peak_performance/models.py b/peak_performance/models.py index ea674e0..00dd613 100644 --- a/peak_performance/models.py +++ b/peak_performance/models.py @@ -193,7 +193,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model: def double_model_mean_prior(time): """ - Function creating prior probability distributions for double peaks using a ZeroSumNormal distribution. + Function creating prior probability distributions for multi-peaks using a ZeroSumNormal distribution. Parameters ---------- @@ -203,23 +203,26 @@ def double_model_mean_prior(time): Returns ------- mean - Normally distributed prior for the ordered means of the double peak model. + Normally distributed prior for the ordered means of the multi-peak model. diff - Difference between meanmean and mean. + Difference between the group mean and peak-wise mean. meanmean - Normally distributed prior for the mean of the double peak means. + Normally distributed prior for the group mean of the peak means. """ + pmodel = pm.modelcontext(None) meanmean = pm.Normal("meanmean", mu=np.min(time) + np.ptp(time) / 2, sigma=np.ptp(time) / 6) diff = pm.ZeroSumNormal( "diff", sigma=1, - shape=(2,), # currently no dims due to bug with ordered transformation + # Support arbitrary number of subpeaks + shape=len(pmodel.coords["subpeak"]), + # NOTE: As of PyMC v5.13, the OrderedTransform and ZeroSumTransform are incompatible. + # See https://github.com/pymc-devs/pymc/issues/6975. + # As a workaround we'll call pt.sort a few lines below. ) - mean = pm.Normal( + mean = pm.Deterministic( "mean", - mu=meanmean + diff, - sigma=1, - transform=pm.distributions.transforms.ordered, + meanmean + pt.sort(diff), dims=("subpeak",), ) return mean, diff, meanmean diff --git a/peak_performance/test_models.py b/peak_performance/test_models.py index a7e400a..7a814ab 100644 --- a/peak_performance/test_models.py +++ b/peak_performance/test_models.py @@ -3,6 +3,7 @@ import arviz as az import numpy as np import pymc as pm +import pytensor.tensor as pt import pytest import scipy.integrate import scipy.stats as st @@ -26,6 +27,43 @@ def test_initial_guesses(): pass +def test_zsn_sorting(): + """This tests a workaround that we rely on for multi-peak models.""" + coords = { + "thing": ["left", "right"], + } + with pm.Model(coords=coords) as pmodel: + hyper = pm.Normal("hyper", mu=0, sigma=3) + diff = pm.ZeroSumNormal( + "diff", + sigma=1, + shape=2, + ) + # Create a sorted deterministic without using transforms + diff_sorted = pm.Deterministic("diff_sorted", pt.sort(diff), dims="thing") + pos = pm.Deterministic( + "pos", + hyper + diff_sorted, + dims="thing", + ) + # Observe the two things in incorrect order to provoke the model 😈 + dat = pm.Data("dat", [0.2, -0.3], dims="thing") + pm.Normal("L", pos, observed=dat, dims="thing") + + # Check draws from the prior + drawn = pm.draw(diff_sorted, draws=69) + np.testing.assert_array_less(drawn[:, 0], drawn[:, 1]) + + # And check MCMC draws too + with pmodel: + idata = pm.sample( + chains=1, tune=10, draws=69, step=pm.Metropolis(), compute_convergence_checks=False + ) + sampled = idata.posterior["diff_sorted"].stack(sample=("chain", "draw")).values.T + np.testing.assert_array_less(sampled[:, 0], sampled[:, 1]) + pass + + class TestDistributions: def test_normal_posterior(self): x = np.linspace(-5, 10, 10000) diff --git a/pyproject.toml b/pyproject.toml index fa050b2..63b1393 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "peak_performance" -version = "0.6.5" +version = "0.7.0" authors = [ {name = "Jochen Nießer", email = "j.niesser@fz-juelich.de"}, {name = "Michael Osthege", email = "m.osthege@fz-juelich.de"}, From 4c73517a230b7d611b1b3d76fdcbb44da78fe0e6 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 10 May 2024 14:19:26 +0200 Subject: [PATCH 3/4] Fix multi-peak model to avoid bimodalities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jochen Nießer --- peak_performance/models.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/peak_performance/models.py b/peak_performance/models.py index 00dd613..a781afd 100644 --- a/peak_performance/models.py +++ b/peak_performance/models.py @@ -211,18 +211,23 @@ def double_model_mean_prior(time): """ pmodel = pm.modelcontext(None) meanmean = pm.Normal("meanmean", mu=np.min(time) + np.ptp(time) / 2, sigma=np.ptp(time) / 6) - diff = pm.ZeroSumNormal( - "diff", - sigma=1, + diff_unsorted = pm.ZeroSumNormal( + "diff_unsorted", + sigma=2, # Support arbitrary number of subpeaks shape=len(pmodel.coords["subpeak"]), - # NOTE: As of PyMC v5.13, the OrderedTransform and ZeroSumTransform are incompatible. + # NOTE: As of PyMC v5.14, the OrderedTransform and ZeroSumTransform are incompatible. # See https://github.com/pymc-devs/pymc/issues/6975. # As a workaround we'll call pt.sort a few lines below. ) - mean = pm.Deterministic( + diff = pm.Deterministic("diff", pt.sort(diff_unsorted), dims="subpeak") + mean = pm.Normal( "mean", - meanmean + pt.sort(diff), + meanmean + diff, + # Introduce a small jitter to the subpeak means to decouple them + # from the strictly asymmetric ZeroSumNormal entries. + # This reduces the chances of unwanted bimodality. + sigma=0.01, dims=("subpeak",), ) return mean, diff, meanmean From abc5677f38a4ddd8cc793bcc1f4b325020855116 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 10 May 2024 14:19:38 +0200 Subject: [PATCH 4/4] Default to 4 chains --- peak_performance/pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/peak_performance/pipeline.py b/peak_performance/pipeline.py index 66caa1e..df43023 100644 --- a/peak_performance/pipeline.py +++ b/peak_performance/pipeline.py @@ -489,6 +489,7 @@ def sampling(pmodel, **sample_kwargs): idata Inference data object. """ + sample_kwargs.setdefault("chains", 4) sample_kwargs.setdefault("tune", 2000) sample_kwargs.setdefault("draws", 2000) # check if nutpie is available; if so, use it to enhance performance