diff --git a/R/families.R b/R/families.R index 2851803..7628492 100644 --- a/R/families.R +++ b/R/families.R @@ -544,14 +544,15 @@ tfd_mvr <- function(loc, scale, } # Implementation of a distribution-like layer for (Quasi-)Tweedie -tfd_tweedie <- function(loc, p = 1.5, quasi = FALSE, +tfd_tweedie <- function(loc, phi, p = 1.5, quasi = FALSE, validate_args = FALSE, allow_nan_stats = TRUE, - name = "QuasiTweedie") + name = "Tweedie") { args <- list( loc = loc, + scale = phi, var_power = p, quasi = quasi, validate_args = validate_args, @@ -567,12 +568,22 @@ tfd_tweedie <- function(loc, p = 1.5, quasi = FALSE, } # tfd_distfun for (Quasi-)Tweedie to allow for flexible p -tweedie <- function(p, quasi = FALSE) +tweedie <- function(p, quasi = FALSE, output_dim = 1L) { - tfd_dist <- function(l) tfd_tweedie(loc = l, p = p, quasi = quasi) - ret_fun <- function(x) tfd_dist(tf$add(1e-8, tfe(x))) - attr(ret_fun, "nrparams_dist") <- 1L + tfd_dist <- function(l, s) tfd_tweedie(loc = l, phi = s, p = p, quasi = quasi) + trafo_list <- list(function(x) tf$add(1e-8, tfe(x)), + function(x) tf$add(1e-8, tfe(x))) + dist_dim <- 2L + ret_fun <- function(x) + do.call(tfd_dist, + lapply(1:(x$shape[[2]]/output_dim), + function(i) + trafo_list[[i]]( + tf_stride_cols(x,(i-1L)*output_dim+1L, + (i-1L)*output_dim+output_dim))) + ) + attr(ret_fun, "nrparams_dist") <- 2L return(ret_fun) diff --git a/inst/python/distributions/tweedie.py b/inst/python/distributions/tweedie.py index cd670d2..f0b4bfe 100644 --- a/inst/python/distributions/tweedie.py +++ b/inst/python/distributions/tweedie.py @@ -11,13 +11,16 @@ from tensorflow.experimental import numpy as tnp class Tweedie(distribution.AutoCompositeTensorDistribution): - """Mean-Variance Regression (https://arxiv.org/pdf/1804.01631.pdf) + """Tweedie """ def __init__(self, loc, + scale, var_power=1., quasi=False, + a=1.01, + b=1.99, validate_args=False, allow_nan_stats=True, name='Tweedie'): @@ -26,6 +29,8 @@ def __init__(self, broadcasting. Args: loc: Floating point tensor; the means of the distribution(s). + scale: Floating point tensor; the scale of the distribution for Quasi, + phi for non-Quasi var_power: The variance power, also referred to as "p". The default is 1. quasi: Python `bool`, default `False`. When `True` quasi log-liklihood is used. validate_args: Python `bool`, default `False`. When `True` distribution @@ -40,11 +45,15 @@ def __init__(self, """ parameters = dict(locals()) with tf.name_scope(name) as name: - dtype = dtype_util.common_dtype([loc], dtype_hint=tf.float32) + dtype = dtype_util.common_dtype([loc, scale], dtype_hint=tf.float32) self._loc = tensor_util.convert_nonref_to_tensor( loc, dtype=dtype, name='loc') + self._scale = tensor_util.convert_nonref_to_tensor( + scale, dtype=dtype, name='scale') self._p = var_power self.quasi = quasi + self.a = a + self.b = b super(Tweedie, self).__init__( dtype=dtype, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, @@ -67,10 +76,15 @@ def _parameter_properties(cls, dtype, num_classes=None): def loc(self): """Parameter for the mean.""" return self._loc + + @property + def scale(self): + """Parameter for standard deviation.""" + return self._scale @property def p(self): - """Parameter for standard deviation.""" + """Parameter for power.""" return self._p def _event_shape_tensor(self): @@ -81,7 +95,8 @@ def _event_shape(self): def _sample_n(self, n, seed=None): loc = tf.convert_to_tensor(self.loc) - shape = ps.concat([[n], self._batch_shape_tensor(loc=loc, scale=1)], + scale = tf.convert_to_tensor(self.scale) + shape = ps.concat([[n], self._batch_shape_tensor(loc=loc, scale=scale)], axis=0) sampled = samplers.normal( shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) @@ -91,25 +106,36 @@ def _sample_n(self, n, seed=None): def _log_prob(self, x): """Used for the loss of the model -- not an actual log prob""" if self.quasi: # from https://www.statsmodels.org/stable/_modules/statsmodels/genmod/families/family.html#Tweedie - llf = log(2 * tnp.pi) + self.p * log(x) + llf = log(2 * tnp.pi * self.scale) + self.p * log(x) llf /= -2 u = (x ** (2 - self.p) - (2 - self.p) * x * self.loc ** (1 - self.p) + (1 - self.p) * self.loc ** (2 - self.p)) - u *= 1 / ((1 - self.p) * (2 - self.p)) + u *= 1 / (self.scale * (1 - self.p) * (2 - self.p)) return llf - u - else: # from https://github.com/cran/statmod/blob/master/R/tweedie.R negative deviance residuals - x1 = x + 0.1 * tf.cast(tf.equal(x, 0), tf.float32) - theta = (tf.pow(x1, 1 - self.p) - tf.pow(self.loc, 1 - self.p)) / (1 - self.p) - kappa = (tf.pow(x, 2 - self.p) - tf.pow(self.loc, 2 - self.p)) / (2 - self.p) - - return - 2 * (x * theta - kappa) + else: + # from https://github.com/cran/statmod/blob/master/R/tweedie.R negative deviance residuals + # x1 = x + 0.1 * tf.cast(tf.equal(x, 0), tf.float32) + # theta = (tf.pow(x1, 1 - self.p) - tf.pow(self.loc, 1 - self.p)) / (1 - self.p) + # kappa = (tf.pow(x, 2 - self.p) - tf.pow(self.loc, 2 - self.p)) / (2 - self.p) + # return - 2 * (x * theta - kappa) + # from https://github.com/cran/mgcv/blob/aff4560d187dfd7d98c7bd367f5a0076faf129b7/R/gamlss.r#L2474 + ethi = tf.exp(-self.p) # assuming p > 0 + p = (self.b + self.a * ethi)/(1+ethi) + x1 = x + tf.cast(x == 0, tf.float32) + theta = (tf.pow(x1, 1 - p) - tf.pow(self.loc, 1 - p)) / (1 - p) + kappa = (tf.pow(x, 2 - p) - tf.pow(self.loc, 2 - p)) / (2 - p) + return tf.sign(x - self.loc) * tf.sqrt(tf.nn.relu(2 * (x * theta - kappa) * 1 / self.scale)) + def _mean(self): return self.loc * tf.ones_like(self.scale) def _stddev(self): - return self.scale * tf.ones_like(self.loc) + if self.quasi: + return self._scale + else: + return tf.sqrt(self._scale * tf.pow(self.loc, self.p)) def _default_event_space_bijector(self): return identity_bijector.Identity(validate_args=self.validate_args)