fixes for tf+keras+tfp version upgrade (<3)

neural-structured-additive-learning · Dec 1, 2024 · f93e778 · f93e778
1 parent bf9c6bb
commit f93e778
Show file tree

Hide file tree

Showing 39 changed files with 1,127 additions and 595 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,9 +17,9 @@ Config/reticulate:
   list(
     packages = list(
       list(package = "six", pip = TRUE),
-      list(package = "tensorflow", version = "2.10.0", pip = TRUE),
-      list(package = "tensorflow_probability", version = "0.16", pip = TRUE),
-      list(package = "keras", version = "2.10.0", pip = TRUE))
+      list(package = "tensorflow", version = "2.15", pip = TRUE),
+      list(package = "tensorflow_probability", version = "0.23", pip = TRUE),
+      list(package = "keras", version = "2.15", pip = TRUE))
   )
 Depends: 
     R (>= 4.0.0),
@@ -46,4 +46,4 @@ Imports:
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -33,6 +33,7 @@ export(extract_S)
 export(extract_pure_gam_part)
 export(extractlen)
 export(extractval)
+export(extractvals)
 export(extractvar)
 export(family_to_tfd)
 export(family_to_trafo)
@@ -129,7 +130,6 @@ export(tib_layer)
 export(tibgroup_layer)
 export(tibgroup_layer_torch)
 export(tiblinlasso_layer_torch)
-export(tweedie)
 export(weight_control)
 import(Matrix)
 import(R6)

diff --git a/R/families.R b/R/families.R
@@ -25,47 +25,47 @@ tfmult <- function(x,y) tf$math$multiply(x,y)
 #' with parameters (and corresponding inverse link function in brackets):
 #'
 #' \itemize{
-#'  \item{"normal": }{normal distribution with location (identity), scale (exp)}
-#'  \item{"bernoulli": }{bernoulli distribution with logits (identity)}
-#'  \item{"bernoulli_prob": }{bernoulli distribution with probabilities (sigmoid)}
-#'  \item{"beta": }{beta with concentration 1 = alpha (exp) and concentration
+#'  \item{\code{"normal": }}{normal distribution with location (identity), scale (exp)}
+#'  \item{\code{"bernoulli": }}{bernoulli distribution with logits (identity)}
+#'  \item{\code{"bernoulli_prob": }}{bernoulli distribution with probabilities (sigmoid)}
+#'  \item{\code{"beta": }}{beta with concentration 1 = alpha (exp) and concentration
 #'  0 = beta (exp)}
-#'  \item{"betar": }{beta with mean (sigmoid) and scale (sigmoid)}
-#'  \item{"cauchy": }{location (identity), scale (exp)}
-#'  \item{"chi2": }{cauchy with df (exp)}
-#'  \item{"chi": }{cauchy with df (exp)}
-#'  \item{"exponential": }{exponential with lambda (exp)}
-#'  \item{"gamma": }{gamma with concentration (exp) and rate (exp)}
-#'  \item{"gammar": }{gamma with location (exp) and scale (exp), following
+#'  \item{\code{"betar": }}{beta with mean (sigmoid) and scale (sigmoid)}
+#'  \item{\code{"cauchy": }}{location (identity), scale (exp)}
+#'  \item{\code{"chi2": }}{cauchy with df (exp)}
+#'  \item{\code{"chi": }}{cauchy with df (exp)}
+#'  \item{\code{"exponential": }}{exponential with lambda (exp)}
+#'  \item{\code{"gamma": }}{gamma with concentration (exp) and rate (exp)}
+#'  \item{\code{"gammar": }}{gamma with location (exp) and scale (exp), following
 #'  \code{gamlss.dist::GA}, which implies that the expectation is the location, 
 #'  and the variance of the distribution is the \code{location^2 scale^2}}
-#'  \item{"gumbel": }{gumbel with location (identity), scale (exp)}
-#'  \item{"half_cauchy": }{half cauchy with location (identity), scale (exp)}
-#'  \item{"half_normal": }{half normal with scale (exp)}
-#'  \item{"horseshoe": }{horseshoe with scale (exp)}
-#'  \item{"inverse_gamma": }{inverse gamma with concentation (exp) and rate (exp)}
-#'  \item{"inverse_gamma_ls": }{inverse gamma with location (exp) and variance (1/exp)}
-#'  \item{"inverse_gaussian": }{inverse Gaussian with location (exp) and concentation
+#'  \item{\code{"gumbel": }}{gumbel with location (identity), scale (exp)}
+#'  \item{\code{"half_cauchy": }}{half cauchy with location (identity), scale (exp)}
+#'  \item{\code{"half_normal": }}{half normal with scale (exp)}
+#'  \item{\code{"horseshoe": }}{horseshoe with scale (exp)}
+#'  \item{\code{"inverse_gamma": }}{inverse gamma with concentation (exp) and rate (exp)}
+#'  \item{\code{"inverse_gamma_ls": }}{inverse gamma with location (exp) and variance (1/exp)}
+#'  \item{\code{"inverse_gaussian": }}{inverse Gaussian with location (exp) and concentation
 #'  (exp)}
-#'  \item{"laplace": }{Laplace with location (identity) and scale (exp)}
-#'  \item{"log_normal": }{Log-normal with location (identity) and scale (exp) of
+#'  \item{\code{"laplace": }}{Laplace with location (identity) and scale (exp)}
+#'  \item{\code{"log_normal": }}{Log-normal with location (identity) and scale (exp) of
 #'  underlying normal distribution}
-#'  \item{"logistic": }{logistic with location (identity) and scale (exp)}
-#'  \item{"negbinom": }{neg. binomial with count (exp) and prob (sigmoid)}
-#'  \item{"negbinom_ls": }{neg. binomail with mean (exp) and clutter factor (exp)}
-#'  \item{"pareto": }{Pareto with concentration (exp) and scale (1/exp)} 
-#'  \item{"pareto_ls": }{Pareto location scale version with mean (exp) 
+#'  \item{\code{"logistic": }}{logistic with location (identity) and scale (exp)}
+#'  \item{\code{"negbinom": }}{neg. binomial with count (exp) and prob (sigmoid)}
+#'  \item{\code{"negbinom_ls": }}{neg. binomail with mean (exp) and clutter factor (exp)}
+#'  \item{\code{"pareto": }}{Pareto with concentration (exp) and scale (1/exp)} 
+#'  \item{\code{"pareto_ls": }}{Pareto location scale version with mean (exp) 
 #'  and scale (exp), which corresponds to a Pareto distribution with parameters scale = mean
 #'  and concentration = 1/sigma, where sigma is the scale in the pareto_ls version}
-#'  \item{"poisson": }{poisson with rate (exp)}
-#'  \item{"poisson_lograte": }{poisson with lograte (identity))}
-#'  \item{"student_t": }{Student's t with df (exp)}
-#'  \item{"student_t_ls": }{Student's t with df (exp), location (identity) and
+#'  \item{\code{"poisson": }}{poisson with rate (exp)}
+#'  \item{\code{"poisson_lograte": }}{poisson with lograte (identity))}
+#'  \item{\code{"student_t": }}{Student's t with df (exp)}
+#'  \item{\code{"student_t_ls": }}{Student's t with df (exp), location (identity) and
 #'  scale (exp)}
-#'  \item{"uniform": }{uniform with upper and lower (both identity)}
-#'  \item{"zinb": }{Zero-inflated negative binomial with mean (exp), 
+#'  \item{\code{"uniform": }}{uniform with upper and lower (both identity)}
+#'  \item{\code{"zinb": }}{Zero-inflated negative binomial with mean (exp), 
 #'  variance (exp) and prob (sigmoid)}
-#'  \item{"zip":  }{Zero-inflated poisson distribution with mean (exp) and prob (sigmoid)}
+#'  \item{\code{"zip":  }}{Zero-inflated poisson distribution with mean (exp) and prob (sigmoid)}
 #' }
 #' @param add_const small positive constant to stabilize calculations
 #' @param trafo_list list of transformations for each distribution parameter.
@@ -281,9 +281,9 @@ family_to_tfd <- function(family)
                      negbinom_ls = tfd_negative_binomial_ls,
                      pareto = tfd_pareto,
                      pareto_ls = tfd_pareto,
-                     poisson = tfd_poisson,
+                     poisson = tfd_poisson_fixed,
                      poisson_lograte = function(log_rate)
-                       tfd_poisson(log_rate = log_rate),
+                       tfd_poisson_fixed(log_rate = log_rate),
                      student_t = function(x)
                        tfd_student_t(df=x,loc=0,scale=1),
                      student_t_ls = tfd_student_t,
@@ -472,6 +472,15 @@ family_trafo_funs_special <- function(family, add_const = 1e-8)
 
 }
 
+tfd_poisson_fixed <- function (rate = NULL, log_rate = NULL, interpolate_nondiscrete = TRUE, 
+          validate_args = FALSE, allow_nan_stats = TRUE, name = "Poisson") 
+{
+  args <- list(rate = rate, log_rate = log_rate, 
+               validate_args = validate_args, allow_nan_stats = allow_nan_stats, 
+               name = name)
+  do.call(tfp$distributions$Poisson, args)
+}
+
 #' Implementation of a zero-inflated poisson distribution for TFP
 #'
 #' @param lambda scalar value for rate of poisson distribution
@@ -483,7 +492,7 @@ tfd_zip <- function(lambda, probs)
   return(
     tfd_mixture(cat = tfd_categorical(probs = probs),
                 components =
-                  list(tfd_poisson(rate = lambda),
+                  list(tfd_poisson_fixed(rate = lambda),
                        tfd_deterministic(loc = lambda * 0L)
                   ),
                 name="zip")
@@ -543,56 +552,60 @@ tfd_mvr <- function(loc, scale,
 
 }
 
-# Implementation of a distribution-like layer for (Quasi-)Tweedie
-tfd_tweedie <- function(loc, phi, p = 1.5, quasi = FALSE,
-                         validate_args = FALSE,
-                         allow_nan_stats = TRUE,
-                         name = "Tweedie")
-{
-
-  args <- list(
-    loc = loc,
-    scale = phi,
-    var_power = p,
-    quasi = quasi,
-    validate_args = validate_args,
-    allow_nan_stats = allow_nan_stats,
-    name = name
-  )
-
-  python_path <- system.file("python", package = "deepregression")
-  distributions <- reticulate::import_from_path("distributions", path = python_path)
-
-  return(do.call(distributions$Tweedie, args))
-
-}
-
-#' tfd_distfun for (Quasi-)Tweedie to allow for flexible p
-#' @param p integer; defines distribution
-#' @param quasi logical; whether to use quasi-likelihood or deviance resids
-#' @param output_dim integer; currently only univariate responses supported
-#' @export
 #' 
-tweedie <- function(p, quasi = FALSE, output_dim = 1L)
-{
-
-  tfd_dist <- function(l, s) tfd_tweedie(loc = l, phi = s, p = p, quasi = quasi)
-  trafo_list <- list(function(x) tf$add(1e-8, tfe(x)), 
-                     function(x) tf$add(1e-8, tfe(x))) 
-  dist_dim <- 2L
-  ret_fun <- function(x) 
-    do.call(tfd_dist,
-            lapply(1:(x$shape[[2]]/output_dim),
-                   function(i)
-                     trafo_list[[i]](
-                       tf_stride_cols(x,(i-1L)*output_dim+1L,
-                                      (i-1L)*output_dim+output_dim)))
-    )
-  attr(ret_fun, "nrparams_dist") <- 2L
-
-  return(ret_fun)
-
-}
+#' # Implementation of a distribution-like layer for (Quasi-)Tweedie
+#' tfd_tweedie <- function(loc, phi, p = 1.5, quasi = FALSE,
+#'                          validate_args = FALSE,
+#'                          allow_nan_stats = TRUE,
+#'                          name = "Tweedie")
+#' {
+#'   
+#'   args <- list(
+#'     loc = loc,
+#'     scale = phi,
+#'     var_power = p,
+#'     quasi = quasi,
+#'     validate_args = validate_args,
+#'     allow_nan_stats = allow_nan_stats,
+#'     name = name
+#'   )
+#'   
+#'   python_path <- system.file("python", package = "deepregression")
+#'   distributions <- reticulate::import_from_path("distributions", path = python_path)
+#'   
+#'   return(do.call(distributions$Tweedie, args))
+#'   
+#' }
+#' 
+#' #' tfd_distfun for (Quasi-)Tweedie to allow for flexible p
+#' #' @param p integer; defines distribution
+#' #' @param quasi logical; whether to use quasi-likelihood or deviance resids
+#' #' @param output_dim integer; currently only univariate responses supported
+#' #' @export
+#' #' 
+#' tweedie <- function(p, quasi = FALSE, output_dim = 1L,
+#'                     linkfun_mean = function(x) tf$add(1e-8, tf$math$exp(x)),
+#'                     linkfun_phi = function(x) tf$add(1e-8, tf$math$exp(x)))
+#' {
+#'   
+#'   tfd_dist <- function(l, s) tfd_tweedie(loc = l, phi = s, p = p, quasi = quasi)
+#'   trafo_list <- list(linkfun_mean, linkfun_phi) 
+#'   dist_dim <- 2L
+#'   ret_fun <- function(x) 
+#'     do.call(tfd_dist,
+#'             lapply(1:(x$shape[[2]]/output_dim),
+#'                    function(i)
+#'                      trafo_list[[i]](
+#'                        tf_stride_cols(x,(i-1L)*output_dim+1L,
+#'                                       (i-1L)*output_dim+output_dim)))
+#'     )
+#'   attr(ret_fun, "nrparams_dist") <- 2L
+#'   
+#'   return(ret_fun)
+#'   
+#' }
+#' 
+
 
 #' For using mean squared error via TFP
 #' 

diff --git a/R/formula_helpers.R b/R/formula_helpers.R
@@ -112,7 +112,12 @@ extractval <- function(term, name, default_for_missing = FALSE, default = NULL)
 
 }
 
-# multiple value option of extractval
+#' Extractval with multiple options
+#' @param names character vector of names
+#' @export
+#' @rdname formulaHelpers
+#'
+#'
 extractvals <- function(term, names){
   if(is.character(term)) term <- as.formula(paste0("~", term))
   inputs <- as.list(as.list(term)[[2]])[-1]

diff --git a/R/layers.R b/R/layers.R
@@ -15,6 +15,7 @@ re_layer = function(units, ...) {
 #' 
 #' @param units integer; number of units
 #' @param ... arguments passed to TensorFlow layer
+#' @param P penalty matrix
 #' @return layer object
 #' @export
 #' @rdname re_layers
@@ -139,29 +140,6 @@ layer_sparse_conv_2d <- function(filters,
 #' @param ... arguments passed to TensorFlow layer
 #' @return layer object
 #' @export
-#' @examples
-#' n <- 1000
-#' y <- rnorm(n)
-#' data <- data.frame(x1=rnorm(n), x2=rnorm(n), x3=rnorm(n))
-#' 
-#' library(deepregression)
-#' 
-#' mod <- keras_model_sequential()
-#' mod %>% layer_dense(1000) %>% 
-#'     layer_sparse_batch_normalization(lam = 100)() %>% 
-#'     layer_dense(1)
-#'     
-#' mod %>% compile(optimizer = optimizer_adam(),
-#'                 loss = "mse")
-#' 
-#' mod %>% fit(x = as.matrix(data), y = y, epochs = 1000,
-#'             validation_split = 0.2, 
-#'             callbacks = list(callback_early_stopping(patience = 30, 
-#'                              restore_best_weights = TRUE)),
-#'             verbose = FALSE)
-#' 
-#' lapply(mod$weights[3:4], function(x) 
-#'        summary(c(as.matrix(x))))
 #' 
 #' 
 layer_sparse_batch_normalization <- function(lam=NULL, ...) {

diff --git a/R/zzz.R b/R/zzz.R
@@ -1,9 +1,9 @@
 #' @importFrom stats na.omit
 
 VERSIONPY = "3.10"
-VERSIONTF = "2.10"
-VERSIONKERAS = "2.10"
-VERSIONTFP = "0.16"
+VERSIONTF = "2.15"
+VERSIONKERAS = "2.15"
+VERSIONTFP = "0.23"
 
 globalVariables("self")
 

diff --git a/inst/python/distributions/__pycache__/__init__.cpython-310.pyc b/inst/python/distributions/__pycache__/__init__.cpython-310.pyc
diff --git a/inst/python/distributions/__pycache__/mvr.cpython-310.pyc b/inst/python/distributions/__pycache__/mvr.cpython-310.pyc
diff --git a/inst/python/distributions/tweedie.py b/inst/python/distributions/tweedie.py
@@ -9,6 +9,21 @@
 from tensorflow_probability.python.internal import tensor_util
 from tensorflow.math import exp, log
 from tensorflow.experimental import numpy as tnp
+import numpy as np
+from scipy.special import wright_bessel
+
+
+# Define the TensorFlow wrapper function for scipy's wright_bessel
+def tensorflow_wright_bessel(a, b, x):
+    # The inner function to be applied
+    def wright_bessel_inner(a_np, b_np, x_np):
+        # Use the provided 'out' parameter to store the output directly in a NumPy array
+        result = wright_bessel(a_np, b_np, x_np)
+        return np.array(result, dtype=np.float64)
+
+    # Wrapping the Python function with tf.py_function
+    # It takes the inner function, list of tensor inputs, and the output type as arguments
+    return tf.py_function(wright_bessel_inner, [a, b, x], tf.float64)
 
 class Tweedie(distribution.AutoCompositeTensorDistribution):
   """Tweedie
@@ -113,19 +128,24 @@ def _log_prob(self, x):
       return llf - u
 
     else: 
-      # from https://github.com/cran/statmod/blob/master/R/tweedie.R negative deviance residuals
-      # x1 = x + 0.1 * tf.cast(tf.equal(x, 0), tf.float32)
-      # theta = (tf.pow(x1, 1 - self.p) - tf.pow(self.loc, 1 - self.p)) / (1 - self.p)
-      # kappa = (tf.pow(x, 2 - self.p) - tf.pow(self.loc, 2 - self.p)) / (2 - self.p)
-      # return - 2 * (x * theta - kappa)
-      # from https://github.com/cran/mgcv/blob/aff4560d187dfd7d98c7bd367f5a0076faf129b7/R/gamlss.r#L2474
-      ethi = tf.exp(-self.p) # assuming p > 0
-      p = (self.b + self.a * ethi)/(1+ethi)
-      x1 = x + tf.cast(x == 0, tf.float32)
-      theta = (tf.pow(x1, 1 - p) - tf.pow(self.loc, 1 - p)) / (1 - p)
-      kappa = (tf.pow(x, 2 - p) - tf.pow(self.loc, 2 - p)) / (2 - p)
-      return tf.sign(x - self.loc) * tf.sqrt(tf.nn.relu(2 * (x * theta - kappa) * 1 / self.scale))
+      p = self.p
+      mu = self.loc
+      theta = mu ** (1 - p) / (1 - p)
+      kappa = mu ** (2 - p) / (2 - p)
+      alpha = (2 - p) / (1 - p)
 
+      ll_obs = (endog * theta - kappa) / scale
+      idx = endog > 0
+            if np.any(idx):
+                if not np.isscalar(endog):
+                    endog = endog[idx]
+                if not np.isscalar(scale):
+                    scale = scale[idx]
+                x = ((p - 1) * scale / endog) ** alpha
+                x /= (2 - p) * scale
+                wb = special.wright_bessel(-alpha, 0, x)
+                ll_obs[idx] += np.log(1/endog * wb)
+            return ll_obs
 
 
   def _mean(self):

diff --git a/inst/python/generators/__pycache__/__init__.cpython-310.pyc b/inst/python/generators/__pycache__/__init__.cpython-310.pyc
diff --git a/inst/python/generators/__pycache__/keras_generators.cpython-310.pyc b/inst/python/generators/__pycache__/keras_generators.cpython-310.pyc
diff --git a/inst/python/generators/keras_generators.py b/inst/python/generators/keras_generators.py
@@ -3,7 +3,7 @@
 import numpy as np
 from itertools import groupby
 from tensorflow.keras.preprocessing.image import Iterator, ImageDataGenerator
-from keras.utils.data_utils import Sequence
+from keras.utils import Sequence
 
 def all_equal(iterable):
     g = groupby(iterable)