dls and dllaplace distributions in alm(). Issue #13

config-i1 · Mar 26, 2020 · 153b966 · 153b966
1 parent 74ac53d
commit 153b966
Show file tree

Hide file tree

Showing 13 changed files with 177 additions and 103 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: greybox
 Type: Package
 Title: Toolbox for Model Building and Forecasting
-Version: 0.5.9.41015
+Version: 0.5.9.41016
 Date: 2020-03-26
 Authors@R: c(person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
                   comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK"),

diff --git a/NEWS b/NEWS
@@ -12,6 +12,7 @@ Changes:
 * plot.rmcb() now has a "select" parameter.
 * alm() now returns class "occurrence" if occurrence was used. This is needed in order to connect alm() with es() / mes() from smooth.
 * Introduced a proper is.occurrence() method.
+* Log Laplace and Log S distributions in alm.
 
 Bugfixes:
 * A fix of a bug in case the data is not provided explicitly.

diff --git a/R/alm.R b/R/alm.R
@@ -8,9 +8,11 @@
 #' \item \link[stats]{dnorm} - Normal distribution,
 #' \item \link[stats]{dlogis} - Logistic Distribution,
 #' \item \link[greybox]{dlaplace} - Laplace distribution,
+#' \item dllaplace - Log Laplace distribution,
 #' \item \link[greybox]{dalaplace} - Asymmetric Laplace distribution,
 #' \item \link[stats]{dt} - T-distribution,
 #' \item \link[greybox]{ds} - S-distribution,
+#' \item dls - Log S-distribution,
 #' \item \link[greybox]{dfnorm} - Folded normal distribution,
 #' \item \link[stats]{dlnorm} - Log normal distribution,
 #' \item \link[greybox]{dbcnorm} - Box-Cox normal distribution,
@@ -197,7 +199,7 @@
 #' @export alm
 alm <- function(formula, data, subset, na.action,
                 distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt",
-                               "dfnorm","dlnorm","dbcnorm","dinvgauss",
+                               "dfnorm","dlnorm","dllaplace","dls","dbcnorm","dinvgauss",
                                "dpois","dnbinom",
                                "dbeta",
                                "plogis","pnorm"),
@@ -336,10 +338,12 @@ alm <- function(formula, data, subset, na.action,
                        "dlnorm" =,
                        "dbcnorm"=,
                        "dlaplace" =,
+                       "dllaplace" =,
                        "dalaplace" =,
                        "dlogis" =,
                        "dt" =,
                        "ds" =,
+                       "dls" =,
                        "pnorm" =,
                        "plogis" = matrixXreg %*% B
         );
@@ -352,9 +356,11 @@ alm <- function(formula, data, subset, na.action,
                         "dbcnorm" = sqrt(sum((bcTransform(y[otU],other)-mu[otU])^2)/obsInsample),
                         "dinvgauss" = sum((y[otU]/mu[otU]-1)^2 / (y[otU]/mu[otU]))/obsInsample,
                         "dlaplace" = sum(abs(y[otU]-mu[otU]))/obsInsample,
+                        "dllaplace" = sum(abs(log(y[otU])-mu[otU]))/obsInsample,
                         "dalaplace" = sum((y[otU]-mu[otU]) * (other - (y[otU]<=mu[otU])*1))/obsInsample,
                         "dlogis" = sqrt(sum((y[otU]-mu[otU])^2)/obsInsample * 3 / pi^2),
                         "ds" = sum(sqrt(abs(y[otU]-mu[otU]))) / (obsInsample*2),
+                        "dls" = sum(sqrt(abs(log(y[otU])-mu[otU]))) / (obsInsample*2),
                         "dt" = ,
                         "dchisq" =,
                         "dnbinom" = abs(other),
@@ -405,11 +411,13 @@ alm <- function(formula, data, subset, na.action,
                                 "dinvgauss" = dinvgauss(y[otU], mean=fitterReturn$mu[otU],
                                                         dispersion=fitterReturn$scale/fitterReturn$mu[otU], log=TRUE),
                                 "dlaplace" = dlaplace(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE),
+                                "dllaplace" = dlaplace(log(y[otU]), mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE)-log(y[otU]),
                                 "dalaplace" = dalaplace(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale,
                                                         alpha=fitterReturn$other, log=TRUE),
                                 "dlogis" = dlogis(y[otU], location=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE),
                                 "dt" = dt(y[otU]-fitterReturn$mu[otU], df=fitterReturn$scale, log=TRUE),
                                 "ds" = ds(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE),
+                                "dls" = ds(log(y[otU]), mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE)-log(y[otU]),
                                 "dchisq" = dchisq(y[otU], df=fitterReturn$scale, ncp=fitterReturn$mu[otU], log=TRUE),
                                 "dpois" = dpois(y[otU], lambda=fitterReturn$mu[otU], log=TRUE),
                                 "dnbinom" = dnbinom(y[otU], mu=fitterReturn$mu[otU], size=fitterReturn$scale, log=TRUE),
@@ -423,34 +431,36 @@ alm <- function(formula, data, subset, na.action,
         # The differential entropy for the models with the missing data
         if(occurrenceModel){
             CFValue[] <- CFValue + switch(distribution,
-                                            "dnorm" =,
-                                            "dfnorm" =,
-                                            "dbcnorm" =,
-                                            "dlnorm" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
-                                            # "dinvgauss" = 0.5*(obsZero*(log(pi/2)+1+suppressWarnings(log(fitterReturn$scale)))-
-                                            #                                 sum(log(fitterReturn$mu[!otU]))),
-                                            "dinvgauss" = obsZero*(0.5*(log(pi/2)+1+suppressWarnings(log(fitterReturn$scale)))),
-                                            "dlaplace" =,
-                                            "dalaplace" = obsZero*(1 + log(2*fitterReturn$scale)),
-                                            "dlogis" = obsZero*2,
-                                            "dt" = obsZero*((fitterReturn$scale+1)/2 *
-                                                                (digamma((fitterReturn$scale+1)/2)-digamma(fitterReturn$scale/2)) +
-                                                                log(sqrt(fitterReturn$scale) * beta(fitterReturn$scale/2,0.5))),
-                                            "ds" = obsZero*(2 + 2*log(2*fitterReturn$scale)),
-                                            "dchisq" = obsZero*(log(2)*gamma(fitterReturn$scale/2)-
-                                                                    (1-fitterReturn$scale/2)*digamma(fitterReturn$scale/2)+
-                                                                    fitterReturn$scale/2),
-                                            "dbeta" = sum(log(beta(fitterReturn$mu[otU],fitterReturn$scale[otU]))-
-                                                              (fitterReturn$mu[otU]-1)*
-                                                              (digamma(fitterReturn$mu[otU])-
-                                                                   digamma(fitterReturn$mu[otU]+fitterReturn$scale[otU]))-
-                                                              (fitterReturn$scale[otU]-1)*
-                                                              (digamma(fitterReturn$scale[otU])-
-                                                                   digamma(fitterReturn$mu[otU]+fitterReturn$scale[otU]))),
-                                            # This is a normal approximation of the real entropy
-                                            # "dpois" = sum(0.5*log(2*pi*fitterReturn$scale)+0.5),
-                                            # "dnbinom" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
-                                            0
+                                          "dnorm" =,
+                                          "dfnorm" =,
+                                          "dbcnorm" =,
+                                          "dlnorm" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
+                                          # "dinvgauss" = 0.5*(obsZero*(log(pi/2)+1+suppressWarnings(log(fitterReturn$scale)))-
+                                          #                                 sum(log(fitterReturn$mu[!otU]))),
+                                          "dinvgauss" = obsZero*(0.5*(log(pi/2)+1+suppressWarnings(log(fitterReturn$scale)))),
+                                          "dlaplace" =,
+                                          "dllaplace" =,
+                                          "dalaplace" = obsZero*(1 + log(2*fitterReturn$scale)),
+                                          "dlogis" = obsZero*2,
+                                          "dt" = obsZero*((fitterReturn$scale+1)/2 *
+                                                              (digamma((fitterReturn$scale+1)/2)-digamma(fitterReturn$scale/2)) +
+                                                              log(sqrt(fitterReturn$scale) * beta(fitterReturn$scale/2,0.5))),
+                                          "ds" =,
+                                          "dls" = obsZero*(2 + 2*log(2*fitterReturn$scale)),
+                                          "dchisq" = obsZero*(log(2)*gamma(fitterReturn$scale/2)-
+                                                                  (1-fitterReturn$scale/2)*digamma(fitterReturn$scale/2)+
+                                                                  fitterReturn$scale/2),
+                                          "dbeta" = sum(log(beta(fitterReturn$mu[otU],fitterReturn$scale[otU]))-
+                                                            (fitterReturn$mu[otU]-1)*
+                                                            (digamma(fitterReturn$mu[otU])-
+                                                                 digamma(fitterReturn$mu[otU]+fitterReturn$scale[otU]))-
+                                                            (fitterReturn$scale[otU]-1)*
+                                                            (digamma(fitterReturn$scale[otU])-
+                                                                 digamma(fitterReturn$mu[otU]+fitterReturn$scale[otU]))),
+                                          # This is a normal approximation of the real entropy
+                                          # "dpois" = sum(0.5*log(2*pi*fitterReturn$scale)+0.5),
+                                          # "dnbinom" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
+                                          0
             );
         }
 
@@ -680,7 +690,7 @@ alm <- function(formula, data, subset, na.action,
     errors <- vector("numeric", obsInsample);
     ot <- vector("logical", obsInsample);
 
-    if(any(y<0) & any(distribution==c("dfnorm","dlnorm","dbcnorm","dinvgauss","dchisq","dpois","dnbinom"))){
+    if(any(y<0) & any(distribution==c("dfnorm","dlnorm","dllaplace","dls","dbcnorm","dinvgauss","dchisq","dpois","dnbinom"))){
         stop(paste0("Negative values are not allowed in the response variable for the distribution '",distribution,"'"),
              call.=FALSE);
     }
@@ -880,7 +890,7 @@ alm <- function(formula, data, subset, na.action,
             }
             nVariables <- nVariables + arOrder;
             # Write down the values for the matrixXreg in the necessary transformations
-            if(any(distribution==c("dlnorm","dpois","dnbinom"))){
+            if(any(distribution==c("dlnorm","dllaplace","dls","dpois","dnbinom"))){
                 if(any(y[otU]==0)){
                     # Use Box-Cox if there are zeroes
                     ariElements[] <- bcTransform(ariElements,0.01);
@@ -916,7 +926,7 @@ alm <- function(formula, data, subset, na.action,
 
         #### I(0) initialisation ####
         if(iOrder==0){
-            if(any(distribution==c("dlnorm","dpois","dnbinom","dinvgauss"))){
+            if(any(distribution==c("dlnorm","dllaplace","dls","dpois","dnbinom","dinvgauss"))){
                 if(any(y[otU]==0)){
                     # Use Box-Cox if there are zeroes
                     B <- .lm.fit(matrixXreg[otU,,drop=FALSE],bcTransform(y[otU],0.01))$coefficients;
@@ -991,7 +1001,7 @@ alm <- function(formula, data, subset, na.action,
                 matrixXregForDiffs <- matrixXregForDiffs[-c(1:iOrder),,drop=FALSE];
             }
 
-            if(any(distribution==c("dlnorm","dpois","dnbinom","dinvgauss"))){
+            if(any(distribution==c("dlnorm","dllaplace","dls","dpois","dnbinom","dinvgauss"))){
                 B <- .lm.fit(matrixXregForDiffs,diff(log(y[otU]),differences=iOrder))$coefficients;
             }
             else if(any(distribution==c("plogis","pnorm"))){
@@ -1263,7 +1273,9 @@ alm <- function(formula, data, subset, na.action,
                        "dpois" =,
                        "dnbinom" = mu,
                        "dchisq" = mu + df,
-                       "dlnorm" = exp(mu),
+                       "dlnorm" =,
+                       "dllaplace" =,
+                       "dls" = exp(mu),
                        "dbcnorm" = bcTransformInv(mu,lambda),
                        "dbeta" = mu / (mu + scale),
                        "pnorm" = pnorm(mu, mean=0, sd=1),
@@ -1284,8 +1296,10 @@ alm <- function(formula, data, subset, na.action,
                        "dpois" = y - mu,
                        "dinvgauss" = y / mu,
                        "dchisq" = sqrt(y) - sqrt(mu),
-                       "dlnorm"= log(y) - mu,
-                       "dbcnorm"= bcTransform(y,lambda) - mu,
+                       "dlnorm" =,
+                       "dllaplace" =,
+                       "dls" = log(y) - mu,
+                       "dbcnorm" = bcTransform(y,lambda) - mu,
                        "pnorm" = qnorm((y - pnorm(mu, 0, 1) + 1) / 2, 0, 1),
                        "plogis" = log((1 + y * (1 + exp(mu))) / (1 + exp(mu) * (2 - y) - y))
                        # Here we use the proxy from Svetunkov & Boylan (2019)
@@ -1438,49 +1452,18 @@ alm <- function(formula, data, subset, na.action,
 
         # New data and new response variable
         dataNew <- mf$data[mf$subset,,drop=FALSE];
-        # if(ncol(dataNew)>1){
-        #     y <- as.matrix(dataNew[,all.vars(formula)[1],drop=FALSE]);
-        # }
-        # else{
-        #     y <- dataNew[,1,drop=FALSE]
-        # }
-        # If there are NaN values, substitute them by zeroes
-        # if(any(is.nan(y))){
-        #     y[is.nan(y)] <- 0;
-        # }
-        # ot <- y!=0;
         dataNew[,all.vars(formula)[1]] <- (ot)*1;
 
         if(!occurrenceProvided){
             occurrence <- do.call("alm", list(formula=formula, data=dataNew, distribution=occurrence, ar=arOrder, i=iOrder));
         }
 
-        # Corrected fitted (with zeroes, when y=0)
-        # yFittedNew <- yFitted;
-        # yFitted <- vector("numeric",length(y));
-        # yFitted[] <- 0;
+        # Corrected fitted (with probabilities)
         yFitted[] <- yFitted * fitted(occurrence);
 
-        # Corrected errors (with zeroes, when y=0)
-        # errorsNew <- errors;
-        # errors <- vector("numeric",length(y));
-        # errors[] <- 0;
-        # errors[ot] <- errorsNew;
-
         # Correction of the likelihood
         CFValue <- CFValue - occurrence$logLik;
 
-        # Form the final dataWork in order to return it in the data.
-        # dataWork <- eval(mf, parent.frame());
-        # dataWork <- model.matrix(dataWork,data=dataWork);
-
-        # Add AR elements if needed
-        # if(ariModel){
-        #     ariMatrix <- matrix(0, nrow(dataWork), ariOrder, dimnames=list(NULL, ariTransformedNames));
-        #     ariMatrix[] <- ariElements;
-        #     dataWork <- cbind(dataWork,ariMatrix);
-        # }
-
         if(interceptIsNeeded){
             # This shit is needed, because R has habit of converting everything into vectors...
             dataWork <- cbind(y,matrixXreg[,-1,drop=FALSE]);

diff --git a/R/lmCombine.R b/R/lmCombine.R
@@ -23,7 +23,8 @@
 #' one are produced and then combined.
 #' @param silent If \code{FALSE}, then nothing is silent, everything is printed
 #' out. \code{TRUE} means that nothing is produced.
-#' @param distribution Distribution to pass to \code{alm()}.
+#' @param distribution Distribution to pass to \code{alm()}. See \link[greybox]{alm}
+#' for details.
 #' @param parallel If \code{TRUE}, then the model fitting is done in parallel.
 #' WARNING! Packages \code{foreach} and either \code{doMC} (Linux and Mac only)
 #' or \code{doParallel} are needed in order to run the function in parallel.
@@ -84,7 +85,9 @@
 #'
 #' @export lmCombine
 lmCombine <- function(data, ic=c("AICc","AIC","BIC","BICc"), bruteforce=FALSE, silent=TRUE,
-                      distribution=c("dnorm","dfnorm","dlnorm","dlaplace","ds","dchisq","dlogis",
+                      distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt",
+                                     "dfnorm","dlnorm","dllaplace","dls","dbcnorm","dinvgauss",
+                                     "dpois","dnbinom",
                                      "plogis","pnorm"),
                       parallel=FALSE, ...){
     # Function combines linear regression models and produces the combined lm object.

diff --git a/R/lmDynamic.R b/R/lmDynamic.R
@@ -22,7 +22,8 @@
 #' one are produced and then combined.
 #' @param silent If \code{FALSE}, then nothing is silent, everything is printed
 #' out. \code{TRUE} means that nothing is produced.
-#' @param distribution Distribution to pass to \code{alm()}.
+#' @param distribution Distribution to pass to \code{alm()}. See \link[greybox]{alm}
+#' for details.
 #' @param parallel If \code{TRUE}, then the model fitting is done in parallel.
 #' WARNING! Packages \code{foreach} and either \code{doMC} (Linux and Mac only)
 #' or \code{doParallel} are needed in order to run the function in parallel.
@@ -73,8 +74,10 @@
 #'
 #' @export lmDynamic
 lmDynamic <- function(data, ic=c("AICc","AIC","BIC","BICc"), bruteforce=FALSE, silent=TRUE,
-                      distribution=c("dnorm","dfnorm","dlnorm","dlaplace","ds","dchisq","dlogis",
-                                    "plogis","pnorm"),
+                      distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt",
+                                     "dfnorm","dlnorm","dllaplace","dls","dbcnorm","dinvgauss",
+                                     "dpois","dnbinom",
+                                     "plogis","pnorm"),
                       parallel=FALSE, ...){
     # Function combines linear regression models and produces the combined lm object.
     cl <- match.call();