Complete fix for the R issue and IG, #13

config-i1 · Dec 5, 2019 · 0118d8e · 0118d8e
1 parent b847e6f
commit 0118d8e
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 16 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: greybox
 Type: Package
 Title: Toolbox for Model Building and Forecasting
-Version: 0.5.7.41005
-Date: 2019-12-04
+Version: 0.5.7.41006
+Date: 2019-12-05
 Authors@R: person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
                   comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")
 URL: https://github.com/config-i1/greybox

diff --git a/NEWS b/NEWS
@@ -4,7 +4,7 @@ greybox v0.5.7 (Release data: 2019-12-04)
 Changes:
 * plot.greybox() now also produces LOWESS lines on the scatterplots. There is a parameters that regulates this.
 * A bit of tuning of plot.greybox() graphs.
-* Started work on Inverse Gaussian distribution for alm().
+* Inverse Gaussian distribution for alm() is now available. Not all the methods work okay yet. Predictions might be off.
 
 Bugfixes:
 * alm() would be stuck in some cases, when determination returns NaNs.

diff --git a/R/alm.R b/R/alm.R
@@ -15,7 +15,7 @@
 #' \item \link[stats]{dlnorm} - Log normal distribution,
 #' \item \link[greybox]{dbcnorm} - Box-Cox normal distribution,
 #' \item \link[stats]{dchisq} - Chi-Squared Distribution,
-# \item \link[statmod]{dinvgauss} - Inverse Gaussian distribution,
+#' \item \link[statmod]{dinvgauss} - Inverse Gaussian distribution,
 #' \item \link[stats]{dbeta} - Beta distribution,
 #' \item \link[stats]{dpois} - Poisson Distribution,
 #' \item \link[stats]{dnbinom} - Negative Binomial Distribution,
@@ -192,6 +192,7 @@
 #' @importFrom stats model.frame sd terms model.matrix
 #' @importFrom stats dchisq dlnorm dnorm dlogis dpois dnbinom dt dbeta
 #' @importFrom stats plogis
+#' @importFrom statmod dinvgauss
 #' @importFrom forecast Arima
 #' @export alm
 alm <- function(formula, data, subset, na.action,
@@ -347,10 +348,11 @@ alm <- function(formula, data, subset, na.action,
                        "dnbinom" = exp(matrixXreg %*% B),
                        "dchisq" = ifelseFast(any(matrixXreg %*% B <0),1E+100,(matrixXreg %*% B)^2),
                        "dbeta" = exp(matrixXreg %*% B[1:(length(B)/2)]),
-                       "dbcnorm"=,
                        "dnorm" =,
                        "dfnorm" =,
                        "dlnorm" =,
+                       "dbcnorm"=,
+                       "dinvgauss" =,
                        "dlaplace" =,
                        "dalaplace" =,
                        "dlogis" =,
@@ -366,6 +368,7 @@ alm <- function(formula, data, subset, na.action,
                         "dfnorm" = abs(other),
                         "dlnorm" = sqrt(sum((log(y[otU])-mu[otU])^2)/obsInsample),
                         "dbcnorm" = sqrt(sum((bcTransform(y[otU],other)-mu[otU])^2)/obsInsample),
+                        "dinvgauss" = sum((y[otU]-mu[otU])^2 / (mu[otU]^2*y[otU]))/obsInsample,
                         "dlaplace" = sum(abs(y[otU]-mu[otU]))/obsInsample,
                         "dalaplace" = sum((y[otU]-mu[otU]) * (other - (y[otU]<=mu[otU])*1))/obsInsample,
                         "dlogis" = sqrt(sum((y[otU]-mu[otU])^2)/obsInsample * 3 / pi^2),
@@ -417,6 +420,7 @@ alm <- function(formula, data, subset, na.action,
                                 "dlnorm" = dlnorm(y[otU], meanlog=fitterReturn$mu[otU], sdlog=fitterReturn$scale, log=TRUE),
                                 "dbcnorm" = dbcnorm(y[otU], mu=fitterReturn$mu[otU], sigma=fitterReturn$scale,
                                                     lambda=fitterReturn$other, log=TRUE),
+                                "dinvgauss" = dinvgauss(y[otU], mean=fitterReturn$mu[otU], dispersion=fitterReturn$scale, log=TRUE),
                                 "dlaplace" = dlaplace(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE),
                                 "dalaplace" = dalaplace(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale,
                                                         alpha=fitterReturn$other, log=TRUE),
@@ -440,6 +444,7 @@ alm <- function(formula, data, subset, na.action,
                                             "dfnorm" =,
                                             "dbcnorm" =,
                                             "dlnorm" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
+                                            "dinvgauss" = obsZero*0.5*(log(pi)+1-log(2/fitterReturn$scale)),
                                             "dlaplace" =,
                                             "dalaplace" = obsZero*(1 + log(2*fitterReturn$scale)),
                                             "dlogis" = obsZero*2,
@@ -680,11 +685,16 @@ alm <- function(formula, data, subset, na.action,
     errors <- vector("numeric", obsInsample);
     ot <- vector("logical", obsInsample);
 
-    if(any(y<0) & any(distribution==c("dfnorm","dlnorm","dbcnorm","dchisq","dpois","dnbinom"))){
+    if(any(y<0) & any(distribution==c("dfnorm","dlnorm","dbcnorm","dinvgauss","dchisq","dpois","dnbinom"))){
         stop(paste0("Negative values are not allowed in the response variable for the distribution '",distribution,"'"),
              call.=FALSE);
     }
 
+    if(any(y==0) & any(distribution==c("dinvgauss"))){
+        stop(paste0("Zero values are not allowed in the response variable for the distribution '",distribution,"'"),
+             call.=FALSE);
+    }
+
     if(any(distribution==c("dpois","dnbinom"))){
         if(any(y!=trunc(y))){
             stop(paste0("Count data is needed for the distribution '",distribution,"', but you have fractional numbers. ",
@@ -962,6 +972,11 @@ alm <- function(formula, data, subset, na.action,
                     BUpper <- rep(Inf,length(B));
                 }
             }
+            else if(distribution==c("dinvgauss")){
+                B <- solve(t(matrixXreg[otU,,drop=FALSE]) %*% diag(y[otU]) %*% matrixXreg[otU,,drop=FALSE]) %*% t(matrixXreg[otU,,drop=FALSE]) %*% rep(1,sum(otU));
+                BLower <- -Inf;
+                BUpper <- Inf;
+            }
             else{
                 B <- .lm.fit(matrixXreg[otU,,drop=FALSE],y[otU])$coefficients;
                 BLower <- -Inf;
@@ -1100,7 +1115,7 @@ alm <- function(formula, data, subset, na.action,
         }
         if(is.null(ellipsis$algorithm)){
             # if(recursiveModel){
-                algorithm <- "NLOPT_LN_BOBYQA";
+                # algorithm <- "NLOPT_LN_BOBYQA";
             # }
             # else{
                 algorithm <- "NLOPT_LN_SBPLX";
@@ -1244,6 +1259,7 @@ alm <- function(formula, data, subset, na.action,
     yFitted[] <- switch(distribution,
                        "dfnorm" = sqrt(2/pi)*scale*exp(-mu^2/(2*scale^2))+mu*(1-2*pnorm(-mu/scale)),
                        "dnorm" =,
+                       "dinvgauss" =,
                        "dlaplace" =,
                        "dalaplace" =,
                        "dlogis" =,
@@ -1271,6 +1287,7 @@ alm <- function(formula, data, subset, na.action,
                        "dnorm" =,
                        "dnbinom" =,
                        "dpois" = y - mu,
+                       "dinvgauss" = y / mu,
                        "dchisq" = sqrt(y) - sqrt(mu),
                        "dlnorm"= log(y) - mu,
                        "dbcnorm"= bcTransform(y,lambda) - mu,
@@ -1330,14 +1347,15 @@ alm <- function(formula, data, subset, na.action,
     if(vcovProduce){
         # Only vcov is needed, no point in redoing the occurrenceModel
         occurrenceModel <- FALSE;
-        method.args <- list(eps=1e-4, d=0.1, r=4)
+        method.args <- list(eps=1e-4, d=0.1, r=4);
         # if(CDF){
         #     method.args <- list(d=1e-6, r=6);
         # }
         # else{
         #     if(any(distribution==c("dnbinom","dlaplace","dalaplace","dbcnorm"))){
-        #         method.args <- list(d=1e-6, r=6);
-        #     }
+        if(distribution==c("dinvgauss")){
+            method.args <- list(d=1e-6, r=6);
+        }
         #     else{
         #         method.args <- list(d=1e-4, r=4);
         #     }

diff --git a/R/methods.R b/R/methods.R
@@ -227,6 +227,7 @@ pointLik.alm <- function(object, ...){
                             "dfnorm" = dfnorm(y, mu=mu, sigma=scale, log=TRUE),
                             "dlnorm" = dlnorm(y, meanlog=mu, sdlog=scale, log=TRUE),
                             "dbcnorm" = dbcnorm(y, mu=mu, sigma=scale, lambda=object$other$lambda, log=TRUE),
+                            "dinvgauss" = dinvgauss(y, mean=mu, dispersion=scale, log=TRUE),
                             "dlaplace" = dlaplace(y, mu=mu, scale=scale, log=TRUE),
                             "dalaplace" = dalaplace(y, mu=mu, scale=scale, alpha=object$other$alpha, log=TRUE),
                             "dlogis" = dlogis(y, location=mu, scale=scale, log=TRUE),
@@ -249,6 +250,7 @@ pointLik.alm <- function(object, ...){
                                    "dfnorm" =,
                                    "dbcnorm" =,
                                    "dlnorm" = log(sqrt(2*pi)*scale)+0.5,
+                                   "dinvgauss" = 0.5*(log(pi)+1-log(2*scale)),
                                    "dlaplace" =,
                                    "dalaplace" = (1 + log(2*scale)),
                                    "dlogis" = 2,
@@ -538,6 +540,7 @@ confint.lmGreybox <- function(object, parm, level=0.95, ...){
 
 #' @rdname predict.greybox
 #' @importFrom stats predict qchisq qlnorm qlogis qpois qnbinom qbeta
+#' @importFrom statmode qinvgauss
 #' @export
 predict.alm <- function(object, newdata=NULL, interval=c("none", "confidence", "prediction"),
                             level=0.95, side=c("both","upper","lower"), ...){
@@ -688,6 +691,13 @@ predict.alm <- function(object, newdata=NULL, interval=c("none", "confidence", "
         }
         greyboxForecast$scale <- sigma;
     }
+    else if(object$distribution=="dinvgauss"){
+        greyboxForecast$scale <- greyboxForecast$variance / greyboxForecast$mean^3;
+        if(interval!="n"){
+            greyboxForecast$lower[] <- qinvgauss(levelLow,mean=greyboxForecast$mean,dispersion=greyboxForecast$scale);
+            greyboxForecast$upper[] <- qinvgauss(levelUp,mean=greyboxForecast$mean,dispersion=greyboxForecast$scale);
+        }
+    }
     else if(object$distribution=="dlogis"){
         # Use the connection between the variance and scale in logistic distribution
         scale <- sqrt(greyboxForecast$variances * 3 / pi^2);
@@ -1636,6 +1646,8 @@ plot.greybox <- function(x, which=c(1,2,4,6), level=0.95, legend=FALSE,
                           "dlogis"=qlogis(c((1-level)/2, (1+level)/2), 0, 1),
                           "dt"=qt(c((1-level)/2, (1+level)/2), nobs(x)-nparam(x)),
                           "ds"=qs(c((1-level)/2, (1+level)/2), 0, 1),
+                          # The next one is not correct...
+                          "dinvgauss"=qinvgauss(c((1-level)/2, (1+level)/2), mean=1, shape=1),
                           qnorm(c((1-level)/2, (1+level)/2), 0, 1));
         outliers <- which(abs(ellipsis$y)>zValues[2]);
         # cat(paste0(round(length(outliers)/length(ellipsis$y),3)*100,"% of values are outside the bounds\n"));
@@ -1798,6 +1810,17 @@ plot.greybox <- function(x, which=c(1,2,4,6), level=0.95, legend=FALSE,
             do.call(qqplot, ellipsis);
             qqline(ellipsis$y, distribution=function(p) qt(p, df=x$scale));
         }
+        else if(x$distribution=="dinvgauss"){
+            # Transform residuals for something meaningful
+            # This is not 100% accurate, because the dispersion should change as well as mean...
+            if(!any(names(ellipsis)=="main")){
+                ellipsis$main <- "QQ-plot of Inverse Gaussian distribution";
+            }
+            ellipsis$x <- qinvgauss(ppoints(500), mean=1, dispersion=x$scale);
+
+            do.call(qqplot, ellipsis);
+            qqline(ellipsis$y, distribution=function(p) qinvgauss(p, mean=1, dispersion=x$scale));
+        }
         else if(x$distribution=="dchisq"){
             message("Sorry, but we don't produce QQ plots for the Chi-Squared distribution");
         }
@@ -2093,6 +2116,7 @@ print.summary.alm <- function(x, ...){
                       "dfnorm" = "Folded Normal",
                       "dlnorm" = "Log Normal",
                       "dbcnorm" = paste0("Box-Cox Normal with lambda=",round(x$other$lambda,2)),
+                      "dinvgauss" = "Inverse Gaussian",
                       "dchisq" = paste0("Chi-Squared with df=",round(x$other$df,2)),
                       "dpois" = "Poisson",
                       "dnbinom" = paste0("Negative Binomial with size=",round(x$other$size,2)),
@@ -2143,6 +2167,7 @@ print.summary.greybox <- function(x, ...){
                       "dfnorm" = "Folded Normal",
                       "dlnorm" = "Log Normal",
                       "dbcnorm" = "Box-Cox Normal",
+                      "dinvgauss" = "Inverse Gaussian",
                       "dchisq" = "Chi-Squared",
                       "dpois" = "Poisson",
                       "dnbinom" = "Negative Binomial",
@@ -2186,6 +2211,7 @@ print.summary.greyboxC <- function(x, ...){
                       "dfnorm" = "Folded Normal",
                       "dlnorm" = "Log Normal",
                       "dbcnorm" = "Box-Cox Normal",
+                      "dinvgauss" = "Inverse Gaussian",
                       "dchisq" = "Chi-Squared",
                       "dpois" = "Poisson",
                       "dnbinom" = "Negative Binomial",
@@ -2258,6 +2284,9 @@ rstandard.greybox <- function(model, ...){
     else if(model$distribution=="ds"){
             return((errors - mean(errors)) / (model$scale * obs / df)^2);
     }
+    else if(model$distribution=="dinvgauss"){
+            return(errors / mean(errors));
+    }
     else{
         return((errors - mean(errors)) / model$scale * obs / df);
     }
@@ -2295,6 +2324,12 @@ rstudent.greybox <- function(model, ...){
             rstudentised[i] <- errors[i] / (sqrt(sum(errors[-i]^2) / df) * sqrt(3) / pi);
         }
     }
+    else if(model$distribution=="dinvgauss"){
+        errors[] <- residuals(model);
+        for(i in 1:obs){
+            rstudentised[i] <- errors[i] / mean(errors[-i]);
+        }
+    }
     else{
         for(i in 1:obs){
             rstudentised[i] <- errors[i] / sqrt(sum(errors[-i]^2) / df)
@@ -2489,12 +2524,12 @@ vcov.alm <- function(object, ...){
         nVariables <- ncol(matrixXreg);
         matrixXreg <- crossprod(matrixXreg);
         vcovMatrixTry <- try(chol2inv(chol(matrixXreg)), silent=TRUE);
-        if(class(vcovMatrixTry)=="try-error"){
+        if(any(class(vcovMatrixTry)=="try-error")){
             warning(paste0("Choleski decomposition of covariance matrix failed, so we had to revert to the simple inversion.\n",
                            "The estimate of the covariance matrix of parameters might be inaccurate."),
                     call.=FALSE);
             vcovMatrix <- try(solve(matrixXreg, diag(nVariables), tol=1e-20), silent=TRUE);
-            if(class(vcovMatrix)=="try-error"){
+            if(any(class(vcovMatrix)=="try-error")){
                 warning(paste0("Sorry, but the covariance matrix is singular, so we could not invert it.\n",
                                "We failed to produce the covariance matrix of parameters."),
                         call.=FALSE);
@@ -2518,12 +2553,12 @@ vcov.alm <- function(object, ...){
         # colnames(matrixXreg) <- names(coef(object));
         matrixXreg <- crossprod(matrixXreg);
         vcovMatrixTry <- try(chol2inv(chol(matrixXreg)), silent=TRUE);
-        if(class(vcovMatrixTry)=="try-error"){
+        if(any(class(vcovMatrixTry)=="try-error")){
             warning(paste0("Choleski decomposition of covariance matrix failed, so we had to revert to the simple inversion.\n",
                            "The estimate of the covariance matrix of parameters might be inaccurate."),
                     call.=FALSE);
             vcovMatrix <- try(solve(matrixXreg, diag(nVariables), tol=1e-20), silent=TRUE);
-            if(class(vcovMatrix)=="try-error"){
+            if(any(class(vcovMatrix)=="try-error")){
                 warning(paste0("Sorry, but the covariance matrix is singular, so we could not invert it.\n",
                                "We failed to produce the covariance matrix of parameters."),
                         call.=FALSE);
@@ -2540,10 +2575,10 @@ vcov.alm <- function(object, ...){
         # Form the call for alm
         newCall <- object$call;
         if(interceptIsNeeded){
-            newCall$formula <- as.formula(paste0(all.vars(newCall$formula)[1],"~."));
+            newCall$formula <- as.formula(paste0("`",all.vars(newCall$formula)[1],"`~."));
         }
         else{
-            newCall$formula <- as.formula(paste0(all.vars(newCall$formula)[1],"~.-1"));
+            newCall$formula <- as.formula(paste0("`",all.vars(newCall$formula)[1],"`~.-1"));
         }
         newCall$data <- object$data;
         newCall$subset <- object$subset;