Skip to content

Commit

Permalink
Complete fix for the R issue and IG, #13
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivan Svetunkov committed Dec 5, 2019
1 parent b847e6f commit 0118d8e
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 16 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: greybox
Type: Package
Title: Toolbox for Model Building and Forecasting
Version: 0.5.7.41005
Date: 2019-12-04
Version: 0.5.7.41006
Date: 2019-12-05
Authors@R: person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")
URL: https://github.com/config-i1/greybox
Expand Down
2 changes: 1 addition & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ greybox v0.5.7 (Release data: 2019-12-04)
Changes:
* plot.greybox() now also produces LOWESS lines on the scatterplots. There is a parameters that regulates this.
* A bit of tuning of plot.greybox() graphs.
* Started work on Inverse Gaussian distribution for alm().
* Inverse Gaussian distribution for alm() is now available. Not all the methods work okay yet. Predictions might be off.

Bugfixes:
* alm() would be stuck in some cases, when determination returns NaNs.
Expand Down
32 changes: 25 additions & 7 deletions R/alm.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' \item \link[stats]{dlnorm} - Log normal distribution,
#' \item \link[greybox]{dbcnorm} - Box-Cox normal distribution,
#' \item \link[stats]{dchisq} - Chi-Squared Distribution,
# \item \link[statmod]{dinvgauss} - Inverse Gaussian distribution,
#' \item \link[statmod]{dinvgauss} - Inverse Gaussian distribution,
#' \item \link[stats]{dbeta} - Beta distribution,
#' \item \link[stats]{dpois} - Poisson Distribution,
#' \item \link[stats]{dnbinom} - Negative Binomial Distribution,
Expand Down Expand Up @@ -192,6 +192,7 @@
#' @importFrom stats model.frame sd terms model.matrix
#' @importFrom stats dchisq dlnorm dnorm dlogis dpois dnbinom dt dbeta
#' @importFrom stats plogis
#' @importFrom statmod dinvgauss
#' @importFrom forecast Arima
#' @export alm
alm <- function(formula, data, subset, na.action,
Expand Down Expand Up @@ -347,10 +348,11 @@ alm <- function(formula, data, subset, na.action,
"dnbinom" = exp(matrixXreg %*% B),
"dchisq" = ifelseFast(any(matrixXreg %*% B <0),1E+100,(matrixXreg %*% B)^2),
"dbeta" = exp(matrixXreg %*% B[1:(length(B)/2)]),
"dbcnorm"=,
"dnorm" =,
"dfnorm" =,
"dlnorm" =,
"dbcnorm"=,
"dinvgauss" =,
"dlaplace" =,
"dalaplace" =,
"dlogis" =,
Expand All @@ -366,6 +368,7 @@ alm <- function(formula, data, subset, na.action,
"dfnorm" = abs(other),
"dlnorm" = sqrt(sum((log(y[otU])-mu[otU])^2)/obsInsample),
"dbcnorm" = sqrt(sum((bcTransform(y[otU],other)-mu[otU])^2)/obsInsample),
"dinvgauss" = sum((y[otU]-mu[otU])^2 / (mu[otU]^2*y[otU]))/obsInsample,
"dlaplace" = sum(abs(y[otU]-mu[otU]))/obsInsample,
"dalaplace" = sum((y[otU]-mu[otU]) * (other - (y[otU]<=mu[otU])*1))/obsInsample,
"dlogis" = sqrt(sum((y[otU]-mu[otU])^2)/obsInsample * 3 / pi^2),
Expand Down Expand Up @@ -417,6 +420,7 @@ alm <- function(formula, data, subset, na.action,
"dlnorm" = dlnorm(y[otU], meanlog=fitterReturn$mu[otU], sdlog=fitterReturn$scale, log=TRUE),
"dbcnorm" = dbcnorm(y[otU], mu=fitterReturn$mu[otU], sigma=fitterReturn$scale,
lambda=fitterReturn$other, log=TRUE),
"dinvgauss" = dinvgauss(y[otU], mean=fitterReturn$mu[otU], dispersion=fitterReturn$scale, log=TRUE),
"dlaplace" = dlaplace(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE),
"dalaplace" = dalaplace(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale,
alpha=fitterReturn$other, log=TRUE),
Expand All @@ -440,6 +444,7 @@ alm <- function(formula, data, subset, na.action,
"dfnorm" =,
"dbcnorm" =,
"dlnorm" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
"dinvgauss" = obsZero*0.5*(log(pi)+1-log(2/fitterReturn$scale)),
"dlaplace" =,
"dalaplace" = obsZero*(1 + log(2*fitterReturn$scale)),
"dlogis" = obsZero*2,
Expand Down Expand Up @@ -680,11 +685,16 @@ alm <- function(formula, data, subset, na.action,
errors <- vector("numeric", obsInsample);
ot <- vector("logical", obsInsample);

if(any(y<0) & any(distribution==c("dfnorm","dlnorm","dbcnorm","dchisq","dpois","dnbinom"))){
if(any(y<0) & any(distribution==c("dfnorm","dlnorm","dbcnorm","dinvgauss","dchisq","dpois","dnbinom"))){
stop(paste0("Negative values are not allowed in the response variable for the distribution '",distribution,"'"),
call.=FALSE);
}

if(any(y==0) & any(distribution==c("dinvgauss"))){
stop(paste0("Zero values are not allowed in the response variable for the distribution '",distribution,"'"),
call.=FALSE);
}

if(any(distribution==c("dpois","dnbinom"))){
if(any(y!=trunc(y))){
stop(paste0("Count data is needed for the distribution '",distribution,"', but you have fractional numbers. ",
Expand Down Expand Up @@ -962,6 +972,11 @@ alm <- function(formula, data, subset, na.action,
BUpper <- rep(Inf,length(B));
}
}
else if(distribution==c("dinvgauss")){
B <- solve(t(matrixXreg[otU,,drop=FALSE]) %*% diag(y[otU]) %*% matrixXreg[otU,,drop=FALSE]) %*% t(matrixXreg[otU,,drop=FALSE]) %*% rep(1,sum(otU));
BLower <- -Inf;
BUpper <- Inf;
}
else{
B <- .lm.fit(matrixXreg[otU,,drop=FALSE],y[otU])$coefficients;
BLower <- -Inf;
Expand Down Expand Up @@ -1100,7 +1115,7 @@ alm <- function(formula, data, subset, na.action,
}
if(is.null(ellipsis$algorithm)){
# if(recursiveModel){
algorithm <- "NLOPT_LN_BOBYQA";
# algorithm <- "NLOPT_LN_BOBYQA";
# }
# else{
algorithm <- "NLOPT_LN_SBPLX";
Expand Down Expand Up @@ -1244,6 +1259,7 @@ alm <- function(formula, data, subset, na.action,
yFitted[] <- switch(distribution,
"dfnorm" = sqrt(2/pi)*scale*exp(-mu^2/(2*scale^2))+mu*(1-2*pnorm(-mu/scale)),
"dnorm" =,
"dinvgauss" =,
"dlaplace" =,
"dalaplace" =,
"dlogis" =,
Expand Down Expand Up @@ -1271,6 +1287,7 @@ alm <- function(formula, data, subset, na.action,
"dnorm" =,
"dnbinom" =,
"dpois" = y - mu,
"dinvgauss" = y / mu,
"dchisq" = sqrt(y) - sqrt(mu),
"dlnorm"= log(y) - mu,
"dbcnorm"= bcTransform(y,lambda) - mu,
Expand Down Expand Up @@ -1330,14 +1347,15 @@ alm <- function(formula, data, subset, na.action,
if(vcovProduce){
# Only vcov is needed, no point in redoing the occurrenceModel
occurrenceModel <- FALSE;
method.args <- list(eps=1e-4, d=0.1, r=4)
method.args <- list(eps=1e-4, d=0.1, r=4);
# if(CDF){
# method.args <- list(d=1e-6, r=6);
# }
# else{
# if(any(distribution==c("dnbinom","dlaplace","dalaplace","dbcnorm"))){
# method.args <- list(d=1e-6, r=6);
# }
if(distribution==c("dinvgauss")){
method.args <- list(d=1e-6, r=6);
}
# else{
# method.args <- list(d=1e-4, r=4);
# }
Expand Down
47 changes: 41 additions & 6 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ pointLik.alm <- function(object, ...){
"dfnorm" = dfnorm(y, mu=mu, sigma=scale, log=TRUE),
"dlnorm" = dlnorm(y, meanlog=mu, sdlog=scale, log=TRUE),
"dbcnorm" = dbcnorm(y, mu=mu, sigma=scale, lambda=object$other$lambda, log=TRUE),
"dinvgauss" = dinvgauss(y, mean=mu, dispersion=scale, log=TRUE),
"dlaplace" = dlaplace(y, mu=mu, scale=scale, log=TRUE),
"dalaplace" = dalaplace(y, mu=mu, scale=scale, alpha=object$other$alpha, log=TRUE),
"dlogis" = dlogis(y, location=mu, scale=scale, log=TRUE),
Expand All @@ -249,6 +250,7 @@ pointLik.alm <- function(object, ...){
"dfnorm" =,
"dbcnorm" =,
"dlnorm" = log(sqrt(2*pi)*scale)+0.5,
"dinvgauss" = 0.5*(log(pi)+1-log(2*scale)),
"dlaplace" =,
"dalaplace" = (1 + log(2*scale)),
"dlogis" = 2,
Expand Down Expand Up @@ -538,6 +540,7 @@ confint.lmGreybox <- function(object, parm, level=0.95, ...){

#' @rdname predict.greybox
#' @importFrom stats predict qchisq qlnorm qlogis qpois qnbinom qbeta
#' @importFrom statmode qinvgauss
#' @export
predict.alm <- function(object, newdata=NULL, interval=c("none", "confidence", "prediction"),
level=0.95, side=c("both","upper","lower"), ...){
Expand Down Expand Up @@ -688,6 +691,13 @@ predict.alm <- function(object, newdata=NULL, interval=c("none", "confidence", "
}
greyboxForecast$scale <- sigma;
}
else if(object$distribution=="dinvgauss"){
greyboxForecast$scale <- greyboxForecast$variance / greyboxForecast$mean^3;
if(interval!="n"){
greyboxForecast$lower[] <- qinvgauss(levelLow,mean=greyboxForecast$mean,dispersion=greyboxForecast$scale);
greyboxForecast$upper[] <- qinvgauss(levelUp,mean=greyboxForecast$mean,dispersion=greyboxForecast$scale);
}
}
else if(object$distribution=="dlogis"){
# Use the connection between the variance and scale in logistic distribution
scale <- sqrt(greyboxForecast$variances * 3 / pi^2);
Expand Down Expand Up @@ -1636,6 +1646,8 @@ plot.greybox <- function(x, which=c(1,2,4,6), level=0.95, legend=FALSE,
"dlogis"=qlogis(c((1-level)/2, (1+level)/2), 0, 1),
"dt"=qt(c((1-level)/2, (1+level)/2), nobs(x)-nparam(x)),
"ds"=qs(c((1-level)/2, (1+level)/2), 0, 1),
# The next one is not correct...
"dinvgauss"=qinvgauss(c((1-level)/2, (1+level)/2), mean=1, shape=1),
qnorm(c((1-level)/2, (1+level)/2), 0, 1));
outliers <- which(abs(ellipsis$y)>zValues[2]);
# cat(paste0(round(length(outliers)/length(ellipsis$y),3)*100,"% of values are outside the bounds\n"));
Expand Down Expand Up @@ -1798,6 +1810,17 @@ plot.greybox <- function(x, which=c(1,2,4,6), level=0.95, legend=FALSE,
do.call(qqplot, ellipsis);
qqline(ellipsis$y, distribution=function(p) qt(p, df=x$scale));
}
else if(x$distribution=="dinvgauss"){
# Transform residuals for something meaningful
# This is not 100% accurate, because the dispersion should change as well as mean...
if(!any(names(ellipsis)=="main")){
ellipsis$main <- "QQ-plot of Inverse Gaussian distribution";
}
ellipsis$x <- qinvgauss(ppoints(500), mean=1, dispersion=x$scale);

do.call(qqplot, ellipsis);
qqline(ellipsis$y, distribution=function(p) qinvgauss(p, mean=1, dispersion=x$scale));
}
else if(x$distribution=="dchisq"){
message("Sorry, but we don't produce QQ plots for the Chi-Squared distribution");
}
Expand Down Expand Up @@ -2093,6 +2116,7 @@ print.summary.alm <- function(x, ...){
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dbcnorm" = paste0("Box-Cox Normal with lambda=",round(x$other$lambda,2)),
"dinvgauss" = "Inverse Gaussian",
"dchisq" = paste0("Chi-Squared with df=",round(x$other$df,2)),
"dpois" = "Poisson",
"dnbinom" = paste0("Negative Binomial with size=",round(x$other$size,2)),
Expand Down Expand Up @@ -2143,6 +2167,7 @@ print.summary.greybox <- function(x, ...){
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dbcnorm" = "Box-Cox Normal",
"dinvgauss" = "Inverse Gaussian",
"dchisq" = "Chi-Squared",
"dpois" = "Poisson",
"dnbinom" = "Negative Binomial",
Expand Down Expand Up @@ -2186,6 +2211,7 @@ print.summary.greyboxC <- function(x, ...){
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dbcnorm" = "Box-Cox Normal",
"dinvgauss" = "Inverse Gaussian",
"dchisq" = "Chi-Squared",
"dpois" = "Poisson",
"dnbinom" = "Negative Binomial",
Expand Down Expand Up @@ -2258,6 +2284,9 @@ rstandard.greybox <- function(model, ...){
else if(model$distribution=="ds"){
return((errors - mean(errors)) / (model$scale * obs / df)^2);
}
else if(model$distribution=="dinvgauss"){
return(errors / mean(errors));
}
else{
return((errors - mean(errors)) / model$scale * obs / df);
}
Expand Down Expand Up @@ -2295,6 +2324,12 @@ rstudent.greybox <- function(model, ...){
rstudentised[i] <- errors[i] / (sqrt(sum(errors[-i]^2) / df) * sqrt(3) / pi);
}
}
else if(model$distribution=="dinvgauss"){
errors[] <- residuals(model);
for(i in 1:obs){
rstudentised[i] <- errors[i] / mean(errors[-i]);
}
}
else{
for(i in 1:obs){
rstudentised[i] <- errors[i] / sqrt(sum(errors[-i]^2) / df)
Expand Down Expand Up @@ -2489,12 +2524,12 @@ vcov.alm <- function(object, ...){
nVariables <- ncol(matrixXreg);
matrixXreg <- crossprod(matrixXreg);
vcovMatrixTry <- try(chol2inv(chol(matrixXreg)), silent=TRUE);
if(class(vcovMatrixTry)=="try-error"){
if(any(class(vcovMatrixTry)=="try-error")){
warning(paste0("Choleski decomposition of covariance matrix failed, so we had to revert to the simple inversion.\n",
"The estimate of the covariance matrix of parameters might be inaccurate."),
call.=FALSE);
vcovMatrix <- try(solve(matrixXreg, diag(nVariables), tol=1e-20), silent=TRUE);
if(class(vcovMatrix)=="try-error"){
if(any(class(vcovMatrix)=="try-error")){
warning(paste0("Sorry, but the covariance matrix is singular, so we could not invert it.\n",
"We failed to produce the covariance matrix of parameters."),
call.=FALSE);
Expand All @@ -2518,12 +2553,12 @@ vcov.alm <- function(object, ...){
# colnames(matrixXreg) <- names(coef(object));
matrixXreg <- crossprod(matrixXreg);
vcovMatrixTry <- try(chol2inv(chol(matrixXreg)), silent=TRUE);
if(class(vcovMatrixTry)=="try-error"){
if(any(class(vcovMatrixTry)=="try-error")){
warning(paste0("Choleski decomposition of covariance matrix failed, so we had to revert to the simple inversion.\n",
"The estimate of the covariance matrix of parameters might be inaccurate."),
call.=FALSE);
vcovMatrix <- try(solve(matrixXreg, diag(nVariables), tol=1e-20), silent=TRUE);
if(class(vcovMatrix)=="try-error"){
if(any(class(vcovMatrix)=="try-error")){
warning(paste0("Sorry, but the covariance matrix is singular, so we could not invert it.\n",
"We failed to produce the covariance matrix of parameters."),
call.=FALSE);
Expand All @@ -2540,10 +2575,10 @@ vcov.alm <- function(object, ...){
# Form the call for alm
newCall <- object$call;
if(interceptIsNeeded){
newCall$formula <- as.formula(paste0(all.vars(newCall$formula)[1],"~."));
newCall$formula <- as.formula(paste0("`",all.vars(newCall$formula)[1],"`~."));
}
else{
newCall$formula <- as.formula(paste0(all.vars(newCall$formula)[1],"~.-1"));
newCall$formula <- as.formula(paste0("`",all.vars(newCall$formula)[1],"`~.-1"));
}
newCall$data <- object$data;
newCall$subset <- object$subset;
Expand Down

0 comments on commit 0118d8e

Please sign in to comment.