Skip to content

Commit

Permalink
dls and dllaplace distributions in alm(). Issue #13
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivan Svetunkov committed Mar 26, 2020
1 parent 74ac53d commit 153b966
Show file tree
Hide file tree
Showing 13 changed files with 177 additions and 103 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: greybox
Type: Package
Title: Toolbox for Model Building and Forecasting
Version: 0.5.9.41015
Version: 0.5.9.41016
Date: 2020-03-26
Authors@R: c(person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK"),
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Changes:
* plot.rmcb() now has a "select" parameter.
* alm() now returns class "occurrence" if occurrence was used. This is needed in order to connect alm() with es() / mes() from smooth.
* Introduced a proper is.occurrence() method.
* Log Laplace and Log S distributions in alm.

Bugfixes:
* A fix of a bug in case the data is not provided explicitly.
Expand Down
119 changes: 51 additions & 68 deletions R/alm.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
#' \item \link[stats]{dnorm} - Normal distribution,
#' \item \link[stats]{dlogis} - Logistic Distribution,
#' \item \link[greybox]{dlaplace} - Laplace distribution,
#' \item dllaplace - Log Laplace distribution,
#' \item \link[greybox]{dalaplace} - Asymmetric Laplace distribution,
#' \item \link[stats]{dt} - T-distribution,
#' \item \link[greybox]{ds} - S-distribution,
#' \item dls - Log S-distribution,
#' \item \link[greybox]{dfnorm} - Folded normal distribution,
#' \item \link[stats]{dlnorm} - Log normal distribution,
#' \item \link[greybox]{dbcnorm} - Box-Cox normal distribution,
Expand Down Expand Up @@ -197,7 +199,7 @@
#' @export alm
alm <- function(formula, data, subset, na.action,
distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt",
"dfnorm","dlnorm","dbcnorm","dinvgauss",
"dfnorm","dlnorm","dllaplace","dls","dbcnorm","dinvgauss",
"dpois","dnbinom",
"dbeta",
"plogis","pnorm"),
Expand Down Expand Up @@ -336,10 +338,12 @@ alm <- function(formula, data, subset, na.action,
"dlnorm" =,
"dbcnorm"=,
"dlaplace" =,
"dllaplace" =,
"dalaplace" =,
"dlogis" =,
"dt" =,
"ds" =,
"dls" =,
"pnorm" =,
"plogis" = matrixXreg %*% B
);
Expand All @@ -352,9 +356,11 @@ alm <- function(formula, data, subset, na.action,
"dbcnorm" = sqrt(sum((bcTransform(y[otU],other)-mu[otU])^2)/obsInsample),
"dinvgauss" = sum((y[otU]/mu[otU]-1)^2 / (y[otU]/mu[otU]))/obsInsample,
"dlaplace" = sum(abs(y[otU]-mu[otU]))/obsInsample,
"dllaplace" = sum(abs(log(y[otU])-mu[otU]))/obsInsample,
"dalaplace" = sum((y[otU]-mu[otU]) * (other - (y[otU]<=mu[otU])*1))/obsInsample,
"dlogis" = sqrt(sum((y[otU]-mu[otU])^2)/obsInsample * 3 / pi^2),
"ds" = sum(sqrt(abs(y[otU]-mu[otU]))) / (obsInsample*2),
"dls" = sum(sqrt(abs(log(y[otU])-mu[otU]))) / (obsInsample*2),
"dt" = ,
"dchisq" =,
"dnbinom" = abs(other),
Expand Down Expand Up @@ -405,11 +411,13 @@ alm <- function(formula, data, subset, na.action,
"dinvgauss" = dinvgauss(y[otU], mean=fitterReturn$mu[otU],
dispersion=fitterReturn$scale/fitterReturn$mu[otU], log=TRUE),
"dlaplace" = dlaplace(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE),
"dllaplace" = dlaplace(log(y[otU]), mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE)-log(y[otU]),
"dalaplace" = dalaplace(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale,
alpha=fitterReturn$other, log=TRUE),
"dlogis" = dlogis(y[otU], location=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE),
"dt" = dt(y[otU]-fitterReturn$mu[otU], df=fitterReturn$scale, log=TRUE),
"ds" = ds(y[otU], mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE),
"dls" = ds(log(y[otU]), mu=fitterReturn$mu[otU], scale=fitterReturn$scale, log=TRUE)-log(y[otU]),
"dchisq" = dchisq(y[otU], df=fitterReturn$scale, ncp=fitterReturn$mu[otU], log=TRUE),
"dpois" = dpois(y[otU], lambda=fitterReturn$mu[otU], log=TRUE),
"dnbinom" = dnbinom(y[otU], mu=fitterReturn$mu[otU], size=fitterReturn$scale, log=TRUE),
Expand All @@ -423,34 +431,36 @@ alm <- function(formula, data, subset, na.action,
# The differential entropy for the models with the missing data
if(occurrenceModel){
CFValue[] <- CFValue + switch(distribution,
"dnorm" =,
"dfnorm" =,
"dbcnorm" =,
"dlnorm" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
# "dinvgauss" = 0.5*(obsZero*(log(pi/2)+1+suppressWarnings(log(fitterReturn$scale)))-
# sum(log(fitterReturn$mu[!otU]))),
"dinvgauss" = obsZero*(0.5*(log(pi/2)+1+suppressWarnings(log(fitterReturn$scale)))),
"dlaplace" =,
"dalaplace" = obsZero*(1 + log(2*fitterReturn$scale)),
"dlogis" = obsZero*2,
"dt" = obsZero*((fitterReturn$scale+1)/2 *
(digamma((fitterReturn$scale+1)/2)-digamma(fitterReturn$scale/2)) +
log(sqrt(fitterReturn$scale) * beta(fitterReturn$scale/2,0.5))),
"ds" = obsZero*(2 + 2*log(2*fitterReturn$scale)),
"dchisq" = obsZero*(log(2)*gamma(fitterReturn$scale/2)-
(1-fitterReturn$scale/2)*digamma(fitterReturn$scale/2)+
fitterReturn$scale/2),
"dbeta" = sum(log(beta(fitterReturn$mu[otU],fitterReturn$scale[otU]))-
(fitterReturn$mu[otU]-1)*
(digamma(fitterReturn$mu[otU])-
digamma(fitterReturn$mu[otU]+fitterReturn$scale[otU]))-
(fitterReturn$scale[otU]-1)*
(digamma(fitterReturn$scale[otU])-
digamma(fitterReturn$mu[otU]+fitterReturn$scale[otU]))),
# This is a normal approximation of the real entropy
# "dpois" = sum(0.5*log(2*pi*fitterReturn$scale)+0.5),
# "dnbinom" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
0
"dnorm" =,
"dfnorm" =,
"dbcnorm" =,
"dlnorm" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
# "dinvgauss" = 0.5*(obsZero*(log(pi/2)+1+suppressWarnings(log(fitterReturn$scale)))-
# sum(log(fitterReturn$mu[!otU]))),
"dinvgauss" = obsZero*(0.5*(log(pi/2)+1+suppressWarnings(log(fitterReturn$scale)))),
"dlaplace" =,
"dllaplace" =,
"dalaplace" = obsZero*(1 + log(2*fitterReturn$scale)),
"dlogis" = obsZero*2,
"dt" = obsZero*((fitterReturn$scale+1)/2 *
(digamma((fitterReturn$scale+1)/2)-digamma(fitterReturn$scale/2)) +
log(sqrt(fitterReturn$scale) * beta(fitterReturn$scale/2,0.5))),
"ds" =,
"dls" = obsZero*(2 + 2*log(2*fitterReturn$scale)),
"dchisq" = obsZero*(log(2)*gamma(fitterReturn$scale/2)-
(1-fitterReturn$scale/2)*digamma(fitterReturn$scale/2)+
fitterReturn$scale/2),
"dbeta" = sum(log(beta(fitterReturn$mu[otU],fitterReturn$scale[otU]))-
(fitterReturn$mu[otU]-1)*
(digamma(fitterReturn$mu[otU])-
digamma(fitterReturn$mu[otU]+fitterReturn$scale[otU]))-
(fitterReturn$scale[otU]-1)*
(digamma(fitterReturn$scale[otU])-
digamma(fitterReturn$mu[otU]+fitterReturn$scale[otU]))),
# This is a normal approximation of the real entropy
# "dpois" = sum(0.5*log(2*pi*fitterReturn$scale)+0.5),
# "dnbinom" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
0
);
}

Expand Down Expand Up @@ -680,7 +690,7 @@ alm <- function(formula, data, subset, na.action,
errors <- vector("numeric", obsInsample);
ot <- vector("logical", obsInsample);

if(any(y<0) & any(distribution==c("dfnorm","dlnorm","dbcnorm","dinvgauss","dchisq","dpois","dnbinom"))){
if(any(y<0) & any(distribution==c("dfnorm","dlnorm","dllaplace","dls","dbcnorm","dinvgauss","dchisq","dpois","dnbinom"))){
stop(paste0("Negative values are not allowed in the response variable for the distribution '",distribution,"'"),
call.=FALSE);
}
Expand Down Expand Up @@ -880,7 +890,7 @@ alm <- function(formula, data, subset, na.action,
}
nVariables <- nVariables + arOrder;
# Write down the values for the matrixXreg in the necessary transformations
if(any(distribution==c("dlnorm","dpois","dnbinom"))){
if(any(distribution==c("dlnorm","dllaplace","dls","dpois","dnbinom"))){
if(any(y[otU]==0)){
# Use Box-Cox if there are zeroes
ariElements[] <- bcTransform(ariElements,0.01);
Expand Down Expand Up @@ -916,7 +926,7 @@ alm <- function(formula, data, subset, na.action,

#### I(0) initialisation ####
if(iOrder==0){
if(any(distribution==c("dlnorm","dpois","dnbinom","dinvgauss"))){
if(any(distribution==c("dlnorm","dllaplace","dls","dpois","dnbinom","dinvgauss"))){
if(any(y[otU]==0)){
# Use Box-Cox if there are zeroes
B <- .lm.fit(matrixXreg[otU,,drop=FALSE],bcTransform(y[otU],0.01))$coefficients;
Expand Down Expand Up @@ -991,7 +1001,7 @@ alm <- function(formula, data, subset, na.action,
matrixXregForDiffs <- matrixXregForDiffs[-c(1:iOrder),,drop=FALSE];
}

if(any(distribution==c("dlnorm","dpois","dnbinom","dinvgauss"))){
if(any(distribution==c("dlnorm","dllaplace","dls","dpois","dnbinom","dinvgauss"))){
B <- .lm.fit(matrixXregForDiffs,diff(log(y[otU]),differences=iOrder))$coefficients;
}
else if(any(distribution==c("plogis","pnorm"))){
Expand Down Expand Up @@ -1263,7 +1273,9 @@ alm <- function(formula, data, subset, na.action,
"dpois" =,
"dnbinom" = mu,
"dchisq" = mu + df,
"dlnorm" = exp(mu),
"dlnorm" =,
"dllaplace" =,
"dls" = exp(mu),
"dbcnorm" = bcTransformInv(mu,lambda),
"dbeta" = mu / (mu + scale),
"pnorm" = pnorm(mu, mean=0, sd=1),
Expand All @@ -1284,8 +1296,10 @@ alm <- function(formula, data, subset, na.action,
"dpois" = y - mu,
"dinvgauss" = y / mu,
"dchisq" = sqrt(y) - sqrt(mu),
"dlnorm"= log(y) - mu,
"dbcnorm"= bcTransform(y,lambda) - mu,
"dlnorm" =,
"dllaplace" =,
"dls" = log(y) - mu,
"dbcnorm" = bcTransform(y,lambda) - mu,
"pnorm" = qnorm((y - pnorm(mu, 0, 1) + 1) / 2, 0, 1),
"plogis" = log((1 + y * (1 + exp(mu))) / (1 + exp(mu) * (2 - y) - y))
# Here we use the proxy from Svetunkov & Boylan (2019)
Expand Down Expand Up @@ -1438,49 +1452,18 @@ alm <- function(formula, data, subset, na.action,

# New data and new response variable
dataNew <- mf$data[mf$subset,,drop=FALSE];
# if(ncol(dataNew)>1){
# y <- as.matrix(dataNew[,all.vars(formula)[1],drop=FALSE]);
# }
# else{
# y <- dataNew[,1,drop=FALSE]
# }
# If there are NaN values, substitute them by zeroes
# if(any(is.nan(y))){
# y[is.nan(y)] <- 0;
# }
# ot <- y!=0;
dataNew[,all.vars(formula)[1]] <- (ot)*1;

if(!occurrenceProvided){
occurrence <- do.call("alm", list(formula=formula, data=dataNew, distribution=occurrence, ar=arOrder, i=iOrder));
}

# Corrected fitted (with zeroes, when y=0)
# yFittedNew <- yFitted;
# yFitted <- vector("numeric",length(y));
# yFitted[] <- 0;
# Corrected fitted (with probabilities)
yFitted[] <- yFitted * fitted(occurrence);

# Corrected errors (with zeroes, when y=0)
# errorsNew <- errors;
# errors <- vector("numeric",length(y));
# errors[] <- 0;
# errors[ot] <- errorsNew;

# Correction of the likelihood
CFValue <- CFValue - occurrence$logLik;

# Form the final dataWork in order to return it in the data.
# dataWork <- eval(mf, parent.frame());
# dataWork <- model.matrix(dataWork,data=dataWork);

# Add AR elements if needed
# if(ariModel){
# ariMatrix <- matrix(0, nrow(dataWork), ariOrder, dimnames=list(NULL, ariTransformedNames));
# ariMatrix[] <- ariElements;
# dataWork <- cbind(dataWork,ariMatrix);
# }

if(interceptIsNeeded){
# This shit is needed, because R has habit of converting everything into vectors...
dataWork <- cbind(y,matrixXreg[,-1,drop=FALSE]);
Expand Down
7 changes: 5 additions & 2 deletions R/lmCombine.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
#' one are produced and then combined.
#' @param silent If \code{FALSE}, then nothing is silent, everything is printed
#' out. \code{TRUE} means that nothing is produced.
#' @param distribution Distribution to pass to \code{alm()}.
#' @param distribution Distribution to pass to \code{alm()}. See \link[greybox]{alm}
#' for details.
#' @param parallel If \code{TRUE}, then the model fitting is done in parallel.
#' WARNING! Packages \code{foreach} and either \code{doMC} (Linux and Mac only)
#' or \code{doParallel} are needed in order to run the function in parallel.
Expand Down Expand Up @@ -84,7 +85,9 @@
#'
#' @export lmCombine
lmCombine <- function(data, ic=c("AICc","AIC","BIC","BICc"), bruteforce=FALSE, silent=TRUE,
distribution=c("dnorm","dfnorm","dlnorm","dlaplace","ds","dchisq","dlogis",
distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt",
"dfnorm","dlnorm","dllaplace","dls","dbcnorm","dinvgauss",
"dpois","dnbinom",
"plogis","pnorm"),
parallel=FALSE, ...){
# Function combines linear regression models and produces the combined lm object.
Expand Down
9 changes: 6 additions & 3 deletions R/lmDynamic.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
#' one are produced and then combined.
#' @param silent If \code{FALSE}, then nothing is silent, everything is printed
#' out. \code{TRUE} means that nothing is produced.
#' @param distribution Distribution to pass to \code{alm()}.
#' @param distribution Distribution to pass to \code{alm()}. See \link[greybox]{alm}
#' for details.
#' @param parallel If \code{TRUE}, then the model fitting is done in parallel.
#' WARNING! Packages \code{foreach} and either \code{doMC} (Linux and Mac only)
#' or \code{doParallel} are needed in order to run the function in parallel.
Expand Down Expand Up @@ -73,8 +74,10 @@
#'
#' @export lmDynamic
lmDynamic <- function(data, ic=c("AICc","AIC","BIC","BICc"), bruteforce=FALSE, silent=TRUE,
distribution=c("dnorm","dfnorm","dlnorm","dlaplace","ds","dchisq","dlogis",
"plogis","pnorm"),
distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt",
"dfnorm","dlnorm","dllaplace","dls","dbcnorm","dinvgauss",
"dpois","dnbinom",
"plogis","pnorm"),
parallel=FALSE, ...){
# Function combines linear regression models and produces the combined lm object.
cl <- match.call();
Expand Down
Loading

0 comments on commit 153b966

Please sign in to comment.