Skip to content

Commit

Permalink
Logit-normal distribution in alm(), proposed in #13
Browse files Browse the repository at this point in the history
  • Loading branch information
config-i1 committed Nov 11, 2020
1 parent c9fe95b commit cedbd5b
Show file tree
Hide file tree
Showing 12 changed files with 495 additions and 30 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: greybox
Type: Package
Title: Toolbox for Model Building and Forecasting
Version: 0.6.4.41004
Date: 2020-11-10
Version: 0.6.4.41005
Date: 2020-11-11
Authors@R: c(person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK"),
person("Yves R.", "Sagaert", role = c("ctb"),
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ export(determ)
export(determination)
export(dfnorm)
export(dlaplace)
export(dlogitnorm)
export(ds)
export(dtplnorm)
export(errorType)
Expand Down Expand Up @@ -150,6 +151,7 @@ export(pcor)
export(pfnorm)
export(pinball)
export(plaplace)
export(plogitnorm)
export(pointLik)
export(polyprod)
export(ps)
Expand All @@ -158,6 +160,7 @@ export(qalaplace)
export(qbcnorm)
export(qfnorm)
export(qlaplace)
export(qlogitnorm)
export(qs)
export(qtplnorm)
export(rAME)
Expand All @@ -168,6 +171,7 @@ export(ralaplace)
export(rbcnorm)
export(rfnorm)
export(rlaplace)
export(rlogitnorm)
export(rmcb)
export(ro)
export(rs)
Expand Down
3 changes: 2 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
greybox v0.6.4 (Release data: 2020-11-10)
greybox v0.6.4 (Release data: 2020-11-11)
==============

Changes:
* Updated the description of alm().
* plot.greybox(x, 1) now does not have the symmetric axes. This helps in reading the data in cases of fat tails.
* Updated vignette of ro() to reflect the new defaults.
* Reverting the hessian calculation without normalisation.
* New set of distribution functions: logitnorm

Bugfixes:
* coefbootstrap() would not work in case of poorly prepared names of variables (e.g. with spaces and quotes).
Expand Down
35 changes: 27 additions & 8 deletions R/alm.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#' \item \link[greybox]{dbcnorm} - Box-Cox normal distribution,
# \item \link[stats]{dchisq} - Chi-Squared Distribution,
#' \item \link[statmod]{dinvgauss} - Inverse Gaussian distribution,
#' \item \link[greybox]{dlogitnorm} - Logit-normal distribution,
#' \item \link[stats]{dbeta} - Beta distribution,
#' \item \link[stats]{dpois} - Poisson Distribution,
#' \item \link[stats]{dnbinom} - Negative Binomial Distribution,
Expand Down Expand Up @@ -251,7 +252,7 @@ alm <- function(formula, data, subset, na.action,
distribution=c("dnorm","dlaplace","ds","dgnorm","dlogis","dt","dalaplace",
"dlnorm","dllaplace","dls","dlgnorm","dbcnorm","dfnorm","dinvgauss",
"dpois","dnbinom",
"dbeta",
"dbeta","dlogitnorm",
"plogis","pnorm"),
loss=c("likelihood","MSE","MAE","HAM","LASSO","RIDGE"),
occurrence=c("none","plogis","pnorm"),
Expand Down Expand Up @@ -413,6 +414,7 @@ alm <- function(formula, data, subset, na.action,
"dnbinom" = exp(matrixXreg %*% B),
"dchisq" = ifelseFast(any(matrixXreg %*% B <0),1E+100,(matrixXreg %*% B)^2),
"dbeta" = exp(matrixXreg %*% B[1:(length(B)/2)]),
"dlogitnorm"=,
"dnorm" =,
"dlaplace" =,
"ds" =,
Expand Down Expand Up @@ -444,6 +446,7 @@ alm <- function(formula, data, subset, na.action,
"dlgnorm" = (other*sum(abs(log(y[otU])-mu[otU])^other)/obsInsample)^{1/other},
"dbcnorm" = sqrt(sum((bcTransform(y[otU],other)-mu[otU])^2)/obsInsample),
"dinvgauss" = sum((y[otU]/mu[otU]-1)^2 / (y[otU]/mu[otU]))/obsInsample,
"dlogitnorm" = sqrt(sum((log(y[otU]/(1-y[otU]))-mu[otU])^2)/obsInsample),
"dfnorm" = abs(other),
"dt" = ,
"dchisq" =,
Expand Down Expand Up @@ -514,6 +517,7 @@ alm <- function(formula, data, subset, na.action,
"dchisq" = dchisq(y[otU], df=fitterReturn$scale, ncp=fitterReturn$mu[otU], log=TRUE),
"dpois" = dpois(y[otU], lambda=fitterReturn$mu[otU], log=TRUE),
"dnbinom" = dnbinom(y[otU], mu=fitterReturn$mu[otU], size=fitterReturn$scale, log=TRUE),
"dlogitnorm" = dlogitnorm(y[otU], mu=fitterReturn$mu[otU], sigma=fitterReturn$scale, log=TRUE),
"dbeta" = dbeta(y[otU], shape1=fitterReturn$mu[otU], shape2=fitterReturn$scale[otU], log=TRUE),
"pnorm" = c(pnorm(fitterReturn$mu[ot], mean=0, sd=1, log.p=TRUE),
pnorm(fitterReturn$mu[!ot], mean=0, sd=1, lower.tail=FALSE, log.p=TRUE)),
Expand All @@ -527,6 +531,7 @@ alm <- function(formula, data, subset, na.action,
"dnorm" =,
"dfnorm" =,
"dbcnorm" =,
"dlogitnorm" =,
"dlnorm" = obsZero*(log(sqrt(2*pi)*fitterReturn$scale)+0.5),
"dgnorm" =,
"dlgnorm" =obsZero*(1/fitterReturn$other-
Expand Down Expand Up @@ -581,6 +586,7 @@ alm <- function(formula, data, subset, na.action,
"dllaplace" =,
"dls" =,
"dlgnorm" = exp(fitterReturn$mu),
"dlogitnorm" = exp(fitterReturn$mu)/(1+exp(fitterReturn$mu)),
"dbcnorm" = bcTransformInv(fitterReturn$mu,lambdaBC),
"dbeta" = fitterReturn$mu / (fitterReturn$mu + scale),
"pnorm" = pnorm(fitterReturn$mu, mean=0, sd=1),
Expand Down Expand Up @@ -909,8 +915,8 @@ alm <- function(formula, data, subset, na.action,
maxeval <- 500;
}
# The following ones don't really need the estimation. This is for consistency only
else if(any(distribution==c("dnorm","dlnorm")) & !recursiveModel && any(loss==c("likelihood","MSE"))){
maxeval <- 2;
else if(any(distribution==c("dnorm","dlnorm","dlogitnorm")) & !recursiveModel && any(loss==c("likelihood","MSE"))){
maxeval <- 1;
}
else{
maxeval <- 200;
Expand Down Expand Up @@ -988,13 +994,14 @@ alm <- function(formula, data, subset, na.action,
}
}

if(distribution=="dbeta"){
if(any(distribution==c("dbeta","dlogitnorm"))){
if(any((y>1) | (y<0))){
stop("The response variable should lie between 0 and 1 in the Beta distribution", call.=FALSE);
stop(paste0("The response variable should lie between 0 and 1 in distribution=\"",
distribution,"\""), call.=FALSE);
}
else if(any(y==c(0,1))){
warning(paste0("The response variable contains boundary values (either zero or one). ",
"Beta distribution is not estimable in this case. ",
"distribution=\"",distribution,"\" is not estimable in this case. ",
"So we used a minor correction for it, in order to overcome this limitation."), call.=FALSE);
y <- y*(1-2*1e-10);
}
Expand Down Expand Up @@ -1245,6 +1252,9 @@ alm <- function(formula, data, subset, na.action,
B <- .lm.fit(matrixXreg[otU,,drop=FALSE],log(y[otU]/(1-y[otU])))$coefficients;
B <- c(B, -B);
}
else if(distribution=="dlogitnorm"){
B <- .lm.fit(matrixXreg[otU,,drop=FALSE],log(y[otU]/(1-y[otU])))$coefficients;
}
else if(distribution=="dchisq"){
B <- .lm.fit(matrixXreg[otU,,drop=FALSE],sqrt(y[otU]))$coefficients;
if(aParameterProvided){
Expand Down Expand Up @@ -1311,9 +1321,14 @@ alm <- function(formula, data, subset, na.action,
else if(distribution=="dbeta"){
# In Beta we set B to be twice longer, using first half of parameters for shape1, and the second for shape2
# Transform y, just in case, to make sure that it does not hit boundary values
B <- .lm.fit(matrixXregForDiffs,diff(log(y/(1-y)),differences=iOrder)[c(1:nrow(matrixXregForDiffs))])$coefficients;
B <- .lm.fit(matrixXregForDiffs,
diff(log(y/(1-y)),differences=iOrder)[c(1:nrow(matrixXregForDiffs))])$coefficients;
B <- c(B, -B);
}
else if(distribution=="dlogitnorm"){
B <- .lm.fit(matrixXregForDiffs,
diff(log(y/(1-y)),differences=iOrder)[c(1:nrow(matrixXregForDiffs))])$coefficients;
}
else if(distribution=="dchisq"){
B <- .lm.fit(matrixXregForDiffs,diff(sqrt(y[otU]),differences=iOrder))$coefficients;
if(aParameterProvided){
Expand Down Expand Up @@ -1512,6 +1527,7 @@ alm <- function(formula, data, subset, na.action,
mu[] <- fitterReturn$mu;
scale <- fitterReturn$scale;

# Give names to additional parameters
if(is.null(parameters)){
parameters <- B;
if(distribution=="dnbinom"){
Expand Down Expand Up @@ -1624,6 +1640,7 @@ alm <- function(formula, data, subset, na.action,
"dllaplace" =,
"dls" =,
"dlgnorm" = exp(mu),
"dlogitnorm" = exp(mu)/(1+exp(mu)),
"dbcnorm" = bcTransformInv(mu,lambdaBC),
"dbeta" = mu / (mu + scale),
"pnorm" = pnorm(mu, mean=0, sd=1),
Expand All @@ -1650,6 +1667,7 @@ alm <- function(formula, data, subset, na.action,
"dls" =,
"dlgnorm" = log(y) - mu,
"dbcnorm" = bcTransform(y,lambdaBC) - mu,
"dlogitnorm" = log(y/(1-y)) - mu,
"pnorm" = qnorm((y - pnorm(mu, 0, 1) + 1) / 2, 0, 1),
"plogis" = log((1 + y * (1 + exp(mu))) / (1 + exp(mu) * (2 - y) - y))
# Here we use the proxy from Svetunkov & Boylan (2019)
Expand All @@ -1668,6 +1686,7 @@ alm <- function(formula, data, subset, na.action,
# Parameters of the model + scale
nParam <- nVariables + (loss=="likelihood")*1;

# Amend the number of parameters, depending on the type of distribution
if(any(distribution==c("dnbinom","dchisq","dt","dfnorm","dbcnorm","dgnorm","dlgnorm","dalaplace"))){
if(!aParameterProvided){
nParam <- nParam + 1;
Expand Down Expand Up @@ -1804,7 +1823,7 @@ alm <- function(formula, data, subset, na.action,
}

if(loss=="likelihood" ||
(loss=="MSE" && any(distribution==c("dnorm","dlnorm","dbcnorm"))) ||
(loss=="MSE" && any(distribution==c("dnorm","dlnorm","dbcnorm","dlogitnorm"))) ||
(loss=="MAE" && any(distribution==c("dlaplace","dllaplace"))) ||
(loss=="HAM" && any(distribution==c("ds","dls")))){
logLik <- -CFValue;
Expand Down
105 changes: 105 additions & 0 deletions R/logitnorm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#' Logit Normal Distribution
#'
#' Density, cumulative distribution, quantile functions and random number
#' generation for the distribution that becomes normal after the Logit
#' transformation.
#'
#' The distribution has the following density function:
#'
#' f(y) = 1/(sqrt(2 pi) y (1-y)) exp(-(logit(y) -mu)^2 / (2 sigma^2))
#'
#' where y is in (0, 1) and logit(y) =log(y/(1-y)).
#'
#' Both \code{plogitnorm} and \code{qlogitnorm} are returned for the lower
#' tail of the distribution.
#'
#' All the functions are defined for the values between 0 and 1.
#'
#' @template author
#' @keywords distribution
#'
#' @param q vector of quantiles.
#' @param p vector of probabilities.
#' @param n number of observations. Should be a single number.
#' @param mu vector of location parameters (means).
#' @param sigma vector of scale parameters.
#' @param log if \code{TRUE}, then probabilities are returned in
#' logarithms.
#'
#' @return Depending on the function, various things are returned
#' (usually either vector or scalar):
#' \itemize{
#' \item \code{dlogitnorm} returns the density function value for the
#' provided parameters.
#' \item \code{plogitnorm} returns the value of the cumulative function
#' for the provided parameters.
#' \item \code{qlogitnorm} returns quantiles of the distribution. Depending
#' on what was provided in \code{p}, \code{mu} and \code{sigma}, this
#' can be either a vector or a matrix, or an array.
#' \item \code{rlogitnorm} returns a vector of random variables
#' generated from the logitnorm distribution. Depending on what was
#' provided in \code{mu} and \code{sigma}, this can be either a vector
#' or a matrix or an array.
#' }
#'
#' @examples
#' x <- dlogitnorm(c(-1000:1000)/200, 0, 1)
#' plot(c(-1000:1000)/200, x, type="l")
#'
#' x <- plogitnorm(c(-1000:1000)/200, 0, 1)
#' plot(c(-1000:1000)/200, x, type="l")
#'
#' qlogitnorm(c(0.025,0.975), 0, c(1,2))
#'
#' x <- rlogitnorm(1000, 0, 1)
#' hist(x)
#'
#' @references \itemize{
#' \item Mead, R. (1965). A Generalised Logit-Normal Distribution.
#' Biometrics, 21 (3), 721–732. doi: 10.2307/2528553
#' }
#'
#' @rdname logitnorm-distribution
#' @importFrom stats dnorm pnorm qnorm rnorm
#' @export dlogitnorm
dlogitnorm <- function(q, mu=0, sigma=1, log=FALSE){
logitnormReturn <- 1 / (sigma*sqrt(2*pi)*q*(1-q))*exp(-(log(q/(1-q))-mu)^2/(2*sigma^2));
# logitnormReturn[q<=0] <- -Inf;
# logitnormReturn[q>=1] <- Inf;

# Return logs if needed
if(log){
logitnormReturn[] <- log(logitnormReturn);
}

return(logitnormReturn);
}

#' @rdname logitnorm-distribution
#' @export plogitnorm
#' @aliases plogitnorm
plogitnorm <- function(q, mu=0, sigma=1){
logitnormReturn <- vector("numeric", length(q));
logitnormReturn[q>=0] <- pnorm(q=log(q/(1-q)), mean=mu, sd=sigma);
logitnormReturn[q<0] <- 0;
logitnormReturn[q>=1] <- 1;
return(logitnormReturn);
}

#' @rdname logitnorm-distribution
#' @export qlogitnorm
#' @aliases qlogitnorm
qlogitnorm <- function(p, mu=0, sigma=1){
logitnormReturn <- qnorm(p=p, mean=mu, sd=sigma);
logitnormReturn[] <- exp(logitnormReturn)/(1+exp(logitnormReturn));
logitnormReturn[is.nan(logitnormReturn)] <- 0;
return(logitnormReturn);
}

#' @rdname logitnorm-distribution
#' @export rlogitnorm
#' @aliases rlogitnorm
rlogitnorm <- function(n=1, mu=0, sigma=1){
logitnormReturn <- qlogitnorm(runif(n=n, 0, 1), mu=mu, sigma=sigma);
return(logitnormReturn);
}
Loading

0 comments on commit cedbd5b

Please sign in to comment.