Skip to content

Commit

Permalink
Beta distribution in alm. Issue #13
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivan Svetunkov committed Oct 31, 2018
1 parent caf0020 commit d37dafd
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 31 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: greybox
Type: Package
Title: Toolbox for Model Building and Forecasting
Version: 0.3.3.41001
Date: 2018-10-30
Version: 0.3.3.41002
Date: 2018-10-31
Authors@R: person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")
URL: https://github.com/config-i1/greybox
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ importFrom(stats,coef)
importFrom(stats,confint)
importFrom(stats,cor)
importFrom(stats,cov)
importFrom(stats,dbeta)
importFrom(stats,dchisq)
importFrom(stats,deltat)
importFrom(stats,dlnorm)
Expand All @@ -137,6 +138,7 @@ importFrom(stats,pchisq)
importFrom(stats,plogis)
importFrom(stats,pnorm)
importFrom(stats,predict)
importFrom(stats,qbeta)
importFrom(stats,qchisq)
importFrom(stats,qlnorm)
importFrom(stats,qlogis)
Expand Down
3 changes: 2 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
greybox v0.3.3 (Release data: 2018-10-30)
greybox v0.3.3 (Release data: 2018-10-31)
==============

Changes:
* Student t distribution in alm.
* Beta distribution in alm. When will I stop? I guess, I'll do that when I stop procrastinating...

Bugfixes:
* Corrected a typo of "plogos" in alm.
Expand Down
60 changes: 57 additions & 3 deletions R/alm.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#' \item Folded normal distribution, \link[greybox]{dfnorm},
#' \item Log normal distribution, \link[stats]{dlnorm},
#' \item Chi-Squared Distribution, \link[stats]{dchisq},
#' \item Beta distribution, \link[stats]{dbeta},
#' \item Poisson Distribution, \link[stats]{dpois},
#' \item Negative Binomial Distribution, \link[stats]{dnbinom},
#' \item Cumulative Logistic Distribution, \link[stats]{plogis},
Expand Down Expand Up @@ -122,13 +123,14 @@
#' @importFrom numDeriv hessian
#' @importFrom nloptr nloptr
#' @importFrom stats model.frame sd terms
#' @importFrom stats dchisq dlnorm dnorm dlogis dpois dnbinom dt
#' @importFrom stats dchisq dlnorm dnorm dlogis dpois dnbinom dt dbeta
#' @importFrom stats plogis
#' @export alm
alm <- function(formula, data, subset, na.action,
distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt",
"dfnorm","dlnorm","dchisq",
"dpois","dnbinom",
"dbeta",
"plogis","pnorm"),
occurrence=c("none","plogis","pnorm"),
B=NULL, vcovProduce=FALSE, ...){
Expand All @@ -138,7 +140,7 @@ alm <- function(formula, data, subset, na.action,

distribution <- distribution[1];
if(all(distribution!=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt","dfnorm","dlnorm","dchisq",
"dpois","dnbinom","plogis","pnorm"))){
"dpois","dnbinom","dbeta","plogis","pnorm"))){
if(any(distribution==c("norm","fnorm","lnorm","laplace","s","chisq","logis"))){
warning(paste0("You are using the old value of the distribution parameter.\n",
"Use distribution='d",distribution,"' instead."),
Expand Down Expand Up @@ -243,6 +245,18 @@ alm <- function(formula, data, subset, na.action,
}
}

if(distribution=="dbeta"){
if(any((y>1) | (y<0))){
stop("The response variable should lie between 0 and 1 in the Beta distribution", call.=FALSE);
}
else if(any(y==c(0,1))){
warning(paste0("The response variable contains boundary values (either zero or one). ",
"Beta distribution is not estimable in this case. ",
"So we used a minor correction for it, in order to overcome this limitation."), call.=FALSE);
y <- y*(1-2*1e-10);
}
}

if(any(distribution==c("plogis","pnorm"))){
CDF <- TRUE;
}
Expand Down Expand Up @@ -364,6 +378,7 @@ alm <- function(formula, data, subset, na.action,
"dpois" = exp(matrixXreg %*% B),
"dchisq" = ifelseFast(any(matrixXreg %*% B[-1] <0),1E+100,(matrixXreg %*% B[-1])^2),
"dnbinom" = exp(matrixXreg %*% B[-1]),
"dbeta" = exp(matrixXreg %*% B[1:(length(B)/2)]),
"dnorm" =,
"dfnorm" =,
"dlnorm" =,
Expand All @@ -377,6 +392,7 @@ alm <- function(formula, data, subset, na.action,
);

scale <- switch(distribution,
"dbeta" = exp(matrixXreg %*% B[-c(1:(length(B)/2))]),
"dnorm" =,
"dfnorm" = sqrt(meanFast((y-mu)^2)),
"dlnorm" = sqrt(meanFast((log(y)-mu)^2)),
Expand Down Expand Up @@ -410,6 +426,7 @@ alm <- function(formula, data, subset, na.action,
"dchisq" = dchisq(y, df=fitterReturn$scale, ncp=fitterReturn$mu, log=TRUE),
"dpois" = dpois(y, lambda=fitterReturn$mu, log=TRUE),
"dnbinom" = dnbinom(y, mu=fitterReturn$mu, size=fitterReturn$scale, log=TRUE),
"dbeta" = dbeta(y, shape1=fitterReturn$mu, shape2=fitterReturn$scale, log=TRUE),
"pnorm" = c(pnorm(fitterReturn$mu[ot], mean=0, sd=1, log.p=TRUE),
pnorm(fitterReturn$mu[!ot], mean=0, sd=1, lower.tail=FALSE, log.p=TRUE)),
"plogis" = c(plogis(fitterReturn$mu[ot], location=0, scale=1, log.p=TRUE),
Expand Down Expand Up @@ -440,6 +457,12 @@ alm <- function(formula, data, subset, na.action,
# Box-Cox transform in order to get meaningful initials
B <- .lm.fit(matrixXreg,(y^0.01-1)/0.01)$coefficients;
}
else if(distribution=="dbeta"){
# In Beta we set B to be twice longer, using first half of parameters for shape1, and the second for shape2
# Transform y, just in case, to make sure that it does not hit boundary values
B <- .lm.fit(matrixXreg,log(y/(1-y)))$coefficients;
B <- c(B, -B);
}
else{
B <- .lm.fit(matrixXreg,y)$coefficients;
BLower <- -Inf;
Expand Down Expand Up @@ -514,6 +537,11 @@ alm <- function(formula, data, subset, na.action,
names(B) <- variablesNames;
}
}
else if(distribution=="dbeta"){
if(!vcovProduce){
names(B) <- c(paste0("shape1_",variablesNames),paste0("shape2_",variablesNames));
}
}
else{
names(B) <- variablesNames;
}
Expand All @@ -527,6 +555,12 @@ alm <- function(formula, data, subset, na.action,
nVariables <- nVariables + 1;
}
}
else if(distribution=="dbeta"){
df <- nVariables*2;
if(!vcovProduce){
nVariables <- nVariables*2;
}
}

### Fitted values in the scale of the original variable
yFitted[] <- switch(distribution,
Expand All @@ -541,12 +575,14 @@ alm <- function(formula, data, subset, na.action,
"dnbinom" = mu,
"dchisq" = mu + scale,
"dlnorm" = exp(mu),
"dbeta" = mu / (mu + scale),
"pnorm" = pnorm(mu, mean=0, sd=1),
"plogis" = plogis(mu, location=0, scale=1)
);

### Error term in the transformed scale
errors[] <- switch(distribution,
"dbeta" = y - yFitted,
"dfnorm" =,
"dlaplace" =,
"dalaplace" =,
Expand Down Expand Up @@ -624,7 +660,13 @@ alm <- function(formula, data, subset, na.action,
}

if(nVariables>1){
dimnames(vcovMatrix) <- list(variablesNames,variablesNames);
if(distribution=="dbeta"){
dimnames(vcovMatrix) <- list(c(paste0("shape1_",variablesNames),paste0("shape2_",variablesNames)),
c(paste0("shape1_",variablesNames),paste0("shape2_",variablesNames)));
}
else{
dimnames(vcovMatrix) <- list(variablesNames,variablesNames);
}
}
else{
names(vcovMatrix) <- variablesNames;
Expand Down Expand Up @@ -674,12 +716,24 @@ alm <- function(formula, data, subset, na.action,
nVariables <- nVariables - 1;
}
}
else if(distribution=="dbeta"){
variablesNames <- variablesNames[1:(nVariables/2)];
nVariables <- nVariables/2;

# Write down shape parameters
ellipsis$shape1 <- mu;
ellipsis$shape2 <- scale;
# mu and scale are set top be equal to mu and variance.
scale <- mu * scale / ((mu+scale)^2 * (mu + scale + 1));
mu <- yFitted;
}

finalModel <- list(coefficients=B, vcov=vcovMatrix, fitted.values=yFitted, residuals=as.vector(errors),
mu=mu, scale=scale, distribution=distribution, logLik=-CFValue,
df.residual=obsInsample-df, df=df, call=cl, rank=df,
data=matrix(as.matrix(dataWork[,c(responseName,variablesNames[-1])]), ncol=nVariables,
dimnames=list(NULL, c(responseName,variablesNames[-1]))),
occurrence=occurrence, subset=subset, other=ellipsis);

return(structure(finalModel,class=c("alm","greybox")));
}
68 changes: 51 additions & 17 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ pointLik.alm <- function(object, ...){
"dpois" = dpois(y, lambda=mu, log=TRUE),
"dnbinom" = dnbinom(y, mu=mu, size=scale, log=TRUE),
"dchisq" = dchisq(y, df=scale, ncp=mu, log=TRUE),
"dbeta" = dbeta(y, shape1=mu, shape2=scale, log=TRUE),
"plogis" = c(plogis(mu[ot], location=0, scale=1, log.p=TRUE),
plogis(mu[!ot], location=0, scale=1, lower.tail=FALSE, log.p=TRUE)),
"pnorm" = c(pnorm(mu[ot], mean=0, sd=1, log.p=TRUE),
Expand Down Expand Up @@ -406,7 +407,7 @@ confint.greyboxD <- function(object, parm, level=0.95, ...){
}

#' @rdname predict.greybox
#' @importFrom stats predict qchisq qlnorm qlogis qpois qnbinom
#' @importFrom stats predict qchisq qlnorm qlogis qpois qnbinom qbeta
#' @export
predict.alm <- function(object, newdata=NULL, interval=c("none", "confidence", "prediction"),
level=0.95, side=c("both","upper","lower"), ...){
Expand Down Expand Up @@ -572,6 +573,24 @@ predict.alm <- function(object, newdata=NULL, interval=c("none", "confidence", "
greyboxForecast$upper[] <- exp(greyboxForecast$upper);
}
}
else if(object$distribution=="dbeta"){
greyboxForecast$shape1 <- greyboxForecast$mean;
greyboxForecast$shape2 <- greyboxForecast$variances;
greyboxForecast$mean <- greyboxForecast$shape1 / (greyboxForecast$shape1 + greyboxForecast$shape2);
greyboxForecast$variances <- (greyboxForecast$shape1 * greyboxForecast$shape2 /
((greyboxForecast$shape1+greyboxForecast$shape2)^2 *
(greyboxForecast$shape1 + greyboxForecast$shape2 + 1)));
if(interval=="p"){
greyboxForecast$lower <- qbeta(levelLow,greyboxForecast$shape1,greyboxForecast$shape2);
greyboxForecast$upper <- qbeta(levelUp,greyboxForecast$shape1,greyboxForecast$shape2);
}
else if(interval=="c"){
greyboxForecast$lower <- (greyboxForecast$mean + qt(levelLow,df=object$df.residual)*
sqrt(greyboxForecast$variances/(nobs(object)-nParam(object))));
greyboxForecast$upper <- (greyboxForecast$mean + qt(levelUp,df=object$df.residual)*
sqrt(greyboxForecast$variances/(nobs(object)-nParam(object))));
}
}
else if(object$distribution=="plogis"){
# The intervals are based on the assumption that a~N(0, sigma^2), and p=exp(a) / (1 + exp(a))
greyboxForecast$scale <- object$scale;
Expand Down Expand Up @@ -713,6 +732,7 @@ predict.greybox <- function(object, newdata=NULL, interval=c("none", "confidence
}
nRows <- nrow(newdata);

paramQuantiles <- qt(c(levelLow, levelUp),df=object$df.residual);
parameters <- coef.greybox(object);
parametersNames <- names(parameters);
ourVcov <- vcov(object);
Expand All @@ -724,6 +744,9 @@ predict.greybox <- function(object, newdata=NULL, interval=c("none", "confidence
ourVcov <- ourVcov[-1,-1];
}
}
else if(object$distribution=="dbeta"){
parametersNames <- substr(parametersNames[1:(length(parametersNames)/2)],8,nchar(parametersNames));
}

if(any(parametersNames=="(Intercept)")){
matrixOfxreg <- as.matrix(cbind(rep(1,nrow(newdata)),newdata[,-1]));
Expand All @@ -741,26 +764,34 @@ predict.greybox <- function(object, newdata=NULL, interval=c("none", "confidence
matrixOfxreg <- matrix(matrixOfxreg, nrow=1);
}

ourForecast <- as.vector(matrixOfxreg %*% parameters);

paramQuantiles <- qt(c(levelLow, levelUp),df=object$df.residual);
if(object$distribution=="dbeta"){
# We predict values for shape1 and shape2 and write them down in mean and variance.
ourForecast <- as.vector(exp(matrixOfxreg %*% parameters[1:(length(parameters)/2)]));
vectorOfVariances <- as.vector(exp(matrixOfxreg %*% parameters[-c(1:(length(parameters)/2))]));
# ourForecast <- ourForecast / (ourForecast + as.vector(exp(matrixOfxreg %*% parameters[-c(1:(length(parameters)/2))])));

# abs is needed for some cases, when the likelihood was not fully optimised
vectorOfVariances <- abs(diag(matrixOfxreg %*% ourVcov %*% t(matrixOfxreg)));

if(interval=="c"){
lower <- ourForecast + paramQuantiles[1] * sqrt(vectorOfVariances);
upper <- ourForecast + paramQuantiles[2] * sqrt(vectorOfVariances);
}
else if(interval=="p"){
vectorOfVariances <- vectorOfVariances + sigma(object)^2;
lower <- ourForecast + paramQuantiles[1] * sqrt(vectorOfVariances);
upper <- ourForecast + paramQuantiles[2] * sqrt(vectorOfVariances);
}
else{
lower <- NULL;
upper <- NULL;
}
else{
ourForecast <- as.vector(matrixOfxreg %*% parameters);
# abs is needed for some cases, when the likelihood was not fully optimised
vectorOfVariances <- abs(diag(matrixOfxreg %*% ourVcov %*% t(matrixOfxreg)));

if(interval=="c"){
lower <- ourForecast + paramQuantiles[1] * sqrt(vectorOfVariances);
upper <- ourForecast + paramQuantiles[2] * sqrt(vectorOfVariances);
}
else if(interval=="p"){
vectorOfVariances <- vectorOfVariances + sigma(object)^2;
lower <- ourForecast + paramQuantiles[1] * sqrt(vectorOfVariances);
upper <- ourForecast + paramQuantiles[2] * sqrt(vectorOfVariances);
}
else{
lower <- NULL;
upper <- NULL;
}
}

ourModel <- list(model=object, mean=ourForecast, lower=lower, upper=upper, level=c(levelLow, levelUp), newdata=newdata,
variances=vectorOfVariances, newdataProvided=newdataProvided);
Expand Down Expand Up @@ -1060,6 +1091,7 @@ print.summary.alm <- function(x, ...){
"dchisq" = "Chi-Squared",
"dpois" = "Poisson",
"dnbinom" = "Negative Binomial",
"dbeta" = "Beta",
"plogis" = "Cumulative logistic",
"pnorm" = "Cumulative normal"
);
Expand Down Expand Up @@ -1100,6 +1132,7 @@ print.summary.greybox <- function(x, ...){
"dchisq" = "Chi-Squared",
"dpois" = "Poisson",
"dnbinom" = "Negative Binomial",
"dbeta" = "Beta",
"plogis" = "Cumulative logistic",
"pnorm" = "Cumulative normal"
);
Expand Down Expand Up @@ -1136,6 +1169,7 @@ print.summary.greyboxC <- function(x, ...){
"dchisq" = "Chi-Squared",
"dpois" = "Poisson",
"dnbinom" = "Negative Binomial",
"dbeta" = "Beta",
"plogis" = "Cumulative logistic",
"pnorm" = "Cumulative normal"
);
Expand Down
3 changes: 2 additions & 1 deletion man/alm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit d37dafd

Please sign in to comment.