Skip to content

Commit

Permalink
Speeds up in alm() and stepwise(). Relevant to issue #14
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivan Svetunkov committed Aug 15, 2018
1 parent 37e3f1a commit f71613b
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 19 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: greybox
Type: Package
Title: Toolbox for Model Building and Forecasting
Version: 0.3.1.41012
Version: 0.3.1.41013
Date: 2018-08-15
Authors@R: person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")
Expand Down
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Changes:
* alm() with dlnorm now also returns analytical covariance matrix instead of hessian based one.
* stepwise(), lmCombine() and lmDynamic() now rely on .lm.fit() function, when distribution="dnorm", so the speed of calculation should be substantially higher.
* New functions for class checks: is.greybox(), is.alm(), is.greyboxC(), is.greyboxD(), is.rmc() and is.rollingOrigin().
* stepwise() now calculates only the necessary correlations. This allows further inceasing the speed of computation.
* alm() uses its own mean function, so this should also increas its speed.

Bugfixes:
* Fixed a bug with the style="line" in rmc(), where the grouping would be wrong in cases, when one method significantly differs from the others.
Expand Down
18 changes: 11 additions & 7 deletions R/alm.R
Original file line number Diff line number Diff line change
Expand Up @@ -304,19 +304,23 @@ alm <- function(formula, data, subset, na.action,
}
}

meanFast <- function(x){
return(sum(x) / length(x));
}

fitter <- function(A, distribution, y, matrixXreg){
mu[] <- matrixXreg %*% A;

scale <- switch(distribution,
"dnorm"=,
"dfnorm" = sqrt(mean((y-mu)^2)),
"dlnorm"= sqrt(mean((log(y)-mu)^2)),
"dlaplace" = mean(abs(y-mu)),
"ds" = mean(sqrt(abs(y-mu))) / 2,
"dfnorm" = sqrt(meanFast((y-mu)^2)),
"dlnorm"= sqrt(meanFast((log(y)-mu)^2)),
"dlaplace" = meanFast(abs(y-mu)),
"ds" = meanFast(sqrt(abs(y-mu))) / 2,
"dchisq" = 2*mu,
"dlogis" = sqrt(mean((y-mu)^2) * 3 / pi^2),
"pnorm" = sqrt(mean(qnorm((y - pnorm(mu, 0, 1) + 1) / 2, 0, 1)^2)),
"plogis" = sqrt(mean(log((1 + y * (1 + exp(mu))) / (1 + exp(mu) * (2 - y) - y))^2)) # Here we use the proxy from Svetunkov et al. (2018)
"dlogis" = sqrt(meanFast((y-mu)^2) * 3 / pi^2),
"pnorm" = sqrt(meanFast(qnorm((y - pnorm(mu, 0, 1) + 1) / 2, 0, 1)^2)),
"plogis" = sqrt(meanFast(log((1 + y * (1 + exp(mu))) / (1 + exp(mu) * (2 - y) - y))^2)) # Here we use the proxy from Svetunkov et al. (2018)
);

return(list(mu=mu,scale=scale));
Expand Down
20 changes: 9 additions & 11 deletions R/stepwise.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL

method <- method[1];

ourncols <- ncol(ourData) - 1;
bestICNotFound <- TRUE;
allICs <- list(NA);
# Run the simplest model y = const
Expand All @@ -112,6 +111,8 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
# Add residuals to the ourData
ourData <- cbind(ourData,residuals(testModel));
colnames(ourData)[ncol(ourData)] <- "resid";
nCols <- ncol(ourData);

bestFormula <- testFormula;
if(!silent){
cat(testFormula); cat(", "); cat(currentIC); cat("\n\n");
Expand All @@ -120,14 +121,8 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
m <- 2;
# Start the loop
while(bestICNotFound){
ourCorrelation <- cor(ourData,use="complete.obs",method=method);
# Extract the last row of the correlation matrix
ourCorrelation <- ourCorrelation[-1,-1];
ourCorrelation <- ourCorrelation[nrow(ourCorrelation),];
ourCorrelation <- ourCorrelation[1:ourncols];
# Find the highest correlation coefficient
newElement <- which(abs(ourCorrelation)==max(abs(ourCorrelation)))[1];
newElement <- names(ourCorrelation)[newElement];
ourCorrelation <- cor(ourData[,nCols],ourData,use="complete.obs",method=method)[-c(1,nCols)];
newElement <- names(ourData)[which(abs(ourCorrelation)==max(abs(ourCorrelation)))[1] + 1];
# If the newElement is the same as before, stop
if(any(newElement==all.vars(as.formula(bestFormula)))){
bestICNotFound <- FALSE;
Expand Down Expand Up @@ -175,6 +170,10 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
# Remove "1+" from the best formula
bestFormula <- sub(" 1+", "", bestFormula,fixed=T);

# listToCall$formula <- as.formula(bestFormula);
# listToCall$data <- substitute(data);
# bestModel <- do.call(lmCall,listToCall);

if(distribution=="dnorm"){
bestModel <- do.call("lm", list(formula=as.formula(bestFormula),
data=substitute(data)));
Expand All @@ -189,6 +188,5 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
}

bestModel$ICs <- unlist(allICs);
class(bestModel) <- c("alm","greybox");
return(model=bestModel);
return(structure(bestModel,class=c("alm","greybox"));
}

0 comments on commit f71613b

Please sign in to comment.