Speeds up in alm() and stepwise(). Relevant to issue #14

config-i1 · Aug 15, 2018 · f71613b · f71613b
1 parent 37e3f1a
commit f71613b
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 19 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: greybox
 Type: Package
 Title: Toolbox for Model Building and Forecasting
-Version: 0.3.1.41012
+Version: 0.3.1.41013
 Date: 2018-08-15
 Authors@R: person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
                   comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")

diff --git a/NEWS b/NEWS
@@ -18,6 +18,8 @@ Changes:
 * alm() with dlnorm now also returns analytical covariance matrix instead of hessian based one.
 * stepwise(), lmCombine() and lmDynamic() now rely on .lm.fit() function, when distribution="dnorm", so the speed of calculation should be substantially higher.
 * New functions for class checks: is.greybox(), is.alm(), is.greyboxC(), is.greyboxD(), is.rmc() and is.rollingOrigin().
+* stepwise() now calculates only the necessary correlations. This allows further inceasing the speed of computation.
+* alm() uses its own mean function, so this should also increas its speed.
 
 Bugfixes:
 * Fixed a bug with the style="line" in rmc(), where the grouping would be wrong in cases, when one method significantly differs from the others.

diff --git a/R/alm.R b/R/alm.R
@@ -304,19 +304,23 @@ alm <- function(formula, data, subset, na.action,
         }
     }
 
+    meanFast <- function(x){
+        return(sum(x) / length(x));
+    }
+
     fitter <- function(A, distribution, y, matrixXreg){
         mu[] <- matrixXreg %*% A;
 
         scale <- switch(distribution,
                         "dnorm"=,
-                        "dfnorm" = sqrt(mean((y-mu)^2)),
-                        "dlnorm"= sqrt(mean((log(y)-mu)^2)),
-                        "dlaplace" = mean(abs(y-mu)),
-                        "ds" = mean(sqrt(abs(y-mu))) / 2,
+                        "dfnorm" = sqrt(meanFast((y-mu)^2)),
+                        "dlnorm"= sqrt(meanFast((log(y)-mu)^2)),
+                        "dlaplace" = meanFast(abs(y-mu)),
+                        "ds" = meanFast(sqrt(abs(y-mu))) / 2,
                         "dchisq" = 2*mu,
-                        "dlogis" = sqrt(mean((y-mu)^2) * 3 / pi^2),
-                        "pnorm" = sqrt(mean(qnorm((y - pnorm(mu, 0, 1) + 1) / 2, 0, 1)^2)),
-                        "plogis" = sqrt(mean(log((1 + y * (1 + exp(mu))) / (1 + exp(mu) * (2 - y) - y))^2)) # Here we use the proxy from Svetunkov et al. (2018)
+                        "dlogis" = sqrt(meanFast((y-mu)^2) * 3 / pi^2),
+                        "pnorm" = sqrt(meanFast(qnorm((y - pnorm(mu, 0, 1) + 1) / 2, 0, 1)^2)),
+                        "plogis" = sqrt(meanFast(log((1 + y * (1 + exp(mu))) / (1 + exp(mu) * (2 - y) - y))^2)) # Here we use the proxy from Svetunkov et al. (2018)
         );
 
         return(list(mu=mu,scale=scale));

diff --git a/R/stepwise.R b/R/stepwise.R
@@ -93,7 +93,6 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
 
     method <- method[1];
 
-    ourncols <- ncol(ourData) - 1;
     bestICNotFound <- TRUE;
     allICs <- list(NA);
     # Run the simplest model y = const
@@ -112,6 +111,8 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
     # Add residuals to the ourData
     ourData <- cbind(ourData,residuals(testModel));
     colnames(ourData)[ncol(ourData)] <- "resid";
+    nCols <- ncol(ourData);
+
     bestFormula <- testFormula;
     if(!silent){
         cat(testFormula); cat(", "); cat(currentIC); cat("\n\n");
@@ -120,14 +121,8 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
     m <- 2;
     # Start the loop
     while(bestICNotFound){
-        ourCorrelation <- cor(ourData,use="complete.obs",method=method);
-        # Extract the last row of the correlation matrix
-        ourCorrelation <- ourCorrelation[-1,-1];
-        ourCorrelation <- ourCorrelation[nrow(ourCorrelation),];
-        ourCorrelation <- ourCorrelation[1:ourncols];
-        # Find the highest correlation coefficient
-        newElement <- which(abs(ourCorrelation)==max(abs(ourCorrelation)))[1];
-        newElement <- names(ourCorrelation)[newElement];
+        ourCorrelation <- cor(ourData[,nCols],ourData,use="complete.obs",method=method)[-c(1,nCols)];
+        newElement <- names(ourData)[which(abs(ourCorrelation)==max(abs(ourCorrelation)))[1] + 1];
         # If the newElement is the same as before, stop
         if(any(newElement==all.vars(as.formula(bestFormula)))){
             bestICNotFound <- FALSE;
@@ -175,6 +170,10 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
     # Remove "1+" from the best formula
     bestFormula <- sub(" 1+", "", bestFormula,fixed=T);
 
+    # listToCall$formula <- as.formula(bestFormula);
+    # listToCall$data <- substitute(data);
+    # bestModel <- do.call(lmCall,listToCall);
+
     if(distribution=="dnorm"){
         bestModel <- do.call("lm", list(formula=as.formula(bestFormula),
                                          data=substitute(data)));
@@ -189,6 +188,5 @@ stepwise <- function(data, ic=c("AICc","AIC","BIC","BICc"), silent=TRUE, df=NULL
     }
 
     bestModel$ICs <- unlist(allICs);
-    class(bestModel) <- c("alm","greybox");
-    return(model=bestModel);
+    return(structure(bestModel,class=c("alm","greybox"));
 }