diff --git a/.Rbuildignore b/.Rbuildignore index e9d4a74..02c5e12 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,4 @@ ^debug\.log$ ^\.github$ ^CODE_OF_CONDUCT\.md$ +^revdep$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index b0f3cd6..2782049 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,14 +1,10 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - master - - develop + branches: [master, develop] pull_request: - branches: - - master - - develop + branches: [master, develop] name: R-CMD-check @@ -22,63 +18,33 @@ jobs: fail-fast: false matrix: config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - { os: macOS-latest, r: 'release' } + - { os: windows-latest, r: 'release' } + - { os: ubuntu-latest, r: 'devel', http-user-agent: 'release' } + - { os: ubuntu-latest, r: 'release' } + - { os: ubuntu-latest, r: 'oldrel-1' } env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} + extra-packages: | + any::rcmdcheck + needs: check - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + upload-snapshots: true diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 9245d3a..0eae2b7 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,48 +1,31 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - master - - develop + branches: [master, develop] pull_request: - branches: - - master - - develop + branches: [master, develop] name: test-coverage jobs: test-coverage: - runs-on: macOS-latest + runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v1 - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + use-public-rspm: true - - name: Install dependencies - run: | - install.packages(c("remotes")) - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("covr") - shell: Rscript {0} + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr + needs: coverage - name: Test coverage - run: covr::codecov() + run: covr::codecov(quiet = FALSE) shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index 8223372..2009624 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,20 +17,21 @@ Imports: gridExtra, nortest, stats, - utils + utils, + xplorerr Suggests: covr, descriptr, knitr, rmarkdown, testthat, - vdiffr, - xplorerr + vdiffr License: MIT + file LICENSE URL: https://olsrr.rsquaredacademy.com/, https://github.com/rsquaredacademy/olsrr BugReports: https://github.com/rsquaredacademy/olsrr/issues Encoding: UTF-8 LazyData: true VignetteBuilder: knitr -RoxygenNote: 7.1.1 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/NEWS.md b/NEWS.md index 74764b9..700dc7a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,22 @@ -# olsrr 0.5.3.9000 +# olsrr 0.6.0 + +This is a minor release for bug fixes and other enhancements. ## New Features -- Force variables in/out in variable selection procedures -- Hierarchical selection -- Variable selection using r-squared and adjusted r-squared +- hierarchical selection can be enables when using `p` values as variable selection metric + +## Enhancements + +- force variables to be included or excluded from the model at all stages of variable selection +- Variable selection methods allow use of the following metrics: + - p value + - akaike information criterion (aic) + - schwarz bayesian criterion (sbc) + - sawa bayesian criterion (sbic) + - r-square + - adjusted r-square +- Choose threshold for determining influential observations in `ols_plot_dffits()` ## Bug Fixes diff --git a/R/ols-bartlett-test.R b/R/ols-bartlett-test.R index 8de56f7..9d8839e 100644 --- a/R/ols-bartlett-test.R +++ b/R/ols-bartlett-test.R @@ -27,8 +27,10 @@ #' #' @examples #' # using grouping variable -#' library(descriptr) -#' ols_test_bartlett(mtcarz, 'mpg', group_var = 'cyl') +#' if (require("descriptr")) { +#' library(descriptr) +#' ols_test_bartlett(mtcarz, 'mpg', group_var = 'cyl') +#' } #' #' # using variables #' ols_test_bartlett(hsb, 'read', 'write') diff --git a/R/ols-cooks-d-barplot.R b/R/ols-cooks-d-barplot.R index d5f2192..63b501e 100644 --- a/R/ols-cooks-d-barplot.R +++ b/R/ols-cooks-d-barplot.R @@ -5,8 +5,11 @@ #' fitted values of the model. #' #' @param model An object of class \code{lm}. -#' @param type An integer between 1 and 5 selecting one of the 6 methods for computing the threshold. -#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object. +#' @param type An integer between 1 and 5 selecting one of the 5 methods for +#' computing the threshold. +#' @param threshold Threshold for detecting outliers. +#' @param print_plot logical; if \code{TRUE}, prints the plot else returns a +#' plot object. #' #' @details #' Cook's distance was introduced by American statistician R Dennis Cook in @@ -51,6 +54,8 @@ #' @examples #' model <- lm(mpg ~ disp + hp + wt, data = mtcars) #' ols_plot_cooksd_bar(model) +#' ols_plot_cooksd_bar(model, type = 4) +#' ols_plot_cooksd_bar(model, threshold = 0.2) #' #' @importFrom stats cooks.distance #' @importFrom ggplot2 geom_bar coord_flip ylim geom_hline geom_label @@ -59,15 +64,19 @@ #' #' @export #' -ols_plot_cooksd_bar <- function(model, type = 1, print_plot = TRUE) { +ols_plot_cooksd_bar <- function(model, type = 1, threshold = NULL, print_plot = TRUE) { check_model(model) k <- ols_prep_cdplot_data(model, type) d <- ols_prep_outlier_obs(k) f <- ols_prep_cdplot_outliers(k) + + if (is.null(threshold)) { + threshold <- k$ts + } - y_max <- max(k$maxx, k$ts) + y_max <- max(k$maxx, threshold) y_lim <- y_max + (y_max * 0.1) # geoms @@ -76,33 +85,33 @@ ols_plot_cooksd_bar <- function(model, type = 1, print_plot = TRUE) { geom_bar(width = 0.5, stat = "identity", aes(fill = fct_color)) + geom_text(hjust = -0.2, nudge_x = 0.05, size = 2, na.rm = TRUE) + geom_hline(yintercept = 0) + - geom_hline(yintercept = k$ts, colour = "red") + geom_hline(yintercept = threshold, colour = "red") # annotations p <- p + annotate("text", x = Inf, y = Inf, hjust = 1.2, vjust = 2, family = "serif", fontface = "italic", colour = "darkred", - label = paste("Threshold:", round(k$ts, 3))) + label = paste("Threshold:", round(threshold, 3))) # scales - p <- + p <- p + - scale_fill_manual(values = c("blue", "red")) + scale_fill_manual(values = c("blue", "red")) # guides - p <- + p <- p + labs(fill = "Observation") + xlab("Observation") + - ylab("Cook's D") + - ggtitle("Cook's D Bar Plot") + - ylim(0, y_lim) + ylab("Cook's D") + + ggtitle("Cook's D Bar Plot") + + ylim(0, y_lim) if (print_plot) { suppressWarnings(print(p)) } else { - return(list(plot = p, outliers = f, threshold = k$ts)) + return(list(plot = p, outliers = f, threshold = threshold)) } -} \ No newline at end of file +} diff --git a/R/ols-cooks-d-chart.R b/R/ols-cooks-d-chart.R index 051900e..5a066df 100644 --- a/R/ols-cooks-d-chart.R +++ b/R/ols-cooks-d-chart.R @@ -6,6 +6,7 @@ #' #' @param model An object of class \code{lm}. #' @param type An integer between 1 and 5 selecting one of the 6 methods for computing the threshold. +#' @param threshold Threshold for detecting outliers. #' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object. #' #' @details @@ -51,6 +52,8 @@ #' @examples #' model <- lm(mpg ~ disp + hp + wt, data = mtcars) #' ols_plot_cooksd_chart(model) +#' ols_plot_cooksd_chart(model, type = 4) +#' ols_plot_cooksd_chart(model, threshold = 0.2) #' #' @importFrom ggplot2 geom_linerange #' @@ -58,7 +61,7 @@ #' #' @export #' -ols_plot_cooksd_chart <- function(model, type = 1, print_plot = TRUE) { +ols_plot_cooksd_chart <- function(model, type = 1, threshold = NULL, print_plot = TRUE) { check_model(model) @@ -66,20 +69,24 @@ ols_plot_cooksd_chart <- function(model, type = 1, print_plot = TRUE) { d <- ols_prep_outlier_obs(k) f <- ols_prep_cdplot_outliers(k) + if (is.null(threshold)) { + threshold <- k$ts + } + p <- ggplot(d, aes(x = obs, y = cd, label = txt, ymin = min(cd), ymax = cd)) + geom_linerange(colour = "blue") + geom_point(shape = 1, colour = "blue") + geom_text(vjust = -1, size = 3, family = "serif", fontface = "italic", colour = "darkred", na.rm = TRUE) + - geom_hline(yintercept = k$ts, colour = "red") + geom_hline(yintercept = threshold, colour = "red") # annotations p <- p + annotate("text", x = Inf, y = Inf, hjust = 1.2, vjust = 2, family = "serif", fontface = "italic", colour = "darkred", - label = paste("Threshold:", round(k$ts, 3))) + label = paste("Threshold:", round(threshold, 3))) # guides p <- @@ -92,7 +99,7 @@ ols_plot_cooksd_chart <- function(model, type = 1, print_plot = TRUE) { if (print_plot) { suppressWarnings(print(p)) } else { - return(list(plot = p, outliers = f, threshold = k$ts)) + return(list(plot = p, outliers = f, threshold = threshold)) } -} \ No newline at end of file +} diff --git a/R/ols-launch-app.R b/R/ols-launch-app.R index 3ac457e..d30f949 100644 --- a/R/ols-launch-app.R +++ b/R/ols-launch-app.R @@ -7,8 +7,6 @@ #' @export #' ols_launch_app <- function() { - - check_suggests('xplorerr') xplorerr::app_linear_regression() } \ No newline at end of file diff --git a/R/olsrr.R b/R/olsrr.R index 20bcc05..27284cf 100644 --- a/R/olsrr.R +++ b/R/olsrr.R @@ -8,16 +8,17 @@ #' @docType package #' @keywords internal #' @name olsrr -NULL +#' @aliases olsrr-package +"_PACKAGE" ## quiets concerns of R CMD check re: the .'s that appear in pipelines if (getRversion() >= "2.15.1") { utils::globalVariables(c(".", "owner", "repo", "tag_name", "result", "a", "b", "tx", "mindex", "n", "x", "y", "k", "size", "shape", "rsquare", "cp", "adjr", "cps", "aic", "sbic", "sbc", "index", "betas", "rsq", - "lpreds", "terms", "pvdata", "values", "d", "v", "r.squared", "obs", + "lpreds", "terms", "pvdata", "values", "d", "v", "r.squared", "obs", "txt", "cd", "fct_color", "ckd", "dbetas", "color", "pred", "ds", "dsr", - "fstatistic", "hadi", "Df", "res", "pot", "lfit", "rerror", "ybar", + "fstatistic", "hadi", "Df", "res", "pot", "lfit", "rerror", "ybar", "yhat", "predicted", "resid", "lev_thrsh", "leverage", "levrstud", "sdres")) } diff --git a/R/utils.R b/R/utils.R index 14c4f5e..7379802 100644 --- a/R/utils.R +++ b/R/utils.R @@ -67,7 +67,7 @@ l <- function(x) { } null_model_metrics <- function(model, full_model) { - + output <- summary(model) anovam <- anova(model) aic <- ols_aic(model) @@ -80,36 +80,14 @@ null_model_metrics <- function(model, full_model) { rsq <- output$r.squared adjr <- output$adj.r.squared rmse <- sqrt(mean(model$residuals ^ 2)) - + list(adjr = adjr, aic = aic, sbc = sbc, sbic = sbic, ess = ess, rsq = rsq, rss = rss, rmse = rmse) - + } max_nchar <- function(char, val, rn = 3, ns = 3) { max(nchar(char), nchar(format(round(val, rn), nsmall = ns))) } -#' @importFrom utils packageVersion menu install.packages -check_suggests <- function(pkg) { - - pkg_flag <- tryCatch(utils::packageVersion(pkg), error = function(e) NA) - - if (is.na(pkg_flag)) { - - msg <- message(paste0('\n', pkg, ' must be installed for this functionality.')) - - if (interactive()) { - message(msg, "\nWould you like to install it?") - if (utils::menu(c("Yes", "No")) == 1) { - utils::install.packages(pkg) - } else { - stop(msg, call. = FALSE) - } - } else { - stop(msg, call. = FALSE) - } - } - -} diff --git a/README.Rmd b/README.Rmd index 8d7b87f..7877f2a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,9 +16,8 @@ knitr::opts_chunk$set( [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/olsrr)](https://cran.r-project.org/package=olsrr) -[![cran checks](https://badges.cranchecks.info/summary/olsrr.svg)](https://cran.r-project.org/web/checks/check_results_olsrr.html) [![R build status](https://github.com/rsquaredacademy/olsrr/workflows/R-CMD-check/badge.svg)](https://github.com/rsquaredacademy/olsrr/actions) -[![Coverage status](https://codecov.io/gh/rsquaredacademy/olsrr/branch/master/graph/badge.svg)](https://codecov.io/github/rsquaredacademy/olsrr?branch=master) [![status](https://tinyverse.netlify.com/badge/olsrr)](https://CRAN.R-project.org/package=olsrr) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![](https://cranlogs.r-pkg.org/badges/grand-total/olsrr)](https://cran.r-project.org/package=olsrr) +[![Coverage status](https://codecov.io/gh/rsquaredacademy/olsrr/branch/master/graph/badge.svg)](https://app.codecov.io/github/rsquaredacademy/olsrr?branch=master) ## Overview @@ -41,8 +40,8 @@ The olsrr package provides following tools for building OLS regression models us install.packages("olsrr") # Install development version from GitHub -# install.packages("devtools") -devtools::install_github("rsquaredacademy/olsrr") +# install.packages("pak") +pak::pak("rsquaredacademy/olsrr") ``` ## Articles @@ -56,8 +55,6 @@ devtools::install_github("rsquaredacademy/olsrr") ## Usage -olsrr uses consistent prefix `ols_` for easy tab completion. - ```{r, echo=FALSE, message=FALSE} library(olsrr) library(dplyr) @@ -67,58 +64,14 @@ library(nortest) library(goftest) ``` -olsrr is built with the aim of helping those users who are new to the R language. If you know how to -write a `formula` or build models using `lm`, you will find olsrr very useful. Most of the functions -use an object of class `lm` as input. So you just need to build a model using `lm` and then pass it onto -the functions in olsrr. Below is a quick demo: +olsrr uses consistent prefix `ols_` for easy tab completion. If you know how to write a `formula` or build models using `lm`, you will find olsrr very useful. Most of the functions use an object of class `lm` as input. So you just need to build a model using `lm` and then pass it onto the functions in olsrr. Below is +a quick demo: #### Regression ```{r regress} -ols_regress(mpg ~ disp + hp + wt + qsec, data = mtcars) -``` - -#### Stepwise Regression - -Build regression model from a set of candidate predictor variables by entering and removing predictors based on -p values, in a stepwise manner until there is no variable left to enter or remove any more. - -#### Variable Selection - -```{r stepwise1} -# stepwise regression -model <- lm(y ~ ., data = surgical) -ols_step_both_p(model) -``` - -#### Stepwise AIC Backward Regression - -Build regression model from a set of candidate predictor variables by removing predictors based on -Akaike Information Criteria, in a stepwise manner until there is no variable left to remove any more. - -##### Variable Selection - -```{r stepaicb1} -# stepwise aic backward regression -model <- lm(y ~ ., data = surgical) -k <- ols_step_backward_aic(model) -k -``` - -#### Breusch Pagan Test - -Breusch Pagan test is used to test for herteroskedasticity (non-constant error variance). It tests whether the variance of the errors from a regression is dependent on the values of the independent variables. It is a $\chi^{2}$ test. - -```{r bp1} -model <- lm(mpg ~ disp + hp + wt + drat, data = mtcars) -ols_test_breusch_pagan(model) -``` - -#### Collinearity Diagnostics - -```{r colldiag} model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars) -ols_coll_diag(model) +ols_regress(model) ``` ## Getting Help diff --git a/README.md b/README.md index 24e8d48..6f8cce1 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,10 @@ [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/olsrr)](https://cran.r-project.org/package=olsrr) -[![cran -checks](https://badges.cranchecks.info/summary/olsrr.svg)](https://cran.r-project.org/web/checks/check_results_olsrr.html) [![R build status](https://github.com/rsquaredacademy/olsrr/workflows/R-CMD-check/badge.svg)](https://github.com/rsquaredacademy/olsrr/actions) [![Coverage -status](https://codecov.io/gh/rsquaredacademy/olsrr/branch/master/graph/badge.svg)](https://codecov.io/github/rsquaredacademy/olsrr?branch=master) -[![status](https://tinyverse.netlify.com/badge/olsrr)](https://CRAN.R-project.org/package=olsrr) -[![Lifecycle: -stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html) -[![](https://cranlogs.r-pkg.org/badges/grand-total/olsrr)](https://cran.r-project.org/package=olsrr) +status](https://codecov.io/gh/rsquaredacademy/olsrr/branch/master/graph/badge.svg)](https://app.codecov.io/github/rsquaredacademy/olsrr?branch=master) ## Overview @@ -39,8 +33,8 @@ models using R: install.packages("olsrr") # Install development version from GitHub -# install.packages("devtools") -devtools::install_github("rsquaredacademy/olsrr") +# install.packages("pak") +pak::pak("rsquaredacademy/olsrr") ``` ## Articles @@ -59,19 +53,17 @@ devtools::install_github("rsquaredacademy/olsrr") ## Usage -olsrr uses consistent prefix `ols_` for easy tab completion. - -olsrr is built with the aim of helping those users who are new to the R -language. If you know how to write a `formula` or build models using -`lm`, you will find olsrr very useful. Most of the functions use an -object of class `lm` as input. So you just need to build a model using -`lm` and then pass it onto the functions in olsrr. Below is a quick -demo: +olsrr uses consistent prefix `ols_` for easy tab completion. If you know +how to write a `formula` or build models using `lm`, you will find olsrr +very useful. Most of the functions use an object of class `lm` as input. +So you just need to build a model using `lm` and then pass it onto the +functions in olsrr. Below is a quick demo: #### Regression ``` r -ols_regress(mpg ~ disp + hp + wt + qsec, data = mtcars) +model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars) +ols_regress(model) #> Model Summary #> --------------------------------------------------------------- #> R 0.914 RMSE 2.409 @@ -108,195 +100,6 @@ ols_regress(mpg ~ disp + hp + wt + qsec, data = mtcars) #> ---------------------------------------------------------------------------------------- ``` -#### Stepwise Regression - -Build regression model from a set of candidate predictor variables by -entering and removing predictors based on p values, in a stepwise manner -until there is no variable left to enter or remove any more. - -#### Variable Selection - -``` r -# stepwise regression -model <- lm(y ~ ., data = surgical) -ols_step_both_p(model) -#> -#> -#> Stepwise Summary -#> ------------------------------------------------------------------------------ -#> Step Variable AIC SBC SBIC R2 Adj. R2 -#> ------------------------------------------------------------------------------ -#> 0 Base Model 802.606 806.584 646.794 0.00000 0.00000 -#> 1 liver_test (+) 771.875 777.842 616.009 0.45454 0.44405 -#> 2 alc_heavy (+) 761.439 769.395 605.506 0.56674 0.54975 -#> 3 enzyme_test (+) 750.509 760.454 595.297 0.65900 0.63854 -#> 4 pindex (+) 735.715 747.649 582.943 0.75015 0.72975 -#> 5 bcs (+) 730.620 744.543 579.638 0.78091 0.75808 -#> ------------------------------------------------------------------------------ -#> -#> Final Model Output -#> ------------------ -#> -#> Model Summary -#> ------------------------------------------------------------------- -#> R 0.884 RMSE 184.276 -#> R-Squared 0.781 MSE 38202.426 -#> Adj. R-Squared 0.758 Coef. Var 27.839 -#> Pred R-Squared 0.700 AIC 730.620 -#> MAE 137.656 SBC 744.543 -#> ------------------------------------------------------------------- -#> RMSE: Root Mean Square Error -#> MSE: Mean Square Error -#> MAE: Mean Absolute Error -#> AIC: Akaike Information Criteria -#> SBC: Schwarz Bayesian Criteria -#> -#> ANOVA -#> ----------------------------------------------------------------------- -#> Sum of -#> Squares DF Mean Square F Sig. -#> ----------------------------------------------------------------------- -#> Regression 6535804.090 5 1307160.818 34.217 0.0000 -#> Residual 1833716.447 48 38202.426 -#> Total 8369520.537 53 -#> ----------------------------------------------------------------------- -#> -#> Parameter Estimates -#> ------------------------------------------------------------------------------------------------ -#> model Beta Std. Error Std. Beta t Sig lower upper -#> ------------------------------------------------------------------------------------------------ -#> (Intercept) -1178.330 208.682 -5.647 0.000 -1597.914 -758.746 -#> liver_test 58.064 40.144 0.156 1.446 0.155 -22.652 138.779 -#> alc_heavy 317.848 71.634 0.314 4.437 0.000 173.818 461.878 -#> enzyme_test 9.748 1.656 0.521 5.887 0.000 6.419 13.077 -#> pindex 8.924 1.808 0.380 4.935 0.000 5.288 12.559 -#> bcs 59.864 23.060 0.241 2.596 0.012 13.498 106.230 -#> ------------------------------------------------------------------------------------------------ -``` - -#### Stepwise AIC Backward Regression - -Build regression model from a set of candidate predictor variables by -removing predictors based on Akaike Information Criteria, in a stepwise -manner until there is no variable left to remove any more. - -##### Variable Selection - -``` r -# stepwise aic backward regression -model <- lm(y ~ ., data = surgical) -k <- ols_step_backward_aic(model) -k -#> -#> -#> Stepwise Summary -#> ------------------------------------------------------------------------- -#> Step Variable AIC SBC SBIC R2 Adj. R2 -#> ------------------------------------------------------------------------- -#> 0 Full Model 736.390 756.280 586.665 0.78184 0.74305 -#> 1 alc_mod 734.407 752.308 583.884 0.78177 0.74856 -#> 2 gender 732.494 748.406 581.290 0.78142 0.75351 -#> 3 age 730.620 744.543 578.844 0.78091 0.75808 -#> ------------------------------------------------------------------------- -#> -#> Final Model Output -#> ------------------ -#> -#> Model Summary -#> ------------------------------------------------------------------- -#> R 0.884 RMSE 184.276 -#> R-Squared 0.781 MSE 38202.426 -#> Adj. R-Squared 0.758 Coef. Var 27.839 -#> Pred R-Squared 0.700 AIC 730.620 -#> MAE 137.656 SBC 744.543 -#> ------------------------------------------------------------------- -#> RMSE: Root Mean Square Error -#> MSE: Mean Square Error -#> MAE: Mean Absolute Error -#> AIC: Akaike Information Criteria -#> SBC: Schwarz Bayesian Criteria -#> -#> ANOVA -#> ----------------------------------------------------------------------- -#> Sum of -#> Squares DF Mean Square F Sig. -#> ----------------------------------------------------------------------- -#> Regression 6535804.090 5 1307160.818 34.217 0.0000 -#> Residual 1833716.447 48 38202.426 -#> Total 8369520.537 53 -#> ----------------------------------------------------------------------- -#> -#> Parameter Estimates -#> ------------------------------------------------------------------------------------------------ -#> model Beta Std. Error Std. Beta t Sig lower upper -#> ------------------------------------------------------------------------------------------------ -#> (Intercept) -1178.330 208.682 -5.647 0.000 -1597.914 -758.746 -#> bcs 59.864 23.060 0.241 2.596 0.012 13.498 106.230 -#> pindex 8.924 1.808 0.380 4.935 0.000 5.288 12.559 -#> enzyme_test 9.748 1.656 0.521 5.887 0.000 6.419 13.077 -#> liver_test 58.064 40.144 0.156 1.446 0.155 -22.652 138.779 -#> alc_heavy 317.848 71.634 0.314 4.437 0.000 173.818 461.878 -#> ------------------------------------------------------------------------------------------------ -``` - -#### Breusch Pagan Test - -Breusch Pagan test is used to test for herteroskedasticity (non-constant -error variance). It tests whether the variance of the errors from a -regression is dependent on the values of the independent variables. It -is a $\chi^{2}$ test. - -``` r -model <- lm(mpg ~ disp + hp + wt + drat, data = mtcars) -ols_test_breusch_pagan(model) -#> -#> Breusch Pagan Test for Heteroskedasticity -#> ----------------------------------------- -#> Ho: the variance is constant -#> Ha: the variance is not constant -#> -#> Data -#> ------------------------------- -#> Response : mpg -#> Variables: fitted values of mpg -#> -#> Test Summary -#> --------------------------- -#> DF = 1 -#> Chi2 = 1.429672 -#> Prob > Chi2 = 0.231818 -``` - -#### Collinearity Diagnostics - -``` r -model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars) -ols_coll_diag(model) -#> Tolerance and Variance Inflation Factor -#> --------------------------------------- -#> Variables Tolerance VIF -#> 1 disp 0.1252279 7.985439 -#> 2 hp 0.1935450 5.166758 -#> 3 wt 0.1445726 6.916942 -#> 4 qsec 0.3191708 3.133119 -#> -#> -#> Eigenvalue and Condition Index -#> ------------------------------ -#> Eigenvalue Condition Index intercept disp hp wt -#> 1 4.721487187 1.000000 0.000123237 0.001132468 0.001413094 0.0005253393 -#> 2 0.216562203 4.669260 0.002617424 0.036811051 0.027751289 0.0002096014 -#> 3 0.050416837 9.677242 0.001656551 0.120881424 0.392366164 0.0377028008 -#> 4 0.010104757 21.616057 0.025805998 0.777260487 0.059594623 0.7017528428 -#> 5 0.001429017 57.480524 0.969796790 0.063914571 0.518874831 0.2598094157 -#> qsec -#> 1 0.0001277169 -#> 2 0.0046789491 -#> 3 0.0001952599 -#> 4 0.0024577686 -#> 5 0.9925403056 -``` - ## Getting Help If you encounter a bug, please file a minimal reproducible example using diff --git a/cran-comments.md b/cran-comments.md index 108b5a3..e4fa3ce 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,10 +1,10 @@ -## Test environments -* local Windows 10, R 3.6.2 -* ubuntu 12.04 (on travis-ci), R 3.5.3, R 3.6.2, R-devel -* win-builder (devel and release) - ## R CMD check results 0 errors | 0 warnings | 0 note +## revdepcheck results + +We checked 4 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. + * We saw 0 new problems + * We failed to check 0 packages diff --git a/docs/articles/index.html b/docs/articles/index.html index 1ffe5a8..5db44c6 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,5 +1,5 @@ -Articles • olsrrArticles • olsrr @@ -86,7 +86,7 @@

All vignettes

diff --git a/docs/articles/variable_selection.html b/docs/articles/variable_selection.html index c9a29be..cb324d4 100644 --- a/docs/articles/variable_selection.html +++ b/docs/articles/variable_selection.html @@ -11,7 +11,7 @@ - + Changelog • olsrrChangelog • olsrr @@ -61,15 +61,27 @@
-

olsrr 0.5.3.9000

+

olsrr 0.6.0

-

New Features

-
-

Bug Fixes

+

Enhancements

+
+
+

Bug Fixes

diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png index 7b81fbc..17a3580 100644 Binary files a/docs/reference/Rplot001.png and b/docs/reference/Rplot001.png differ diff --git a/docs/reference/Rplot002.png b/docs/reference/Rplot002.png index 72a5a6b..55bbd50 100644 Binary files a/docs/reference/Rplot002.png and b/docs/reference/Rplot002.png differ diff --git a/docs/reference/Rplot003.png b/docs/reference/Rplot003.png index 1e6f21a..4930058 100644 Binary files a/docs/reference/Rplot003.png and b/docs/reference/Rplot003.png differ diff --git a/docs/reference/index.html b/docs/reference/index.html index b584699..154876e 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,5 +1,5 @@ -Function reference • olsrrFunction reference • olsrr @@ -676,7 +676,7 @@

Data

diff --git a/docs/reference/ols_coll_diag.html b/docs/reference/ols_coll_diag.html index 3ff6a74..d186634 100644 --- a/docs/reference/ols_coll_diag.html +++ b/docs/reference/ols_coll_diag.html @@ -1,5 +1,5 @@ -Collinearity diagnostics — ols_coll_diag • olsrrCollinearity diagnostics — ols_coll_diag • olsrr @@ -67,11 +67,11 @@

Usage

-
ols_coll_diag(model)
-
-ols_vif_tol(model)
-
-ols_eigen_cindex(model)
+
ols_coll_diag(model)
+
+ols_vif_tol(model)
+
+ols_eigen_cindex(model)
@@ -136,19 +136,19 @@

References

Examples

-
# model
-model <- lm(mpg ~ disp + hp + wt + drat, data = mtcars)
-
-# vif and tolerance
-ols_vif_tol(model)
+    
# model
+model <- lm(mpg ~ disp + hp + wt + drat, data = mtcars)
+
+# vif and tolerance
+ols_vif_tol(model)
 #>   Variables Tolerance      VIF
 #> 1      disp 0.1218116 8.209402
 #> 2        hp 0.3454979 2.894373
 #> 3        wt 0.1962092 5.096601
 #> 4      drat 0.4386836 2.279547
-
-# eigenvalues and condition indices
-ols_eigen_cindex(model)
+
+# eigenvalues and condition indices
+ols_eigen_cindex(model)
 #>    Eigenvalue Condition Index    intercept        disp          hp           wt
 #> 1 4.692806914        1.000000 0.0002323252 0.001106455 0.002566185 0.0007172086
 #> 2 0.240308641        4.419078 0.0036813894 0.034132904 0.031334562 0.0009394254
@@ -161,9 +161,9 @@ 

Examples#> 3 0.0026259361 #> 4 0.0568226912 #> 5 0.9253487552 - -# collinearity diagnostics -ols_coll_diag(model) + +# collinearity diagnostics +ols_coll_diag(model) #> Tolerance and Variance Inflation Factor #> --------------------------------------- #> Variables Tolerance VIF @@ -187,7 +187,7 @@

Examples#> 3 0.0026259361 #> 4 0.0568226912 #> 5 0.9253487552 - +

diff --git a/docs/reference/ols_plot_added_variable-1.png b/docs/reference/ols_plot_added_variable-1.png index 50ab95f..b549a16 100644 Binary files a/docs/reference/ols_plot_added_variable-1.png and b/docs/reference/ols_plot_added_variable-1.png differ diff --git a/docs/reference/ols_plot_added_variable.html b/docs/reference/ols_plot_added_variable.html index a25f104..f11becd 100644 --- a/docs/reference/ols_plot_added_variable.html +++ b/docs/reference/ols_plot_added_variable.html @@ -2,7 +2,7 @@ Added variable plots — ols_plot_added_variable • olsrrAdded variable plots — ols_plot_added_variable • olsrrResidual plus component plot — ols_plot_comp_plus_resid • olsrrResidual plus component plot — ols_plot_comp_plus_resid • olsrrCooks' D bar plot — ols_plot_cooksd_bar • olsrrCooks' D bar plot — ols_plot_cooksd_bar • olsrrCooks' D chart — ols_plot_cooksd_chart • olsrrCooks' D chart — ols_plot_cooksd_chart • olsrrDFBETAs panel — ols_plot_dfbetas • olsrrDFBETAs panel — ols_plot_dfbetas • olsrr @@ -67,7 +67,7 @@

Usage

-
ols_plot_dfbetas(model, print_plot = TRUE)
+
ols_plot_dfbetas(model, print_plot = TRUE)
@@ -107,16 +107,16 @@

References

See also

-

[ols_plot_dffits()]

+

Examples

-
model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
-ols_plot_dfbetas(model)
+    
model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
+ols_plot_dfbetas(model)
 
 
-
+
 
diff --git a/docs/reference/ols_plot_dffits.html b/docs/reference/ols_plot_dffits.html index 2b4e323..b7bcf53 100644 --- a/docs/reference/ols_plot_dffits.html +++ b/docs/reference/ols_plot_dffits.html @@ -1,5 +1,5 @@ -DFFITS plot — ols_plot_dffits • olsrrDFFITS plot — ols_plot_dffits • olsrr @@ -67,7 +67,7 @@

Usage

-
ols_plot_dffits(model, size_adj_threshold = TRUE, print_plot = TRUE)
+
ols_plot_dffits(model, size_adj_threshold = TRUE, print_plot = TRUE)

Examples

-
model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
-ols_plot_dffits(model)
+    
model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
+ols_plot_dffits(model)
 
-ols_plot_dffits(model, size_adj_threshold = FALSE)
+ols_plot_dffits(model, size_adj_threshold = FALSE)
 
-
+
 
diff --git a/docs/reference/ols_plot_hadi.html b/docs/reference/ols_plot_hadi.html index 8b9898f..ecf0a14 100644 --- a/docs/reference/ols_plot_hadi.html +++ b/docs/reference/ols_plot_hadi.html @@ -1,7 +1,7 @@ Hadi plot — ols_plot_hadi • olsrrHadi plot — ols_plot_hadi • olsrrResidual vs fitted plot — ols_plot_resid_fit • olsrrResidual vs fitted plot — ols_plot_resid_fit • olsrrStudentized residuals vs leverage plot — ols_plot_resid_lev • olsrrStudentized residuals vs leverage plot — ols_plot_resid_lev • olsrr @@ -67,7 +67,7 @@

Usage

-
ols_plot_resid_lev(model, threshold = NULL, print_plot = TRUE)
+
ols_plot_resid_lev(model, threshold = NULL, print_plot = TRUE)
@@ -86,17 +86,17 @@

Arguments

See also

-

[ols_plot_resid_stud_fit()], [ols_plot_resid_lev()]

+

ols_plot_resid_stud_fit(), ols_plot_resid_lev()

Examples

-
model <- lm(read ~ write + math + science, data = hsb)
-ols_plot_resid_lev(model)
+    
model <- lm(read ~ write + math + science, data = hsb)
+ols_plot_resid_lev(model)
 
-ols_plot_resid_lev(model, threshold = 3)
+ols_plot_resid_lev(model, threshold = 3)
 
-
+
 
diff --git a/docs/reference/ols_plot_resid_pot.html b/docs/reference/ols_plot_resid_pot.html index 78a8368..c2856f7 100644 --- a/docs/reference/ols_plot_resid_pot.html +++ b/docs/reference/ols_plot_resid_pot.html @@ -1,6 +1,6 @@ Potential residual plot — ols_plot_resid_pot • olsrrPotential residual plot — ols_plot_resid_pot • olsrrResidual vs regressor plot — ols_plot_resid_regressor • olsrrResidual vs regressor plot — ols_plot_resid_regressor • olsrrStandardized residual chart — ols_plot_resid_stand • olsrrStandardized residual chart — ols_plot_resid_stand • olsrr @@ -67,7 +67,7 @@

Usage

-
ols_plot_resid_stand(model, threshold = NULL, print_plot = TRUE)
+
ols_plot_resid_stand(model, threshold = NULL, print_plot = TRUE)
@@ -105,17 +105,17 @@

Details

See also

-

[ols_plot_resid_stud()]

+

Examples

-
model <- lm(mpg ~ disp + hp + wt, data = mtcars)
-ols_plot_resid_stand(model)
+    
model <- lm(mpg ~ disp + hp + wt, data = mtcars)
+ols_plot_resid_stand(model)
 
-ols_plot_resid_stand(model, threshold = 3)
+ols_plot_resid_stand(model, threshold = 3)
 
-
+
 

diff --git a/docs/reference/ols_plot_resid_stud.html b/docs/reference/ols_plot_resid_stud.html index 2d9dc6e..63d33d8 100644 --- a/docs/reference/ols_plot_resid_stud.html +++ b/docs/reference/ols_plot_resid_stud.html @@ -1,5 +1,5 @@ -Studentized residual plot — ols_plot_resid_stud • olsrrStudentized residual plot — ols_plot_resid_stud • olsrr @@ -67,7 +67,7 @@

Usage

-
ols_plot_resid_stud(model, threshold = NULL, print_plot = TRUE)
+
ols_plot_resid_stud(model, threshold = NULL, print_plot = TRUE)
@@ -109,17 +109,17 @@

Details

See also

-

[ols_plot_resid_stand()]

+

Examples

-
model <- lm(mpg ~ disp + hp + wt, data = mtcars)
-ols_plot_resid_stud(model)
+    
model <- lm(mpg ~ disp + hp + wt, data = mtcars)
+ols_plot_resid_stud(model)
 
-ols_plot_resid_stud(model, threshold = 2)
+ols_plot_resid_stud(model, threshold = 2)
 
-
+
 

diff --git a/docs/reference/ols_plot_resid_stud_fit.html b/docs/reference/ols_plot_resid_stud_fit.html index 6bc25e3..c0cc73d 100644 --- a/docs/reference/ols_plot_resid_stud_fit.html +++ b/docs/reference/ols_plot_resid_stud_fit.html @@ -1,7 +1,7 @@ Deleted studentized residual vs fitted values plot — ols_plot_resid_stud_fit • olsrrDeleted studentized residual vs fitted values plot — ols_plot_resid_stud_fit • olsrrLack of fit F test — ols_pure_error_anova • olsrrLack of fit F test — ols_pure_error_anova • olsrr @@ -67,7 +67,7 @@

Usage

-
ols_pure_error_anova(model, ...)
+
ols_pure_error_anova(model, ...)
@@ -175,8 +175,8 @@

References

Examples

-
model <- lm(mpg ~ disp, data = mtcars)
-ols_pure_error_anova(model)
+    
model <- lm(mpg ~ disp, data = mtcars)
+ols_pure_error_anova(model)
 #> Lack of Fit F Test 
 #> -----------------
 #> Response :   mpg 
@@ -191,7 +191,7 @@ 

Examples#> Lack of fit 25 304.2787 12.17115 4.724824 0.04563623 #> Pure Error 5 12.88 2.576 #> ---------------------------------------------------------------------- - +

diff --git a/docs/reference/ols_regress.html b/docs/reference/ols_regress.html index 53c4933..d57d18c 100644 --- a/docs/reference/ols_regress.html +++ b/docs/reference/ols_regress.html @@ -1,5 +1,5 @@ -Ordinary least squares regression — ols_regress • olsrrOrdinary least squares regression — ols_regress • olsrr @@ -67,10 +67,10 @@

Usage

-
ols_regress(object, ...)
-
-# S3 method for lm
-ols_regress(object, ...)
+
ols_regress(object, ...)
+
+# S3 method for lm
+ols_regress(object, ...)
@@ -204,12 +204,12 @@

Interaction Terms

References

-

https://www.ssc.wisc.edu/~hemken/Stataworkshops/stdBeta/Getting

+

https://www.ssc.wisc.edu/~hemken/Stataworkshops/stdBeta/Getting%20Standardized%20Coefficients%20Right.pdf

diff --git a/docs/reference/ols_test_bartlett.html b/docs/reference/ols_test_bartlett.html index c26a032..a69da71 100644 --- a/docs/reference/ols_test_bartlett.html +++ b/docs/reference/ols_test_bartlett.html @@ -1,5 +1,5 @@ -Bartlett test — ols_test_bartlett • olsrrBartlett test — ols_test_bartlett • olsrr @@ -67,10 +67,10 @@

Usage

-
ols_test_bartlett(data, ...)
-
-# S3 method for default
-ols_test_bartlett(data, ..., group_var = NULL)
+
ols_test_bartlett(data, ...)
+
+# S3 method for default
+ols_test_bartlett(data, ..., group_var = NULL)
@@ -125,14 +125,20 @@

See also

Examples

-
# using grouping variable
-library(descriptr)
+    
# using grouping variable
+if (require("descriptr")) {
+  library(descriptr)
+  ols_test_bartlett(mtcarz, 'mpg', group_var = 'cyl')
+}
+#> Loading required package: descriptr
+#> 
+#> Attaching package: ‘descriptr’
+#> The following object is masked _by_ ‘.GlobalEnv’:
 #> 
-#> Attaching package: 'descriptr'
-#> The following object is masked from 'package:olsrr':
+#>     hsb
+#> The following object is masked from ‘package:olsrr’:
 #> 
 #>     hsb
-ols_test_bartlett(mtcarz, 'mpg', group_var = 'cyl')
 #> 
 #>     Bartlett's Test of Homogenity of Variances    
 #> ------------------------------------------------
@@ -144,9 +150,9 @@ 

Examples#> DF = 2 #> Chi2 = 8.39345 #> Prob > Chi2 = 0.01504477 - -# using variables -ols_test_bartlett(hsb, 'read', 'write') + +# using variables +ols_test_bartlett(hsb, 'read', 'write') #> #> Bartlett's Test of Homogenity of Variances #> ------------------------------------------------ @@ -162,7 +168,7 @@

Examples#> DF = 1 #> Chi2 = 1.222871 #> Prob > Chi2 = 0.2687979 - +

diff --git a/docs/reference/ols_test_breusch_pagan.html b/docs/reference/ols_test_breusch_pagan.html index 9ec11b6..9fd5c90 100644 --- a/docs/reference/ols_test_breusch_pagan.html +++ b/docs/reference/ols_test_breusch_pagan.html @@ -1,6 +1,6 @@ Breusch pagan test — ols_test_breusch_pagan • olsrrBreusch pagan test — ols_test_breusch_pagan • olsrrolsrr package — olsrr • olsrrolsrr package — olsrr • olsrr @@ -71,8 +71,20 @@

DetailsSee the README on GitHub

+ +
+

Author

+

Maintainer: Aravind Hebbali hebbali.aravind@gmail.com

+
- + diff --git a/docs/reference/surgical.html b/docs/reference/surgical.html index 06cac30..63d3269 100644 --- a/docs/reference/surgical.html +++ b/docs/reference/surgical.html @@ -1,5 +1,5 @@ -Surgical Unit Data Set — surgical • olsrrSurgical Unit Data Set — surgical • olsrr @@ -67,7 +67,7 @@

Usage

-
surgical
+
surgical
@@ -75,28 +75,28 @@

Format<

A data frame with 54 rows and 9 variables:

bcs

blood clotting score

-
pindex
+
pindex

prognostic index

-
enzyme_test
+
enzyme_test

enzyme function test score

-
liver_test
+
liver_test

liver function test score

-
age
+
age

age, in years

-
gender
+
gender

indicator variable for gender (0 = male, 1 = female)

-
alc_mod
+
alc_mod

indicator variable for history of alcohol use (0 = None, 1 = Moderate)

-
alc_heavy
+
alc_heavy

indicator variable for history of alcohol use (0 = None, 1 = Heavy)

-
y
+
y

Survival Time

@@ -116,7 +116,7 @@

Source<

diff --git a/man/ols_coll_diag.Rd b/man/ols_coll_diag.Rd index 38cce28..fa769fb 100644 --- a/man/ols_coll_diag.Rd +++ b/man/ols_coll_diag.Rd @@ -39,9 +39,9 @@ Percent of variance in the predictor that cannot be accounted for by other predi Steps to calculate tolerance: \itemize{ - \item Regress the kth predictor on rest of the predictors in the model. - \item Compute \eqn{R^2} - the coefficient of determination from the regression in the above step. - \item \eqn{Tolerance = 1 - R^2} +\item Regress the kth predictor on rest of the predictors in the model. +\item Compute \eqn{R^2} - the coefficient of determination from the regression in the above step. +\item \eqn{Tolerance = 1 - R^2} } \emph{Variance Inflation Factor} @@ -57,9 +57,9 @@ requiring correction. Steps to calculate VIF: \itemize{ - \item Regress the kth predictor on rest of the predictors in the model. - \item Compute \eqn{R^2} - the coefficient of determination from the regression in the above step. - \item \eqn{Tolerance = 1 / 1 - R^2 = 1 / Tolerance} +\item Regress the kth predictor on rest of the predictors in the model. +\item Compute \eqn{R^2} - the coefficient of determination from the regression in the above step. +\item \eqn{Tolerance = 1 / 1 - R^2 = 1 / Tolerance} } \emph{Condition Index} diff --git a/man/ols_plot_added_variable.Rd b/man/ols_plot_added_variable.Rd index 41e3af5..3f4c61d 100644 --- a/man/ols_plot_added_variable.Rd +++ b/man/ols_plot_added_variable.Rd @@ -29,9 +29,9 @@ model. Let the response variable of the model be \emph{Y} Steps to construct an added variable plot: \itemize{ - \item Regress \emph{Y} on all variables other than \emph{X} and store the residuals (\emph{Y} residuals). - \item Regress \emph{X} on all the other variables included in the model (\emph{X} residuals). - \item Construct a scatter plot of \emph{Y} residuals and \emph{X} residuals. +\item Regress \emph{Y} on all variables other than \emph{X} and store the residuals (\emph{Y} residuals). +\item Regress \emph{X} on all the other variables included in the model (\emph{X} residuals). +\item Construct a scatter plot of \emph{Y} residuals and \emph{X} residuals. } What do the \emph{Y} and \emph{X} residuals represent? The \emph{Y} residuals represent the part @@ -56,5 +56,5 @@ Kutner, MH, Nachtscheim CJ, Neter J and Li W., 2004, Applied Linear Statistical Chicago, IL., McGraw Hill/Irwin. } \seealso{ -[ols_plot_resid_regressor()], [ols_plot_comp_plus_resid()] +\code{\link[=ols_plot_resid_regressor]{ols_plot_resid_regressor()}}, \code{\link[=ols_plot_comp_plus_resid]{ols_plot_comp_plus_resid()}} } diff --git a/man/ols_plot_comp_plus_resid.Rd b/man/ols_plot_comp_plus_resid.Rd index 18cbe6a..1122dce 100644 --- a/man/ols_plot_comp_plus_resid.Rd +++ b/man/ols_plot_comp_plus_resid.Rd @@ -28,5 +28,5 @@ Kutner, MH, Nachtscheim CJ, Neter J and Li W., 2004, Applied Linear Statistical Chicago, IL., McGraw Hill/Irwin. } \seealso{ -[ols_plot_added_variable()], [ols_plot_resid_regressor()] +\code{\link[=ols_plot_added_variable]{ols_plot_added_variable()}}, \code{\link[=ols_plot_resid_regressor]{ols_plot_resid_regressor()}} } diff --git a/man/ols_plot_cooksd_bar.Rd b/man/ols_plot_cooksd_bar.Rd index 1082e73..9020afe 100644 --- a/man/ols_plot_cooksd_bar.Rd +++ b/man/ols_plot_cooksd_bar.Rd @@ -4,14 +4,18 @@ \alias{ols_plot_cooksd_bar} \title{Cooks' D bar plot} \usage{ -ols_plot_cooksd_bar(model, type = 1, print_plot = TRUE) +ols_plot_cooksd_bar(model, type = 1, threshold = NULL, print_plot = TRUE) } \arguments{ \item{model}{An object of class \code{lm}.} -\item{type}{An integer between 1 and 5 selecting one of the 6 methods for computing the threshold.} +\item{type}{An integer between 1 and 5 selecting one of the 5 methods for +computing the threshold.} -\item{print_plot}{logical; if \code{TRUE}, prints the plot else returns a plot object.} +\item{threshold}{Threshold for detecting outliers.} + +\item{print_plot}{logical; if \code{TRUE}, prints the plot else returns a +plot object.} } \value{ \code{ols_plot_cooksd_bar} returns a list containing the @@ -33,9 +37,9 @@ residual and leverage i.e it takes it account both the \emph{x} value and Steps to compute Cook's distance: \itemize{ - \item Delete observations one at a time. - \item Refit the regression model on remaining \eqn{n - 1} observations - \item examine how much all of the fitted values change when the ith observation is deleted. +\item Delete observations one at a time. +\item Refit the regression model on remaining \eqn{n - 1} observations +\item examine how much all of the fitted values change when the ith observation is deleted. } A data point having a large cook's d indicates that the data point strongly @@ -44,25 +48,27 @@ the threshold used for detecting or classifying observations as outliers and we list them below. \itemize{ - \item \strong{Type 1} : 4 / n - \item \strong{Type 2} : 4 / (n - k - 1) - \item \strong{Type 3} : ~1 - \item \strong{Type 4} : 1 / (n - k - 1) - \item \strong{Type 5} : 3 * mean(Vector of cook's distance values) +\item \strong{Type 1} : 4 / n +\item \strong{Type 2} : 4 / (n - k - 1) +\item \strong{Type 3} : ~1 +\item \strong{Type 4} : 1 / (n - k - 1) +\item \strong{Type 5} : 3 * mean(Vector of cook's distance values) } where \strong{n} and \strong{k} stand for \itemize{ - \item \strong{n}: Number of observations - \item \strong{k}: Number of predictors +\item \strong{n}: Number of observations +\item \strong{k}: Number of predictors } } \examples{ model <- lm(mpg ~ disp + hp + wt, data = mtcars) ols_plot_cooksd_bar(model) +ols_plot_cooksd_bar(model, type = 4) +ols_plot_cooksd_bar(model, threshold = 0.2) } \seealso{ -[ols_plot_cooksd_chart()] +\code{\link[=ols_plot_cooksd_chart]{ols_plot_cooksd_chart()}} } diff --git a/man/ols_plot_cooksd_chart.Rd b/man/ols_plot_cooksd_chart.Rd index c61d98e..4e90117 100644 --- a/man/ols_plot_cooksd_chart.Rd +++ b/man/ols_plot_cooksd_chart.Rd @@ -4,13 +4,15 @@ \alias{ols_plot_cooksd_chart} \title{Cooks' D chart} \usage{ -ols_plot_cooksd_chart(model, type = 1, print_plot = TRUE) +ols_plot_cooksd_chart(model, type = 1, threshold = NULL, print_plot = TRUE) } \arguments{ \item{model}{An object of class \code{lm}.} \item{type}{An integer between 1 and 5 selecting one of the 6 methods for computing the threshold.} +\item{threshold}{Threshold for detecting outliers.} + \item{print_plot}{logical; if \code{TRUE}, prints the plot else returns a plot object.} } \value{ @@ -33,9 +35,9 @@ residual and leverage i.e it takes it account both the \emph{x} value and Steps to compute Cook's distance: \itemize{ - \item Delete observations one at a time. - \item Refit the regression model on remaining \eqn{n - 1} observations - \item exmine how much all of the fitted values change when the ith observation is deleted. +\item Delete observations one at a time. +\item Refit the regression model on remaining \eqn{n - 1} observations +\item exmine how much all of the fitted values change when the ith observation is deleted. } A data point having a large cook's d indicates that the data point strongly @@ -44,25 +46,27 @@ the threshold used for detecting or classifying observations as outliers and we list them below. \itemize{ - \item \strong{Type 1} : 4 / n - \item \strong{Type 2} : 4 / (n - k - 1) - \item \strong{Type 3} : ~1 - \item \strong{Type 4} : 1 / (n - k - 1) - \item \strong{Type 5} : 3 * mean(Vector of cook's distance values) +\item \strong{Type 1} : 4 / n +\item \strong{Type 2} : 4 / (n - k - 1) +\item \strong{Type 3} : ~1 +\item \strong{Type 4} : 1 / (n - k - 1) +\item \strong{Type 5} : 3 * mean(Vector of cook's distance values) } where \strong{n} and \strong{k} stand for \itemize{ - \item \strong{n}: Number of observations - \item \strong{k}: Number of predictors +\item \strong{n}: Number of observations +\item \strong{k}: Number of predictors } } \examples{ model <- lm(mpg ~ disp + hp + wt, data = mtcars) ols_plot_cooksd_chart(model) +ols_plot_cooksd_chart(model, type = 4) +ols_plot_cooksd_chart(model, threshold = 0.2) } \seealso{ -[ols_plot_cooksd_bar()] +\code{\link[=ols_plot_cooksd_bar]{ols_plot_cooksd_bar()}} } diff --git a/man/ols_plot_dfbetas.Rd b/man/ols_plot_dfbetas.Rd index 1ee24d1..944b594 100644 --- a/man/ols_plot_dfbetas.Rd +++ b/man/ols_plot_dfbetas.Rd @@ -41,5 +41,5 @@ Wiley Series in Probability and Mathematical Statistics. New York: John Wiley & Sons. pp. ISBN 0-471-05856-4. } \seealso{ -[ols_plot_dffits()] +\code{\link[=ols_plot_dffits]{ols_plot_dffits()}} } diff --git a/man/ols_plot_dffits.Rd b/man/ols_plot_dffits.Rd index 29475b1..d245008 100644 --- a/man/ols_plot_dffits.Rd +++ b/man/ols_plot_dffits.Rd @@ -9,10 +9,10 @@ ols_plot_dffits(model, size_adj_threshold = TRUE, print_plot = TRUE) \arguments{ \item{model}{An object of class \code{lm}.} -\item{size_adj_threshold}{logical; if \code{TRUE} (the default), size +\item{size_adj_threshold}{logical; if \code{TRUE} (the default), size adjusted threshold is used to determine influential observations.} -\item{print_plot}{logical; if \code{TRUE}, prints the plot else returns a +\item{print_plot}{logical; if \code{TRUE}, prints the plot else returns a plot object.} } \value{ @@ -33,16 +33,16 @@ when the ith data point is omitted. Steps to compute DFFITs: \itemize{ - \item Delete observations one at a time. - \item Refit the regression model on remaining \eqn{n - 1} observations - \item examine how much all of the fitted values change when the ith observation is deleted. +\item Delete observations one at a time. +\item Refit the regression model on remaining \eqn{n - 1} observations +\item examine how much all of the fitted values change when the ith observation is deleted. } An observation is deemed influential if the absolute value of its DFFITS value is greater than: \deqn{2\sqrt((p + 1) / (n - p -1))} -A size-adjusted cutoff recommended by Belsley, Kuh, and Welsch is -\deqn{2\sqrt(p / n)} and is used by default in **olsrr**. +A size-adjusted cutoff recommended by Belsley, Kuh, and Welsch is +\deqn{2\sqrt(p / n)} and is used by default in \strong{olsrr}. where \code{n} is the number of observations and \code{p} is the number of predictors including intercept. } @@ -60,5 +60,5 @@ Wiley Series in Probability and Mathematical Statistics. New York: John Wiley & Sons. ISBN 0-471-05856-4. } \seealso{ -[ols_plot_dfbetas()] +\code{\link[=ols_plot_dfbetas]{ols_plot_dfbetas()}} } diff --git a/man/ols_plot_hadi.Rd b/man/ols_plot_hadi.Rd index 965fb60..95b0107 100644 --- a/man/ols_plot_hadi.Rd +++ b/man/ols_plot_hadi.Rd @@ -25,5 +25,5 @@ ols_plot_hadi(model) Chatterjee, Samprit and Hadi, Ali. Regression Analysis by Example. 5th ed. N.p.: John Wiley & Sons, 2012. Print. } \seealso{ -[ols_plot_resid_pot()] +\code{\link[=ols_plot_resid_pot]{ols_plot_resid_pot()}} } diff --git a/man/ols_plot_resid_fit.Rd b/man/ols_plot_resid_fit.Rd index 1a038e2..48483d5 100644 --- a/man/ols_plot_resid_fit.Rd +++ b/man/ols_plot_resid_fit.Rd @@ -19,9 +19,9 @@ x axis to detect non-linearity, unequal error variances, and outliers. Characteristics of a well behaved residual vs fitted plot: \itemize{ - \item The residuals spread randomly around the 0 line indicating that the relationship is linear. - \item The residuals form an approximate horizontal band around the 0 line indicating homogeneity of error variance. - \item No one residual is visibly away from the random pattern of the residuals indicating that there are no outliers. +\item The residuals spread randomly around the 0 line indicating that the relationship is linear. +\item The residuals form an approximate horizontal band around the 0 line indicating homogeneity of error variance. +\item No one residual is visibly away from the random pattern of the residuals indicating that there are no outliers. } } \examples{ diff --git a/man/ols_plot_resid_lev.Rd b/man/ols_plot_resid_lev.Rd index 666d81e..8e9600b 100644 --- a/man/ols_plot_resid_lev.Rd +++ b/man/ols_plot_resid_lev.Rd @@ -23,5 +23,5 @@ ols_plot_resid_lev(model, threshold = 3) } \seealso{ -[ols_plot_resid_stud_fit()], [ols_plot_resid_lev()] +\code{\link[=ols_plot_resid_stud_fit]{ols_plot_resid_stud_fit()}}, \code{\link[=ols_plot_resid_lev]{ols_plot_resid_lev()}} } diff --git a/man/ols_plot_resid_pot.Rd b/man/ols_plot_resid_pot.Rd index a6889c2..e043eb8 100644 --- a/man/ols_plot_resid_pot.Rd +++ b/man/ols_plot_resid_pot.Rd @@ -24,5 +24,5 @@ ols_plot_resid_pot(model) Chatterjee, Samprit and Hadi, Ali. Regression Analysis by Example. 5th ed. N.p.: John Wiley & Sons, 2012. Print. } \seealso{ -[ols_plot_hadi()] +\code{\link[=ols_plot_hadi]{ols_plot_hadi()}} } diff --git a/man/ols_plot_resid_regressor.Rd b/man/ols_plot_resid_regressor.Rd index 15f8370..cee5003 100644 --- a/man/ols_plot_resid_regressor.Rd +++ b/man/ols_plot_resid_regressor.Rd @@ -25,5 +25,5 @@ ols_plot_resid_regressor(model, 'drat') } \seealso{ -[ols_plot_added_variable()], [ols_plot_comp_plus_resid()] +\code{\link[=ols_plot_added_variable]{ols_plot_added_variable()}}, \code{\link[=ols_plot_comp_plus_resid]{ols_plot_comp_plus_resid()}} } diff --git a/man/ols_plot_resid_stand.Rd b/man/ols_plot_resid_stand.Rd index 0ded501..3dce89e 100644 --- a/man/ols_plot_resid_stand.Rd +++ b/man/ols_plot_resid_stand.Rd @@ -35,5 +35,5 @@ ols_plot_resid_stand(model, threshold = 3) } \seealso{ -[ols_plot_resid_stud()] +\code{\link[=ols_plot_resid_stud]{ols_plot_resid_stud()}} } diff --git a/man/ols_plot_resid_stud.Rd b/man/ols_plot_resid_stud.Rd index 8a461c5..aa81fc9 100644 --- a/man/ols_plot_resid_stud.Rd +++ b/man/ols_plot_resid_stud.Rd @@ -39,5 +39,5 @@ ols_plot_resid_stud(model, threshold = 2) } \seealso{ -[ols_plot_resid_stand()] +\code{\link[=ols_plot_resid_stand]{ols_plot_resid_stand()}} } diff --git a/man/ols_plot_resid_stud_fit.Rd b/man/ols_plot_resid_stud_fit.Rd index dcb8008..3fe8418 100644 --- a/man/ols_plot_resid_stud_fit.Rd +++ b/man/ols_plot_resid_stud_fit.Rd @@ -42,6 +42,6 @@ ols_plot_resid_stud_fit(model, threshold = 3) } \seealso{ -[ols_plot_resid_lev()], [ols_plot_resid_stand()], - [ols_plot_resid_stud()] +\code{\link[=ols_plot_resid_lev]{ols_plot_resid_lev()}}, \code{\link[=ols_plot_resid_stand]{ols_plot_resid_stand()}}, +\code{\link[=ols_plot_resid_stud]{ols_plot_resid_stud()}} } diff --git a/man/ols_pure_error_anova.Rd b/man/ols_pure_error_anova.Rd index 5cb7950..d758262 100644 --- a/man/ols_pure_error_anova.Rd +++ b/man/ols_pure_error_anova.Rd @@ -46,8 +46,8 @@ The residual sum of squares resulting from a regression can be decomposed into 2 components: \itemize{ - \item Due to lack of fit - \item Due to random variation +\item Due to lack of fit +\item Due to random variation } If most of the error is due to lack of fit and not just random error, the diff --git a/man/ols_regress.Rd b/man/ols_regress.Rd index 3047154..67df88e 100644 --- a/man/ols_regress.Rd +++ b/man/ols_regress.Rd @@ -73,5 +73,5 @@ ols_regress(mpg ~ disp * wt, data = mtcars, iterm = TRUE) } \references{ -https://www.ssc.wisc.edu/~hemken/Stataworkshops/stdBeta/Getting%20Standardized%20Coefficients%20Right.pdf +https://www.ssc.wisc.edu/~hemken/Stataworkshops/stdBeta/Getting\%20Standardized\%20Coefficients\%20Right.pdf } diff --git a/man/ols_test_bartlett.Rd b/man/ols_test_bartlett.Rd index 674ca69..938d4bb 100644 --- a/man/ols_test_bartlett.Rd +++ b/man/ols_test_bartlett.Rd @@ -35,8 +35,10 @@ is an alternative test that is less sensitive to departures from normality. } \examples{ # using grouping variable -library(descriptr) -ols_test_bartlett(mtcarz, 'mpg', group_var = 'cyl') +if (require("descriptr")) { + library(descriptr) + ols_test_bartlett(mtcarz, 'mpg', group_var = 'cyl') +} # using variables ols_test_bartlett(hsb, 'read', 'write') diff --git a/man/ols_test_breusch_pagan.Rd b/man/ols_test_breusch_pagan.Rd index 2c35ced..6faea8b 100644 --- a/man/ols_test_breusch_pagan.Rd +++ b/man/ols_test_breusch_pagan.Rd @@ -62,11 +62,11 @@ values of a independent variable. Computation \itemize{ - \item Fit a regression model - \item Regress the squared residuals from the above model on the independent variables - \item Compute \eqn{nR^2}. It follows a chi square distribution with p -1 degrees of - freedom, where p is the number of independent variables, n is the sample size and - \eqn{R^2} is the coefficient of determination from the regression in step 2. +\item Fit a regression model +\item Regress the squared residuals from the above model on the independent variables +\item Compute \eqn{nR^2}. It follows a chi square distribution with p -1 degrees of +freedom, where p is the number of independent variables, n is the sample size and +\eqn{R^2} is the coefficient of determination from the regression in step 2. } } \examples{ diff --git a/man/olsrr.Rd b/man/olsrr.Rd index a8a262f..98e7e81 100644 --- a/man/olsrr.Rd +++ b/man/olsrr.Rd @@ -3,6 +3,8 @@ \docType{package} \name{olsrr} \alias{olsrr} +\alias{_PACKAGE} +\alias{olsrr-package} \title{\code{olsrr} package} \description{ Tools for teaching and learning OLS regression @@ -10,5 +12,18 @@ Tools for teaching and learning OLS regression \details{ See the README on \href{https://github.com/rsquaredacademy/olsrr}{GitHub} +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://olsrr.rsquaredacademy.com/} + \item \url{https://github.com/rsquaredacademy/olsrr} + \item Report bugs at \url{https://github.com/rsquaredacademy/olsrr/issues} +} + +} +\author{ +\strong{Maintainer}: Aravind Hebbali \email{hebbali.aravind@gmail.com} + } \keyword{internal} diff --git a/man/surgical.Rd b/man/surgical.Rd index a4bfef0..66295d4 100644 --- a/man/surgical.Rd +++ b/man/surgical.Rd @@ -7,15 +7,15 @@ \format{ A data frame with 54 rows and 9 variables: \describe{ - \item{bcs}{blood clotting score} - \item{pindex}{prognostic index} - \item{enzyme_test}{enzyme function test score} - \item{liver_test}{liver function test score} - \item{age}{age, in years} - \item{gender}{indicator variable for gender (0 = male, 1 = female)} - \item{alc_mod}{indicator variable for history of alcohol use (0 = None, 1 = Moderate)} - \item{alc_heavy}{indicator variable for history of alcohol use (0 = None, 1 = Heavy)} - \item{y}{Survival Time} +\item{bcs}{blood clotting score} +\item{pindex}{prognostic index} +\item{enzyme_test}{enzyme function test score} +\item{liver_test}{liver function test score} +\item{age}{age, in years} +\item{gender}{indicator variable for gender (0 = male, 1 = female)} +\item{alc_mod}{indicator variable for history of alcohol use (0 = None, 1 = Moderate)} +\item{alc_heavy}{indicator variable for history of alcohol use (0 = None, 1 = Heavy)} +\item{y}{Survival Time} } } \source{ diff --git a/revdep/.gitignore b/revdep/.gitignore new file mode 100644 index 0000000..111ab32 --- /dev/null +++ b/revdep/.gitignore @@ -0,0 +1,7 @@ +checks +library +checks.noindex +library.noindex +cloud.noindex +data.sqlite +*.html diff --git a/revdep/README.md b/revdep/README.md new file mode 100644 index 0000000..c0c7e87 --- /dev/null +++ b/revdep/README.md @@ -0,0 +1,112 @@ +# Platform + +|field |value | +|:--------|:-----------------------------------| +|version |R version 4.3.2 (2023-10-31 ucrt) | +|os |Windows 10 x64 (build 19045) | +|system |x86_64, mingw32 | +|ui |RStudio | +|language |(EN) | +|collate |English_India.utf8 | +|ctype |en_US.UTF-8 | +|tz |Asia/Calcutta | +|date |2024-02-12 | +|rstudio |2023.12.1+402 Ocean Storm (desktop) | +|pandoc |NA | + +# Dependencies + +|package |old |new |Δ | +|:------------|:----------|:----------|:--| +|olsrr |0.5.3 |0.6.0 |* | +|abind |1.4-5 |1.4-5 | | +|backports |1.4.1 |1.4.1 | | +|base64enc |NA |0.1-3 |* | +|brio |1.1.4 |1.1.4 | | +|broom |1.0.5 |1.0.5 | | +|bslib |NA |0.6.1 |* | +|cachem |NA |1.0.8 |* | +|callr |3.7.3 |3.7.3 | | +|car |3.1-2 |3.1-2 | | +|carData |3.0-5 |3.0-5 | | +|cli |3.6.2 |3.6.2 | | +|colorspace |2.1-0 |2.1-0 | | +|commonmark |NA |1.9.1 |* | +|cpp11 |0.4.7 |0.4.7 | | +|crayon |1.5.2 |1.5.2 | | +|data.table |1.15.0 |NA |* | +|desc |1.4.3 |1.4.3 | | +|diffobj |0.3.5 |0.3.5 | | +|digest |0.6.34 |0.6.34 | | +|dplyr |1.1.4 |1.1.4 | | +|ellipsis |0.3.2 |0.3.2 | | +|evaluate |0.23 |0.23 | | +|fansi |1.0.6 |1.0.6 | | +|farver |2.1.1 |2.1.1 | | +|fastmap |NA |1.1.1 |* | +|fontawesome |NA |0.5.2 |* | +|fs |1.6.3 |1.6.3 | | +|generics |0.1.3 |0.1.3 | | +|ggplot2 |3.4.4 |3.4.4 | | +|glue |1.7.0 |1.7.0 | | +|goftest |1.2-3 |1.2-3 | | +|gridExtra |2.3 |2.3 | | +|gtable |0.3.4 |0.3.4 | | +|htmltools |NA |0.5.7 |* | +|httpuv |NA |1.6.14 |* | +|isoband |0.2.7 |0.2.7 | | +|jquerylib |NA |0.1.4 |* | +|jsonlite |1.8.8 |1.8.8 | | +|labeling |0.4.3 |0.4.3 | | +|later |NA |1.3.2 |* | +|lifecycle |1.0.4 |1.0.4 | | +|lme4 |1.1-35.1 |1.1-35.1 | | +|magrittr |2.0.3 |2.0.3 | | +|MatrixModels |0.5-3 |0.5-3 | | +|memoise |NA |2.0.1 |* | +|mime |NA |0.12 |* | +|minqa |1.2.6 |1.2.6 | | +|munsell |0.5.0 |0.5.0 | | +|nloptr |2.0.3 |2.0.3 | | +|nortest |1.0-4 |1.0-4 | | +|numDeriv |2016.8-1.1 |2016.8-1.1 | | +|pbkrtest |0.5.2 |0.5.2 | | +|pillar |1.9.0 |1.9.0 | | +|pkgbuild |1.4.3 |1.4.3 | | +|pkgconfig |2.0.3 |2.0.3 | | +|pkgload |1.3.4 |1.3.4 | | +|praise |1.0.0 |1.0.0 | | +|processx |3.8.3 |3.8.3 | | +|promises |NA |1.2.1 |* | +|ps |1.7.6 |1.7.6 | | +|purrr |1.0.2 |1.0.2 | | +|quantreg |5.97 |5.97 | | +|R6 |2.5.1 |2.5.1 | | +|rappdirs |NA |0.3.3 |* | +|RColorBrewer |1.1-3 |1.1-3 | | +|Rcpp |1.0.12 |1.0.12 | | +|RcppEigen |0.3.3.9.4 |0.3.3.9.4 | | +|rematch2 |2.1.2 |2.1.2 | | +|rlang |1.1.3 |1.1.3 | | +|rprojroot |2.0.4 |2.0.4 | | +|sass |NA |0.4.8 |* | +|scales |1.3.0 |1.3.0 | | +|shiny |NA |1.8.0 |* | +|sourcetools |NA |0.1.7-1 |* | +|SparseM |1.81 |1.81 | | +|stringi |1.8.3 |1.8.3 | | +|stringr |1.5.1 |1.5.1 | | +|testthat |3.2.1 |3.2.1 | | +|tibble |3.2.1 |3.2.1 | | +|tidyr |1.3.1 |1.3.1 | | +|tidyselect |1.2.0 |1.2.0 | | +|utf8 |1.2.4 |1.2.4 | | +|vctrs |0.6.5 |0.6.5 | | +|viridisLite |0.4.2 |0.4.2 | | +|waldo |0.5.2 |0.5.2 | | +|withr |3.0.0 |3.0.0 | | +|xplorerr |NA |0.1.2 |* | +|xtable |NA |1.8-4 |* | + +# Revdeps + diff --git a/revdep/cran.md b/revdep/cran.md new file mode 100644 index 0000000..ab1853c --- /dev/null +++ b/revdep/cran.md @@ -0,0 +1,7 @@ +## revdepcheck results + +We checked 4 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. + + * We saw 0 new problems + * We failed to check 0 packages + diff --git a/revdep/failures.md b/revdep/failures.md new file mode 100644 index 0000000..9a20736 --- /dev/null +++ b/revdep/failures.md @@ -0,0 +1 @@ +*Wow, no problems at all. :)* \ No newline at end of file diff --git a/revdep/problems.md b/revdep/problems.md new file mode 100644 index 0000000..9a20736 --- /dev/null +++ b/revdep/problems.md @@ -0,0 +1 @@ +*Wow, no problems at all. :)* \ No newline at end of file diff --git a/tests/testthat/_snaps/visual/cooks-d-bar-chart-threshold.svg b/tests/testthat/_snaps/visual/cooks-d-bar-chart-threshold.svg new file mode 100644 index 0000000..0314202 --- /dev/null +++ b/tests/testthat/_snaps/visual/cooks-d-bar-chart-threshold.svg @@ -0,0 +1,122 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +17 +20 +31 + +Threshold: 0.2 + + + +0.0 +0.1 +0.2 +0.3 + + + + + + + + +0 +10 +20 +30 +Observation +Cook's D +Cook's D Chart + + diff --git a/tests/testthat/_snaps/visual/cooks-d-bar-plot-threshold.svg b/tests/testthat/_snaps/visual/cooks-d-bar-plot-threshold.svg new file mode 100644 index 0000000..839a573 --- /dev/null +++ b/tests/testthat/_snaps/visual/cooks-d-bar-plot-threshold.svg @@ -0,0 +1,99 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +17 +20 +31 + + +Threshold: 0.2 + + + +0.0 +0.1 +0.2 +0.3 + + + + + + + + +0 +10 +20 +30 +Observation +Cook's D + +Observation + + + + +normal +outlier +Cook's D Bar Plot + + diff --git a/tests/testthat/test-bartlett.R b/tests/testthat/test-bartlett.R index d3ee598..47b7ea2 100644 --- a/tests/testthat/test-bartlett.R +++ b/tests/testthat/test-bartlett.R @@ -1,4 +1,4 @@ -test_that("all output from the test match the result", { +test_that("output from the test match the result when using variables", { b <- ols_test_bartlett(mtcars, 'mpg', 'disp') @@ -8,14 +8,18 @@ test_that("all output from the test match the result", { expect_equal(b$var_c, c("mpg", "disp"), ignore_attr = TRUE) expect_null(b$g_var) - b <- ols_test_bartlett(descriptr::mtcarz, 'mpg', group_var = 'vs') +}) - expect_equal(round(b$fstat, 3), 1.585) - expect_equal(round(b$pval, 3), 0.208) - expect_equal(b$df, 1) - expect_equal(b$var_c, "mpg") - expect_equal(b$g_var, "vs") +test_that("output from test match the result when using grouping variables", { + if (requireNamespace("descriptr", quietly = TRUE)) { + b <- ols_test_bartlett(descriptr::mtcarz, 'mpg', group_var = 'vs') + expect_equal(round(b$fstat, 3), 1.585) + expect_equal(round(b$pval, 3), 0.208) + expect_equal(b$df, 1) + expect_equal(b$var_c, "mpg") + expect_equal(b$g_var, "vs") + } }) test_that("bartlett test throws error messages", { diff --git a/tests/testthat/test-norm-output.R b/tests/testthat/test-norm-output.R index feee35c..e08b90c 100644 --- a/tests/testthat/test-norm-output.R +++ b/tests/testthat/test-norm-output.R @@ -8,8 +8,3 @@ test_that("output from ols_corr_test is as expected", { expect_equal(round(ols_test_correlation(model), 3), 0.97) }) -test_that("ols_test_normality returns error messages", { - model <- glm(prog ~ female + read + science, data = hsb, family = binomial(link = 'logit')) - expect_error(ols_test_normality(hsb$female), "y must be numeric") - expect_error(ols_test_normality(model), "Please specify a OLS linear regression model.") -}) \ No newline at end of file diff --git a/tests/testthat/test-visual.R b/tests/testthat/test-visual.R index 68fd39e..692bd2c 100644 --- a/tests/testthat/test-visual.R +++ b/tests/testthat/test-visual.R @@ -70,6 +70,9 @@ test_that("cooks d bar plot is as expected", { p <- ols_plot_cooksd_bar(model, print_plot = FALSE) vdiffr::expect_doppelganger("cooks d bar plot", p$plot) + p1 <- ols_plot_cooksd_bar(model, threshold = 0.2, print_plot = FALSE) + vdiffr::expect_doppelganger("cooks d bar plot threshold", p1$plot) + p2 <- ols_plot_cooksd_bar(model, type = 2, print_plot = FALSE) vdiffr::expect_doppelganger("cooks d bar plot type 2", p2$plot) @@ -87,6 +90,9 @@ test_that("cooks d bar chart is as expected", { skip_on_cran() p <- ols_plot_cooksd_chart(model, print_plot = FALSE) vdiffr::expect_doppelganger("cooks d bar chart", p$plot) + + p1 <- ols_plot_cooksd_chart(model, threshold = 0.2, print_plot = FALSE) + vdiffr::expect_doppelganger("cooks d bar chart threshold", p1$plot) }) test_that("dffits plot is as expected", { @@ -415,4 +421,4 @@ test_that("sbic both direction regression plot is as expected", { p2 <- plot(ols_step_both_sbic(model), details = FALSE, print_plot = FALSE) vdiffr::expect_doppelganger("sbc both direction regression plot", p2$plot) -}) \ No newline at end of file +}) diff --git a/vignettes/variable_selection.Rmd b/vignettes/variable_selection.Rmd index 8b9c5eb..6147a70 100644 --- a/vignettes/variable_selection.Rmd +++ b/vignettes/variable_selection.Rmd @@ -23,7 +23,15 @@ library(goftest) ## Introduction -## All Possible Regression +Variable selection refers to the process of choosing the most relevant variables to include in a +regression model. They help to improve model performance and avoid over fitting. + +Before we explore stepwise selection methods, let us take a quick look at all/best subset regression. +As they evaluate every possible variable combination, these methods are computationally intensive and may +crash your system if used with a large set of variables. We have included them in the package purely for +educational purpose. + +### All Possible Regression All subset regression tests all possible subsets of the set of potential independent variables. If there are K potential independent variables (besides the constant), then there are $2^{k}$ distinct subsets of them to be tested. For example, if you have 10 candidate independent variables, the number of subsets to be tested is $2^{10}$, which is 1024, and if you have 20 candidate variables, the number is $2^{20}$, which is more than one million. @@ -32,15 +40,7 @@ model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars) ols_step_all_possible(model) ``` -The `plot` method shows the panel of fit criteria for all possible regression methods. - -```{r allsubplot, fig.width=10, fig.height=10, fig.align='center'} -model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars) -k <- ols_step_all_possible(model) -plot(k) -``` - -## Best Subset Regression +### Best Subset Regression Select the subset of predictors that do the best at meeting some well-defined objective criterion, such as having the largest R2 value or the smallest MSE, Mallow's Cp or AIC. @@ -50,188 +50,134 @@ model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars) ols_step_best_subset(model) ``` -The `plot` method shows the panel of fit criteria for best subset regression methods. - -```{r bestsubplot, fig.width=10, fig.height=10, fig.align='center'} -model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars) -k <- ols_step_best_subset(model) -plot(k) -``` +## Stepwise Selection -## Stepwise Forward Regression +Stepwise regression is a method of fitting regression models that involves the +iterative selection of independent variables to use in a model. It can be +achieved through forward selection, backward elimination, or a combination of +both methods. The forward selection approach starts with no variables and adds +each new variable incrementally, testing for statistical significance, while +the backward elimination method begins with a full model and then removes the +least statistically significant variables one at a time. -Build regression model from a set of candidate predictor variables by entering predictors based on -p values, in a stepwise manner until there is no variable left to enter any more. The model should include all the candidate predictor variables. If details is set to `TRUE`, each step is displayed. +### Model -### Variable Selection +We will use the below model throughout this article except in the case of hierarchical selection. +You can learn more about the data [here](https://olsrr.rsquaredacademy.com/reference/surgical). -```{r stepf1} -# stepwise forward regression +```{r model} model <- lm(y ~ ., data = surgical) -ols_step_forward_p(model) +summary(model) ``` -### Plot +### Model specification -```{r stepf2, fig.width=10, fig.height=10, fig.align='center'} -model <- lm(y ~ ., data = surgical) -k <- ols_step_forward_p(model) -plot(k) -``` +Irrespective of the stepwise method being used, we have to specify the full model i.e. all the variabels/predictors +under consideration as `olsrr` extracts the candidate variables for selection/elimination from the model specified. -### Detailed Output +##### Forward selection -```{r stepwisefdetails} +```{r stepf1} # stepwise forward regression -model <- lm(y ~ ., data = surgical) -ols_step_forward_p(model, details = TRUE) +ols_step_forward_p(model) ``` -## Stepwise Backward Regression +##### Backward elimination -Build regression model from a set of candidate predictor variables by removing predictors based on -p values, in a stepwise manner until there is no variable left to remove any more. The model should include all the candidate predictor variables. If details is set to `TRUE`, each step is displayed. - -### Variable Selection - -```{r stepb, fig.width=10, fig.height=10, fig.align='center'} +```{r stepb} # stepwise backward regression -model <- lm(y ~ ., data = surgical) ols_step_backward_p(model) ``` -### Plot - -```{r stepb2, fig.width=10, fig.height=10, fig.align='center'} -model <- lm(y ~ ., data = surgical) -k <- ols_step_backward_p(model) -plot(k) -``` - -### Detailed Output - -```{r stepwisebdetails} -# stepwise backward regression -model <- lm(y ~ ., data = surgical) -ols_step_backward_p(model, details = TRUE) -``` - -## Stepwise Regression +### Criteria -Build regression model from a set of candidate predictor variables by entering and removing predictors based on -p values, in a stepwise manner until there is no variable left to enter or remove any more. The model should include all the candidate predictor variables. If details is set to `TRUE`, each step is displayed. +The criteria for selecting variables may be one of the following: -### Variable Selection +- p value +- akaike information criterion (aic) +- schwarz bayesian criterion (sbc) +- sawa bayesian criterion (sbic) +- r-square +- adjusted r-square + +### Include/exclude variables -```{r stepwise1} -# stepwise regression -model <- lm(y ~ ., data = surgical) -ols_step_both_p(model) -``` +We can force variables to be included or excluded from the model at all stages of variable selection. The +variables may be specified either by name or position in the model specified. -### Plot +##### By name -```{r stepwise2, fig.width=10, fig.height=10, fig.align='center'} -model <- lm(y ~ ., data = surgical) -k <- ols_step_both_p(model) -plot(k) +```{r include_name} +ols_step_forward_p(model, include = c("age", "alc_mod")) ``` -### Detailed Output +##### By index -```{r stepwisedetails} -# stepwise regression -model <- lm(y ~ ., data = surgical) -ols_step_both_p(model, details = TRUE) +```{r include_index} +ols_step_forward_p(model, include = c(5, 7)) ``` -## Stepwise AIC Forward Regression - -Build regression model from a set of candidate predictor variables by entering predictors based on -Akaike Information Criteria, in a stepwise manner until there is no variable left to enter any more. -The model should include all the candidate predictor variables. If details is set to `TRUE`, each step is displayed. +### Standardized output -### Variable Selection +All stepwise selection methods display standard output which includes: -```{r stepaicf1} -# stepwise aic forward regression -model <- lm(y ~ ., data = surgical) -ols_step_forward_aic(model) -``` +- selection summary +- model summary +- ANOVA +- parameter estimates -### Plot - -```{r stepaicf2, fig.width=5, fig.height=5, fig.align='center'} -model <- lm(y ~ ., data = surgical) -k <- ols_step_forward_aic(model) -plot(k) +```{r output} +# adjusted r-square +ols_step_forward_adj_r2(model) ``` -### Detailed Output - -```{r stepwiseaicfdetails} -# stepwise aic forward regression -model <- lm(y ~ ., data = surgical) -ols_step_forward_aic(model, details = TRUE) -``` +### Visualization -## Stepwise AIC Backward Regression +Use the `plot()` method to visualize variable selection. It will display how the variable selection criteria +changes at each step of the selection process along with the variable selected. -Build regression model from a set of candidate predictor variables by removing predictors based on -Akaike Information Criteria, in a stepwise manner until there is no variable left to remove any more. -The model should include all the candidate predictor variables. If details is set to `TRUE`, each step is displayed. - -### Variable Selection - -```{r stepaicb1} -# stepwise aic backward regression -model <- lm(y ~ ., data = surgical) -k <- ols_step_backward_aic(model) -k +```{r visualize} +# adjusted r-square +k <- ols_step_forward_adj_r2(model) +plot(k) ``` -### Plot +### Verbose output -```{r stepaicb2, fig.width=5, fig.height=5, fig.align='center'} -model <- lm(y ~ ., data = surgical) -k <- ols_step_backward_aic(model) -plot(k) -``` +To view the detailed regression output at each stage of variable selection/elimination, set `details` to `TRUE`. It will +display the following information at each step: -### Detailed Output +- step number +- variable selected/eliminated +- model +- value of the criteria at that stage -```{r stepwiseaicbdetails} -# stepwise aic backward regression -model <- lm(y ~ ., data = surgical) -ols_step_backward_aic(model, details = TRUE) +```{r details} +# adjusted r-square +ols_step_forward_adj_r2(model, details = TRUE) ``` -## Stepwise AIC Regression +### Progress -Build regression model from a set of candidate predictor variables by entering and removing predictors based on -Akaike Information Criteria, in a stepwise manner until there is no variable left to enter or remove any more. -The model should include all the candidate predictor variables. If details is set to `TRUE`, each step is displayed. +To view the progress in the variable selection procedure, set `progress` to `TRUE`. It will display the variable +being selected/eliminated at each step until there are no more candidate variables left. -### Variable Selection - -```{r stepwiseaic1} -# stepwise aic regression -model <- lm(y ~ ., data = surgical) -ols_step_both_aic(model) +```{r progress} +# adjusted r-square +ols_step_forward_adj_r2(model, progress = TRUE) ``` -### Plot +### Hierarchical selection -```{r stepwiseaic2, fig.width=5, fig.height=5, fig.align='center'} -model <- lm(y ~ ., data = surgical) -k <- ols_step_both_aic(model) -plot(k) -``` - -### Detailed Output +When using `p` values as the criterion for selecting/eliminating variables, we can enable hierarchical +selection. In this method, the search for the most significant variable is restricted to the next available +variable. In the below example, as `liver_test` does not meet the threshold for selection, none of the +variables after `liver_test` are considered for further selection i.e. the stepwise selection ends as soon +as it comes across a variable that does not meet the selection threshold. You can learn more about hierachichal +selection [here](https://www.stata.com/manuals/rstepwise.pdf). -```{r stepwiseaicdetails} -# stepwise aic regression -model <- lm(y ~ ., data = surgical) -ols_step_both_aic(model, details = TRUE) +```{r hierarchical} +# hierarchical selection +m <- lm(y ~ bcs + alc_heavy + pindex + enzyme_test + liver_test + age + gender + alc_mod, data = surgical) +ols_step_forward_p(m, 0.1, hierarchical = TRUE) ```