Merge pull request #15 from masurp/add-sem

Add updates to master
masurp · Dec 4, 2020 · 0fb5294 · 0fb5294
2 parents 088f665 + d642a6a
commit 0fb5294
Show file tree

Hide file tree

Showing 98 changed files with 3,195 additions and 1,238 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: specr
 Title: Conducting and Visualizing Specification Curve Analyses
-Version: 0.2.1.9000
+Version: 0.2.2
 Authors@R: c(
     person(given = "Philipp K.",
            family = "Masur",
@@ -20,16 +20,20 @@ Depends:
     R (>= 3.5.0)
 Imports:
     broom,
+    broom.mixed,
     cowplot,
     dplyr,
+    gapminder,
     ggplot2,
     ggraph,
     glue,
     igraph,
+    lavaan,
     lme4,
     magrittr,
     purrr,
     rlang, 
+    stringr,
     tibble,
     tidyr
 Suggests: 
@@ -42,5 +46,5 @@ Suggests:
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
 VignetteBuilder: knitr
diff --git a/R/helpers.R b/R/helpers.R
@@ -1,15 +1,20 @@
 
 # create regression formula based on setup_specs
-create_formula <- function(x, y, controls, ...) {
+create_formula <- function(x,
+                           y,
+                           controls,
+                           ...) {
 
   if (controls == "no covariates") controls <- 1
   paste(y, "~", x, "+", controls)
 
 }
 
-# run individual specification
-run_spec <- function(specs, df, conf.level, keep.results = FALSE) {
-
+# run specifications
+run_spec <- function(specs,
+                     df,
+                     conf.level,
+                     keep.results = FALSE) {
   results <- specs %>%
     dplyr::mutate(formula = pmap(specs, create_formula)) %>%
     tidyr::unnest(formula) %>%
@@ -21,10 +26,19 @@ run_spec <- function(specs, df, conf.level, keep.results = FALSE) {
                               broom::tidy,
                               conf.int = TRUE,
                               conf.level = conf.level),
-                  obs = map(.data$res, nobs)) %>%
+                  fit = map(.data$res, broom::glance)) %>%
     tidyr::unnest(.data$coefs) %>%
-    tidyr::unnest(.data$obs) %>%
-    dplyr::filter(.data$term == .data$x) %>%
+    tidyr::unnest(.data$fit, names_sep = "_")
+
+  if("op" %in% names(results)) {
+    results <- results %>%
+      dplyr::filter(.data$term == paste(.data$y, "~", .data$x))
+  } else {
+    results <- results %>%
+      dplyr::filter(.data$term == .data$x)
+    }
+
+  results <- results %>%
     dplyr::select(-.data$formula, -.data$term)
 
   if (isFALSE(keep.results)) {
@@ -36,7 +50,8 @@ run_spec <- function(specs, df, conf.level, keep.results = FALSE) {
 }
 
 # creates subsets
-create_subsets <- function(df, subsets) {
+create_subsets <- function(df,
+                           subsets) {
 
   subsets %>%
     stack %>%
@@ -46,20 +61,20 @@ create_subsets <- function(df, subsets) {
 
 
 # formats results
-format_results <- function(df, null = 0, desc = FALSE) {
+format_results <- function(df, var, null = 0, desc = FALSE) {
 
   # rank specs
   if (isFALSE(desc)) {
     df <- df %>%
-      dplyr::arrange(.data$estimate)
+      dplyr::arrange(!! var)
   } else {
     df <- df %>%
-      dplyr::arrange(desc(.data$estimate))
+      dplyr::arrange(desc(!! var))
   }
 
   # create rank variable and color significance
   df <- df %>%
-    dplyr::mutate(specifications = 1:n(),
+    dplyr::mutate(specifications = 1:nrow(df),
                   color = case_when(conf.low > null ~ "#377eb8",
                                     conf.high < null ~ "#e41a1c",
                                     TRUE ~ "darkgrey"))
@@ -72,3 +87,4 @@ names_from_dots <- function(...) {
   sapply(substitute(list(...))[-1], deparse)
 
 }
+
diff --git a/R/plot_choices.r b/R/plot_choices.r
@@ -3,6 +3,7 @@
 #' This functions plots how analytical choices affect the obtained results (i.e., the rank within the curve). Significant results are highlighted (negative = red, positive = blue, grey = nonsignificant). This functions creates the lower panel in \code{plot_specs()}.
 #'
 #' @param df a data frame resulting from \code{run_specs()}.
+#' @param var which variable should be evaluated? Defaults to estimate (the effect sizes computed by [run_specs()]).
 #' @param choices a vector specifying which analytical choices should be plotted. By default, all choices are plotted.
 #' @param desc logical value indicating whether the curve should the arranged in a descending order. Defaults to FALSE.
 #' @param null Indicate what value represents the 'null' hypothesis (Defaults to zero).
@@ -27,14 +28,18 @@
 #' plot_choices(results,
 #'              choices = c("x", "y", "controls"))
 plot_choices <- function(df,
+                         var = .data$estimate,
                          choices = c("x", "y", "model", "controls", "subsets"),
                          desc = FALSE,
                          null = 0) {
 
   value <- key <- NULL
 
+  var <- enquo(var)
+
+  # Create basic plot
   df %>%
-    format_results(desc = desc, null = null) %>%
+    format_results(var = var, null = null, desc = desc) %>%
     tidyr::gather(key, value, choices) %>%
     dplyr::mutate(key = factor(.data$key, levels = choices)) %>%
     ggplot(aes(x = .data$specifications,

diff --git a/R/plot_curve.r b/R/plot_curve.r
@@ -3,6 +3,7 @@
 #' This function plots the a ranked specification curve. Confidence intervals can be included. Significant results are highlighted (negative = red, positive = blue, grey = nonsignificant). This functions creates the upper panel in \code{plot_specs()}.
 #'
 #' @param df a data frame resulting from \code{run_specs()}.
+#' @param var which variable should be evaluated? Defaults to estimate (the effect sizes computed by [run_specs()]).
 #' @param desc logical value indicating whether the curve should the arranged in a descending order. Defaults to FALSE.
 #' @param ci logical value indicating whether confidence intervals should be plotted.
 #' @param ribbon logical value indicating whether a ribbon instead should be plotted.
@@ -35,17 +36,20 @@
 #'              linetype = "dashed") +
 #'   theme_linedraw()
 plot_curve <- function(df,
+                       var = .data$estimate,
                        desc = FALSE,
                        ci = TRUE,
                        ribbon = FALSE,
                        legend = FALSE,
                        null = 0){
 
+  var <- enquo(var)
+
   # Create basic plot
   plot <- df %>%
-    format_results(desc = desc, null = null) %>%
+    format_results(var = var, null = null, desc = desc) %>%
     ggplot(aes(x = .data$specifications,
-               y = .data$estimate,
+               y = !! var,
                ymin = .data$conf.low,
                ymax = .data$conf.high,
                color = .data$color)) +

diff --git a/R/plot_samplesizes.r b/R/plot_samplesizes.r
@@ -3,6 +3,7 @@
 #' This function plots a histogram of sample sizes per specification. It can be added to the overall specification curve plot (see vignettes).
 #'
 #' @param df a data frame resulting from \code{run_specs()}.
+#' @param var which variable should be evaluated? Defaults to estimate (the effect sizes computed by [run_specs()]).
 #' @param desc logical value indicating whether the curve should the arranged in a descending order. Defaults to FALSE.
 #'
 #' @return a \link[ggplot2]{ggplot} object.
@@ -24,19 +25,22 @@
 #' # plot ranked bar chart of sample sizes
 #' plot_samplesizes(results)
 #'
-#' # customize
+#' # add a horizontal line for the median sample size
 #' plot_samplesizes(results) +
-#'   geom_hline(yintercept = median(results$obs),
+#'   geom_hline(yintercept = median(results$fit_nobs),
 #'              color = "darkgrey",
 #'              linetype = "dashed") +
 #'   theme_linedraw()
 plot_samplesizes <- function(df,
+                             var = .data$estimate,
                              desc = FALSE) {
 
+  var <- enquo(var)
+
   df %>%
-    format_results(desc = desc) %>%
+    format_results(var = var, desc = desc) %>%
     ggplot(aes(x = .data$specifications,
-               y = .data$obs)) +
+               y = .data$fit_nobs)) +
     geom_bar(stat = "identity",
              fill = "grey",
              size = .2) +

diff --git a/R/plot_specs.r b/R/plot_specs.r
@@ -17,11 +17,6 @@
 #'   plotted.
 #' @param null Indicate what value represents the 'null' hypothesis (defaults to
 #'   zero).
-#' @param sample_perc numeric value denoting what percentage of the
-#'   specifications should be plotted. Needs to be strictly greater than 0 and smalle than 1.
-#'   Defaults to 1 (= all specifications). Drawing a sample from all
-#'   specification usually makes only sense of the number of specifications is
-#'   very large and one wants to simplify the visualization.
 #' @param ... additional arguments that can be passed to \code{plot_grid()}.
 #'
 #' @return a \link[ggplot2]{ggplot} object.
@@ -70,18 +65,10 @@ plot_specs <- function(df = NULL,
                        null = 0,
                        ci = TRUE,
                        ribbon = FALSE,
-                       sample_perc = 1,
                        ...) {
 
   if (!is.null(df)) {
 
-    if (sample_perc > 1 | sample_perc < 0) {
-      stop("`sample_n` must be greater than 0 and less than 1!")
-    }
-
-  # Draw sample
-  df <- dplyr::sample_n(df, size = sample_perc*nrow(df))
-
   # Create both plots
   plot_a <- plot_curve(df, ci = ci, ribbon = ribbon, desc = desc, null = null)
   plot_b <- plot_choices(df, choices = choices, desc = desc, null = null)

diff --git a/R/plot_summary.r b/R/plot_summary.r
@@ -37,7 +37,8 @@ plot_summary <- function(df,
       theme(legend.position = "none",
             axis.line = element_line("black", size = .5),
             axis.text = element_text(colour = "black"),
-            strip.text.x = element_blank())
+            strip.text.x = element_blank()) +
+    labs(x = "")
 }
 
 
diff --git a/R/summarise_specs.r b/R/summarise_specs.r
@@ -39,7 +39,8 @@
 #' # Unnamed vector instead of named list passed to `stats`
 #' summarise_specs(results,
 #'                 controls,
-#'                 stats = c(mean, median))
+#'                 stats = c(mean = mean,
+#'                           median = median))
 #'
 #' @seealso [plot_summary()] to visually investigate the affect of analytical choices.
 summarise_specs <- function(df,
@@ -68,7 +69,7 @@ summarise_specs <- function(df,
        df %>%
          summary_specs,
        df %>%
-         dplyr::summarize(obs = median(.data$obs))
+         dplyr::summarize(obs = median(.data$fit_nobs))
      )
 
   } else {
@@ -79,7 +80,7 @@ summarise_specs <- function(df,
         summary_specs,
       df %>%
         dplyr::group_by(!!! group_var) %>%
-        dplyr::summarize(obs = median(.data$obs)),
+        dplyr::summarize(obs = median(.data$fit_nobs)),
       by = names_from_dots(...)
     )
   }

diff --git a/README.Rmd b/README.Rmd
@@ -25,6 +25,7 @@ knitr::opts_chunk$set(
 <!-- badges: start -->
 [![CRAN status](https://www.r-pkg.org/badges/version/specr)](https://CRAN.R-project.org/package=specr)
 [![Travis build status](https://travis-ci.org/masurp/specr.svg?branch=master)](https://travis-ci.org/masurp/specr)
+![](https://cranlogs.r-pkg.org/badges/grand-total/specr)
 [![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
 <!-- badges: end -->
 
@@ -38,7 +39,9 @@ There are also some vignettes that exemplify and explain specific aspects and fu
 
 - [Getting started](https://masurp.github.io/specr/articles/specr.html): A comprehensive example. This vignette illustrates the major functions of the package. 
 - [Customizing specification curve plots](https://masurp.github.io/specr/articles/custom-plot.html): This vignette exemplifies various ways to plot the specification curve. 
-- [Decomposing the variance of the specification curve](https://masurp.github.io/specr/articles/decompose_var.html): An example of how to investigate variance components of the specification curve.
+- [Decomposing the variance of the specification curve](https://masurp.github.io/specr/articles/decompose_var.html): Investigating variance components of the specification curve.
+- [Including latent measurement models](https://masurp.github.io/specr/articles/measurement_models.html): This vignette exemplifies how to include latent measurement models and estimate structural equations models using `lavaan`. 
+- [Including random effects/Estimate multilevel models](https://masurp.github.io/specr/articles/random_effects.html): This vignette exemplifies how to include random effects and estimate multilevel models using `lme4`.
 - [Visualizing progress during estimation](https://masurp.github.io/specr/articles/progress.html): This vignette explains how to create a progress bar for longer computations.
 
 ### Disclaimer
@@ -65,6 +68,7 @@ devtools::install_github("masurp/specr")
 Using `specr` is comparatively simple. The main function is `run_specs()` in which analytical choices are specified as arguments. The function `plot_specs()` can then be used to visualize the results.
 
 ```{r, message=F, warning = F, fig.height=10, fig.width=10}
+# Load package
 library(specr)
 
 # Run specs

diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@
 status](https://www.r-pkg.org/badges/version/specr)](https://CRAN.R-project.org/package=specr)
 [![Travis build
 status](https://travis-ci.org/masurp/specr.svg?branch=master)](https://travis-ci.org/masurp/specr)
-[![Lifecycle:
+![](https://cranlogs.r-pkg.org/badges/grand-total/specr) [![Lifecycle:
 maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
 <!-- badges: end -->
 
@@ -45,8 +45,15 @@ aspects and functions of the package:
     curve.
   - [Decomposing the variance of the specification
     curve](https://masurp.github.io/specr/articles/decompose_var.html):
-    An example of how to investigate variance components of the
-    specification curve.
+    Investigating variance components of the specification curve.
+  - [Including latent measurement
+    models](https://masurp.github.io/specr/articles/measurement_models.html):
+    This vignette exemplifies how to include latent measurement models
+    and estimate structural equations models using `lavaan`.
+  - [Including random effects/Estimate multilevel
+    models](https://masurp.github.io/specr/articles/random_effects.html):
+    This vignette exemplifies how to include random effects and estimate
+    multilevel models using `lme4`.
   - [Visualizing progress during
     estimation](https://masurp.github.io/specr/articles/progress.html):
     This vignette explains how to create a progress bar for longer
@@ -85,6 +92,7 @@ Using `specr` is comparatively simple. The main function is
 The function `plot_specs()` can then be used to visualize the results.
 
 ``` r
+# Load package
 library(specr)
 
 # Run specs
@@ -97,7 +105,7 @@ results <- run_specs(df = example_data,
                                     group2 = unique(example_data$group2)))
 # Result frame
 head(results)
-#> # A tibble: 6 x 12
+#> # A tibble: 6 x 23
 #>   x     y     model controls estimate std.error statistic  p.value conf.low
 #>   <chr> <chr> <chr> <chr>       <dbl>     <dbl>     <dbl>    <dbl>    <dbl>
 #> 1 x1    y1    lm    c1 + c2     4.95      0.525     9.43  3.11e-18    3.92 
@@ -106,7 +114,11 @@ head(results)
 #> 4 x2    y2    lm    c1 + c2     0.985     0.324     3.04  2.62e- 3    0.347
 #> 5 x1    y1    lm    c1          5.53      0.794     6.97  2.95e-11    3.96 
 #> 6 x2    y1    lm    c1          8.07      0.557    14.5   6.90e-35    6.98 
-#> # … with 3 more variables: conf.high <dbl>, obs <int>, subsets <chr>
+#> # … with 14 more variables: conf.high <dbl>, fit_r.squared <dbl>,
+#> #   fit_adj.r.squared <dbl>, fit_sigma <dbl>, fit_statistic <dbl>,
+#> #   fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>, fit_AIC <dbl>,
+#> #   fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <int>, fit_nobs <int>,
+#> #   subsets <chr>
 
 # Plot
 plot_specs(results, choices = c("x", "y", "controls", "subsets"))
@@ -121,17 +133,17 @@ citation("specr")
 #> 
 #> To cite 'specr' in publications use:
 #> 
-#>   Masur, Philipp K. & Scharkow, M. (2019). specr: Statistical functions
-#>   for conducting specification curve analyses. Available from
-#>   https://github.com/masurp/specr.
+#>   Masur, Philipp K. & Scharkow, M. (2020). specr: Conducting and
+#>   Visualizing Specification Curve Analyses. Available from
+#>   https://CRAN.R-project.org/package=specr.
 #> 
 #> A BibTeX entry for LaTeX users is
 #> 
 #>   @Misc{,
-#>     title = {specr: Statistical functions for conducting specification curve analyses (Version 0.2.1.9000)},
+#>     title = {specr: Conducting and Visualizing Specification Curve Analyses (Version 0.2.2)},
 #>     author = {Philipp K. Masur and Michael Scharkow},
-#>     year = {2019},
-#>     url = {https://github.com/masurp/specr},
+#>     year = {2020},
+#>     url = {https://CRAN.R-project.org/package=specr},
 #>   }
 ```