Skip to content

Commit

Permalink
Merge pull request #15 from masurp/add-sem
Browse files Browse the repository at this point in the history
Add updates to master
  • Loading branch information
masurp authored Dec 4, 2020
2 parents 088f665 + d642a6a commit 0fb5294
Show file tree
Hide file tree
Showing 98 changed files with 3,195 additions and 1,238 deletions.
8 changes: 6 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: specr
Title: Conducting and Visualizing Specification Curve Analyses
Version: 0.2.1.9000
Version: 0.2.2
Authors@R: c(
person(given = "Philipp K.",
family = "Masur",
Expand All @@ -20,16 +20,20 @@ Depends:
R (>= 3.5.0)
Imports:
broom,
broom.mixed,
cowplot,
dplyr,
gapminder,
ggplot2,
ggraph,
glue,
igraph,
lavaan,
lme4,
magrittr,
purrr,
rlang,
stringr,
tibble,
tidyr
Suggests:
Expand All @@ -42,5 +46,5 @@ Suggests:
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.0
RoxygenNote: 7.1.1
VignetteBuilder: knitr
40 changes: 28 additions & 12 deletions R/helpers.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@

# create regression formula based on setup_specs
create_formula <- function(x, y, controls, ...) {
create_formula <- function(x,
y,
controls,
...) {

if (controls == "no covariates") controls <- 1
paste(y, "~", x, "+", controls)

}

# run individual specification
run_spec <- function(specs, df, conf.level, keep.results = FALSE) {

# run specifications
run_spec <- function(specs,
df,
conf.level,
keep.results = FALSE) {
results <- specs %>%
dplyr::mutate(formula = pmap(specs, create_formula)) %>%
tidyr::unnest(formula) %>%
Expand All @@ -21,10 +26,19 @@ run_spec <- function(specs, df, conf.level, keep.results = FALSE) {
broom::tidy,
conf.int = TRUE,
conf.level = conf.level),
obs = map(.data$res, nobs)) %>%
fit = map(.data$res, broom::glance)) %>%
tidyr::unnest(.data$coefs) %>%
tidyr::unnest(.data$obs) %>%
dplyr::filter(.data$term == .data$x) %>%
tidyr::unnest(.data$fit, names_sep = "_")

if("op" %in% names(results)) {
results <- results %>%
dplyr::filter(.data$term == paste(.data$y, "~", .data$x))
} else {
results <- results %>%
dplyr::filter(.data$term == .data$x)
}

results <- results %>%
dplyr::select(-.data$formula, -.data$term)

if (isFALSE(keep.results)) {
Expand All @@ -36,7 +50,8 @@ run_spec <- function(specs, df, conf.level, keep.results = FALSE) {
}

# creates subsets
create_subsets <- function(df, subsets) {
create_subsets <- function(df,
subsets) {

subsets %>%
stack %>%
Expand All @@ -46,20 +61,20 @@ create_subsets <- function(df, subsets) {


# formats results
format_results <- function(df, null = 0, desc = FALSE) {
format_results <- function(df, var, null = 0, desc = FALSE) {

# rank specs
if (isFALSE(desc)) {
df <- df %>%
dplyr::arrange(.data$estimate)
dplyr::arrange(!! var)
} else {
df <- df %>%
dplyr::arrange(desc(.data$estimate))
dplyr::arrange(desc(!! var))
}

# create rank variable and color significance
df <- df %>%
dplyr::mutate(specifications = 1:n(),
dplyr::mutate(specifications = 1:nrow(df),
color = case_when(conf.low > null ~ "#377eb8",
conf.high < null ~ "#e41a1c",
TRUE ~ "darkgrey"))
Expand All @@ -72,3 +87,4 @@ names_from_dots <- function(...) {
sapply(substitute(list(...))[-1], deparse)

}

7 changes: 6 additions & 1 deletion R/plot_choices.r
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' This functions plots how analytical choices affect the obtained results (i.e., the rank within the curve). Significant results are highlighted (negative = red, positive = blue, grey = nonsignificant). This functions creates the lower panel in \code{plot_specs()}.
#'
#' @param df a data frame resulting from \code{run_specs()}.
#' @param var which variable should be evaluated? Defaults to estimate (the effect sizes computed by [run_specs()]).
#' @param choices a vector specifying which analytical choices should be plotted. By default, all choices are plotted.
#' @param desc logical value indicating whether the curve should the arranged in a descending order. Defaults to FALSE.
#' @param null Indicate what value represents the 'null' hypothesis (Defaults to zero).
Expand All @@ -27,14 +28,18 @@
#' plot_choices(results,
#' choices = c("x", "y", "controls"))
plot_choices <- function(df,
var = .data$estimate,
choices = c("x", "y", "model", "controls", "subsets"),
desc = FALSE,
null = 0) {

value <- key <- NULL

var <- enquo(var)

# Create basic plot
df %>%
format_results(desc = desc, null = null) %>%
format_results(var = var, null = null, desc = desc) %>%
tidyr::gather(key, value, choices) %>%
dplyr::mutate(key = factor(.data$key, levels = choices)) %>%
ggplot(aes(x = .data$specifications,
Expand Down
8 changes: 6 additions & 2 deletions R/plot_curve.r
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' This function plots the a ranked specification curve. Confidence intervals can be included. Significant results are highlighted (negative = red, positive = blue, grey = nonsignificant). This functions creates the upper panel in \code{plot_specs()}.
#'
#' @param df a data frame resulting from \code{run_specs()}.
#' @param var which variable should be evaluated? Defaults to estimate (the effect sizes computed by [run_specs()]).
#' @param desc logical value indicating whether the curve should the arranged in a descending order. Defaults to FALSE.
#' @param ci logical value indicating whether confidence intervals should be plotted.
#' @param ribbon logical value indicating whether a ribbon instead should be plotted.
Expand Down Expand Up @@ -35,17 +36,20 @@
#' linetype = "dashed") +
#' theme_linedraw()
plot_curve <- function(df,
var = .data$estimate,
desc = FALSE,
ci = TRUE,
ribbon = FALSE,
legend = FALSE,
null = 0){

var <- enquo(var)

# Create basic plot
plot <- df %>%
format_results(desc = desc, null = null) %>%
format_results(var = var, null = null, desc = desc) %>%
ggplot(aes(x = .data$specifications,
y = .data$estimate,
y = !! var,
ymin = .data$conf.low,
ymax = .data$conf.high,
color = .data$color)) +
Expand Down
12 changes: 8 additions & 4 deletions R/plot_samplesizes.r
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' This function plots a histogram of sample sizes per specification. It can be added to the overall specification curve plot (see vignettes).
#'
#' @param df a data frame resulting from \code{run_specs()}.
#' @param var which variable should be evaluated? Defaults to estimate (the effect sizes computed by [run_specs()]).
#' @param desc logical value indicating whether the curve should the arranged in a descending order. Defaults to FALSE.
#'
#' @return a \link[ggplot2]{ggplot} object.
Expand All @@ -24,19 +25,22 @@
#' # plot ranked bar chart of sample sizes
#' plot_samplesizes(results)
#'
#' # customize
#' # add a horizontal line for the median sample size
#' plot_samplesizes(results) +
#' geom_hline(yintercept = median(results$obs),
#' geom_hline(yintercept = median(results$fit_nobs),
#' color = "darkgrey",
#' linetype = "dashed") +
#' theme_linedraw()
plot_samplesizes <- function(df,
var = .data$estimate,
desc = FALSE) {

var <- enquo(var)

df %>%
format_results(desc = desc) %>%
format_results(var = var, desc = desc) %>%
ggplot(aes(x = .data$specifications,
y = .data$obs)) +
y = .data$fit_nobs)) +
geom_bar(stat = "identity",
fill = "grey",
size = .2) +
Expand Down
13 changes: 0 additions & 13 deletions R/plot_specs.r
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@
#' plotted.
#' @param null Indicate what value represents the 'null' hypothesis (defaults to
#' zero).
#' @param sample_perc numeric value denoting what percentage of the
#' specifications should be plotted. Needs to be strictly greater than 0 and smalle than 1.
#' Defaults to 1 (= all specifications). Drawing a sample from all
#' specification usually makes only sense of the number of specifications is
#' very large and one wants to simplify the visualization.
#' @param ... additional arguments that can be passed to \code{plot_grid()}.
#'
#' @return a \link[ggplot2]{ggplot} object.
Expand Down Expand Up @@ -70,18 +65,10 @@ plot_specs <- function(df = NULL,
null = 0,
ci = TRUE,
ribbon = FALSE,
sample_perc = 1,
...) {

if (!is.null(df)) {

if (sample_perc > 1 | sample_perc < 0) {
stop("`sample_n` must be greater than 0 and less than 1!")
}

# Draw sample
df <- dplyr::sample_n(df, size = sample_perc*nrow(df))

# Create both plots
plot_a <- plot_curve(df, ci = ci, ribbon = ribbon, desc = desc, null = null)
plot_b <- plot_choices(df, choices = choices, desc = desc, null = null)
Expand Down
3 changes: 2 additions & 1 deletion R/plot_summary.r
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ plot_summary <- function(df,
theme(legend.position = "none",
axis.line = element_line("black", size = .5),
axis.text = element_text(colour = "black"),
strip.text.x = element_blank())
strip.text.x = element_blank()) +
labs(x = "")
}


7 changes: 4 additions & 3 deletions R/summarise_specs.r
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
#' # Unnamed vector instead of named list passed to `stats`
#' summarise_specs(results,
#' controls,
#' stats = c(mean, median))
#' stats = c(mean = mean,
#' median = median))
#'
#' @seealso [plot_summary()] to visually investigate the affect of analytical choices.
summarise_specs <- function(df,
Expand Down Expand Up @@ -68,7 +69,7 @@ summarise_specs <- function(df,
df %>%
summary_specs,
df %>%
dplyr::summarize(obs = median(.data$obs))
dplyr::summarize(obs = median(.data$fit_nobs))
)

} else {
Expand All @@ -79,7 +80,7 @@ summarise_specs <- function(df,
summary_specs,
df %>%
dplyr::group_by(!!! group_var) %>%
dplyr::summarize(obs = median(.data$obs)),
dplyr::summarize(obs = median(.data$fit_nobs)),
by = names_from_dots(...)
)
}
Expand Down
6 changes: 5 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ knitr::opts_chunk$set(
<!-- badges: start -->
[![CRAN status](https://www.r-pkg.org/badges/version/specr)](https://CRAN.R-project.org/package=specr)
[![Travis build status](https://travis-ci.org/masurp/specr.svg?branch=master)](https://travis-ci.org/masurp/specr)
![](https://cranlogs.r-pkg.org/badges/grand-total/specr)
[![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
<!-- badges: end -->

Expand All @@ -38,7 +39,9 @@ There are also some vignettes that exemplify and explain specific aspects and fu

- [Getting started](https://masurp.github.io/specr/articles/specr.html): A comprehensive example. This vignette illustrates the major functions of the package.
- [Customizing specification curve plots](https://masurp.github.io/specr/articles/custom-plot.html): This vignette exemplifies various ways to plot the specification curve.
- [Decomposing the variance of the specification curve](https://masurp.github.io/specr/articles/decompose_var.html): An example of how to investigate variance components of the specification curve.
- [Decomposing the variance of the specification curve](https://masurp.github.io/specr/articles/decompose_var.html): Investigating variance components of the specification curve.
- [Including latent measurement models](https://masurp.github.io/specr/articles/measurement_models.html): This vignette exemplifies how to include latent measurement models and estimate structural equations models using `lavaan`.
- [Including random effects/Estimate multilevel models](https://masurp.github.io/specr/articles/random_effects.html): This vignette exemplifies how to include random effects and estimate multilevel models using `lme4`.
- [Visualizing progress during estimation](https://masurp.github.io/specr/articles/progress.html): This vignette explains how to create a progress bar for longer computations.

### Disclaimer
Expand All @@ -65,6 +68,7 @@ devtools::install_github("masurp/specr")
Using `specr` is comparatively simple. The main function is `run_specs()` in which analytical choices are specified as arguments. The function `plot_specs()` can then be used to visualize the results.

```{r, message=F, warning = F, fig.height=10, fig.width=10}
# Load package
library(specr)
# Run specs
Expand Down
34 changes: 23 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
status](https://www.r-pkg.org/badges/version/specr)](https://CRAN.R-project.org/package=specr)
[![Travis build
status](https://travis-ci.org/masurp/specr.svg?branch=master)](https://travis-ci.org/masurp/specr)
[![Lifecycle:
![](https://cranlogs.r-pkg.org/badges/grand-total/specr) [![Lifecycle:
maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
<!-- badges: end -->

Expand Down Expand Up @@ -45,8 +45,15 @@ aspects and functions of the package:
curve.
- [Decomposing the variance of the specification
curve](https://masurp.github.io/specr/articles/decompose_var.html):
An example of how to investigate variance components of the
specification curve.
Investigating variance components of the specification curve.
- [Including latent measurement
models](https://masurp.github.io/specr/articles/measurement_models.html):
This vignette exemplifies how to include latent measurement models
and estimate structural equations models using `lavaan`.
- [Including random effects/Estimate multilevel
models](https://masurp.github.io/specr/articles/random_effects.html):
This vignette exemplifies how to include random effects and estimate
multilevel models using `lme4`.
- [Visualizing progress during
estimation](https://masurp.github.io/specr/articles/progress.html):
This vignette explains how to create a progress bar for longer
Expand Down Expand Up @@ -85,6 +92,7 @@ Using `specr` is comparatively simple. The main function is
The function `plot_specs()` can then be used to visualize the results.

``` r
# Load package
library(specr)

# Run specs
Expand All @@ -97,7 +105,7 @@ results <- run_specs(df = example_data,
group2 = unique(example_data$group2)))
# Result frame
head(results)
#> # A tibble: 6 x 12
#> # A tibble: 6 x 23
#> x y model controls estimate std.error statistic p.value conf.low
#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 x1 y1 lm c1 + c2 4.95 0.525 9.43 3.11e-18 3.92
Expand All @@ -106,7 +114,11 @@ head(results)
#> 4 x2 y2 lm c1 + c2 0.985 0.324 3.04 2.62e- 3 0.347
#> 5 x1 y1 lm c1 5.53 0.794 6.97 2.95e-11 3.96
#> 6 x2 y1 lm c1 8.07 0.557 14.5 6.90e-35 6.98
#> # … with 3 more variables: conf.high <dbl>, obs <int>, subsets <chr>
#> # … with 14 more variables: conf.high <dbl>, fit_r.squared <dbl>,
#> # fit_adj.r.squared <dbl>, fit_sigma <dbl>, fit_statistic <dbl>,
#> # fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>, fit_AIC <dbl>,
#> # fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <int>, fit_nobs <int>,
#> # subsets <chr>

# Plot
plot_specs(results, choices = c("x", "y", "controls", "subsets"))
Expand All @@ -121,17 +133,17 @@ citation("specr")
#>
#> To cite 'specr' in publications use:
#>
#> Masur, Philipp K. & Scharkow, M. (2019). specr: Statistical functions
#> for conducting specification curve analyses. Available from
#> https://github.com/masurp/specr.
#> Masur, Philipp K. & Scharkow, M. (2020). specr: Conducting and
#> Visualizing Specification Curve Analyses. Available from
#> https://CRAN.R-project.org/package=specr.
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Misc{,
#> title = {specr: Statistical functions for conducting specification curve analyses (Version 0.2.1.9000)},
#> title = {specr: Conducting and Visualizing Specification Curve Analyses (Version 0.2.2)},
#> author = {Philipp K. Masur and Michael Scharkow},
#> year = {2019},
#> url = {https://github.com/masurp/specr},
#> year = {2020},
#> url = {https://CRAN.R-project.org/package=specr},
#> }
```

Expand Down
Loading

0 comments on commit 0fb5294

Please sign in to comment.