Skip to content

Commit

Permalink
Merge branch 'main' into data_summary_ci
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke authored Mar 4, 2024
2 parents 26f70f2 + 7a1f372 commit 6f33194
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 83 deletions.
12 changes: 12 additions & 0 deletions .dev/test-value_at.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
test_that("value_at", {
data(efc, package = "datawizard")
expect_equal(value_at(efc$e42dep, 5), 4, ignore_attr = TRUE)
expect_equal(value_at(efc$c12hour, 4), NA_real_, ignore_attr = TRUE)
expect_equal(value_at(efc$c12hour, 4, remove_na = TRUE), 168, ignore_attr = TRUE)
expect_equal(value_at(efc$c12hour, 5:7), efc$c12hour[5:7], ignore_attr = TRUE)
expect_equal(value_at(efc$e42dep, 123456, default = 55), 55, ignore_attr = TRUE)
expect_null(value_at(efc$e42dep, 123456))
expect_null(value_at(efc$e42dep, NULL))
expect_error(value_at(efc$e42dep, NA), regex = "`position` can't")
expect_error(value_at(efc$e42dep, c(3, NA)), regex = "`position` can't")
})
52 changes: 52 additions & 0 deletions .dev/value_at.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#' @title Find the value(s) at a specific position in a variable
#' @name value_at
#'
#' @description This function can be used to extract one or more values at a
#' specific position in a variable.
#'
#' @param x A vector or factor.
#' @param position An integer or a vector of integers, indicating the position(s)
#' of the value(s) to be returned. Negative values are counted from the end of
#' the vector. If `NA`, an error is thrown.
#' @param remove_na Logical, if `TRUE`, missing values are removed before
#' computing the position. If `FALSE`, missing values are included in the
#' computation.
#' @param default The value to be returned if the position is out of range.
#'
#' @seealso `data_summary()` to use `value_at()` inside a `data_summary()` call.
#'
#' @return A vector with the value(s) at the specified position(s).
#'
#' @examples
#' data(mtcars)
#' # 5th value
#' value_at(mtcars$mpg, 5)
#' # last value
#' value_at(mtcars$mpg, -1)
#' # out of range, return default
#' value_at(mtcars$mpg, 150)
#' # return 2nd and fifth value
#' value_at(mtcars$mpg, c(2, 5))
#' @export
value_at <- function(x, position = 1, default = NULL, remove_na = FALSE) {
if (remove_na) {
x <- x[!is.na(x)]
}
n <- length(x)
unlist(lapply(position, .values_at, x = x, n = n, default = default), use.names = FALSE)
}

# helper ----

.values_at <- function(x, position, n, default) {
if (is.na(position)) {
insight::format_error("`position` can't be `NA`.")
}
if (position < 0L) {
position <- position + n + 1
}
if (position <= 0 || position > n) {
return(default)
}
x[position]
}
36 changes: 16 additions & 20 deletions R/adjust.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,11 @@ adjust <- function(data,
predictors[predictors == predictors_num] <- paste0("s(", predictors_num, ")")
}
formula_predictors <- paste(c("1", predictors), collapse = " + ")
formula <- paste(var, "~", formula_predictors)
model_formula <- paste(var, "~", formula_predictors)

x <- .model_adjust_for(
data = data[unique(c(var, effect, facs))],
formula,
model_formula = model_formula,
multilevel = multilevel,
additive = additive,
bayesian = bayesian,
Expand All @@ -148,7 +148,7 @@ data_adjust <- adjust

#' @keywords internal
.model_adjust_for <- function(data,
formula,
model_formula,
multilevel = FALSE,
additive = FALSE,
bayesian = FALSE,
Expand All @@ -159,32 +159,28 @@ data_adjust <- adjust
# Bayesian
if (bayesian) {
insight::check_if_installed("rstanarm")
model <- rstanarm::stan_gamm4(stats::as.formula(formula), random = formula_random, data = data, refresh = 0)
model <- rstanarm::stan_gamm4(stats::as.formula(model_formula), random = formula_random, data = data, refresh = 0)
# Frequentist
} else {
insight::check_if_installed("gamm4")
model <- gamm4::gamm4(stats::as.formula(formula), random = formula_random, data = data)
model <- gamm4::gamm4(stats::as.formula(model_formula), random = formula_random, data = data)
}

# Linear -------------------------
} else {
} else if (bayesian) {
# Bayesian
if (bayesian) {
insight::check_if_installed("rstanarm")
if (multilevel) {
model <- rstanarm::stan_lmer(paste(formula, formula_random), data = data, refresh = 0)
} else {
model <- rstanarm::stan_glm(formula, data = data, refresh = 0)
}
# Frequentist
insight::check_if_installed("rstanarm")
if (multilevel) {
model <- rstanarm::stan_lmer(paste(model_formula, formula_random), data = data, refresh = 0)
} else {
if (multilevel) {
insight::check_if_installed("lme4")
model <- lme4::lmer(paste(formula, formula_random), data = data)
} else {
model <- stats::lm(formula, data = data)
}
model <- rstanarm::stan_glm(model_formula, data = data, refresh = 0)
}
} else if (multilevel) {
# Frequentist
insight::check_if_installed("lme4")
model <- lme4::lmer(paste(model_formula, formula_random), data = data)
} else {
model <- stats::lm(model_formula, data = data)
}

adjusted <- insight::get_residuals(model)
Expand Down
10 changes: 5 additions & 5 deletions R/assign_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {
attr(x, "label") <- variable
} else {
insight::format_error(
"Variable labels (argument `variable`) must be provided as a single character string, e.g. `variable = \"mylabel\"`."
"Variable labels (argument `variable`) must be provided as a single character string, e.g. `variable = \"mylabel\"`." # nolint
)
}
}
Expand All @@ -88,13 +88,13 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {
if (!is.null(values)) {
# extract unique values
unique_values <- as.vector(sort(stats::na.omit(unique(x))))
labels <- NULL
value_labels <- NULL

# do we have a names vector for "values"?
# else check if number of labels and values match
if (is.null(names(values))) {
if (length(values) == length(unique_values)) {
labels <- stats::setNames(unique_values, values)
value_labels <- stats::setNames(unique_values, values)
} else {
insight::format_error(
"Cannot add labels. Number of unique values and number of value labels are not equal.",
Expand All @@ -114,11 +114,11 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {

if (length(values)) {
# we need to switch names and values
labels <- stats::setNames(coerce_to_numeric(names(values)), values)
value_labels <- stats::setNames(coerce_to_numeric(names(values)), values)
}
}

attr(x, "labels") <- labels
attr(x, "labels") <- value_labels
}

x
Expand Down
112 changes: 62 additions & 50 deletions R/categorize.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,28 +145,8 @@ categorize.numeric <- function(x,
labels = NULL,
verbose = TRUE,
...) {
# check arguments
if (is.character(split)) {
split <- match.arg(
split,
choices = c(
"median", "mean", "quantile", "equal_length", "equal_range",
"equal", "equal_distance", "range", "distance"
)
)
}

if (is.character(split) && split %in% c("quantile", "equal_length") && is.null(n_groups)) {
insight::format_error(
"Recoding based on quantiles or equal-sized groups requires the `n_groups` argument to be specified."
)
}

if (is.character(split) && split == "equal_range" && is.null(n_groups) && is.null(range)) {
insight::format_error(
"Recoding into groups with equal range requires either the `range` or `n_groups` argument to be specified."
)
}
# sanity check
split <- .sanitize_split_arg(split, n_groups, range)

# handle aliases
if (identical(split, "equal_length")) split <- "length"
Expand Down Expand Up @@ -221,28 +201,7 @@ categorize.numeric <- function(x,
original_x[!is.na(original_x)] <- out

# turn into factor?
if (!is.null(labels)) {
if (length(labels) == length(unique(out))) {
original_x <- as.factor(original_x)
levels(original_x) <- labels
} else if (length(labels) == 1 && labels %in% c("mean", "median")) {
original_x <- as.factor(original_x)
no_na_x <- original_x[!is.na(original_x)]
if (labels == "mean") {
labels <- stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x
} else {
labels <- stats::aggregate(x, list(no_na_x), FUN = stats::median, na.rm = TRUE)$x
}
levels(original_x) <- insight::format_value(labels, ...)
} else if (isTRUE(verbose)) {
insight::format_warning(
"Argument `labels` and levels of the recoded variable are not of the same length.",
"Variable will not be converted to factor."
)
}
}

original_x
.original_x_to_factor(original_x, x, labels, out, verbose, ...)
}


Expand Down Expand Up @@ -283,15 +242,15 @@ categorize.data.frame <- function(x,
# create the new variables and updates "select", so new variables are processed
if (!isFALSE(append)) {
# process arguments
args <- .process_append(
my_args <- .process_append(
x,
select,
append,
append_suffix = "_r"
)
# update processed arguments
x <- args$x
select <- args$select
x <- my_args$x
select <- my_args$select
}

x[select] <- lapply(
Expand Down Expand Up @@ -342,15 +301,15 @@ categorize.grouped_df <- function(x,
# create the new variables and updates "select", so new variables are processed
if (!isFALSE(append)) {
# process arguments
args <- .process_append(
my_args <- .process_append(
x,
select,
append,
append_suffix = "_r"
)
# update processed arguments
x <- args$x
select <- args$select
x <- my_args$x
select <- my_args$select
}

x <- as.data.frame(x)
Expand Down Expand Up @@ -387,3 +346,56 @@ categorize.grouped_df <- function(x,
}
seq(lowest, max(x), by = range)
}


.sanitize_split_arg <- function(split, n_groups, range) {
# check arguments
if (is.character(split)) {
split <- match.arg(
split,
choices = c(
"median", "mean", "quantile", "equal_length", "equal_range",
"equal", "equal_distance", "range", "distance"
)
)
}

if (is.character(split) && split %in% c("quantile", "equal_length") && is.null(n_groups)) {
insight::format_error(
"Recoding based on quantiles or equal-sized groups requires the `n_groups` argument to be specified."
)
}

if (is.character(split) && split == "equal_range" && is.null(n_groups) && is.null(range)) {
insight::format_error(
"Recoding into groups with equal range requires either the `range` or `n_groups` argument to be specified."
)
}

split
}


.original_x_to_factor <- function(original_x, x, labels, out, verbose, ...) {
if (!is.null(labels)) {
if (length(labels) == length(unique(out))) {
original_x <- as.factor(original_x)
levels(original_x) <- labels
} else if (length(labels) == 1 && labels %in% c("mean", "median")) {
original_x <- as.factor(original_x)
no_na_x <- original_x[!is.na(original_x)]
if (labels == "mean") {
labels <- stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x
} else {
labels <- stats::aggregate(x, list(no_na_x), FUN = stats::median, na.rm = TRUE)$x
}
levels(original_x) <- insight::format_value(labels, ...)
} else if (isTRUE(verbose)) {
insight::format_warning(
"Argument `labels` and levels of the recoded variable are not of the same length.",
"Variable will not be converted to factor."
)
}
}
original_x
}
9 changes: 8 additions & 1 deletion R/data_modify.R
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,14 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
}

# finally, we can evaluate expression and get values for new variables
new_variable <- try(with(data, eval(symbol)), silent = TRUE)
symbol_string <- insight::safe_deparse(symbol)
if (!is.null(symbol_string) && all(symbol_string == "n()")) {
# "special" functions
new_variable <- nrow(data)
} else {
# default evaluation of expression
new_variable <- try(with(data, eval(symbol)), silent = TRUE)
}

# successful, or any errors, like misspelled variable name?
if (inherits(new_variable, "try-error")) {
Expand Down
Loading

0 comments on commit 6f33194

Please sign in to comment.