Skip to content

Commit

Permalink
fix demean()
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed May 16, 2024
1 parent bc30964 commit 37afbdf
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 57 deletions.
74 changes: 37 additions & 37 deletions R/demean.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @param x A data frame.
#' @param select Character vector (or formula) with names of variables to select
#' that should be group- and de-meaned.
#' @param group Character vector (or formula) with the name of the variable that
#' @param by Character vector (or formula) with the name of the variable that
#' indicates the group- or cluster-ID.
#' @param center Method for centering. `demean()` always performs
#' mean-centering, while `degroup()` can use `center = "median"` or
Expand All @@ -25,6 +25,7 @@
#' attributes to indicate the within- and between-effects. This is only
#' relevant when printing `model_parameters()` - in such cases, the
#' within- and between-effects are printed in separated blocks.
#' @param group Deprecated. Use `by` instead.
#' @inheritParams center
#'
#' @return
Expand Down Expand Up @@ -92,7 +93,7 @@
#'
#' \subsection{Terminology}{
#' The group-meaned variable is simply the mean of an independent variable
#' within each group (or id-level or cluster) represented by `group`.
#' within each group (or id-level or cluster) represented by `by`.
#' It represents the cluster-mean of an independent variable. The regression
#' coefficient of a group-meaned variable is the *between-subject-effect*.
#' The de-meaned variable is then the centered version of the group-meaned
Expand Down Expand Up @@ -199,10 +200,10 @@
#' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID
#' iris$binary <- as.factor(rbinom(150, 1, .35)) # binary variable
#'
#' x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID")
#' x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID")
#' head(x)
#'
#' x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), group = "ID")
#' x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), by = "ID")
#' head(x)
#'
#'
Expand All @@ -213,23 +214,29 @@
#' y = c(1, 2, 1, 2, 4, 3, 2, 1),
#' ID = c(1, 2, 3, 1, 2, 3, 1, 2)
#' )
#' demean(dat, select = c("a", "x*y"), group = "ID")
#' demean(dat, select = c("a", "x*y"), by = "ID")
#'
#' # or in formula-notation
#' demean(dat, select = ~ a + x * y, group = ~ID)
#' demean(dat, select = ~ a + x * y, by = ~ID)
#'
#' @export
demean <- function(x,
select,
group,
by,
suffix_demean = "_within",
suffix_groupmean = "_between",
add_attributes = TRUE,
verbose = TRUE) {
verbose = TRUE,
group = NULL) {
## TODO: deprecate later
if (!is.null(group)) {
by <- group
}

degroup(
x = x,
select = select,
group = group,
by = by,
center = "mean",
suffix_demean = suffix_demean,
suffix_groupmean = suffix_groupmean,
Expand All @@ -247,12 +254,18 @@ demean <- function(x,
#' @export
degroup <- function(x,
select,
group,
by,
center = "mean",
suffix_demean = "_within",
suffix_groupmean = "_between",
add_attributes = TRUE,
verbose = TRUE) {
verbose = TRUE,
group = NULL) {
## TODO: deprecate later
if (!is.null(group)) {
by <- group
}

# ugly tibbles again...
x <- .coerce_to_dataframe(x)

Expand All @@ -266,8 +279,8 @@ degroup <- function(x,
))
}

if (inherits(group, "formula")) {
group <- all.vars(group)
if (inherits(by, "formula")) {
by <- all.vars(by)
}

interactions_no <- select[!grepl("(\\*|\\:)", select)]
Expand Down Expand Up @@ -296,7 +309,7 @@ degroup <- function(x,
select <- intersect(colnames(x), select)

# get data to demean...
dat <- x[, c(select, group)]
dat <- x[, c(select, by)]


# find categorical predictors that are coded as factors
Expand Down Expand Up @@ -344,31 +357,18 @@ degroup <- function(x,
# for variables within each group (the group means). assign
# mean values to a vector of same length as the data

if (center == "mode") {
x_gm_list <- lapply(select, function(i) {
stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) distribution_mode(stats::na.omit(.gm)))
})
} else if (center == "median") {
x_gm_list <- lapply(select, function(i) {
stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) stats::median(.gm, na.rm = TRUE))
})
} else if (center == "min") {
x_gm_list <- lapply(select, function(i) {
stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) min(.gm, na.rm = TRUE))
})
} else if (center == "max") {
x_gm_list <- lapply(select, function(i) {
stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) max(.gm, na.rm = TRUE))
})
} else {
x_gm_list <- lapply(select, function(i) {
stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) mean(.gm, na.rm = TRUE))
})
}

gm_fun <- switch(center,
mode = function(.gm) distribution_mode(stats::na.omit(.gm)),
median = function(.gm) stats::median(.gm, na.rm = TRUE),
min = function(.gm) min(.gm, na.rm = TRUE),
max = function(.gm) max(.gm, na.rm = TRUE),
function(.gm) mean(.gm, na.rm = TRUE)
)
x_gm_list <- lapply(select, function(i) {
stats::ave(dat[[i]], dat[[by]], FUN = gm_fun)
})
names(x_gm_list) <- select


# create de-meaned variables by subtracting the group mean from each individual value

x_dm_list <- lapply(select, function(i) dat[[i]] - x_gm_list[[i]])
Expand Down
29 changes: 17 additions & 12 deletions man/demean.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/_snaps/demean.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
# demean interaction term

Code
demean(dat, select = c("a", "x*y"), group = "ID")
demean(dat, select = c("a", "x*y"), by = "ID")
Output
a_between x_y_between a_within x_y_within
1 2.666667 4.666667 -1.6666667 -0.6666667
Expand Down
20 changes: 13 additions & 7 deletions tests/testthat/test-demean.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,35 @@ test_that("demean works", {
df <- iris

set.seed(123)
df$ID <- sample(1:4, nrow(df), replace = TRUE) # fake-ID
df$ID <- sample.int(4, nrow(df), replace = TRUE) # fake-ID

set.seed(123)
df$binary <- as.factor(rbinom(150, 1, 0.35)) # binary variable

set.seed(123)
x <- demean(df, select = c("Sepal.Length", "Petal.Length"), group = "ID")
x <- demean(df, select = c("Sepal.Length", "Petal.Length"), by = "ID")
expect_snapshot(head(x))

set.seed(123)
expect_message(
x <- demean(df, select = c("Sepal.Length", "binary", "Species"), group = "ID"),
{
x <- demean(df, select = c("Sepal.Length", "binary", "Species"), by = "ID")
},
"have been coerced to numeric"
)
expect_snapshot(head(x))

set.seed(123)
expect_message(
y <- demean(df, select = ~ Sepal.Length + binary + Species, group = ~ID),
{
y <- demean(df, select = ~ Sepal.Length + binary + Species, by = ~ID)
},
"have been coerced to numeric"
)
expect_message(
z <- demean(df, select = c("Sepal.Length", "binary", "Species"), group = "ID"),
{
z <- demean(df, select = c("Sepal.Length", "binary", "Species"), by = "ID")
},
"have been coerced to numeric"
)
expect_identical(y, z)
Expand All @@ -39,7 +45,7 @@ test_that("demean interaction term", {
)

set.seed(123)
expect_snapshot(demean(dat, select = c("a", "x*y"), group = "ID"))
expect_snapshot(demean(dat, select = c("a", "x*y"), by = "ID"))
})

test_that("demean shows message if some vars don't exist", {
Expand All @@ -52,7 +58,7 @@ test_that("demean shows message if some vars don't exist", {

set.seed(123)
expect_message(
demean(dat, select = "foo", group = "ID"),
demean(dat, select = "foo", by = "ID"),
regexp = "not found"
)
})

0 comments on commit 37afbdf

Please sign in to comment.