fix demean()

easystats · May 16, 2024 · 37afbdf · 37afbdf
1 parent bc30964
commit 37afbdf
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 57 deletions.
diff --git a/R/demean.R b/R/demean.R
@@ -11,7 +11,7 @@
 #' @param x A data frame.
 #' @param select Character vector (or formula) with names of variables to select
 #'   that should be group- and de-meaned.
-#' @param group Character vector (or formula) with the name of the variable that
+#' @param by Character vector (or formula) with the name of the variable that
 #'   indicates the group- or cluster-ID.
 #' @param center Method for centering. `demean()` always performs
 #'   mean-centering, while `degroup()` can use `center = "median"` or
@@ -25,6 +25,7 @@
 #'   attributes to indicate the within- and between-effects. This is only
 #'   relevant when printing `model_parameters()` - in such cases, the
 #'   within- and between-effects are printed in separated blocks.
+#' @param group Deprecated. Use `by` instead.
 #' @inheritParams center
 #'
 #' @return
@@ -92,7 +93,7 @@
 #'
 #'   \subsection{Terminology}{
 #'     The group-meaned variable is simply the mean of an independent variable
-#'     within each group (or id-level or cluster) represented by `group`.
+#'     within each group (or id-level or cluster) represented by `by`.
 #'     It represents the cluster-mean of an independent variable. The regression
 #'     coefficient of a group-meaned variable is the *between-subject-effect*.
 #'     The de-meaned variable is then the centered version of the group-meaned
@@ -199,10 +200,10 @@
 #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID
 #' iris$binary <- as.factor(rbinom(150, 1, .35)) # binary variable
 #'
-#' x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID")
+#' x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID")
 #' head(x)
 #'
-#' x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), group = "ID")
+#' x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), by = "ID")
 #' head(x)
 #'
 #'
@@ -213,23 +214,29 @@
 #'   y = c(1, 2, 1, 2, 4, 3, 2, 1),
 #'   ID = c(1, 2, 3, 1, 2, 3, 1, 2)
 #' )
-#' demean(dat, select = c("a", "x*y"), group = "ID")
+#' demean(dat, select = c("a", "x*y"), by = "ID")
 #'
 #' # or in formula-notation
-#' demean(dat, select = ~ a + x * y, group = ~ID)
+#' demean(dat, select = ~ a + x * y, by = ~ID)
 #'
 #' @export
 demean <- function(x,
                    select,
-                   group,
+                   by,
                    suffix_demean = "_within",
                    suffix_groupmean = "_between",
                    add_attributes = TRUE,
-                   verbose = TRUE) {
+                   verbose = TRUE,
+                   group = NULL) {
+  ## TODO: deprecate later
+  if (!is.null(group)) {
+    by <- group
+  }
+
   degroup(
     x = x,
     select = select,
-    group = group,
+    by = by,
     center = "mean",
     suffix_demean = suffix_demean,
     suffix_groupmean = suffix_groupmean,
@@ -247,12 +254,18 @@ demean <- function(x,
 #' @export
 degroup <- function(x,
                     select,
-                    group,
+                    by,
                     center = "mean",
                     suffix_demean = "_within",
                     suffix_groupmean = "_between",
                     add_attributes = TRUE,
-                    verbose = TRUE) {
+                    verbose = TRUE,
+                    group = NULL) {
+  ## TODO: deprecate later
+  if (!is.null(group)) {
+    by <- group
+  }
+
   # ugly tibbles again...
   x <- .coerce_to_dataframe(x)
 
@@ -266,8 +279,8 @@ degroup <- function(x,
     ))
   }
 
-  if (inherits(group, "formula")) {
-    group <- all.vars(group)
+  if (inherits(by, "formula")) {
+    by <- all.vars(by)
   }
 
   interactions_no <- select[!grepl("(\\*|\\:)", select)]
@@ -296,7 +309,7 @@ degroup <- function(x,
   select <- intersect(colnames(x), select)
 
   # get data to demean...
-  dat <- x[, c(select, group)]
+  dat <- x[, c(select, by)]
 
 
   # find categorical predictors that are coded as factors
@@ -344,31 +357,18 @@ degroup <- function(x,
   # for variables within each group (the group means). assign
   # mean values to a vector of same length as the data
 
-  if (center == "mode") {
-    x_gm_list <- lapply(select, function(i) {
-      stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) distribution_mode(stats::na.omit(.gm)))
-    })
-  } else if (center == "median") {
-    x_gm_list <- lapply(select, function(i) {
-      stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) stats::median(.gm, na.rm = TRUE))
-    })
-  } else if (center == "min") {
-    x_gm_list <- lapply(select, function(i) {
-      stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) min(.gm, na.rm = TRUE))
-    })
-  } else if (center == "max") {
-    x_gm_list <- lapply(select, function(i) {
-      stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) max(.gm, na.rm = TRUE))
-    })
-  } else {
-    x_gm_list <- lapply(select, function(i) {
-      stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) mean(.gm, na.rm = TRUE))
-    })
-  }
-
+  gm_fun <- switch(center,
+    mode = function(.gm) distribution_mode(stats::na.omit(.gm)),
+    median = function(.gm) stats::median(.gm, na.rm = TRUE),
+    min = function(.gm) min(.gm, na.rm = TRUE),
+    max = function(.gm) max(.gm, na.rm = TRUE),
+    function(.gm) mean(.gm, na.rm = TRUE)
+  )
+  x_gm_list <- lapply(select, function(i) {
+    stats::ave(dat[[i]], dat[[by]], FUN = gm_fun)
+  })
   names(x_gm_list) <- select
 
-
   # create de-meaned variables by subtracting the group mean from each individual value
 
   x_dm_list <- lapply(select, function(i) dat[[i]] - x_gm_list[[i]])

diff --git a/man/demean.Rd b/man/demean.Rd
diff --git a/tests/testthat/_snaps/demean.md b/tests/testthat/_snaps/demean.md
@@ -55,7 +55,7 @@
 # demean interaction term
 
     Code
-      demean(dat, select = c("a", "x*y"), group = "ID")
+      demean(dat, select = c("a", "x*y"), by = "ID")
     Output
         a_between x_y_between   a_within x_y_within
       1  2.666667    4.666667 -1.6666667 -0.6666667

diff --git a/tests/testthat/test-demean.R b/tests/testthat/test-demean.R
@@ -2,29 +2,35 @@ test_that("demean works", {
   df <- iris
 
   set.seed(123)
-  df$ID <- sample(1:4, nrow(df), replace = TRUE) # fake-ID
+  df$ID <- sample.int(4, nrow(df), replace = TRUE) # fake-ID
 
   set.seed(123)
   df$binary <- as.factor(rbinom(150, 1, 0.35)) # binary variable
 
   set.seed(123)
-  x <- demean(df, select = c("Sepal.Length", "Petal.Length"), group = "ID")
+  x <- demean(df, select = c("Sepal.Length", "Petal.Length"), by = "ID")
   expect_snapshot(head(x))
 
   set.seed(123)
   expect_message(
-    x <- demean(df, select = c("Sepal.Length", "binary", "Species"), group = "ID"),
+    {
+      x <- demean(df, select = c("Sepal.Length", "binary", "Species"), by = "ID")
+    },
     "have been coerced to numeric"
   )
   expect_snapshot(head(x))
 
   set.seed(123)
   expect_message(
-    y <- demean(df, select = ~ Sepal.Length + binary + Species, group = ~ID),
+    {
+      y <- demean(df, select = ~ Sepal.Length + binary + Species, by = ~ID)
+    },
     "have been coerced to numeric"
   )
   expect_message(
-    z <- demean(df, select = c("Sepal.Length", "binary", "Species"), group = "ID"),
+    {
+      z <- demean(df, select = c("Sepal.Length", "binary", "Species"), by = "ID")
+    },
     "have been coerced to numeric"
   )
   expect_identical(y, z)
@@ -39,7 +45,7 @@ test_that("demean interaction term", {
   )
 
   set.seed(123)
-  expect_snapshot(demean(dat, select = c("a", "x*y"), group = "ID"))
+  expect_snapshot(demean(dat, select = c("a", "x*y"), by = "ID"))
 })
 
 test_that("demean shows message if some vars don't exist", {
@@ -52,7 +58,7 @@ test_that("demean shows message if some vars don't exist", {
 
   set.seed(123)
   expect_message(
-    demean(dat, select = "foo", group = "ID"),
+    demean(dat, select = "foo", by = "ID"),
     regexp = "not found"
   )
 })