From 0e3991be778dc48ad763c61499982312e1d98170 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 13:34:26 +0100 Subject: [PATCH] Preserve correct label oder (#473) * Preserve correct label oder * update news * capture more exceptions * lintr * lintr * update lintr --- .lintr | 1 + DESCRIPTION | 2 +- NEWS.md | 2 + R/data_reverse.R | 34 ++++++++--------- R/to_numeric.R | 6 +-- R/utils_labels.R | 25 +++++++++++-- tests/testthat/test-data_to_numeric.R | 31 ++++++++++++++++ tests/testthat/test-standardize_models.R | 47 +++++++++++++++++------- 8 files changed, 109 insertions(+), 39 deletions(-) diff --git a/.lintr b/.lintr index 7fa66fe1c..8aebdfc14 100644 --- a/.lintr +++ b/.lintr @@ -13,6 +13,7 @@ linters: linters_with_defaults( todo_comment_linter = NULL, undesirable_function_linter(c("mapply" = NA, "sapply" = NA, "setwd" = NA)), undesirable_operator_linter = NULL, + if_not_else_linter(exceptions = character(0L)), unnecessary_concatenation_linter(allow_single_expression = FALSE), defaults = linters_with_tags(tags = NULL) ) diff --git a/DESCRIPTION b/DESCRIPTION index 7d8051b0e..eedc0aed6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.9.0.6 +Version: 0.9.0.7 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index 9242a0913..7f3987170 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,8 @@ BUG FIXES * `to_numeric()` now correctly deals with inversed factor levels when `preserve_levels = TRUE`. +* `to_numeric()` inversed order of value labels when `dummy_factors = FALSE`. + * `convert_to_na()` now preserves attributes for factors when `drop_levels = TRUE`. # datawizard 0.9.0 diff --git a/R/data_reverse.R b/R/data_reverse.R index b9615417e..2fc9ef493 100644 --- a/R/data_reverse.R +++ b/R/data_reverse.R @@ -93,24 +93,24 @@ reverse.numeric <- function(x, } # old minimum and maximum - min <- min(range) - max <- max(range) + min_value <- min(range) + max_value <- max(range) # check if a valid range (i.e. vector of length 2) is provided if (length(range) > 2) { insight::format_error( "`range` must be a numeric vector of length two, indicating lowest and highest value of the required range.", - sprintf("Did you want to provide `range = c(%g, %g)`?", min, max) + sprintf("Did you want to provide `range = c(%g, %g)`?", min_value, max_value) ) } - new_min <- max - new_max <- min + new_min <- max_value + new_max <- min_value - out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min) + out <- as.vector((new_max - new_min) / (max_value - min_value) * (x - min_value) + new_min) # labelled data? - out <- .set_back_labels(out, x) + out <- .set_back_labels(out, x, reverse_values = TRUE) out } @@ -134,7 +134,9 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) { # save for later use original_x <- x - if (!is.null(range)) { + if (is.null(range)) { + old_levels <- levels(x) + } else { # no missing values allowed if (anyNA(range)) { insight::format_error("`range` is not allowed to have missing values.") @@ -180,8 +182,6 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) { } old_levels <- range x <- factor(x, levels = range) - } else { - old_levels <- levels(x) } int_x <- as.integer(x) @@ -189,7 +189,7 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) { x <- factor(rev_x, levels = seq_len(length(old_levels)), labels = old_levels) # labelled data? - x <- .set_back_labels(x, original_x) + x <- .set_back_labels(x, original_x, reverse_values = TRUE) x } @@ -225,7 +225,7 @@ reverse.grouped_df <- function(x, # create the new variables and updates "select", so new variables are processed if (!isFALSE(append)) { # process arguments - args <- .process_append( + arguments <- .process_append( x, select, append, @@ -233,8 +233,8 @@ reverse.grouped_df <- function(x, preserve_value_labels = TRUE ) # update processed arguments - x <- args$x - select <- args$select + x <- arguments$x + select <- arguments$select } x <- as.data.frame(x) @@ -279,7 +279,7 @@ reverse.data.frame <- function(x, # create the new variables and updates "select", so new variables are processed if (!isFALSE(append)) { # process arguments - args <- .process_append( + arguments <- .process_append( x, select, append, @@ -287,8 +287,8 @@ reverse.data.frame <- function(x, preserve_value_labels = TRUE ) # update processed arguments - x <- args$x - select <- args$select + x <- arguments$x + select <- arguments$select } # Transform the range so that it is a list now diff --git a/R/to_numeric.R b/R/to_numeric.R index 9a35f9130..8bfcac6bc 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -148,7 +148,7 @@ to_numeric.data.frame <- function(x, #' @export to_numeric.numeric <- function(x, verbose = TRUE, ...) { - .set_back_labels(as.numeric(x), x) + .set_back_labels(as.numeric(x), x, reverse_values = FALSE) } #' @export @@ -233,9 +233,9 @@ to_numeric.factor <- function(x, } x <- factor(x_inverse) } - out <- .set_back_labels(as.numeric(as.character(x)), x) + out <- .set_back_labels(as.numeric(as.character(x)), x, reverse_values = FALSE) } else { - out <- .set_back_labels(as.numeric(x), x) + out <- .set_back_labels(as.numeric(x), x, reverse_values = FALSE) } # shift to requested starting value diff --git a/R/utils_labels.R b/R/utils_labels.R index a783e4fda..64b517086 100644 --- a/R/utils_labels.R +++ b/R/utils_labels.R @@ -3,12 +3,29 @@ # to the transformed vector #' @keywords internal -.set_back_labels <- function(new, old, include_values = TRUE) { +.set_back_labels <- function(new, old, include_values = TRUE, reverse_values = FALSE) { # labelled data? attr(new, "label") <- attr(old, "label", exact = TRUE) - labels <- attr(old, "labels", exact = TRUE) - if (isTRUE(include_values) && !is.null(labels)) { - attr(new, "labels") <- stats::setNames(rev(labels), names(labels)) + value_labels <- attr(old, "labels", exact = TRUE) + # "include_values" is used to preserve value labels + if (isTRUE(include_values) && !is.null(value_labels)) { + if (reverse_values) { + # reverse values? Used for "reverse_scale()" + attr(new, "labels") <- stats::setNames(rev(value_labels), names(value_labels)) + } else { + # keep value oder? Used for "to_numeric()" + if (is.numeric(new)) { + if (any(grepl("[^0-9]", value_labels))) { + # if we have any non-numeric characters, convert to numeric + attr(new, "labels") <- stats::setNames(as.numeric(as.factor(value_labels)), names(value_labels)) + } else { + # if we have numeric, or "numeric character" (like "1", "2", "3" etc.) + attr(new, "labels") <- stats::setNames(as.numeric(value_labels), names(value_labels)) + } + } else { + attr(new, "labels") <- stats::setNames(value_labels, names(value_labels)) + } + } } else if (isFALSE(include_values)) { attr(new, "labels") <- NULL } diff --git a/tests/testthat/test-data_to_numeric.R b/tests/testthat/test-data_to_numeric.R index 3e0a9d095..464c35e8d 100644 --- a/tests/testthat/test-data_to_numeric.R +++ b/tests/testthat/test-data_to_numeric.R @@ -189,3 +189,34 @@ test_that("to_numeric works with haven_labelled, convert many labels correctly", expect_identical(as.vector(table(x)), c(180L, 506L, 156L)) }) }) + + +test_that("to_numeric preserves correct label order", { + x <- factor(c(1, 2, 3, 4)) + x <- assign_labels(x, values = c("one", "two", "three", "four")) + out <- to_numeric(x, dummy_factors = FALSE) + expect_identical( + attributes(out)$labels, + c(one = 1, two = 2, three = 3, four = 4) + ) + # correctly reverse scale + out <- to_numeric(reverse_scale(x), dummy_factors = FALSE) + expect_identical( + attributes(out)$labels, + c(one = 4, two = 3, three = 2, four = 1) + ) + # factor with alphabetical values + x <- factor(letters[1:4]) + x <- assign_labels(x, values = c("one", "two", "three", "four")) + out <- to_numeric(x, dummy_factors = FALSE) + expect_identical( + attributes(out)$labels, + c(one = 1, two = 2, three = 3, four = 4) + ) + # correctly reverse scale + out <- to_numeric(reverse_scale(x), dummy_factors = FALSE) + expect_identical( + attributes(out)$labels, + c(one = 4, two = 3, three = 2, four = 1) + ) +}) diff --git a/tests/testthat/test-standardize_models.R b/tests/testthat/test-standardize_models.R index 4e1706ce5..27b527573 100644 --- a/tests/testthat/test-standardize_models.R +++ b/tests/testthat/test-standardize_models.R @@ -47,24 +47,28 @@ test_that("transformations", { fit_scale2 <- lm(scale(mpg) ~ scale(exp(hp_100)), mt) expect_equal( effectsize::standardize_parameters(fit_exp, method = "refit")[2, 2], - unname(coef(fit_scale1)[2]) + unname(coef(fit_scale1)[2]), + ignore_attr = TRUE ) expect_equal( effectsize::standardize_parameters(fit_exp, method = "basic")[2, 2], - unname(coef(fit_scale2)[2]) + unname(coef(fit_scale2)[2]), + ignore_attr = TRUE ) skip_if_not_installed("insight", minimum_version = "0.10.0") d <- data.frame( time = as.factor(c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)), group = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2), - sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50) + sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50) # nolint ) m <- lm(log(sum + 1) ~ as.numeric(time) * group, data = d) - expect_message(out <- standardize(m)) + expect_message({ + out <- standardize(m) + }) expect_identical(coef(m), c( `(Intercept)` = -0.4575, `as.numeric(time)` = 0.5492, group = 0.3379, `as.numeric(time):group` = 0.15779 @@ -98,12 +102,14 @@ test_that("weights", { stdREFIT <- effectsize::standardize_parameters(m, method = "refit") expect_equal( stdREFIT[[2]], - effectsize::standardize_parameters(m, method = "posthoc")[[2]] + effectsize::standardize_parameters(m, method = "posthoc")[[2]], + ignore_attr = TRUE ) expect_equal( stdREFIT[[2]], - effectsize::standardize_parameters(m, method = "basic")[[2]] + effectsize::standardize_parameters(m, method = "basic")[[2]], + ignore_attr = TRUE ) }) @@ -230,7 +236,9 @@ test_that("standardize mediation", { ) out1 <- summary(standardize(med1)) - expect_message(out2 <- summary(standardize(med2))) + expect_message({ + out2 <- summary(standardize(med2)) + }) expect_identical(unlist(out1[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]), unlist(out2[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]), tolerance = 0.1 @@ -266,13 +274,22 @@ test_that("offsets", { m <- lm(mpg ~ hp + offset(wt), data = mtcars) - expect_warning(mz1 <- standardize(m)) - expect_warning(mz2 <- standardize(m, two_sd = TRUE)) + expect_warning({ + mz1 <- standardize(m) + }) + expect_warning({ + mz2 <- standardize(m, two_sd = TRUE) + }) expect_identical(c(1, 2) * coef(mz1), coef(mz2)) m <- glm(cyl ~ hp + offset(wt), family = poisson(), data = mtcars) - expect_warning(mz <- standardize(m), regexp = NA) + expect_warning( + { + mz <- standardize(m) + }, + regexp = NA + ) par1 <- parameters::model_parameters(mz) par2 <- effectsize::standardize_parameters(m, method = "basic") @@ -288,10 +305,12 @@ test_that("brms", { skip_if_not_installed("brms") invisible( - capture.output(mod <- brms::brm(mpg ~ hp, - data = mtcars, - refresh = 0, chains = 1, silent = 2 - )) + capture.output({ + mod <- brms::brm(mpg ~ hp, + data = mtcars, + refresh = 0, chains = 1, silent = 2 + ) + }) ) expect_warning(