Skip to content

Commit

Permalink
Preserve correct label oder (#473)
Browse files Browse the repository at this point in the history
* Preserve correct label oder

* update news

* capture more exceptions

* lintr

* lintr

* update lintr
  • Loading branch information
strengejacke authored Dec 18, 2023
1 parent 5f5c9c1 commit 0e3991b
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 39 deletions.
1 change: 1 addition & 0 deletions .lintr
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ linters: linters_with_defaults(
todo_comment_linter = NULL,
undesirable_function_linter(c("mapply" = NA, "sapply" = NA, "setwd" = NA)),
undesirable_operator_linter = NULL,
if_not_else_linter(exceptions = character(0L)),
unnecessary_concatenation_linter(allow_single_expression = FALSE),
defaults = linters_with_tags(tags = NULL)
)
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.9.0.6
Version: 0.9.0.7
Authors@R: c(
person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ BUG FIXES
* `to_numeric()` now correctly deals with inversed factor levels when
`preserve_levels = TRUE`.

* `to_numeric()` inversed order of value labels when `dummy_factors = FALSE`.

* `convert_to_na()` now preserves attributes for factors when `drop_levels = TRUE`.

# datawizard 0.9.0
Expand Down
34 changes: 17 additions & 17 deletions R/data_reverse.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,24 +93,24 @@ reverse.numeric <- function(x,
}

# old minimum and maximum
min <- min(range)
max <- max(range)
min_value <- min(range)
max_value <- max(range)

# check if a valid range (i.e. vector of length 2) is provided
if (length(range) > 2) {
insight::format_error(
"`range` must be a numeric vector of length two, indicating lowest and highest value of the required range.",
sprintf("Did you want to provide `range = c(%g, %g)`?", min, max)
sprintf("Did you want to provide `range = c(%g, %g)`?", min_value, max_value)
)
}

new_min <- max
new_max <- min
new_min <- max_value
new_max <- min_value

out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min)
out <- as.vector((new_max - new_min) / (max_value - min_value) * (x - min_value) + new_min)

# labelled data?
out <- .set_back_labels(out, x)
out <- .set_back_labels(out, x, reverse_values = TRUE)
out
}

Expand All @@ -134,7 +134,9 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {
# save for later use
original_x <- x

if (!is.null(range)) {
if (is.null(range)) {
old_levels <- levels(x)
} else {
# no missing values allowed
if (anyNA(range)) {
insight::format_error("`range` is not allowed to have missing values.")
Expand Down Expand Up @@ -180,16 +182,14 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {
}
old_levels <- range
x <- factor(x, levels = range)
} else {
old_levels <- levels(x)
}

int_x <- as.integer(x)
rev_x <- reverse(int_x, range = c(1, length(old_levels)))
x <- factor(rev_x, levels = seq_len(length(old_levels)), labels = old_levels)

# labelled data?
x <- .set_back_labels(x, original_x)
x <- .set_back_labels(x, original_x, reverse_values = TRUE)

x
}
Expand Down Expand Up @@ -225,16 +225,16 @@ reverse.grouped_df <- function(x,
# create the new variables and updates "select", so new variables are processed
if (!isFALSE(append)) {
# process arguments
args <- .process_append(
arguments <- .process_append(
x,
select,
append,
append_suffix = "_r",
preserve_value_labels = TRUE
)
# update processed arguments
x <- args$x
select <- args$select
x <- arguments$x
select <- arguments$select
}

x <- as.data.frame(x)
Expand Down Expand Up @@ -279,16 +279,16 @@ reverse.data.frame <- function(x,
# create the new variables and updates "select", so new variables are processed
if (!isFALSE(append)) {
# process arguments
args <- .process_append(
arguments <- .process_append(
x,
select,
append,
append_suffix = "_r",
preserve_value_labels = TRUE
)
# update processed arguments
x <- args$x
select <- args$select
x <- arguments$x
select <- arguments$select
}

# Transform the range so that it is a list now
Expand Down
6 changes: 3 additions & 3 deletions R/to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ to_numeric.data.frame <- function(x,

#' @export
to_numeric.numeric <- function(x, verbose = TRUE, ...) {
.set_back_labels(as.numeric(x), x)
.set_back_labels(as.numeric(x), x, reverse_values = FALSE)
}

#' @export
Expand Down Expand Up @@ -233,9 +233,9 @@ to_numeric.factor <- function(x,
}
x <- factor(x_inverse)
}
out <- .set_back_labels(as.numeric(as.character(x)), x)
out <- .set_back_labels(as.numeric(as.character(x)), x, reverse_values = FALSE)
} else {
out <- .set_back_labels(as.numeric(x), x)
out <- .set_back_labels(as.numeric(x), x, reverse_values = FALSE)
}

# shift to requested starting value
Expand Down
25 changes: 21 additions & 4 deletions R/utils_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,29 @@
# to the transformed vector

#' @keywords internal
.set_back_labels <- function(new, old, include_values = TRUE) {
.set_back_labels <- function(new, old, include_values = TRUE, reverse_values = FALSE) {
# labelled data?
attr(new, "label") <- attr(old, "label", exact = TRUE)
labels <- attr(old, "labels", exact = TRUE)
if (isTRUE(include_values) && !is.null(labels)) {
attr(new, "labels") <- stats::setNames(rev(labels), names(labels))
value_labels <- attr(old, "labels", exact = TRUE)
# "include_values" is used to preserve value labels
if (isTRUE(include_values) && !is.null(value_labels)) {
if (reverse_values) {
# reverse values? Used for "reverse_scale()"
attr(new, "labels") <- stats::setNames(rev(value_labels), names(value_labels))
} else {
# keep value oder? Used for "to_numeric()"
if (is.numeric(new)) {
if (any(grepl("[^0-9]", value_labels))) {
# if we have any non-numeric characters, convert to numeric
attr(new, "labels") <- stats::setNames(as.numeric(as.factor(value_labels)), names(value_labels))
} else {
# if we have numeric, or "numeric character" (like "1", "2", "3" etc.)
attr(new, "labels") <- stats::setNames(as.numeric(value_labels), names(value_labels))
}
} else {
attr(new, "labels") <- stats::setNames(value_labels, names(value_labels))
}
}
} else if (isFALSE(include_values)) {
attr(new, "labels") <- NULL
}
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-data_to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,34 @@ test_that("to_numeric works with haven_labelled, convert many labels correctly",
expect_identical(as.vector(table(x)), c(180L, 506L, 156L))
})
})


test_that("to_numeric preserves correct label order", {
x <- factor(c(1, 2, 3, 4))
x <- assign_labels(x, values = c("one", "two", "three", "four"))
out <- to_numeric(x, dummy_factors = FALSE)
expect_identical(
attributes(out)$labels,
c(one = 1, two = 2, three = 3, four = 4)
)
# correctly reverse scale
out <- to_numeric(reverse_scale(x), dummy_factors = FALSE)
expect_identical(
attributes(out)$labels,
c(one = 4, two = 3, three = 2, four = 1)
)
# factor with alphabetical values
x <- factor(letters[1:4])
x <- assign_labels(x, values = c("one", "two", "three", "four"))
out <- to_numeric(x, dummy_factors = FALSE)
expect_identical(
attributes(out)$labels,
c(one = 1, two = 2, three = 3, four = 4)
)
# correctly reverse scale
out <- to_numeric(reverse_scale(x), dummy_factors = FALSE)
expect_identical(
attributes(out)$labels,
c(one = 4, two = 3, three = 2, four = 1)
)
})
47 changes: 33 additions & 14 deletions tests/testthat/test-standardize_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,28 @@ test_that("transformations", {
fit_scale2 <- lm(scale(mpg) ~ scale(exp(hp_100)), mt)
expect_equal(
effectsize::standardize_parameters(fit_exp, method = "refit")[2, 2],
unname(coef(fit_scale1)[2])
unname(coef(fit_scale1)[2]),
ignore_attr = TRUE
)

expect_equal(
effectsize::standardize_parameters(fit_exp, method = "basic")[2, 2],
unname(coef(fit_scale2)[2])
unname(coef(fit_scale2)[2]),
ignore_attr = TRUE
)

skip_if_not_installed("insight", minimum_version = "0.10.0")
d <- data.frame(
time = as.factor(c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)),
group = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2),
sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50)
sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50) # nolint
)
m <- lm(log(sum + 1) ~ as.numeric(time) * group, data = d)


expect_message(out <- standardize(m))
expect_message({
out <- standardize(m)
})
expect_identical(coef(m), c(
`(Intercept)` = -0.4575, `as.numeric(time)` = 0.5492, group = 0.3379,
`as.numeric(time):group` = 0.15779
Expand Down Expand Up @@ -98,12 +102,14 @@ test_that("weights", {
stdREFIT <- effectsize::standardize_parameters(m, method = "refit")
expect_equal(
stdREFIT[[2]],
effectsize::standardize_parameters(m, method = "posthoc")[[2]]
effectsize::standardize_parameters(m, method = "posthoc")[[2]],
ignore_attr = TRUE
)

expect_equal(
stdREFIT[[2]],
effectsize::standardize_parameters(m, method = "basic")[[2]]
effectsize::standardize_parameters(m, method = "basic")[[2]],
ignore_attr = TRUE
)
})

Expand Down Expand Up @@ -230,7 +236,9 @@ test_that("standardize mediation", {
)

out1 <- summary(standardize(med1))
expect_message(out2 <- summary(standardize(med2)))
expect_message({
out2 <- summary(standardize(med2))
})
expect_identical(unlist(out1[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]),
unlist(out2[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]),
tolerance = 0.1
Expand Down Expand Up @@ -266,13 +274,22 @@ test_that("offsets", {

m <- lm(mpg ~ hp + offset(wt), data = mtcars)

expect_warning(mz1 <- standardize(m))
expect_warning(mz2 <- standardize(m, two_sd = TRUE))
expect_warning({
mz1 <- standardize(m)
})
expect_warning({
mz2 <- standardize(m, two_sd = TRUE)
})
expect_identical(c(1, 2) * coef(mz1), coef(mz2))


m <- glm(cyl ~ hp + offset(wt), family = poisson(), data = mtcars)
expect_warning(mz <- standardize(m), regexp = NA)
expect_warning(
{
mz <- standardize(m)
},
regexp = NA
)

par1 <- parameters::model_parameters(mz)
par2 <- effectsize::standardize_parameters(m, method = "basic")
Expand All @@ -288,10 +305,12 @@ test_that("brms", {
skip_if_not_installed("brms")

invisible(
capture.output(mod <- brms::brm(mpg ~ hp,
data = mtcars,
refresh = 0, chains = 1, silent = 2
))
capture.output({
mod <- brms::brm(mpg ~ hp,
data = mtcars,
refresh = 0, chains = 1, silent = 2
)
})
)

expect_warning(
Expand Down

0 comments on commit 0e3991b

Please sign in to comment.