Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to_numeric() with inversed factor levels #469

Merged
merged 6 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.9.0.4
Version: 0.9.0.5
Authors@R: c(
person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# datawizard 0.9.0.9000 (development version)
# datawizard 0.9.0.x (development version)

CHANGES

Expand All @@ -7,6 +7,11 @@ CHANGES

* `to_factor()` and `to_numeric()` now support class `haven_labelled`.

BUG FIXES

* `to_numeric()` now correctly deals with inversed factor levels when
`preserve_levels = TRUE`.

# datawizard 0.9.0

NEW FUNCTIONS
Expand Down
24 changes: 12 additions & 12 deletions R/data_tabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@
for (i in seq_along(grps)) {
rows <- grps[[i]]
# save information about grouping factors
if (!is.null(group_variables)) {
group_variable <- group_variables[i, , drop = FALSE]
} else {
if (is.null(group_variables)) {
group_variable <- NULL
} else {
group_variable <- group_variables[i, , drop = FALSE]
}
out <- c(out, data_tabulate(
data_filter(x, rows),
Expand Down Expand Up @@ -226,7 +226,7 @@
# format data frame
ftab <- insight::format_table(x, ...)
ftab[] <- lapply(ftab, function(i) {
i[i == ""] <- ifelse(identical(format, "text"), "<NA>", "(NA)")
i[i == ""] <- ifelse(identical(format, "text"), "<NA>", "(NA)") # nolint
i
})
ftab$N <- gsub("\\.00$", "", ftab$N)
Expand Down Expand Up @@ -347,10 +347,10 @@
}
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)
i <- format(i, format = "text", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[i_attr$duplicate_varnames] <- ""
i[nrow(i) + 1, ] <- ""
i
})
Expand All @@ -375,9 +375,9 @@
print_html(x[[1]], big_mark = big_mark, ...)
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)

Check warning on line 378 in R/data_tabulate.R

View check run for this annotation

Codecov / codecov/patch

R/data_tabulate.R#L378

Added line #L378 was not covered by tests
i <- format(i, format = "html", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""

Check warning on line 380 in R/data_tabulate.R

View check run for this annotation

Codecov / codecov/patch

R/data_tabulate.R#L380

Added line #L380 was not covered by tests
i
})

Expand All @@ -401,10 +401,10 @@
print_md(x[[1]], big_mark = big_mark, ...)
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)

Check warning on line 404 in R/data_tabulate.R

View check run for this annotation

Codecov / codecov/patch

R/data_tabulate.R#L404

Added line #L404 was not covered by tests
i <- format(i, format = "markdown", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[i_attr$duplicate_varnames] <- ""

Check warning on line 407 in R/data_tabulate.R

View check run for this annotation

Codecov / codecov/patch

R/data_tabulate.R#L406-L407

Added lines #L406 - L407 were not covered by tests
i[nrow(i) + 1, ] <- ""
i
})
Expand Down
19 changes: 13 additions & 6 deletions R/to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ to_numeric.data.frame <- function(x,
return(x)
}

attr <- attributes(x)
df_attr <- attributes(x)

# evaluate arguments
select <- .select_nse(select,
Expand All @@ -91,16 +91,16 @@ to_numeric.data.frame <- function(x,
# drop numerics, when append is not FALSE
select <- colnames(x[select])[!vapply(x[select], is.numeric, FUN.VALUE = logical(1L))]
# process arguments
args <- .process_append(
fun_args <- .process_append(
x,
select,
append,
append_suffix = "_n",
keep_factors = TRUE
)
# update processed arguments
x <- args$x
select <- args$select
x <- fun_args$x
select <- fun_args$select
}

out <- sapply(
Expand Down Expand Up @@ -129,7 +129,7 @@ to_numeric.data.frame <- function(x,
}

# due to the special handling of dummy factors, we need to take care
# of appending the data here again. usually, "args$x" includes the appended
# of appending the data here again. usually, "fun_args$x" includes the appended
# data, which does not work here...

if (!isFALSE(append)) {
Expand All @@ -141,7 +141,7 @@ to_numeric.data.frame <- function(x,
}

# add back custom attributes
out <- .replace_attrs(out, attr)
out <- .replace_attrs(out, df_attr)
out
}

Expand Down Expand Up @@ -226,6 +226,13 @@ to_numeric.factor <- function(x,
}
names(out) <- levels(x)
} else if (preserve_levels) {
if (is.unsorted(levels(x))) {
x_inverse <- rep(NA_real_, length(x))
for (i in 1:nlevels(x)) {
x_inverse[x == levels(x)[i]] <- as.numeric(levels(x)[nlevels(x) - i + 1])
}
x <- factor(x_inverse)
}
out <- .set_back_labels(as.numeric(as.character(x)), x)
} else {
out <- .set_back_labels(as.numeric(x), x)
Expand Down
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ ggplot's
https
interpretability
interpretable
inversed
joss
labelled
labelling
Expand Down
19 changes: 14 additions & 5 deletions tests/testthat/test-data_to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,13 @@ test_that("convert factor to numeric", {
expect_snapshot(to_numeric(f))
})


test_that("convert factor to numeric", {
expect_identical(to_numeric(c("abc", "xyz")), c(1, 2))
expect_identical(to_numeric(c("123", "789")), c(123, 789))
expect_identical(to_numeric(c("1L", "2e-3")), c(1, 0.002))
expect_identical(to_numeric(c("1L", "2e-3", "ABC")), c(1, 2, 3))
})


test_that("convert factor to numeric, dummy factors", {
expect_identical(
to_numeric(c("abc", "xyz"), dummy_factors = TRUE),
Expand All @@ -66,7 +64,6 @@ test_that("convert factor to numeric, dummy factors", {
)
})


test_that("convert factor to numeric, append", {
data(efc)
expect_identical(
Expand Down Expand Up @@ -94,13 +91,11 @@ test_that("convert factor to numeric, append", {
)
})


test_that("convert factor to numeric, all numeric", {
data(mtcars)
expect_identical(to_numeric(mtcars), mtcars)
})


test_that("convert factor to numeric, dummy factors, with NA", {
x1 <- factor(rep(c("a", "b"), 3))
x2 <- factor(c("a", NA_character_, "a", "b", "a", "b"))
Expand Down Expand Up @@ -153,6 +148,20 @@ test_that("convert factor to numeric, dummy factors, with NA", {
expect_identical(nrow(to_numeric(x7, dummy_factors = TRUE)), length(x7))
})

test_that("to_numeric, inverse factor levels", {
f <- c(0, 0, 1, 1, 1, 0)
x1 <- factor(f, levels = c(0, 1))
x2 <- factor(f, levels = c(1, 0))
out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = FALSE)
expect_identical(out, c(1, 1, 2, 2, 2, 1))
out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = FALSE)
expect_identical(out, c(2, 2, 1, 1, 1, 2))
out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = TRUE)
expect_identical(out, c(0, 0, 1, 1, 1, 0))
out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = TRUE)
expect_identical(out, c(1, 1, 0, 0, 0, 1))
})

# select helpers ------------------------------
test_that("to_numeric regex", {
expect_identical(
Expand Down
Loading