Skip to content

Commit

Permalink
Merge pull request #1407 from tidymodels/steo_dummy-soarse-enum
Browse files Browse the repository at this point in the history
  • Loading branch information
EmilHvitfeldt authored Jan 13, 2025
2 parents 76c651b + 4964e63 commit 2326e71
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 23 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

* All steps and checks now require arguments `trained`, `skip`, `role`, and `id` at all times.

* `step_dummy()` gained `sparse` argument. When set to `TRUE`, `step_dummy()` will produce sparse vectors. (#1392)
* `step_dummy()` gained `sparse` argument. When set to `"yes"`, `step_dummy()` will produce sparse vectors. (#1392)

# recipes 1.1.0

Expand Down
23 changes: 13 additions & 10 deletions R/dummy.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
#' @param levels A list that contains the information needed to create dummy
#' variables for each variable contained in `terms`. This is `NULL` until the
#' step is trained by [prep()].
#' @param sparse A logical. Should the columns produced be sparse vectors.
#' Sparsity is only supported for `"contr.treatment"` contrasts. Defaults to
#' `FALSE`.
#' @param sparse A single string. Should the columns produced be sparse vectors.
#' Can take the values `"yes"`, `"no"`, and `"auto"`. If `sparse = "auto"`
#' then workflows can determine the best option. Sparsity is only supported
#' for `"contr.treatment"` contrasts. Defaults to `"auto"`.
#' @template step-return
#' @family dummy variable and encoding steps
#' @seealso [dummy_names()]
Expand Down Expand Up @@ -125,7 +126,7 @@ step_dummy <-
preserve = deprecated(),
naming = dummy_names,
levels = NULL,
sparse = FALSE,
sparse = "auto",
keep_original_cols = FALSE,
skip = FALSE,
id = rand_id("dummy")) {
Expand Down Expand Up @@ -181,7 +182,7 @@ prep.step_dummy <- function(x, training, info = NULL, ...) {
check_type(training[, col_names], types = c("factor", "ordered"))
check_bool(x$one_hot, arg = "one_hot")
check_function(x$naming, arg = "naming", allow_empty = FALSE)
check_bool(x$sparse, arg = "sparse")
rlang::arg_match0(x$sparse, c("auto", "yes", "no"), arg_nm = "sparse")

if (length(col_names) > 0) {
## I hate doing this but currently we are going to have
Expand Down Expand Up @@ -301,19 +302,21 @@ bake.step_dummy <- function(object, new_data, ...) {
ordered = is_ordered
)

if (object$sparse) {
if (object$sparse == "yes") {
current_contrast <- getOption("contrasts")[is_ordered + 1]
if (current_contrast != "contr.treatment") {
if (!current_contrast %in% c("contr.treatment", "contr_one_hot")) {
cli::cli_abort(
"When {.code sparse = TRUE}, only {.val contr.treatment} contrasts are
supported, not {.val {current_contrast}}."
"When {.code sparse = TRUE}, only {.val contr.treatment} and
{.val contr_one_hot} contrasts are supported, not
{.val {current_contrast}}."
)
}
}

indicators <- sparsevctrs::sparse_dummy(
x = new_data[[col_name]],
one_hot = object$one_hot
)

indicators <- tibble::new_tibble(indicators)
used_lvl <- colnames(indicators)
} else {
Expand Down
9 changes: 5 additions & 4 deletions man/step_dummy.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions tests/testthat/_snaps/dummy.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,15 @@
Caused by error in `bake()`:
! Only one factor level in `x`: "only-level".

# sparse = TRUE errors on unsupported contrasts
# sparse = 'yes' errors on unsupported contrasts

Code
recipe(~., data = tibble(x = letters)) %>% step_dummy(x, sparse = TRUE) %>%
recipe(~., data = tibble(x = letters)) %>% step_dummy(x, sparse = "yes") %>%
prep()
Condition
Error in `step_dummy()`:
Caused by error in `bake()`:
! When `sparse = TRUE`, only "contr.treatment" contrasts are supported, not "contr.helmert".
! When `sparse = TRUE`, only "contr.treatment" and "contr_one_hot" contrasts are supported, not "contr.helmert".

# bake method errors when needed non-standard role columns are missing

Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/test-dummy.R
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,13 @@ test_that("throws an informative error for single level", {
)
})

test_that("sparse = TRUE works", {
test_that("sparse = 'yes' works", {
rec <- recipe(~ ., data = tibble(x = c(NA, letters)))

suppressWarnings({
dense <- rec %>% step_dummy(x, sparse = FALSE) %>% prep() %>% bake(NULL)
dense <- rec %>% step_dummy(x, sparse = "no") %>% prep() %>% bake(NULL)
dense <- purrr::map(dense, as.integer) %>% tibble::new_tibble()
sparse <- rec %>% step_dummy(x, sparse = TRUE) %>% prep() %>% bake(NULL)
sparse <- rec %>% step_dummy(x, sparse = "yes") %>% prep() %>% bake(NULL)
})

expect_identical(dense, sparse)
Expand All @@ -369,15 +369,15 @@ test_that("sparse = TRUE works", {
expect_true(all(vapply(sparse, sparsevctrs::is_sparse_vector, logical(1))))
})

test_that("sparse = TRUE errors on unsupported contrasts", {
test_that("sparse = 'yes' errors on unsupported contrasts", {
go_helmert <- getOption("contrasts")
go_helmert["unordered"] <- "contr.helmert"
withr::local_options(contrasts = go_helmert)

expect_snapshot(
error = TRUE,
recipe(~ ., data = tibble(x = letters)) %>%
step_dummy(x, sparse = TRUE) %>%
step_dummy(x, sparse = "yes") %>%
prep()
)
})
Expand Down

0 comments on commit 2326e71

Please sign in to comment.