Skip to content

Commit

Permalink
Merge pull request #1364 from tidymodels/sparse-tibble-recipe
Browse files Browse the repository at this point in the history
make sure `recipe()` accepts sparse tibbles
  • Loading branch information
EmilHvitfeldt authored Sep 9, 2024
2 parents 9a0376b + 02f7a73 commit dd2e0d5
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 0 deletions.
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Imports:
Matrix,
purrr (>= 1.0.0),
rlang (>= 1.1.0),
sparsevctrs (>= 0.1.0.9000),
stats,
tibble,
tidyr (>= 1.0.0),
Expand Down Expand Up @@ -62,6 +63,8 @@ Suggests:
testthat (>= 3.0.0),
workflows,
xml2
Remotes:
r-lib/sparsevctrs
VignetteBuilder:
knitr
RdMacros:
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

* Example for `step_novel()` now better illustrates how it works. (@Edgar-Zamora, #1248)

* `recipe()` now works with sparse tibbles. (#1364)

# recipes 1.1.0

## Improvements
Expand Down
3 changes: 3 additions & 0 deletions R/sparsevctrs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
is_sparse_tibble <- function(x) {
any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))
}
26 changes: 26 additions & 0 deletions tests/testthat/helper-sparsevctrs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# ------------------------------------------------------------------------------
# For sparse tibble testing

sparse_hotel_rates <- function() {
# 99.2 sparsity
hotel_rates <- modeldata::hotel_rates

prefix_colnames <- function(x, prefix) {
colnames(x) <- paste(colnames(x), prefix, sep = "_")
x
}

dummies_country <- hardhat::fct_encode_one_hot(hotel_rates$country)
dummies_company <- hardhat::fct_encode_one_hot(hotel_rates$company)
dummies_agent <- hardhat::fct_encode_one_hot(hotel_rates$agent)

res <- dplyr::bind_cols(
hotel_rates["avg_price_per_room"],
prefix_colnames(dummies_country, "country"),
prefix_colnames(dummies_company, "company"),
prefix_colnames(dummies_agent, "agent")
)

res <- as.matrix(res)
Matrix::Matrix(res, sparse = TRUE)
}
31 changes: 31 additions & 0 deletions tests/testthat/test-sparsevctrs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
test_that("recipe() accepts sparse tibbles", {
skip_if_not_installed("modeldata")

hotel_data <- sparse_hotel_rates()
hotel_data <- sparsevctrs::coerce_to_sparse_tibble(hotel_data)

expect_no_condition(
rec_spec <- recipe(avg_price_per_room ~ ., data = hotel_data)
)

expect_true(
is_sparse_tibble(rec_spec$template)
)

expect_no_condition(
rec_spec <- recipe(hotel_data)
)

expect_true(
is_sparse_tibble(rec_spec$template)
)

expect_no_condition(
rec_spec <- recipe(hotel_data, avg_price_per_room ~ .)
)

expect_true(
is_sparse_tibble(rec_spec$template)
)
})

0 comments on commit dd2e0d5

Please sign in to comment.