Skip to content

Commit

Permalink
Merge pull request #61 from r-lib/better-coerce
Browse files Browse the repository at this point in the history
Better coerce
  • Loading branch information
EmilHvitfeldt authored May 22, 2024
2 parents a1fe003 + a5904eb commit 2c46cfd
Show file tree
Hide file tree
Showing 4 changed files with 320 additions and 2 deletions.
35 changes: 34 additions & 1 deletion R/coerce.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#'
#' @seealso [coerce_to_sparse_data_frame()] [coerce_to_sparse_tibble()]
#' @examplesIf rlang::is_installed("Matrix")
#' set.seed(1234)
#' sparse_tbl <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
#' names(sparse_tbl) <- letters[1:10]
#' sparse_tbl <- as.data.frame(sparse_tbl)
Expand All @@ -30,8 +29,32 @@ coerce_to_sparse_matrix <- function(x) {
)
}

if (!all(vapply(x, is.numeric, logical(1)))) {
offenders <- which(!vapply(x, is.numeric, logical(1)))
offenders <- names(x)[offenders]
cli::cli_abort(c(
x = "All columns of {.arg x} must be numeric.",
i = "Non-numeric columns: {.field {offenders}}."
))
}

if (!any(vapply(x, is_sparse_numeric, logical(1)))) {
res <- as.matrix(x)
res <- Matrix::Matrix(res, sparse = TRUE)
return(res)
}

if (!all(vapply(x, sparse_default, numeric(1)) == 0, na.rm = TRUE)) {
offenders <- which(vapply(x, sparse_default, numeric(1)) != 0)

for (i in offenders) {
x[[i]] <- x[[i]][]
}
}

all_positions <- lapply(x, sparse_positions)
all_values <- lapply(x, sparse_values)

all_rows <- rep(seq_along(x), times = lengths(all_positions))

all_positions <- unlist(all_positions, use.names = FALSE)
Expand Down Expand Up @@ -78,6 +101,11 @@ coerce_to_sparse_tibble <- function(x) {
)
}

if (!methods::is(x, "dgCMatrix")) {
x <- as(x, "generalMatrix")
x <- as(x, "CsparseMatrix")
}

if (is.null(colnames(x))) {
cli::cli_abort(
"{.arg x} must have column names."
Expand Down Expand Up @@ -119,6 +147,11 @@ coerce_to_sparse_data_frame <- function(x) {
)
}

if (!methods::is(x, "dgCMatrix")) {
x <- as(x, "generalMatrix")
x <- as(x, "CsparseMatrix")
}

if (is.null(colnames(x))) {
cli::cli_abort(
"{.arg x} must have column names."
Expand Down
1 change: 0 additions & 1 deletion man/coerce_to_sparse_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

81 changes: 81 additions & 0 deletions tests/testthat/_snaps/coerce.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# coerce_to_sparse_matrix() errors on wrong input

Code
coerce_to_sparse_matrix(1:10)
Condition
Error in `coerce_to_sparse_matrix()`:
! `x` must be a <data.frame>, not an integer vector.

---

Code
coerce_to_sparse_matrix(matrix(0, nrow = 10, ncol = 10))
Condition
Error in `coerce_to_sparse_matrix()`:
! `x` must be a <data.frame>, not a double matrix.

---

Code
coerce_to_sparse_matrix(iris)
Condition
Error in `coerce_to_sparse_matrix()`:
x All columns of `x` must be numeric.
i Non-numeric columns: Species.

# coerce_to_sparse_matrix() materializes non-zero defaulted columns

Code
res <- coerce_to_sparse_matrix(sparse_df)
Output
sparsevctrs: Sparse vector materialized
sparsevctrs: Sparse vector materialized

# coerce_to_sparse_data_frame() errors with no column names

Code
coerce_to_sparse_data_frame(sparse_mat)
Condition
Error in `coerce_to_sparse_data_frame()`:
! `x` must have column names.

# coerce_to_sparse_data_frame() errors with wrong input

Code
coerce_to_sparse_data_frame(mtcars)
Condition
Error in `coerce_to_sparse_data_frame()`:
! `x` must be a <sparseMatrix>, not a data frame.

---

Code
coerce_to_sparse_data_frame(1:10)
Condition
Error in `coerce_to_sparse_data_frame()`:
! `x` must be a <sparseMatrix>, not an integer vector.

# coerce_to_sparse_tibble() errors with no column names

Code
coerce_to_sparse_tibble(sparse_mat)
Condition
Error in `coerce_to_sparse_tibble()`:
! `x` must have column names.

# coerce_to_sparse_tibble() errors with wrong input

Code
coerce_to_sparse_tibble(mtcars)
Condition
Error in `coerce_to_sparse_tibble()`:
! `x` must be a <sparseMatrix>, not a data frame.

---

Code
coerce_to_sparse_tibble(1:10)
Condition
Error in `coerce_to_sparse_tibble()`:
! `x` must be a <sparseMatrix>, not an integer vector.

205 changes: 205 additions & 0 deletions tests/testthat/test-coerce.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
### coerce_to_sparse_matrix ----------------------------------------------------
test_that("coerce_to_sparse_matrix() works", {
skip_if_not_installed("Matrix")

sparse_df <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
names(sparse_df) <- letters[1:10]
sparse_df <- as.data.frame(sparse_df)

res <- coerce_to_sparse_matrix(sparse_df)
expect_s4_class(res, "dgCMatrix")
expect_identical(dim(res), c(10L, 10L))

exp <- Matrix::diag(1:10, 10, 10)
exp <- Matrix::Matrix(exp, sparse = TRUE)
exp <- as(exp, "generalMatrix")
exp <- as(exp, "CsparseMatrix")
colnames(exp) <- colnames(res)
rownames(exp) <- rownames(res)

expect_identical(res, exp)
})

test_that("coerce_to_sparse_matrix() errors on wrong input", {
skip_if_not_installed("Matrix")

expect_snapshot(
error = TRUE,
coerce_to_sparse_matrix(1:10)
)
expect_snapshot(
error = TRUE,
coerce_to_sparse_matrix(matrix(0, nrow = 10, ncol = 10))
)
expect_snapshot(
error = TRUE,
coerce_to_sparse_matrix(iris)
)
})

test_that("coerce_to_sparse_matrix() will divert for non-sparse data.frames", {
skip_if_not_installed("Matrix")

expect_identical(
coerce_to_sparse_matrix(mtcars),
Matrix::Matrix(as.matrix(mtcars), sparse = TRUE)
)
})

test_that("coerce_to_sparse_matrix() materializes non-zero defaulted columns", {
skip_if_not_installed("Matrix")
withr::local_options("sparsevctrs.verbose_materialize" = TRUE)

sparse_df <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
names(sparse_df) <- letters[1:10]
sparse_df <- as.data.frame(sparse_df)
sparse_df$nonzero1 <- sparse_double(1, 1, 10, default = 10)
sparse_df$nonzero2 <- sparse_double(1, 1, 10, default = 20)

expect_snapshot(
res <- coerce_to_sparse_matrix(sparse_df)
)

withr::local_options("sparsevctrs.verbose_materialize" = NULL)

expect_s4_class(res, "dgCMatrix")
expect_identical(dim(res), c(10L, 12L))

exp <- Matrix::diag(1:10, 10, 10)
exp <- Matrix::Matrix(exp, sparse = TRUE)
exp <- as(exp, "generalMatrix")
exp <- as(exp, "CsparseMatrix")

exp <- cbind(exp, sparse_df$nonzero1)
exp <- cbind(exp, sparse_df$nonzero2)

colnames(exp) <- colnames(res)
rownames(exp) <- rownames(res)

expect_identical(res, exp)
})

### coerce_to_sparse_data_frame ------------------------------------------------

test_that("coerce_to_sparse_data_frame() works", {
skip_if_not_installed("Matrix")

sparse_mat <- Matrix::diag(1:10, 10, 10)
sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE)
sparse_mat <- as(sparse_mat, "generalMatrix")
sparse_mat <- as(sparse_mat, "CsparseMatrix")
colnames(sparse_mat) <- letters[1:10]
rownames(sparse_mat) <- 1:10

res <- coerce_to_sparse_data_frame(sparse_mat)

exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
names(exp) <- letters[1:10]
exp <- as.data.frame(exp)

expect_identical(res, exp)
})

test_that("coerce_to_sparse_data_frame() works with non-dgCMatrix input", {
skip_if_not_installed("Matrix")

sparse_mat <- Matrix::diag(1:10, 10, 10)
sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE)
colnames(sparse_mat) <- letters[1:10]
rownames(sparse_mat) <- 1:10

res <- coerce_to_sparse_data_frame(sparse_mat)

exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
names(exp) <- letters[1:10]
exp <- as.data.frame(exp)

expect_identical(res, exp)
})

test_that("coerce_to_sparse_data_frame() errors with no column names", {
skip_if_not_installed("Matrix")

sparse_mat <- Matrix::diag(1:10, 10, 10)
sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE)

expect_snapshot(
error = TRUE,
coerce_to_sparse_data_frame(sparse_mat)
)
})

test_that("coerce_to_sparse_data_frame() errors with wrong input", {
expect_snapshot(
error = TRUE,
coerce_to_sparse_data_frame(mtcars)
)
expect_snapshot(
error = TRUE,
coerce_to_sparse_data_frame(1:10)
)
})

### coerce_to_sparse_tibble ----------------------------------------------------

test_that("coerce_to_sparse_tibble() works", {
skip_if_not_installed("Matrix")
skip_if_not_installed("tibble")

sparse_mat <- Matrix::diag(1:10, 10, 10)
sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE)
sparse_mat <- as(sparse_mat, "generalMatrix")
sparse_mat <- as(sparse_mat, "CsparseMatrix")
colnames(sparse_mat) <- letters[1:10]
rownames(sparse_mat) <- 1:10

res <- coerce_to_sparse_tibble(sparse_mat)

exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
names(exp) <- letters[1:10]
exp <- tibble::as_tibble(exp)

expect_identical(res, exp)
})

test_that("coerce_to_sparse_tibble() works with non-dgCMatrix input", {
skip_if_not_installed("Matrix")
skip_if_not_installed("tibble")

sparse_mat <- Matrix::diag(1:10, 10, 10)
sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE)
colnames(sparse_mat) <- letters[1:10]
rownames(sparse_mat) <- 1:10

res <- coerce_to_sparse_tibble(sparse_mat)

exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
names(exp) <- letters[1:10]
exp <- tibble::as_tibble(exp)

expect_identical(res, exp)
})

test_that("coerce_to_sparse_tibble() errors with no column names", {
skip_if_not_installed("Matrix")
skip_if_not_installed("tibble")

sparse_mat <- Matrix::diag(1:10, 10, 10)
sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE)

expect_snapshot(
error = TRUE,
coerce_to_sparse_tibble(sparse_mat)
)
})

test_that("coerce_to_sparse_tibble() errors with wrong input", {
expect_snapshot(
error = TRUE,
coerce_to_sparse_tibble(mtcars)
)
expect_snapshot(
error = TRUE,
coerce_to_sparse_tibble(1:10)
)
})

0 comments on commit 2c46cfd

Please sign in to comment.