diff --git a/R/coerce.R b/R/coerce.R index 01e4682..b8e3563 100644 --- a/R/coerce.R +++ b/R/coerce.R @@ -12,7 +12,6 @@ #' #' @seealso [coerce_to_sparse_data_frame()] [coerce_to_sparse_tibble()] #' @examplesIf rlang::is_installed("Matrix") -#' set.seed(1234) #' sparse_tbl <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) #' names(sparse_tbl) <- letters[1:10] #' sparse_tbl <- as.data.frame(sparse_tbl) @@ -30,8 +29,32 @@ coerce_to_sparse_matrix <- function(x) { ) } + if (!all(vapply(x, is.numeric, logical(1)))) { + offenders <- which(!vapply(x, is.numeric, logical(1))) + offenders <- names(x)[offenders] + cli::cli_abort(c( + x = "All columns of {.arg x} must be numeric.", + i = "Non-numeric columns: {.field {offenders}}." + )) + } + + if (!any(vapply(x, is_sparse_numeric, logical(1)))) { + res <- as.matrix(x) + res <- Matrix::Matrix(res, sparse = TRUE) + return(res) + } + + if (!all(vapply(x, sparse_default, numeric(1)) == 0, na.rm = TRUE)) { + offenders <- which(vapply(x, sparse_default, numeric(1)) != 0) + + for (i in offenders) { + x[[i]] <- x[[i]][] + } + } + all_positions <- lapply(x, sparse_positions) all_values <- lapply(x, sparse_values) + all_rows <- rep(seq_along(x), times = lengths(all_positions)) all_positions <- unlist(all_positions, use.names = FALSE) @@ -78,6 +101,11 @@ coerce_to_sparse_tibble <- function(x) { ) } + if (!methods::is(x, "dgCMatrix")) { + x <- as(x, "generalMatrix") + x <- as(x, "CsparseMatrix") + } + if (is.null(colnames(x))) { cli::cli_abort( "{.arg x} must have column names." @@ -119,6 +147,11 @@ coerce_to_sparse_data_frame <- function(x) { ) } + if (!methods::is(x, "dgCMatrix")) { + x <- as(x, "generalMatrix") + x <- as(x, "CsparseMatrix") + } + if (is.null(colnames(x))) { cli::cli_abort( "{.arg x} must have column names." diff --git a/man/coerce_to_sparse_matrix.Rd b/man/coerce_to_sparse_matrix.Rd index 9f8bc2c..673b67d 100644 --- a/man/coerce_to_sparse_matrix.Rd +++ b/man/coerce_to_sparse_matrix.Rd @@ -20,7 +20,6 @@ a sparse matrix out of a dense data frame is not ideal. } \examples{ \dontshow{if (rlang::is_installed("Matrix")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -set.seed(1234) sparse_tbl <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) names(sparse_tbl) <- letters[1:10] sparse_tbl <- as.data.frame(sparse_tbl) diff --git a/tests/testthat/_snaps/coerce.md b/tests/testthat/_snaps/coerce.md new file mode 100644 index 0000000..4160978 --- /dev/null +++ b/tests/testthat/_snaps/coerce.md @@ -0,0 +1,81 @@ +# coerce_to_sparse_matrix() errors on wrong input + + Code + coerce_to_sparse_matrix(1:10) + Condition + Error in `coerce_to_sparse_matrix()`: + ! `x` must be a , not an integer vector. + +--- + + Code + coerce_to_sparse_matrix(matrix(0, nrow = 10, ncol = 10)) + Condition + Error in `coerce_to_sparse_matrix()`: + ! `x` must be a , not a double matrix. + +--- + + Code + coerce_to_sparse_matrix(iris) + Condition + Error in `coerce_to_sparse_matrix()`: + x All columns of `x` must be numeric. + i Non-numeric columns: Species. + +# coerce_to_sparse_matrix() materializes non-zero defaulted columns + + Code + res <- coerce_to_sparse_matrix(sparse_df) + Output + sparsevctrs: Sparse vector materialized + sparsevctrs: Sparse vector materialized + +# coerce_to_sparse_data_frame() errors with no column names + + Code + coerce_to_sparse_data_frame(sparse_mat) + Condition + Error in `coerce_to_sparse_data_frame()`: + ! `x` must have column names. + +# coerce_to_sparse_data_frame() errors with wrong input + + Code + coerce_to_sparse_data_frame(mtcars) + Condition + Error in `coerce_to_sparse_data_frame()`: + ! `x` must be a , not a data frame. + +--- + + Code + coerce_to_sparse_data_frame(1:10) + Condition + Error in `coerce_to_sparse_data_frame()`: + ! `x` must be a , not an integer vector. + +# coerce_to_sparse_tibble() errors with no column names + + Code + coerce_to_sparse_tibble(sparse_mat) + Condition + Error in `coerce_to_sparse_tibble()`: + ! `x` must have column names. + +# coerce_to_sparse_tibble() errors with wrong input + + Code + coerce_to_sparse_tibble(mtcars) + Condition + Error in `coerce_to_sparse_tibble()`: + ! `x` must be a , not a data frame. + +--- + + Code + coerce_to_sparse_tibble(1:10) + Condition + Error in `coerce_to_sparse_tibble()`: + ! `x` must be a , not an integer vector. + diff --git a/tests/testthat/test-coerce.R b/tests/testthat/test-coerce.R new file mode 100644 index 0000000..cbd3e71 --- /dev/null +++ b/tests/testthat/test-coerce.R @@ -0,0 +1,205 @@ +### coerce_to_sparse_matrix ---------------------------------------------------- +test_that("coerce_to_sparse_matrix() works", { + skip_if_not_installed("Matrix") + + sparse_df <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) + names(sparse_df) <- letters[1:10] + sparse_df <- as.data.frame(sparse_df) + + res <- coerce_to_sparse_matrix(sparse_df) + expect_s4_class(res, "dgCMatrix") + expect_identical(dim(res), c(10L, 10L)) + + exp <- Matrix::diag(1:10, 10, 10) + exp <- Matrix::Matrix(exp, sparse = TRUE) + exp <- as(exp, "generalMatrix") + exp <- as(exp, "CsparseMatrix") + colnames(exp) <- colnames(res) + rownames(exp) <- rownames(res) + + expect_identical(res, exp) +}) + +test_that("coerce_to_sparse_matrix() errors on wrong input", { + skip_if_not_installed("Matrix") + + expect_snapshot( + error = TRUE, + coerce_to_sparse_matrix(1:10) + ) + expect_snapshot( + error = TRUE, + coerce_to_sparse_matrix(matrix(0, nrow = 10, ncol = 10)) + ) + expect_snapshot( + error = TRUE, + coerce_to_sparse_matrix(iris) + ) +}) + +test_that("coerce_to_sparse_matrix() will divert for non-sparse data.frames", { + skip_if_not_installed("Matrix") + + expect_identical( + coerce_to_sparse_matrix(mtcars), + Matrix::Matrix(as.matrix(mtcars), sparse = TRUE) + ) +}) + +test_that("coerce_to_sparse_matrix() materializes non-zero defaulted columns", { + skip_if_not_installed("Matrix") + withr::local_options("sparsevctrs.verbose_materialize" = TRUE) + + sparse_df <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) + names(sparse_df) <- letters[1:10] + sparse_df <- as.data.frame(sparse_df) + sparse_df$nonzero1 <- sparse_double(1, 1, 10, default = 10) + sparse_df$nonzero2 <- sparse_double(1, 1, 10, default = 20) + + expect_snapshot( + res <- coerce_to_sparse_matrix(sparse_df) + ) + + withr::local_options("sparsevctrs.verbose_materialize" = NULL) + + expect_s4_class(res, "dgCMatrix") + expect_identical(dim(res), c(10L, 12L)) + + exp <- Matrix::diag(1:10, 10, 10) + exp <- Matrix::Matrix(exp, sparse = TRUE) + exp <- as(exp, "generalMatrix") + exp <- as(exp, "CsparseMatrix") + + exp <- cbind(exp, sparse_df$nonzero1) + exp <- cbind(exp, sparse_df$nonzero2) + + colnames(exp) <- colnames(res) + rownames(exp) <- rownames(res) + + expect_identical(res, exp) +}) + +### coerce_to_sparse_data_frame ------------------------------------------------ + +test_that("coerce_to_sparse_data_frame() works", { + skip_if_not_installed("Matrix") + + sparse_mat <- Matrix::diag(1:10, 10, 10) + sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) + sparse_mat <- as(sparse_mat, "generalMatrix") + sparse_mat <- as(sparse_mat, "CsparseMatrix") + colnames(sparse_mat) <- letters[1:10] + rownames(sparse_mat) <- 1:10 + + res <- coerce_to_sparse_data_frame(sparse_mat) + + exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) + names(exp) <- letters[1:10] + exp <- as.data.frame(exp) + + expect_identical(res, exp) +}) + +test_that("coerce_to_sparse_data_frame() works with non-dgCMatrix input", { + skip_if_not_installed("Matrix") + + sparse_mat <- Matrix::diag(1:10, 10, 10) + sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) + colnames(sparse_mat) <- letters[1:10] + rownames(sparse_mat) <- 1:10 + + res <- coerce_to_sparse_data_frame(sparse_mat) + + exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) + names(exp) <- letters[1:10] + exp <- as.data.frame(exp) + + expect_identical(res, exp) +}) + +test_that("coerce_to_sparse_data_frame() errors with no column names", { + skip_if_not_installed("Matrix") + + sparse_mat <- Matrix::diag(1:10, 10, 10) + sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) + + expect_snapshot( + error = TRUE, + coerce_to_sparse_data_frame(sparse_mat) + ) +}) + +test_that("coerce_to_sparse_data_frame() errors with wrong input", { + expect_snapshot( + error = TRUE, + coerce_to_sparse_data_frame(mtcars) + ) + expect_snapshot( + error = TRUE, + coerce_to_sparse_data_frame(1:10) + ) +}) + +### coerce_to_sparse_tibble ---------------------------------------------------- + +test_that("coerce_to_sparse_tibble() works", { + skip_if_not_installed("Matrix") + skip_if_not_installed("tibble") + + sparse_mat <- Matrix::diag(1:10, 10, 10) + sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) + sparse_mat <- as(sparse_mat, "generalMatrix") + sparse_mat <- as(sparse_mat, "CsparseMatrix") + colnames(sparse_mat) <- letters[1:10] + rownames(sparse_mat) <- 1:10 + + res <- coerce_to_sparse_tibble(sparse_mat) + + exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) + names(exp) <- letters[1:10] + exp <- tibble::as_tibble(exp) + + expect_identical(res, exp) +}) + +test_that("coerce_to_sparse_tibble() works with non-dgCMatrix input", { + skip_if_not_installed("Matrix") + skip_if_not_installed("tibble") + + sparse_mat <- Matrix::diag(1:10, 10, 10) + sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) + colnames(sparse_mat) <- letters[1:10] + rownames(sparse_mat) <- 1:10 + + res <- coerce_to_sparse_tibble(sparse_mat) + + exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) + names(exp) <- letters[1:10] + exp <- tibble::as_tibble(exp) + + expect_identical(res, exp) +}) + +test_that("coerce_to_sparse_tibble() errors with no column names", { + skip_if_not_installed("Matrix") + skip_if_not_installed("tibble") + + sparse_mat <- Matrix::diag(1:10, 10, 10) + sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) + + expect_snapshot( + error = TRUE, + coerce_to_sparse_tibble(sparse_mat) + ) +}) + +test_that("coerce_to_sparse_tibble() errors with wrong input", { + expect_snapshot( + error = TRUE, + coerce_to_sparse_tibble(mtcars) + ) + expect_snapshot( + error = TRUE, + coerce_to_sparse_tibble(1:10) + ) +}) \ No newline at end of file