Skip to content

Commit

Permalink
Merge pull request #77 from r-lib/fix-coerce_to_sparse_matrix
Browse files Browse the repository at this point in the history
Improvments to `coerce_to_sparse_matrix()`
  • Loading branch information
EmilHvitfeldt authored Sep 28, 2024
2 parents 4b4774c + 155e6fd commit b29b723
Show file tree
Hide file tree
Showing 4 changed files with 286 additions and 2 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sparsevctrs
Title: Sparse Vectors for Use in Data Frames
Version: 0.1.0.9001
Version: 0.1.0.9002
Authors@R: c(
person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-0679-1945")),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@

* `is_sparse_vector()` has been rewritten for speed improvement. (#76)

* Fixed bugs where `coerce_to_sparse_matrix()` would error for completely sparse columns. (#77)

* `coerce_to_sparse_matrix()` Now turns dense zeroes into sparse zeroes. (#77)

# sparsevctrs 0.1.0

* Initial CRAN submission.
18 changes: 17 additions & 1 deletion R/coerce.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,27 @@ coerce_to_sparse_matrix <- function(x, call = rlang::caller_env(0)) {
all_positions <- unlist(all_positions, use.names = FALSE)
all_values <- unlist(all_values, use.names = FALSE)

# TODO: maybe faster to do this above?
non_zero <- all_values != 0
all_rows <- all_rows[non_zero]
all_positions <- all_positions[non_zero]
all_values <- all_values[non_zero]

n_row <- nrow(x)
n_col <- ncol(x)

if (identical(rownames(x), as.character(seq_len(nrow(x))))) {
row_names <- NULL
} else {
row_names <- rownames(x)
}

res <- Matrix::sparseMatrix(
i = all_positions,
j = all_rows,
x = all_values,
dimnames = list(rownames(x), colnames(x))
dims = c(n_row, n_col),
dimnames = list(row_names, colnames(x))
)
res
}
Expand Down
264 changes: 264 additions & 0 deletions tests/testthat/test-coerce.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,270 @@ test_that("coerce_to_sparse_matrix() works", {
expect_identical(res, exp)
})

test_that("coerce_to_sparse_matrix() with zero rows and columns", {
skip_if_not_installed("Matrix")

dat <- data.frame()
exp <- Matrix::Matrix(nrow = 0, ncol = 0, sparse = TRUE)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(x = integer(), y = integer())
exp <- Matrix::Matrix(nrow = 0, ncol = 2, sparse = TRUE)
colnames(exp) <- c("x", "y")

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)


dat <- data.frame(x = 1:2)[, integer()]
exp <- Matrix::Matrix(nrow = 2, ncol = 0, sparse = TRUE)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)
})

test_that("coerce_to_sparse_matrix() works with single all sparse vector", {
skip_if_not_installed("Matrix")

exp <- Matrix::Matrix(0, nrow = 10, ncol = 1, sparse = TRUE)
colnames(exp) <- c("x")

dat <- data.frame(x = rep(0, 10))

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(x = sparse_integer(integer(), integer(), 10))

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)
})

test_that("coerce_to_sparse_matrix() works with multiple all sparse vector", {
skip_if_not_installed("Matrix")

exp <- Matrix::Matrix(0, nrow = 10, ncol = 2, sparse = TRUE)
colnames(exp) <- c("x", "y")

dat <- data.frame(x = rep(0, 10), y = rep(0, 10))

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(integer(), integer(), 10),
y = sparse_integer(integer(), integer(), 10)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)
})

test_that("coerce_to_sparse_matrix() works with sparse between dense", {
skip_if_not_installed("Matrix")

exp <- Matrix::Matrix(c(1, 0, 0, 0, 0, 1), nrow = 2, ncol = 3, sparse = TRUE)
colnames(exp) <- c("x", "y", "z")

dat <- data.frame(
x = c(1, 0),
y = c(0, 0),
z = c(0, 1)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(1, 1, 2),
y = c(0, 0),
z = c(0, 1)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = c(1, 0),
y = c(0, 0),
z = sparse_integer(1, 2, 2)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(1, 1, 2),
y = c(0, 0),
z = sparse_integer(1, 2, 2)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = c(1, 0),
y = sparse_integer(integer(), integer(), 2),
z = c(0, 1)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(1, 1, 2),
y = sparse_integer(integer(), integer(), 2),
z = c(0, 1)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = c(1, 0),
y = sparse_integer(integer(), integer(), 2),
z = sparse_integer(1, 2, 2)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(1, 1, 2),
y = sparse_integer(integer(), integer(), 2),
z = sparse_integer(1, 2, 2)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)
})

test_that("coerce_to_sparse_matrix() works with sparse before dense", {
skip_if_not_installed("Matrix")

exp <- Matrix::Matrix(c(0, 0, 0, 0, 0, 1), nrow = 3, ncol = 2, sparse = TRUE)
colnames(exp) <- c("x", "y")

dat <- data.frame(
x = c(0, 0, 0),
y = c(0, 0, 1)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = c(0, 0, 0),
y = sparse_integer(1, 3, 3)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(integer(), integer(), 3),
y = c(0, 0, 1)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(integer(), integer(), 3),
y = sparse_integer(1, 3, 3)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)
})

test_that("coerce_to_sparse_matrix() works with sparse after dense", {
skip_if_not_installed("Matrix")

exp <- Matrix::Matrix(c(1, 0, 0, 0, 0, 0), nrow = 3, ncol = 2, sparse = TRUE)
colnames(exp) <- c("x", "y")

dat <- data.frame(
x = c(1, 0, 0),
y = c(0, 0, 0)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(1, 1, 3),
y = c(0, 0, 0)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = c(1, 0, 0),
y = sparse_integer(integer(), integer(), 3)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)

dat <- data.frame(
x = sparse_integer(1, 1, 3),
y = sparse_integer(integer(), integer(), 3)
)

expect_identical(
coerce_to_sparse_matrix(dat),
exp
)
})

test_that("coerce_to_sparse_matrix() errors on wrong input", {
skip_if_not_installed("Matrix")

Expand Down

0 comments on commit b29b723

Please sign in to comment.