Skip to content

Commit

Permalink
Merge pull request #54 from EmilHvitfeldt/altrep-sparse-strings
Browse files Browse the repository at this point in the history
Add sparse character
  • Loading branch information
EmilHvitfeldt authored May 16, 2024
2 parents 062dbce + 0e80f6c commit 6a458d2
Show file tree
Hide file tree
Showing 20 changed files with 1,145 additions and 13 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
# Generated by roxygen2: do not edit by hand

export(as_sparse_character)
export(as_sparse_double)
export(as_sparse_integer)
export(coerce_to_sparse_data_frame)
export(coerce_to_sparse_matrix)
export(coerce_to_sparse_tibble)
export(is_sparse_character)
export(is_sparse_double)
export(is_sparse_integer)
export(is_sparse_vector)
export(sparse_character)
export(sparse_default)
export(sparse_double)
export(sparse_integer)
Expand Down
24 changes: 23 additions & 1 deletion R/coerce-vector.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ as_sparse_double <- function(x, default = 0) {

#' @rdname coerce-vector
#' @export
as_sparse_integer <- function(x, default = 0) {
as_sparse_integer <- function(x, default = 0L) {
if (is_sparse_integer(x)) {
return(x)
}
Expand All @@ -60,4 +60,26 @@ as_sparse_integer <- function(x, default = 0) {
length = length(x),
default = default
)
}

#' @rdname coerce-vector
#' @export
as_sparse_character <- function(x, default = "") {
if (is_sparse_character(x)) {
return(x)
}

check_string(default)

values <- vctrs::vec_cast(x, character())
default <- vctrs::vec_cast(default, character())

index <- which(x != default | is.na(x))

sparse_character(
values = x[index],
positions = index,
length = length(x),
default = default
)
}
78 changes: 78 additions & 0 deletions R/sparse_character.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Create sparse character vector
#'
#' Construction of vectors where only values and positions are recorded. The
#' Length and default values determine all other information.
#'
#' @param values integer vector, values of non-zero entries.
#' @param positions integer vector, indices of non-zero entries.
#' @param length integer value, Length of vector.
#' @param default integer value, value at indices not specified by `positions`.
#' Defaults to `""`. Cannot be `NA`.
#'
#' @details
#'
#' `values` and `positions` are expected to be the same length, and are allowed
#' to both have zero length.
#'
#' Allowed values for `value` are character values. Missing values such as `NA`
#' and `NA_real_` are allowed as they are turned into `NA_character_`.
#' Everything else is disallowed. The values are also not allowed to take the
#' same value as `default`.
#'
#' `positions` should be integers or integer-like doubles. Everything else is
#' not allowed. Positions should furthermore be positive (`0` not allowed),
#' unique, and in increasing order. Lastly they should all be smaller that
#' `length`.
#'
#' For developers:
#'
#' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a
#' message each time a sparse vector has been forced to materialize.
#'
#' @seealso [sparse_double()] [sparse_integer()]
#'
#' @examples
#' sparse_character(character(), integer(), 10)
#'
#' sparse_character(c("A", "C", "E"), c(2, 5, 10), 10)
#'
#' str(
#' sparse_character(c("A", "C", "E"), c(2, 5, 10), 1000000000)
#' )
#' @export
sparse_character <- function(values, positions, length, default = "") {
check_string(default)
check_number_whole(length, min = 0)
if (!is.integer(length)) {
length <- as.integer(length)
}

values <- vctrs::vec_cast(values, character())
default <- vctrs::vec_cast(default, character())

validate_positions(positions, length, len_values = length(values))
positions <- as.integer(positions)

if (any(values == default, na.rm = TRUE)) {
offenders <- which(values == default)
cli::cli_abort(
c(
x = "{.arg values} value must not be equal to the default {default}.",
i = "{default} values at index: {offenders}."
)
)
}

new_sparse_character(values, positions, length, default)
}

new_sparse_character <- function(values, positions, length, default) {
x <- list(
val = values,
pos = positions,
len = length,
default = default
)

.Call(ffi_altrep_new_sparse_string, x)
}
2 changes: 1 addition & 1 deletion R/sparse_double.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a
#' message each time a sparse vector has been forced to materialize.
#'
#' @seealso [sparse_integer()]
#' @seealso [sparse_integer()] [sparse_character()]
#'
#' @examples
#' sparse_double(numeric(), integer(), 10)
Expand Down
4 changes: 2 additions & 2 deletions R/sparse_integer.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a
#' message each time a sparse vector has been forced to materialize.
#'
#' @seealso [sparse_double()]
#' @seealso [sparse_double()] [sparse_character()]
#'
#' @examples
#' sparse_integer(integer(), integer(), 10)
Expand All @@ -41,7 +41,7 @@
#' sparse_integer(c(4, 5, 7), c(2, 5, 10), 1000000000)
#' )
#' @export
sparse_integer <- function(values, positions, length, default = 0) {
sparse_integer <- function(values, positions, length, default = 0L) {
check_number_whole(default)
check_number_whole(length, min = 0)
if (!is.integer(length)) {
Expand Down
29 changes: 26 additions & 3 deletions R/type-predicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
#'
#' @details
#' `is_sparse_vector()` is a general function that detects any type of sparse
#' vector created with this package. `is_sparse_double()` and
#' `is_sparse_integer()` are more specific functions that only detects the type.
#' vector created with this package. `is_sparse_double()`,
#' `is_sparse_integer()`, and `is_sparse_character()` are more specific
#' functions that only detects the type.
#'
#' @examples
#' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
Expand All @@ -19,6 +20,9 @@
#' is_sparse_double(x_sparse)
#' is_sparse_double(x_dense)
#'
#' is_sparse_character(x_sparse)
#' is_sparse_character(x_dense)
#'
#' # Forced materialization
#' is_sparse_vector(x_sparse[])
#' @name type-predicates
Expand All @@ -34,7 +38,13 @@ is_sparse_vector <- function(x) {

res <- as.character(res[[1]])

res %in% c("altrep_sparse_double", "altrep_sparse_integer")
valid <- c(
"altrep_sparse_double",
"altrep_sparse_integer",
"altrep_sparse_string"
)

res %in% valid
}

#' @rdname type-predicates
Expand Down Expand Up @@ -62,3 +72,16 @@ is_sparse_integer <- function(x) {

res == "altrep_sparse_integer"
}

#' @rdname type-predicates
#' @export
is_sparse_character <- function(x) {
res <- .Call(ffi_extract_altrep_class, x)
if (is.null(res)) {
return(FALSE)
}

res <- as.character(res[[1]])

res == "altrep_sparse_string"
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ reference:
contents:
- sparse_double
- sparse_integer
- sparse_character

- title: Convertion functions
contents:
Expand Down
5 changes: 4 additions & 1 deletion man/coerce-vector.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 53 additions & 0 deletions man/sparse_character.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/sparse_double.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/sparse_integer.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 9 additions & 2 deletions man/type-predicates.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 6a458d2

Please sign in to comment.