From 186d435d0042671060db6f32e1dc013b616703e5 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 15 May 2024 18:44:39 -0700 Subject: [PATCH 1/6] add c side to sparse-logical --- src/altrep-sparse-logical.c | 398 ++++++++++++++++++++++++++++++++++++ src/init.c | 8 + src/sparse-utils.c | 7 + src/sparse-utils.h | 2 + 4 files changed, 415 insertions(+) create mode 100644 src/altrep-sparse-logical.c diff --git a/src/altrep-sparse-logical.c b/src/altrep-sparse-logical.c new file mode 100644 index 0000000..4b1d9f9 --- /dev/null +++ b/src/altrep-sparse-logical.c @@ -0,0 +1,398 @@ +#define R_NO_REMAP +#include +#include + +#include "sparse-utils.h" + +// Initialised at load time + +R_altrep_class_t altrep_sparse_logical_class; + +SEXP ffi_altrep_new_sparse_logical(SEXP x) { + return R_new_altrep(altrep_sparse_logical_class, x, R_NilValue); +} + +SEXP alrep_sparse_logical_Materialize(SEXP x) { + if (!Rf_isNull(Rf_GetOption1(Rf_install("sparsevctrs.verbose_materialize")) + )) { + Rprintf("sparsevctrs: Sparse vector materialized\n"); + } + + SEXP out = R_altrep_data2(x); + + if (out != R_NilValue) { + return out; + } + + SEXP val = extract_val(x); + const int* v_val = LOGICAL_RO(val); + + SEXP pos = extract_pos(x); + const int* v_pos = INTEGER_RO(pos); + + const R_xlen_t len = extract_len(x); + + const double v_default_val = extract_default_double(x); + + out = PROTECT(Rf_allocVector(LGLSXP, len)); + int* v_out = LOGICAL(out); + + for (R_xlen_t i = 0; i < len; ++i) { + v_out[i] = v_default_val; + } + + const R_xlen_t n_positions = Rf_xlength(pos); + + for (R_xlen_t i = 0; i < n_positions; ++i) { + const int loc = v_pos[i] - 1; + v_out[loc] = v_val[i]; + } + + R_set_altrep_data2(x, out); + + UNPROTECT(1); + return out; +} + +// ----------------------------------------------------------------------------- +// ALTVEC + +void* altrep_sparse_logical_Dataptr(SEXP x, Rboolean writeable) { + return STDVEC_DATAPTR(alrep_sparse_logical_Materialize(x)); +} + +const void* altrep_sparse_logical_Dataptr_or_null(SEXP x) { + SEXP out = R_altrep_data2(x); + + if (out == R_NilValue) { + return NULL; + } else { + return STDVEC_DATAPTR(out); + } +} + +static SEXP altrep_sparse_logical_Extract_subset(SEXP x, SEXP indx, SEXP call) { + if (!is_index_handleable(indx)) { + return NULL; + } + + const R_xlen_t len = extract_len(x); + + SEXP val = extract_val(x); + const int* v_val = LOGICAL_RO(val); + + SEXP pos = extract_pos(x); + const int* v_pos = INTEGER_RO(pos); + const R_xlen_t n_pos = Rf_xlength(pos); + + const int* v_indx = INTEGER_RO(indx); + + const R_xlen_t size = Rf_xlength(indx); + + R_xlen_t n_hits = 0; + SEXP matches = PROTECT(Rf_allocVector(INTSXP, size)); + int* v_matches = INTEGER(matches); + + for (R_xlen_t i = 0; i < size; ++i) { + // 1 indexed! + const int index = v_indx[i]; + + if (index == NA_INTEGER) { + v_matches[i] = NA_INTEGER; + ++n_hits; + continue; + } + + if (index > len) { + // (Uses `>` not `>=` because `index` is 1 indexed) + // OOB + v_matches[i] = NA_INTEGER; + ++n_hits; + continue; + } + + const R_xlen_t loc = binary_search(index, v_pos, n_pos); + + if (loc == n_pos) { + // Not in `pos`, gets default value + v_matches[i] = (int) n_pos; + continue; + } + + // Did find in `pos` + v_matches[i] = (int) loc; + ++n_hits; + } + + SEXP out = PROTECT(Rf_allocVector(VECSXP, 4)); + + SEXP out_val = Rf_allocVector(LGLSXP, n_hits); + SET_VECTOR_ELT(out, 0, out_val); + int* v_out_val = LOGICAL(out_val); + + SEXP out_pos = Rf_allocVector(INTSXP, n_hits); + SET_VECTOR_ELT(out, 1, out_pos); + int* v_out_pos = INTEGER(out_pos); + + SEXP out_length = Rf_ScalarInteger((int) size); + SET_VECTOR_ELT(out, 2, out_length); + + SEXP out_default = extract_default(x); + SET_VECTOR_ELT(out, 3, out_default); + + SEXP names = Rf_allocVector(STRSXP, 4); + Rf_setAttrib(out, R_NamesSymbol, names); + SET_STRING_ELT(names, 0, Rf_mkChar("val")); + SET_STRING_ELT(names, 1, Rf_mkChar("pos")); + SET_STRING_ELT(names, 2, Rf_mkChar("len")); + SET_STRING_ELT(names, 3, Rf_mkChar("default")); + + R_xlen_t i_out = 0; + + for (R_xlen_t i = 0; i < size; ++i) { + const int match = v_matches[i]; + + if (match == (int) n_pos) { + // Default value case + continue; + } + + if (match == NA_INTEGER) { + v_out_val[i_out] = NA_LOGICAL; + v_out_pos[i_out] = (int) i + 1; + ++i_out; + continue; + } + + // Otherwise we have a hit from `pos` + v_out_val[i_out] = v_val[match]; + v_out_pos[i_out] = (int) i + 1; + ++i_out; + } + + SEXP altrep = ffi_altrep_new_sparse_logical(out); + + UNPROTECT(2); + return altrep; +} + +// ----------------------------------------------------------------------------- +// ALTREP + +R_xlen_t altrep_sparse_logical_Length(SEXP x) { + R_xlen_t out = extract_len(x); + + return out; +} + +// What gets printed when .Internal(inspect()) is used +Rboolean altrep_sparse_logical_Inspect( + SEXP x, + int pre, + int deep, + int pvec, + void (*inspect_subtree)(SEXP, int, int, int) +) { + Rprintf( + "sparsevctrs_altrep_sparse_logical (materialized=%s, length=%i)\n", + R_altrep_data2(x) != R_NilValue ? "T" : "F", + (int) extract_len(x) + ); + return TRUE; +} + +SEXP altrep_sparse_logical_Duplicate(SEXP x, Rboolean deep) { + SEXP data1 = R_altrep_data1(x); + SEXP data2 = R_altrep_data2(x); + + /* If deep or already materialized, do the default behavior */ + if (deep || data2 != R_NilValue) { + return NULL; + } + + return ffi_altrep_new_sparse_logical(data1); +} + +// ----------------------------------------------------------------------------- +// ALTLOGICAL + +static int altrep_sparse_logical_Elt(SEXP x, R_xlen_t i) { + SEXP val = extract_val(x); + + SEXP pos = extract_pos(x); + const int* v_pos = INTEGER_RO(pos); + const R_xlen_t size = Rf_xlength(pos); + + const R_xlen_t len = extract_len(x); + + const double v_default_val = extract_default_double(x); + + if (i > len) { + // OOB of vector itself + return NA_LOGICAL; + } + + // TODO: Add `r_xlen_t_to_int()` + const int needle = (int) i + 1; + const R_xlen_t loc = binary_search(needle, v_pos, size); + + if (loc == size) { + // Can't find it, must be the default value + return v_default_val; + } else { + // Look it up in `val` + return LOGICAL_ELT(val, loc); + } +} + +int altrep_sparse_logical_Is_sorted(SEXP x) { + SEXP pos = extract_pos(x); + const int* v_pos = INTEGER_RO(pos); + + const R_xlen_t pos_len = Rf_xlength(pos); + + SEXP val = extract_val(x); + const int* v_val = LOGICAL_RO(val); + + const double v_default_val = extract_default_double(x); + + // zero length vector are by def sorted + if (pos_len == 0) { + return TRUE; + } + + // 1 length vector are by def sorted + if (pos_len == 1) { + if (R_IsNA(v_val[0])) { + // unless equal to NA + return FALSE; + } else { + return TRUE; + } + } + + double current_value; + + if (v_pos[0] == 1) { + current_value = v_val[0]; + } else { + current_value = v_default_val; + } + + for (R_xlen_t i = 0; i < pos_len; i++) { + if (R_IsNA(v_val[i])) { + return FALSE; + } + + if (v_val[i] < current_value) { + return FALSE; + } + + current_value = v_val[i]; + + if (i + 1 == pos_len) { + break; + } + + // If there is a gap between values check against default + if ((v_pos[i + 1] - v_pos[i]) > 1) { + if (v_default_val < current_value) { + return FALSE; + } + + current_value = v_default_val; + } + } + + return TRUE; +} + +static int altrep_sparse_logical_No_NA_method(SEXP x) { + const SEXP val = extract_val(x); + const int* v_val = LOGICAL_RO(val); + const R_xlen_t val_len = Rf_xlength(val); + + for (R_xlen_t i = 0; i < val_len; i++) { + if (R_IsNA(v_val[i])) { + return FALSE; + } + } + + return TRUE; +} + +static SEXP altrep_sparse_logical_Sum_method(SEXP x, Rboolean na_rm) { + const SEXP val = extract_val(x); + const int* v_val = LOGICAL_RO(val); + const R_xlen_t val_len = Rf_xlength(val); + const R_xlen_t len = extract_len(x); + + double sum = 0; + + if (len == 0) { + return Rf_ScalarLogical(sum); + } + + for (R_xlen_t i = 0; i < val_len; i++) { + if (R_IsNA(v_val[i])) { + if (na_rm) { + continue; + } else { + return Rf_ScalarLogical(NA_LOGICAL); + } + } + sum = sum + v_val[i]; + } + + // default can be non-zero + const double v_default_val = extract_default_double(x); + + if (v_default_val != 0) { + sum = sum + (len - val_len) * v_default_val; + } + + return Rf_ScalarLogical(sum); +} + +// ----------------------------------------------------------------------------- + +void sparsevctrs_init_altrep_sparse_logical(DllInfo* dll) { + altrep_sparse_logical_class = + R_make_altlogical_class("altrep_sparse_logical", "sparsevctrs", dll); + + // ALTVEC + R_set_altvec_Dataptr_method( + altrep_sparse_logical_class, altrep_sparse_logical_Dataptr + ); + R_set_altvec_Dataptr_or_null_method( + altrep_sparse_logical_class, altrep_sparse_logical_Dataptr_or_null + ); + R_set_altvec_Extract_subset_method( + altrep_sparse_logical_class, altrep_sparse_logical_Extract_subset + ); + + // ALTREP + R_set_altrep_Length_method( + altrep_sparse_logical_class, altrep_sparse_logical_Length + ); + R_set_altrep_Inspect_method( + altrep_sparse_logical_class, altrep_sparse_logical_Inspect + ); + R_set_altrep_Duplicate_method( + altrep_sparse_logical_class, altrep_sparse_logical_Duplicate + ); + + // ALTLOGICAL + R_set_altlogical_Elt_method( + altrep_sparse_logical_class, altrep_sparse_logical_Elt + ); + R_set_altlogical_Is_sorted_method( + altrep_sparse_logical_class, altrep_sparse_logical_Is_sorted + ); + R_set_altlogical_No_NA_method( + altrep_sparse_logical_class, altrep_sparse_logical_No_NA_method + ); + R_set_altlogical_Sum_method( + altrep_sparse_logical_class, altrep_sparse_logical_Sum_method + ); +} diff --git a/src/init.c b/src/init.c index 6fe4f98..c5d387d 100644 --- a/src/init.c +++ b/src/init.c @@ -14,6 +14,10 @@ extern void sparsevctrs_init_altrep_sparse_integer(DllInfo*); extern SEXP ffi_altrep_new_sparse_string(SEXP); extern void sparsevctrs_init_altrep_sparse_string(DllInfo*); +// Defined in altrep-sparse-logical.c +extern SEXP ffi_altrep_new_sparse_logical(SEXP); +extern void sparsevctrs_init_altrep_sparse_logical(DllInfo*); + static const R_CallMethodDef CallEntries[] = { {"ffi_altrep_new_sparse_double", (DL_FUNC) &ffi_altrep_new_sparse_double, @@ -24,6 +28,9 @@ static const R_CallMethodDef CallEntries[] = { {"ffi_altrep_new_sparse_string", (DL_FUNC) &ffi_altrep_new_sparse_string, 1}, + {"ffi_altrep_new_sparse_logical", + (DL_FUNC) &ffi_altrep_new_sparse_logical, + 1}, {"ffi_altrep_sparse_positions", (DL_FUNC) &ffi_altrep_sparse_positions, 1}, {"ffi_altrep_sparse_values", (DL_FUNC) &ffi_altrep_sparse_values, 1}, {"ffi_altrep_sparse_default", (DL_FUNC) &ffi_altrep_sparse_default, 1}, @@ -38,4 +45,5 @@ void R_init_sparsevctrs(DllInfo* dll) { sparsevctrs_init_altrep_sparse_double(dll); sparsevctrs_init_altrep_sparse_integer(dll); sparsevctrs_init_altrep_sparse_string(dll); + sparsevctrs_init_altrep_sparse_logical(dll); } diff --git a/src/sparse-utils.c b/src/sparse-utils.c index 8d0318d..22ee966 100644 --- a/src/sparse-utils.c +++ b/src/sparse-utils.c @@ -49,6 +49,13 @@ SEXP extract_default_string(SEXP x) { return out; } +Rboolean extract_default_logical(SEXP x) { + SEXP default_val = extract_default(x); + Rboolean out = LOGICAL_ELT(default_val, 0); + + return out; +} + bool is_altrep(SEXP x) { return (bool) ALTREP(x); } diff --git a/src/sparse-utils.h b/src/sparse-utils.h index a0fe022..7481b36 100644 --- a/src/sparse-utils.h +++ b/src/sparse-utils.h @@ -19,6 +19,8 @@ int extract_default_integer(SEXP x); SEXP extract_default_string(SEXP x); +Rboolean extract_default_logical(SEXP x); + bool is_altrep(SEXP x); SEXP ffi_extract_altrep_class(SEXP x); From 5f836c4ce44d03470b445d5762841f2b076a001f Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 15 May 2024 18:55:42 -0700 Subject: [PATCH 2/6] add R side of sparse logicals --- NAMESPACE | 1 + R/sparse_logical.R | 80 +++++++++++++++++++++++++++++++++++++++++++ R/validate-input.R | 10 ++++++ _pkgdown.yml | 1 + man/sparse_logical.Rd | 52 ++++++++++++++++++++++++++++ 5 files changed, 144 insertions(+) create mode 100644 R/sparse_logical.R create mode 100644 man/sparse_logical.Rd diff --git a/NAMESPACE b/NAMESPACE index 0552523..e46fdb0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(sparse_character) export(sparse_default) export(sparse_double) export(sparse_integer) +export(sparse_logical) export(sparse_positions) export(sparse_values) import(rlang) diff --git a/R/sparse_logical.R b/R/sparse_logical.R new file mode 100644 index 0000000..28e9c90 --- /dev/null +++ b/R/sparse_logical.R @@ -0,0 +1,80 @@ +#' Create sparse logical vector +#' +#' Construction of vectors where only values and positions are recorded. The +#' Length and default values determine all other information. +#' +#' @param values logical vector, values of non-zero entries. +#' @param positions integer vector, indices of non-zero entries. +#' @param length integer value, Length of vector. +#' @param default logical value, value at indices not specified by `positions`. +#' Defaults to `FALSE`. Cannot be `NA`. +#' +#' @details +#' +#' `values` and `positions` are expected to be the same length, and are allowed +#' to both have zero length. +#' +#' Allowed values for `value` are logical values. Missing values such as `NA` +#' and `NA_real_` are allowed. Everything else is disallowed, The values are +#' also not allowed to take the same value as `default`. +#' +#' `positions` should be integers or integer-like doubles. Everything else is +#' not allowed. Positions should furthermore be positive (`0` not allowed), +#' unique, and in increasing order. Lastly they should all be smaller that +#' `length`. +#' +#' For developers: +#' +#' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a +#' message each time a sparse vector has been forced to materialize. +#' +#' @seealso [sparse_double()] [sparse_integer()] [sparse_character()] +#' +#' @examples +#' sparse_logical(logical(), integer(), 10) +#' +#' sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 10) +#' +#' str( +#' sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 1000000000) +#' ) +#' @export +sparse_logical <- function(values, positions, length, default = FALSE) { + check_bool(default) + check_number_whole(length, min = 0) + if (!is.integer(length)) { + length <- as.integer(length) + } + + if (identical(values, NA)) { + values <- NA_real_ + } + + validate_values_logical(values) + + validate_positions(positions, length, len_values = length(values)) + positions <- as.integer(positions) + + if (any(values == default, na.rm = TRUE)) { + offenders <- which(values == default) + cli::cli_abort( + c( + x = "{.arg values} value must not be equal to the default {default}.", + i = "{default} values at index: {offenders}." + ) + ) + } + + new_sparse_logical(values, positions, length, default) +} + +new_sparse_logical <- function(values, positions, length, default) { + x <- list( + val = values, + pos = positions, + len = length, + default = default + ) + + .Call(ffi_altrep_new_sparse_logical, x) +} diff --git a/R/validate-input.R b/R/validate-input.R index df660df..239a15e 100644 --- a/R/validate-input.R +++ b/R/validate-input.R @@ -140,4 +140,14 @@ validate_values_integer <- function(values, call = rlang::caller_env()) { call = call ) } +} + +validate_values_logical <- function(values, call = rlang::caller_env()) { + if (!is.logical(values)) { + cli::cli_abort( + "{.arg values} must be a logical vector, \\ + not {.obj_type_friendly {values}}.", + call = call + ) + } } \ No newline at end of file diff --git a/_pkgdown.yml b/_pkgdown.yml index 03f8876..a6b8058 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -8,6 +8,7 @@ reference: - sparse_double - sparse_integer - sparse_character + - sparse_logical - title: Convertion functions contents: diff --git a/man/sparse_logical.Rd b/man/sparse_logical.Rd new file mode 100644 index 0000000..50e3456 --- /dev/null +++ b/man/sparse_logical.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sparse_logical.R +\name{sparse_logical} +\alias{sparse_logical} +\title{Create sparse logical vector} +\usage{ +sparse_logical(values, positions, length, default = FALSE) +} +\arguments{ +\item{values}{logical vector, values of non-zero entries.} + +\item{positions}{integer vector, indices of non-zero entries.} + +\item{length}{integer value, Length of vector.} + +\item{default}{logical value, value at indices not specified by \code{positions}. +Defaults to \code{FALSE}. Cannot be \code{NA}.} +} +\description{ +Construction of vectors where only values and positions are recorded. The +Length and default values determine all other information. +} +\details{ +\code{values} and \code{positions} are expected to be the same length, and are allowed +to both have zero length. + +Allowed values for \code{value} are logical values. Missing values such as \code{NA} +and \code{NA_real_} are allowed. Everything else is disallowed, The values are +also not allowed to take the same value as \code{default}. + +\code{positions} should be integers or integer-like doubles. Everything else is +not allowed. Positions should furthermore be positive (\code{0} not allowed), +unique, and in increasing order. Lastly they should all be smaller that +\code{length}. + +For developers: + +setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a +message each time a sparse vector has been forced to materialize. +} +\examples{ +sparse_logical(logical(), integer(), 10) + +sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 10) + +str( + sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 1000000000) +) +} +\seealso{ +\code{\link[=sparse_double]{sparse_double()}} \code{\link[=sparse_integer]{sparse_integer()}} \code{\link[=sparse_character]{sparse_character()}} +} From f0162bcd08829ba2ff9a4ae581794a1acf602543 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 15 May 2024 19:20:00 -0700 Subject: [PATCH 3/6] add tests --- NAMESPACE | 1 + R/sparse_logical.R | 4 - R/type-predicates.R | 16 +- man/type-predicates.Rd | 3 + src/altrep-sparse-logical.c | 1 + tests/testthat/_snaps/sparse_logical.md | 251 +++++++++++++++++++++ tests/testthat/test-sparse_logical.R | 279 ++++++++++++++++++++++++ 7 files changed, 550 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/_snaps/sparse_logical.md create mode 100644 tests/testthat/test-sparse_logical.R diff --git a/NAMESPACE b/NAMESPACE index e46fdb0..d63e1f5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(coerce_to_sparse_tibble) export(is_sparse_character) export(is_sparse_double) export(is_sparse_integer) +export(is_sparse_logical) export(is_sparse_vector) export(sparse_character) export(sparse_default) diff --git a/R/sparse_logical.R b/R/sparse_logical.R index 28e9c90..9060d18 100644 --- a/R/sparse_logical.R +++ b/R/sparse_logical.R @@ -46,10 +46,6 @@ sparse_logical <- function(values, positions, length, default = FALSE) { length <- as.integer(length) } - if (identical(values, NA)) { - values <- NA_real_ - } - validate_values_logical(values) validate_positions(positions, length, len_values = length(values)) diff --git a/R/type-predicates.R b/R/type-predicates.R index b9fa2e0..3a71104 100644 --- a/R/type-predicates.R +++ b/R/type-predicates.R @@ -41,7 +41,8 @@ is_sparse_vector <- function(x) { valid <- c( "altrep_sparse_double", "altrep_sparse_integer", - "altrep_sparse_string" + "altrep_sparse_string", + "altrep_sparse_logical" ) res %in% valid @@ -85,3 +86,16 @@ is_sparse_character <- function(x) { res == "altrep_sparse_string" } + +#' @rdname type-predicates +#' @export +is_sparse_logical <- function(x) { + res <- .Call(ffi_extract_altrep_class, x) + if (is.null(res)) { + return(FALSE) + } + + res <- as.character(res[[1]]) + + res == "altrep_sparse_logical" +} diff --git a/man/type-predicates.Rd b/man/type-predicates.Rd index f28989b..57b468e 100644 --- a/man/type-predicates.Rd +++ b/man/type-predicates.Rd @@ -6,6 +6,7 @@ \alias{is_sparse_double} \alias{is_sparse_integer} \alias{is_sparse_character} +\alias{is_sparse_logical} \title{Sparse vector type checkers} \usage{ is_sparse_vector(x) @@ -15,6 +16,8 @@ is_sparse_double(x) is_sparse_integer(x) is_sparse_character(x) + +is_sparse_logical(x) } \arguments{ \item{x}{value to be checked.} diff --git a/src/altrep-sparse-logical.c b/src/altrep-sparse-logical.c index 4b1d9f9..061f459 100644 --- a/src/altrep-sparse-logical.c +++ b/src/altrep-sparse-logical.c @@ -322,6 +322,7 @@ static int altrep_sparse_logical_No_NA_method(SEXP x) { } static SEXP altrep_sparse_logical_Sum_method(SEXP x, Rboolean na_rm) { + Rprintf("const char *, ..."); const SEXP val = extract_val(x); const int* v_val = LOGICAL_RO(val); const R_xlen_t val_len = Rf_xlength(val); diff --git a/tests/testthat/_snaps/sparse_logical.md b/tests/testthat/_snaps/sparse_logical.md new file mode 100644 index 0000000..aab9592 --- /dev/null +++ b/tests/testthat/_snaps/sparse_logical.md @@ -0,0 +1,251 @@ +# input checking is done correctly + + Code + sparse_logical("1", 1, 1) + Condition + Error in `sparse_logical()`: + ! `values` must be a logical vector, not a string. + +--- + + Code + sparse_logical(1, 1, 1) + Condition + Error in `sparse_logical()`: + ! `values` must be a logical vector, not a number. + +--- + + Code + sparse_logical(NULL, 1, 1) + Condition + Error in `sparse_logical()`: + ! `values` must be a logical vector, not NULL. + +--- + + Code + sparse_logical(Inf, 1, 1) + Condition + Error in `sparse_logical()`: + ! `values` must be a logical vector, not a number. + +--- + + Code + sparse_logical(NaN, 1, 1) + Condition + Error in `sparse_logical()`: + ! `values` must be a logical vector, not a numeric `NA`. + +--- + + Code + sparse_logical(TRUE, 1.5, 1) + Condition + Error in `sparse_logical()`: + x `positions` must contain integer values. + i Non-integer values at index: 1. + +--- + + Code + sparse_logical(TRUE, "1", 1) + Condition + Error in `sparse_logical()`: + ! `positions` must be a integer vector, not a string. + +--- + + Code + sparse_logical(TRUE, NULL, 1) + Condition + Error in `sparse_logical()`: + ! `positions` must be a integer vector, not NULL. + +--- + + Code + sparse_logical(TRUE, NA, 1) + Condition + Error in `sparse_logical()`: + ! `positions` must be a integer vector, not `NA`. + +--- + + Code + sparse_logical(TRUE, Inf, 1) + Condition + Error in `sparse_logical()`: + x `positions` must not contain infinite values. + i Infinite values at index: 1. + +--- + + Code + sparse_logical(TRUE, NaN, 1) + Condition + Error in `sparse_logical()`: + x `positions` must not contain NaN values. + i NaN values at index: 1. + +--- + + Code + sparse_logical(logical(0), integer(0), c(1, 10)) + Condition + Error in `sparse_logical()`: + ! `length` must be a whole number, not a double vector. + +--- + + Code + sparse_logical(logical(0), integer(0), 1.5) + Condition + Error in `sparse_logical()`: + ! `length` must be a whole number, not the number 1.5. + +--- + + Code + sparse_logical(logical(0), integer(0), "1") + Condition + Error in `sparse_logical()`: + ! `length` must be a whole number, not the string "1". + +--- + + Code + sparse_logical(logical(0), integer(0), NA) + Condition + Error in `sparse_logical()`: + ! `length` must be a whole number, not `NA`. + +--- + + Code + sparse_logical(logical(0), integer(0), Inf) + Condition + Error in `sparse_logical()`: + ! `length` must be a whole number, not `Inf`. + +--- + + Code + sparse_logical(logical(0), integer(0), NULL) + Condition + Error in `sparse_logical()`: + ! `length` must be a whole number, not `NULL`. + +--- + + Code + sparse_logical(logical(0), integer(0), NaN) + Condition + Error in `sparse_logical()`: + ! `length` must be a whole number, not `NaN`. + +--- + + Code + sparse_logical(c(TRUE, TRUE), 1:6, 10) + Condition + Error in `sparse_logical()`: + ! `value` (2) and `positions` (6) must have the same length. + +--- + + Code + sparse_logical(TRUE, 1:6, 10) + Condition + Error in `sparse_logical()`: + ! `value` (1) and `positions` (6) must have the same length. + +--- + + Code + sparse_logical(c(TRUE, TRUE, TRUE, TRUE), c(1, 1, 5, 6), 10) + Condition + Error in `sparse_logical()`: + x `positions` must not contain any duplicate values. + i Duplicate values at index: 2. + +--- + + Code + sparse_logical(rep(TRUE, 100), rep(1, 100), 100) + Condition + Error in `sparse_logical()`: + x `positions` must not contain any duplicate values. + i Duplicate values at index: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ..., 99, and 100. + +--- + + Code + sparse_logical(c(TRUE, TRUE), c(3, 1), 5) + Condition + Error in `sparse_logical()`: + ! `positions` must be sorted in increasing order. + +--- + + Code + sparse_logical(TRUE, 10, 5) + Condition + Error in `sparse_logical()`: + x `positions` value must not be larger than `length`. + i Offending values at index: 1. + +--- + + Code + sparse_logical(rep(TRUE, 50), seq(25, 74), 50) + Condition + Error in `sparse_logical()`: + x `positions` value must not be larger than `length`. + i Offending values at index: 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ..., 49, and 50. + +--- + + Code + sparse_logical(TRUE, 0, 5) + Condition + Error in `sparse_logical()`: + x `positions` value must positive. + i Non-positive values at index: 1. + +--- + + Code + sparse_logical(rep(TRUE, 101), seq(-50, 50), 100) + Condition + Error in `sparse_logical()`: + x `positions` value must positive. + i Non-positive values at index: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ..., 50, and 51. + +# default argument is working + + Code + sparse_logical(TRUE, 1, 10, default = TRUE) + Condition + Error in `sparse_logical()`: + x `values` value must not be equal to the default TRUE. + i TRUE values at index: 1. + +--- + + Code + sparse_logical(c(TRUE, TRUE, NA), c(1, 4, 6), 10, default = TRUE) + Condition + Error in `sparse_logical()`: + x `values` value must not be equal to the default TRUE. + i TRUE values at index: 1 and 2. + +# verbose testing + + Code + sparse_logical(TRUE, 1, 1)[] + Output + sparsevctrs: Sparse vector materialized + [1] TRUE + diff --git a/tests/testthat/test-sparse_logical.R b/tests/testthat/test-sparse_logical.R new file mode 100644 index 0000000..e2edaf3 --- /dev/null +++ b/tests/testthat/test-sparse_logical.R @@ -0,0 +1,279 @@ +test_that("input checking is done correctly", { + # value + expect_snapshot( + error = TRUE, + sparse_logical("1", 1, 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(1, 1, 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(NULL, 1, 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(Inf, 1, 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(NaN, 1, 1) + ) + + # position + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, 1.5, 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, "1", 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, NULL, 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, NA, 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, Inf, 1) + ) + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, NaN, 1) + ) + + # length + expect_no_error( + sparse_logical(logical(0), integer(0), 0) + ) + expect_snapshot( + error = TRUE, + sparse_logical(logical(0), integer(0), c(1, 10)) + ) + expect_snapshot( + error = TRUE, + sparse_logical(logical(0), integer(0), 1.5) + ) + expect_snapshot( + error = TRUE, + sparse_logical(logical(0), integer(0), "1") + ) + expect_snapshot( + error = TRUE, + sparse_logical(logical(0), integer(0), NA) + ) + expect_snapshot( + error = TRUE, + sparse_logical(logical(0), integer(0), Inf) + ) + expect_snapshot( + error = TRUE, + sparse_logical(logical(0), integer(0), NULL) + ) + expect_snapshot( + error = TRUE, + sparse_logical(logical(0), integer(0), NaN) + ) + + # Length restriction + expect_snapshot( + error = TRUE, + sparse_logical(c(TRUE, TRUE), 1:6, 10) + ) + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, 1:6, 10) + ) + + # duplicates in position + expect_snapshot( + error = TRUE, + sparse_logical(c(TRUE, TRUE, TRUE, TRUE), c(1, 1, 5, 6), 10) + ) + expect_snapshot( + error = TRUE, + sparse_logical(rep(TRUE, 100), rep(1, 100), 100) + ) + + # Ordered position + expect_snapshot( + error = TRUE, + sparse_logical(c(TRUE, TRUE), c(3, 1), 5) + ) + + # Too large position values + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, 10, 5) + ) + expect_snapshot( + error = TRUE, + sparse_logical(rep(TRUE, 50), seq(25, 74), 50) + ) + + # Too large position values + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, 0, 5) + ) + expect_snapshot( + error = TRUE, + sparse_logical(rep(TRUE, 101), seq(-50, 50), 100) + ) +}) + +test_that("length() works with sparse_logical()", { + expect_identical( + length(sparse_logical(logical(), integer(), 0)), + 0L + ) + + expect_identical( + length(sparse_logical(TRUE, 1, 10)), + 10L + ) + + expect_identical( + length(sparse_logical(TRUE, 1, 100)), + 100L + ) +}) + +test_that("single subsetting works with sparse_logical()", { + x_sparse <- sparse_logical(value = c(TRUE, NA, TRUE), position = c(1, 5, 8), 10) + x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE) + + for (i in seq_len(10)) { + expect_identical(x_sparse[i], x_dense[i]) + } + + expect_identical(x_sparse[0], x_dense[0]) + + expect_identical(x_sparse[NA_integer_], x_dense[NA_integer_]) + + expect_identical(x_sparse[NULL], x_dense[NULL]) + + expect_identical(x_sparse[NaN], x_dense[NaN]) + + expect_identical(x_sparse[100], x_dense[100]) + + expect_identical(x_sparse[Inf], x_dense[Inf]) + + expect_identical(x_sparse["not a number"], x_dense["not a number"]) + + expect_identical(x_sparse[1.6], x_dense[1.6]) + expect_identical(x_sparse[2.6], x_dense[2.6]) +}) + +test_that("multiple subsetting works with sparse_logical()", { + x_sparse <- sparse_logical(value = c(TRUE, NA, TRUE), position = c(1, 5, 8), 10) + x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE) + + expect_identical(x_sparse[1:2], x_dense[1:2]) + + expect_identical(x_sparse[3:7], x_dense[3:7]) + + expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) + + expect_identical(x_sparse[-1], x_dense[-1]) + + expect_identical(x_sparse[-c(5:7)], x_dense[-c(5:7)]) + + expect_identical(x_sparse[FALSE], x_dense[FALSE]) + + expect_identical(x_sparse[TRUE], x_dense[TRUE]) + + expect_identical(x_sparse[NA], x_dense[NA]) + + expect_identical(x_sparse[c(1, NA, 4)], x_dense[c(1, NA, 4)]) + + expect_identical(x_sparse[c(1, NA, 0, 4, 0)], x_dense[c(1, NA, 0, 4, 0)]) + + expect_identical(x_sparse[c(1, 11)], x_dense[c(1, 11)]) + + expect_identical(x_sparse[c(1, Inf)], x_dense[c(1, Inf)]) + + expect_identical(x_sparse[c(1, NaN)], x_dense[c(1, NaN)]) +}) + +test_that("materialization works with sparse_logical()", { + x_sparse <- sparse_logical(value = c(TRUE, NA, TRUE), position = c(1, 5, 8), 10) + x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE) + + expect_identical(x_sparse[], x_dense) +}) + +test_that("sorting works with sparse_logical()", { + x_sparse <- sparse_logical(logical(), integer(), 10) + + expect_true(is_sparse_logical(sort(x_sparse))) + + x_sparse <- sparse_logical(NA, 4, 10) + + expect_identical( + sort(x_sparse), + rep(FALSE, 9) + ) + + x_sparse <- sparse_logical(logical(), integer(), 10) + + expect_true(is_sparse_logical(sort(x_sparse))) + + x_sparse <- sparse_logical(c(TRUE, TRUE, TRUE), c(1, 4, 7), 7) + + expect_true(is_sparse_logical(sort(x_sparse))) + + x_sparse <- sparse_logical(c(TRUE, TRUE), c(1, 7), 7) + + expect_true(is_sparse_logical(sort(x_sparse))) + + x_sparse <- sparse_logical(c(TRUE, TRUE), c(1, 7), 7) + + expect_true(is_sparse_logical(sort(x_sparse))) +}) + +test_that("default argument is working", { + expect_snapshot( + error = TRUE, + sparse_logical(TRUE, 1, 10, default = TRUE) + ) + + expect_snapshot( + error = TRUE, + sparse_logical(c(TRUE, TRUE, NA), c(1, 4, 6), 10, default = TRUE) + ) + + x_sparse <- sparse_logical( + value = c(FALSE, NA, FALSE), + position = c(1, 5, 8), + length = 10, + default = TRUE + ) + + x_dense <- c(FALSE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, FALSE, FALSE, FALSE) + + for (i in seq_len(10)) { + expect_identical(x_sparse[i], x_dense[i]) + } + + expect_identical(x_sparse[1:2], x_dense[1:2]) + + expect_identical(x_sparse[3:7], x_dense[3:7]) + + expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) + + expect_identical(x_sparse[], x_dense) +}) + +test_that("verbose testing", { + withr::local_options("sparsevctrs.verbose_materialize" = TRUE) + + expect_snapshot( + sparse_logical(TRUE, 1, 1)[] + ) +}) From 2cd209069c1b00ca8992361c11a37032d448eb99 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 15 May 2024 19:23:06 -0700 Subject: [PATCH 4/6] add as_sparse_logical() --- NAMESPACE | 1 + R/coerce-vector.R | 19 +++++++++++++++++++ man/coerce-vector.Rd | 3 +++ tests/testthat/test-coerce-vector.R | 7 +++++++ 4 files changed, 30 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index d63e1f5..4ca34ba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(as_sparse_character) export(as_sparse_double) export(as_sparse_integer) +export(as_sparse_logical) export(coerce_to_sparse_data_frame) export(coerce_to_sparse_matrix) export(coerce_to_sparse_tibble) diff --git a/R/coerce-vector.R b/R/coerce-vector.R index 426360c..f23afc9 100644 --- a/R/coerce-vector.R +++ b/R/coerce-vector.R @@ -82,4 +82,23 @@ as_sparse_character <- function(x, default = "") { length = length(x), default = default ) +} + +#' @rdname coerce-vector +#' @export +as_sparse_logical <- function(x, default = FALSE) { + if (is_sparse_logical(x)) { + return(x) + } + + check_bool(default) + + index <- which(x != default | is.na(x)) + + sparse_logical( + values = x[index], + positions = index, + length = length(x), + default = default + ) } \ No newline at end of file diff --git a/man/coerce-vector.Rd b/man/coerce-vector.Rd index 3ccb539..8f6134d 100644 --- a/man/coerce-vector.Rd +++ b/man/coerce-vector.Rd @@ -5,6 +5,7 @@ \alias{as_sparse_double} \alias{as_sparse_integer} \alias{as_sparse_character} +\alias{as_sparse_logical} \title{Coerce numeric vector to sparse double} \usage{ as_sparse_double(x, default = 0) @@ -12,6 +13,8 @@ as_sparse_double(x, default = 0) as_sparse_integer(x, default = 0L) as_sparse_character(x, default = "") + +as_sparse_logical(x, default = FALSE) } \arguments{ \item{x}{a numeric vector.} diff --git a/tests/testthat/test-coerce-vector.R b/tests/testthat/test-coerce-vector.R index b5ee62c..0c77e4d 100644 --- a/tests/testthat/test-coerce-vector.R +++ b/tests/testthat/test-coerce-vector.R @@ -32,3 +32,10 @@ test_that("as_sparse_integer works", { expect_true(is_sparse_character(x_sparse)) }) + +test_that("as_sparse_logical works", { + x_dense <- c(FALSE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, FALSE, FALSE, FALSE) + x_sparse <- as_sparse_logical(x_dense) + + expect_true(is_sparse_logical(x_sparse)) +}) From a45e594b02fc981d77168c26344971a4a0fcaf43 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 15 May 2024 19:33:05 -0700 Subject: [PATCH 5/6] fix broken test --- tests/testthat/test-sparse_logical.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-sparse_logical.R b/tests/testthat/test-sparse_logical.R index e2edaf3..1cc02e7 100644 --- a/tests/testthat/test-sparse_logical.R +++ b/tests/testthat/test-sparse_logical.R @@ -255,7 +255,7 @@ test_that("default argument is working", { default = TRUE ) - x_dense <- c(FALSE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, FALSE, FALSE, FALSE) + x_dense <- c(FALSE, TRUE, TRUE, TRUE, NA, TRUE, TRUE, FALSE, TRUE, TRUE) for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) From 6f82a58baff965e29f13240c0bdb9fc90c624897 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 15 May 2024 19:33:20 -0700 Subject: [PATCH 6/6] don't use extract_default_double in logical code --- src/altrep-sparse-logical.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/altrep-sparse-logical.c b/src/altrep-sparse-logical.c index 061f459..4d92653 100644 --- a/src/altrep-sparse-logical.c +++ b/src/altrep-sparse-logical.c @@ -32,7 +32,7 @@ SEXP alrep_sparse_logical_Materialize(SEXP x) { const R_xlen_t len = extract_len(x); - const double v_default_val = extract_default_double(x); + const int v_default_val = extract_default_logical(x); out = PROTECT(Rf_allocVector(LGLSXP, len)); int* v_out = LOGICAL(out); @@ -225,7 +225,7 @@ static int altrep_sparse_logical_Elt(SEXP x, R_xlen_t i) { const R_xlen_t len = extract_len(x); - const double v_default_val = extract_default_double(x); + const int v_default_val = extract_default_logical(x); if (i > len) { // OOB of vector itself @@ -254,7 +254,7 @@ int altrep_sparse_logical_Is_sorted(SEXP x) { SEXP val = extract_val(x); const int* v_val = LOGICAL_RO(val); - const double v_default_val = extract_default_double(x); + const int v_default_val = extract_default_logical(x); // zero length vector are by def sorted if (pos_len == 0) { @@ -271,7 +271,7 @@ int altrep_sparse_logical_Is_sorted(SEXP x) { } } - double current_value; + int current_value; if (v_pos[0] == 1) { current_value = v_val[0]; @@ -322,13 +322,12 @@ static int altrep_sparse_logical_No_NA_method(SEXP x) { } static SEXP altrep_sparse_logical_Sum_method(SEXP x, Rboolean na_rm) { - Rprintf("const char *, ..."); const SEXP val = extract_val(x); const int* v_val = LOGICAL_RO(val); const R_xlen_t val_len = Rf_xlength(val); const R_xlen_t len = extract_len(x); - double sum = 0; + int sum = 0; if (len == 0) { return Rf_ScalarLogical(sum); @@ -346,7 +345,7 @@ static SEXP altrep_sparse_logical_Sum_method(SEXP x, Rboolean na_rm) { } // default can be non-zero - const double v_default_val = extract_default_double(x); + const int v_default_val = extract_default_integer(x); if (v_default_val != 0) { sum = sum + (len - val_len) * v_default_val;