From ea9486e2e1e3df28f85fd5cfb9104163a92c5227 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 08:48:59 +0200 Subject: [PATCH 01/27] https://github.com/easystats/easystats/issues/404 --- DESCRIPTION | 2 +- NEWS.md | 7 +++++++ R/data_partition.R | 23 +++++++++++++++-------- man/data_partition.Rd | 13 ++++++++----- tests/testthat/test-data_partition.R | 4 ++-- 5 files changed, 33 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 20b803319..7c912d49a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.10.0.3 +Version: 0.10.0.4 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index f5e1a3543..1667c1179 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # datawizard 0.10.1 +BREAKING CHANGES + +* Arguments named `group` or `group_by` will be deprecated in a future release. + Please use `by` instead. This affects following functions in *datawizard*. + + * `data_partition()` + CHANGES * `recode_into()` is more relaxed regarding checking the type of `NA` values. diff --git a/R/data_partition.R b/R/data_partition.R index 5953480c5..69af36ed4 100644 --- a/R/data_partition.R +++ b/R/data_partition.R @@ -2,19 +2,20 @@ #' #' Creates data partitions (for instance, a training and a test set) based on a #' data frame that can also be stratified (i.e., evenly spread a given factor) -#' using the `group` argument. +#' using the `by` argument. #' #' @inheritParams data_rename #' @param proportion Scalar (between 0 and 1) or numeric vector, indicating the #' proportion(s) of the training set(s). The sum of `proportion` must not be #' greater than 1. The remaining part will be used for the test set. -#' @param group A character vector indicating the name(s) of the column(s) used +#' @param by A character vector indicating the name(s) of the column(s) used #' for stratified partitioning. #' @param seed A random number generator seed. Enter an integer (e.g. 123) so #' that the random sampling will be the same each time you run the function. #' @param row_id Character string, indicating the name of the column that #' contains the row-id's. #' @param verbose Toggle messages and warnings. +#' @param group Deprecated. Use `by` instead. #' #' @return A list of data frames. The list includes one training set per given #' proportion and the remaining data as test set. List elements of training @@ -28,7 +29,7 @@ #' nrow(out$p_0.9) #' #' # Stratify by group (equal proportions of each species) -#' out <- data_partition(iris, proportion = 0.9, group = "Species") +#' out <- data_partition(iris, proportion = 0.9, by = "Species") #' out$test #' #' # Create multiple partitions @@ -38,21 +39,27 @@ #' # Create multiple partitions, stratified by group - 30% equally sampled #' # from species in first training set, 50% in second training set and #' # remaining 20% equally sampled from each species in test set. -#' out <- data_partition(iris, proportion = c(0.3, 0.5), group = "Species") +#' out <- data_partition(iris, proportion = c(0.3, 0.5), by = "Species") #' lapply(out, function(i) table(i$Species)) #' #' @inherit data_rename seealso #' @export data_partition <- function(data, proportion = 0.7, - group = NULL, + by = NULL, seed = NULL, row_id = ".row_id", verbose = TRUE, + group = NULL, ...) { # validation checks data <- .coerce_to_dataframe(data) + ## TODO: deprecate later + if (!is.null(group)) { + by <- group + } + if (sum(proportion) > 1) { insight::format_error("Sum of `proportion` cannot be higher than 1.") } @@ -91,12 +98,12 @@ data_partition <- function(data, # Create list of data groups. We generally lapply over list of # sampled row-id's by group, thus, we even create a list if not grouped. - if (is.null(group)) { + if (is.null(by)) { indices_list <- list(seq_len(nrow(data))) } else { # else, split by group(s) and extract row-ids per group indices_list <- lapply( - split(data, data[group]), + split(data, data[by]), data_extract, select = row_id, as_data_frame = FALSE @@ -130,7 +137,7 @@ data_partition <- function(data, }) # we need to move all list elements one level higher. - if (is.null(group)) { + if (is.null(by)) { training_sets <- training_sets[[1]] } else { # for grouped training sets, we need to row-bind all sampled training diff --git a/man/data_partition.Rd b/man/data_partition.Rd index 8e7cae95b..4ed71e3c1 100644 --- a/man/data_partition.Rd +++ b/man/data_partition.Rd @@ -7,10 +7,11 @@ data_partition( data, proportion = 0.7, - group = NULL, + by = NULL, seed = NULL, row_id = ".row_id", verbose = TRUE, + group = NULL, ... ) } @@ -21,7 +22,7 @@ data_partition( proportion(s) of the training set(s). The sum of \code{proportion} must not be greater than 1. The remaining part will be used for the test set.} -\item{group}{A character vector indicating the name(s) of the column(s) used +\item{by}{A character vector indicating the name(s) of the column(s) used for stratified partitioning.} \item{seed}{A random number generator seed. Enter an integer (e.g. 123) so @@ -32,6 +33,8 @@ contains the row-id's.} \item{verbose}{Toggle messages and warnings.} +\item{group}{Deprecated. Use \code{by} instead.} + \item{...}{Other arguments passed to or from other functions.} } \value{ @@ -43,7 +46,7 @@ is named \verb{$test}. \description{ Creates data partitions (for instance, a training and a test set) based on a data frame that can also be stratified (i.e., evenly spread a given factor) -using the \code{group} argument. +using the \code{by} argument. } \examples{ data(iris) @@ -52,7 +55,7 @@ out$test nrow(out$p_0.9) # Stratify by group (equal proportions of each species) -out <- data_partition(iris, proportion = 0.9, group = "Species") +out <- data_partition(iris, proportion = 0.9, by = "Species") out$test # Create multiple partitions @@ -62,7 +65,7 @@ lapply(out, head) # Create multiple partitions, stratified by group - 30\% equally sampled # from species in first training set, 50\% in second training set and # remaining 20\% equally sampled from each species in test set. -out <- data_partition(iris, proportion = c(0.3, 0.5), group = "Species") +out <- data_partition(iris, proportion = c(0.3, 0.5), by = "Species") lapply(out, function(i) table(i$Species)) } diff --git a/tests/testthat/test-data_partition.R b/tests/testthat/test-data_partition.R index 7443465a8..99f4535b2 100644 --- a/tests/testthat/test-data_partition.R +++ b/tests/testthat/test-data_partition.R @@ -53,8 +53,8 @@ test_that("data_partition works as expected", { data(iris) expect_snapshot(str(data_partition(iris, proportion = 0.7, seed = 123))) expect_snapshot(str(data_partition(iris, proportion = c(0.2, 0.5), seed = 123))) - expect_snapshot(str(data_partition(iris, proportion = 0.7, group = "Species", seed = 123))) - expect_snapshot(str(data_partition(iris, proportion = c(0.2, 0.5), group = "Species", seed = 123))) + expect_snapshot(str(data_partition(iris, proportion = 0.7, by = "Species", seed = 123))) + expect_snapshot(str(data_partition(iris, proportion = c(0.2, 0.5), by = "Species", seed = 123))) }) test_that("data_partition warns if no testing set", { From 1dfe031584883525f35684b669c52b00df433eb7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 08:51:56 +0200 Subject: [PATCH 02/27] update snapshots --- tests/testthat/_snaps/data_partition.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/data_partition.md b/tests/testthat/_snaps/data_partition.md index 5cf3ffbf1..fdf76fc36 100644 --- a/tests/testthat/_snaps/data_partition.md +++ b/tests/testthat/_snaps/data_partition.md @@ -88,7 +88,7 @@ --- Code - str(data_partition(iris, proportion = 0.7, group = "Species", seed = 123)) + str(data_partition(iris, proportion = 0.7, by = "Species", seed = 123)) Output List of 2 $ p_0.7:'data.frame': 105 obs. of 6 variables: @@ -109,7 +109,7 @@ --- Code - str(data_partition(iris, proportion = c(0.2, 0.5), group = "Species", seed = 123)) + str(data_partition(iris, proportion = c(0.2, 0.5), by = "Species", seed = 123)) Output List of 3 $ p_0.2:'data.frame': 30 obs. of 6 variables: From bc30964d0a2bd0723b218ac2a8d064c6c0d2e331 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 08:56:03 +0200 Subject: [PATCH 03/27] lintr, comments --- R/data_codebook.R | 22 +++++++++++----------- R/data_xtabulate.R | 6 ++++++ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/R/data_codebook.R b/R/data_codebook.R index 7608f08d5..cec95196f 100644 --- a/R/data_codebook.R +++ b/R/data_codebook.R @@ -232,9 +232,9 @@ data_codebook <- function(data, # add proportions, but not for ranges, since these are always 100% if (is_range) { - proportions <- "" + frq_proportions <- "" } else { - proportions <- sprintf("%.1f%%", round(100 * (frq / sum(frq)), 1)) + frq_proportions <- sprintf("%.1f%%", round(100 * (frq / sum(frq)), 1)) } # make sure we have not too long rows, e.g. for variables that @@ -245,9 +245,9 @@ data_codebook <- function(data, } if (length(frq) > max_values) { frq <- frq[1:max_values] - proportions <- proportions[1:max_values] + frq_proportions <- frq_proportions[1:max_values] frq[max_values] <- NA - proportions[max_values] <- NA + frq_proportions[max_values] <- NA } if (length(values) > max_values) { values <- values[1:max_values] @@ -273,7 +273,7 @@ data_codebook <- function(data, values, value_labels, frq, - proportions, + proportions = frq_proportions, stringsAsFactors = FALSE )) @@ -347,12 +347,12 @@ format.data_codebook <- function(x, format = "text", ...) { x$Prop[x$Prop == "NA" | is.na(x$Prop)] <- "" # align only for text format if (identical(format, "text")) { - x$Prop[x$Prop != ""] <- format(x$Prop[x$Prop != ""], justify = "right") + x$Prop[x$Prop != ""] <- format(x$Prop[x$Prop != ""], justify = "right") # nolint } - x[["N"]][x$Prop != ""] <- sprintf( + x[["N"]][x$Prop != ""] <- sprintf( # nolint "%s (%s)", - as.character(x[["N"]][x$Prop != ""]), - x$Prop[x$Prop != ""] + as.character(x[["N"]][x$Prop != ""]), # nolint + x$Prop[x$Prop != ""] # nolint ) x$Prop <- NULL } @@ -388,7 +388,7 @@ print_html.data_codebook <- function(x, # since we have each value at its own row, the HTML table contains # horizontal borders for each cell/row. We want to remove those borders # from rows that actually belong to one variable - separator_lines <- which(duplicated(x$.row_id) & x$N == "") + separator_lines <- which(duplicated(x$.row_id) & x$N == "") # nolint # remove separator lines, as we don't need these for HTML tables x <- x[-separator_lines, ] # check row IDs, and find odd rows @@ -405,7 +405,7 @@ print_html.data_codebook <- function(x, out <- gt::tab_style( out, style = list(gt::cell_borders(sides = "top", style = "hidden")), - locations = gt::cells_body(rows = which(x$ID == "")) + locations = gt::cells_body(rows = which(x$ID == "")) # nolint ) # highlight odd rows if (!is.null(row_color)) { diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 0e38c9c07..6cbe065e7 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -229,6 +229,9 @@ print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) { x$Group <- NULL } + + ## FIXME: change group_by argument later + # print table insight::export_table( format(x, big_mark = big_mark, format = "html", ...), @@ -265,6 +268,9 @@ print_html.dw_data_xtabulates <- function(x, big_mark = NULL, ...) { out <- do.call(rbind, x) + + ## FIXME: change group_by argument later + # print table insight::export_table( out, From 37afbdf7dccebca385c9db7e44543b78af3a7299 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 10:19:50 +0200 Subject: [PATCH 04/27] fix demean() --- R/demean.R | 74 ++++++++++++++++----------------- man/demean.Rd | 29 +++++++------ tests/testthat/_snaps/demean.md | 2 +- tests/testthat/test-demean.R | 20 +++++---- 4 files changed, 68 insertions(+), 57 deletions(-) diff --git a/R/demean.R b/R/demean.R index 69e833a4d..fc797fc9e 100644 --- a/R/demean.R +++ b/R/demean.R @@ -11,7 +11,7 @@ #' @param x A data frame. #' @param select Character vector (or formula) with names of variables to select #' that should be group- and de-meaned. -#' @param group Character vector (or formula) with the name of the variable that +#' @param by Character vector (or formula) with the name of the variable that #' indicates the group- or cluster-ID. #' @param center Method for centering. `demean()` always performs #' mean-centering, while `degroup()` can use `center = "median"` or @@ -25,6 +25,7 @@ #' attributes to indicate the within- and between-effects. This is only #' relevant when printing `model_parameters()` - in such cases, the #' within- and between-effects are printed in separated blocks. +#' @param group Deprecated. Use `by` instead. #' @inheritParams center #' #' @return @@ -92,7 +93,7 @@ #' #' \subsection{Terminology}{ #' The group-meaned variable is simply the mean of an independent variable -#' within each group (or id-level or cluster) represented by `group`. +#' within each group (or id-level or cluster) represented by `by`. #' It represents the cluster-mean of an independent variable. The regression #' coefficient of a group-meaned variable is the *between-subject-effect*. #' The de-meaned variable is then the centered version of the group-meaned @@ -199,10 +200,10 @@ #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID #' iris$binary <- as.factor(rbinom(150, 1, .35)) # binary variable #' -#' x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") +#' x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") #' head(x) #' -#' x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), group = "ID") +#' x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), by = "ID") #' head(x) #' #' @@ -213,23 +214,29 @@ #' y = c(1, 2, 1, 2, 4, 3, 2, 1), #' ID = c(1, 2, 3, 1, 2, 3, 1, 2) #' ) -#' demean(dat, select = c("a", "x*y"), group = "ID") +#' demean(dat, select = c("a", "x*y"), by = "ID") #' #' # or in formula-notation -#' demean(dat, select = ~ a + x * y, group = ~ID) +#' demean(dat, select = ~ a + x * y, by = ~ID) #' #' @export demean <- function(x, select, - group, + by, suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE) { + verbose = TRUE, + group = NULL) { + ## TODO: deprecate later + if (!is.null(group)) { + by <- group + } + degroup( x = x, select = select, - group = group, + by = by, center = "mean", suffix_demean = suffix_demean, suffix_groupmean = suffix_groupmean, @@ -247,12 +254,18 @@ demean <- function(x, #' @export degroup <- function(x, select, - group, + by, center = "mean", suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE) { + verbose = TRUE, + group = NULL) { + ## TODO: deprecate later + if (!is.null(group)) { + by <- group + } + # ugly tibbles again... x <- .coerce_to_dataframe(x) @@ -266,8 +279,8 @@ degroup <- function(x, )) } - if (inherits(group, "formula")) { - group <- all.vars(group) + if (inherits(by, "formula")) { + by <- all.vars(by) } interactions_no <- select[!grepl("(\\*|\\:)", select)] @@ -296,7 +309,7 @@ degroup <- function(x, select <- intersect(colnames(x), select) # get data to demean... - dat <- x[, c(select, group)] + dat <- x[, c(select, by)] # find categorical predictors that are coded as factors @@ -344,31 +357,18 @@ degroup <- function(x, # for variables within each group (the group means). assign # mean values to a vector of same length as the data - if (center == "mode") { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) distribution_mode(stats::na.omit(.gm))) - }) - } else if (center == "median") { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) stats::median(.gm, na.rm = TRUE)) - }) - } else if (center == "min") { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) min(.gm, na.rm = TRUE)) - }) - } else if (center == "max") { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) max(.gm, na.rm = TRUE)) - }) - } else { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) mean(.gm, na.rm = TRUE)) - }) - } - + gm_fun <- switch(center, + mode = function(.gm) distribution_mode(stats::na.omit(.gm)), + median = function(.gm) stats::median(.gm, na.rm = TRUE), + min = function(.gm) min(.gm, na.rm = TRUE), + max = function(.gm) max(.gm, na.rm = TRUE), + function(.gm) mean(.gm, na.rm = TRUE) + ) + x_gm_list <- lapply(select, function(i) { + stats::ave(dat[[i]], dat[[by]], FUN = gm_fun) + }) names(x_gm_list) <- select - # create de-meaned variables by subtracting the group mean from each individual value x_dm_list <- lapply(select, function(i) dat[[i]] - x_gm_list[[i]]) diff --git a/man/demean.Rd b/man/demean.Rd index 422c8d32e..d03a1010b 100644 --- a/man/demean.Rd +++ b/man/demean.Rd @@ -9,33 +9,36 @@ demean( x, select, - group, + by, suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE + verbose = TRUE, + group = NULL ) degroup( x, select, - group, + by, center = "mean", suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE + verbose = TRUE, + group = NULL ) detrend( x, select, - group, + by, center = "mean", suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE + verbose = TRUE, + group = NULL ) } \arguments{ @@ -44,7 +47,7 @@ detrend( \item{select}{Character vector (or formula) with names of variables to select that should be group- and de-meaned.} -\item{group}{Character vector (or formula) with the name of the variable that +\item{by}{Character vector (or formula) with the name of the variable that indicates the group- or cluster-ID.} \item{suffix_demean, suffix_groupmean}{String value, will be appended to the @@ -59,6 +62,8 @@ within- and between-effects are printed in separated blocks.} \item{verbose}{Toggle warnings and messages.} +\item{group}{Deprecated. Use \code{by} instead.} + \item{center}{Method for centering. \code{demean()} always performs mean-centering, while \code{degroup()} can use \code{center = "median"} or \code{center = "mode"} for median- or mode-centering, and also \code{"min"} @@ -131,7 +136,7 @@ intervals and low statistical power} (\cite{Heisig et al. 2017}). \subsection{Terminology}{ The group-meaned variable is simply the mean of an independent variable -within each group (or id-level or cluster) represented by \code{group}. +within each group (or id-level or cluster) represented by \code{by}. It represents the cluster-mean of an independent variable. The regression coefficient of a group-meaned variable is the \emph{between-subject-effect}. The de-meaned variable is then the centered version of the group-meaned @@ -209,10 +214,10 @@ data(iris) iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID iris$binary <- as.factor(rbinom(150, 1, .35)) # binary variable -x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") +x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") head(x) -x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), group = "ID") +x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), by = "ID") head(x) @@ -223,10 +228,10 @@ dat <- data.frame( y = c(1, 2, 1, 2, 4, 3, 2, 1), ID = c(1, 2, 3, 1, 2, 3, 1, 2) ) -demean(dat, select = c("a", "x*y"), group = "ID") +demean(dat, select = c("a", "x*y"), by = "ID") # or in formula-notation -demean(dat, select = ~ a + x * y, group = ~ID) +demean(dat, select = ~ a + x * y, by = ~ID) } \references{ diff --git a/tests/testthat/_snaps/demean.md b/tests/testthat/_snaps/demean.md index f61ba9fcb..7f12d263d 100644 --- a/tests/testthat/_snaps/demean.md +++ b/tests/testthat/_snaps/demean.md @@ -55,7 +55,7 @@ # demean interaction term Code - demean(dat, select = c("a", "x*y"), group = "ID") + demean(dat, select = c("a", "x*y"), by = "ID") Output a_between x_y_between a_within x_y_within 1 2.666667 4.666667 -1.6666667 -0.6666667 diff --git a/tests/testthat/test-demean.R b/tests/testthat/test-demean.R index a2f803cd7..566bd6097 100644 --- a/tests/testthat/test-demean.R +++ b/tests/testthat/test-demean.R @@ -2,29 +2,35 @@ test_that("demean works", { df <- iris set.seed(123) - df$ID <- sample(1:4, nrow(df), replace = TRUE) # fake-ID + df$ID <- sample.int(4, nrow(df), replace = TRUE) # fake-ID set.seed(123) df$binary <- as.factor(rbinom(150, 1, 0.35)) # binary variable set.seed(123) - x <- demean(df, select = c("Sepal.Length", "Petal.Length"), group = "ID") + x <- demean(df, select = c("Sepal.Length", "Petal.Length"), by = "ID") expect_snapshot(head(x)) set.seed(123) expect_message( - x <- demean(df, select = c("Sepal.Length", "binary", "Species"), group = "ID"), + { + x <- demean(df, select = c("Sepal.Length", "binary", "Species"), by = "ID") + }, "have been coerced to numeric" ) expect_snapshot(head(x)) set.seed(123) expect_message( - y <- demean(df, select = ~ Sepal.Length + binary + Species, group = ~ID), + { + y <- demean(df, select = ~ Sepal.Length + binary + Species, by = ~ID) + }, "have been coerced to numeric" ) expect_message( - z <- demean(df, select = c("Sepal.Length", "binary", "Species"), group = "ID"), + { + z <- demean(df, select = c("Sepal.Length", "binary", "Species"), by = "ID") + }, "have been coerced to numeric" ) expect_identical(y, z) @@ -39,7 +45,7 @@ test_that("demean interaction term", { ) set.seed(123) - expect_snapshot(demean(dat, select = c("a", "x*y"), group = "ID")) + expect_snapshot(demean(dat, select = c("a", "x*y"), by = "ID")) }) test_that("demean shows message if some vars don't exist", { @@ -52,7 +58,7 @@ test_that("demean shows message if some vars don't exist", { set.seed(123) expect_message( - demean(dat, select = "foo", group = "ID"), + demean(dat, select = "foo", by = "ID"), regexp = "not found" ) }) From 68e3c56ba6f844938432813e6edc670e0614fa57 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 10:37:55 +0200 Subject: [PATCH 05/27] fix means_by_group --- R/means_by_group.R | 65 ++++++++++++++++++++++++++----------------- man/means_by_group.Rd | 25 ++++++++++++----- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/R/means_by_group.R b/R/means_by_group.R index faa73eba6..fbec716ec 100644 --- a/R/means_by_group.R +++ b/R/means_by_group.R @@ -4,10 +4,10 @@ #' @description Computes summary table of means by groups. #' #' @param x A vector or a data frame. -#' @param group If `x` is a numeric vector, `group` should be a factor that -#' indicates the group-classifying categories. If `x` is a data frame, `group` +#' @param by If `x` is a numeric vector, `by` should be a factor that +#' indicates the group-classifying categories. If `x` is a data frame, `by` #' should be a character string, naming the variable in `x` that is used for -#' grouping. Numeric vectors are coerced to factors. Not that `group` should +#' grouping. Numeric vectors are coerced to factors. Not that `by` should #' only refer to a single variable. #' @param ci Level of confidence interval for mean estimates. Default is `0.95`. #' Use `ci = NA` to suppress confidence intervals. @@ -19,14 +19,15 @@ #' @param digits Optional scalar, indicating the amount of digits after decimal #' point when rounding estimates and values. #' @param ... Currently not used +#' @param group Deprecated. Use `by` instead. #' @inheritParams find_columns #' #' @return A data frame with information on mean and further summary statistics #' for each sub-group. #' -#' @details This function is comparable to `aggregate(x, group, mean)`, but provides +#' @details This function is comparable to `aggregate(x, by, mean)`, but provides #' some further information, including summary statistics from a One-Way-ANOVA -#' using `x` as dependent and `group` as independent variable. [`emmeans::contrast()`] +#' using `x` as dependent and `by` as independent variable. [`emmeans::contrast()`] #' is used to get p-values for each sub-group. P-values indicate whether each #' group-mean is significantly different from the total mean. #' @@ -55,21 +56,27 @@ means_by_group.default <- function(x, ...) { #' @rdname means_by_group #' @export means_by_group.numeric <- function(x, - group = NULL, + by = NULL, ci = 0.95, weights = NULL, digits = NULL, + group = NULL, ...) { + ## TODO: deprecate later + if (is.null(group)) { + by <- group + } + # validation check for arguments - # "group" must be provided - if (is.null(group)) { - insight::format_error("Argument `group` is missing.") + # "by" must be provided + if (is.null(by)) { + insight::format_error("Argument `by` is missing.") } - # group must be of same length as x - if (length(group) != length(x)) { - insight::format_error("Argument `group` must be of same length as `x`.") + # by must be of same length as x + if (length(by) != length(x)) { + insight::format_error("Argument `by` must be of same length as `x`.") } # if weights are provided, must be of same length as x @@ -82,32 +89,32 @@ means_by_group.numeric <- function(x, # retrieve labels var_mean_label <- attr(x, "label", exact = TRUE) - var_grp_label <- attr(group, "label", exact = TRUE) + var_grp_label <- attr(by, "label", exact = TRUE) # if no labels present, use variable names directly if (is.null(var_mean_label)) { var_mean_label <- deparse(substitute(x)) } if (is.null(var_grp_label)) { - var_grp_label <- deparse(substitute(group)) + var_grp_label <- deparse(substitute(by)) } # coerce group to factor if numeric, or convert labels to levels, if factor - if (is.factor(group)) { - group <- tryCatch(labels_to_levels(group, verbose = FALSE), error = function(e) group) + if (is.factor(by)) { + by <- tryCatch(labels_to_levels(by, verbose = FALSE), error = function(e) by) } else { - group <- to_factor(group) + by <- to_factor(by) } - data <- stats::na.omit(data.frame( + my_data <- stats::na.omit(data.frame( x = x, - group = group, + group = by, weights = weights, stringsAsFactors = FALSE )) # get grouped means table - out <- .means_by_group(data, ci = ci) + out <- .means_by_group(my_data, ci = ci) # attributes attr(out, "var_mean_label") <- var_mean_label @@ -123,7 +130,7 @@ means_by_group.numeric <- function(x, #' @export means_by_group.data.frame <- function(x, select = NULL, - group = NULL, + by = NULL, ci = 0.95, weights = NULL, digits = NULL, @@ -131,7 +138,13 @@ means_by_group.data.frame <- function(x, ignore_case = FALSE, regex = FALSE, verbose = TRUE, + group = NULL, ...) { + ## TODO: deprecate later + if (is.null(group)) { + by <- group + } + # evaluate select/exclude, may be select-helpers select <- .select_nse(select, x, @@ -154,11 +167,11 @@ means_by_group.data.frame <- function(x, if (is.null(attr(x[[i]], "label", exact = TRUE))) { attr(x[[i]], "label") <- i } - if (is.null(attr(x[[group]], "label", exact = TRUE))) { - attr(x[[group]], "label") <- group + if (is.null(attr(x[[by]], "label", exact = TRUE))) { + attr(x[[by]], "label") <- by } # compute means table - means_by_group(x[[i]], group = x[[group]], ci = ci, weights = w, digits = digits, ...) + means_by_group(x[[i]], by = x[[by]], ci = ci, weights = w, digits = digits, ...) }) class(out) <- c("dw_groupmeans_list", "list") @@ -195,14 +208,14 @@ means_by_group.data.frame <- function(x, if (insight::check_if_installed("emmeans", quietly = TRUE)) { # create summary table of contrasts, for p-values and confidence intervals predicted <- emmeans::emmeans(fit, specs = "group", level = ci) - contrasts <- emmeans::contrast(predicted, method = "eff") + emm_contrasts <- emmeans::contrast(predicted, method = "eff") # add p-values and confidence intervals to "out" if (!is.null(ci) && !is.na(ci)) { summary_table <- as.data.frame(predicted) out$CI_low <- summary_table$lower.CL out$CI_high <- summary_table$upper.CL } - summary_table <- as.data.frame(contrasts) + summary_table <- as.data.frame(emm_contrasts) out$p <- summary_table$p.value } diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd index 9434452ad..5473daeec 100644 --- a/man/means_by_group.Rd +++ b/man/means_by_group.Rd @@ -8,12 +8,20 @@ \usage{ means_by_group(x, ...) -\method{means_by_group}{numeric}(x, group = NULL, ci = 0.95, weights = NULL, digits = NULL, ...) +\method{means_by_group}{numeric}( + x, + by = NULL, + ci = 0.95, + weights = NULL, + digits = NULL, + group = NULL, + ... +) \method{means_by_group}{data.frame}( x, select = NULL, - group = NULL, + by = NULL, ci = 0.95, weights = NULL, digits = NULL, @@ -21,6 +29,7 @@ means_by_group(x, ...) ignore_case = FALSE, regex = FALSE, verbose = TRUE, + group = NULL, ... ) } @@ -29,10 +38,10 @@ means_by_group(x, ...) \item{...}{Currently not used} -\item{group}{If \code{x} is a numeric vector, \code{group} should be a factor that -indicates the group-classifying categories. If \code{x} is a data frame, \code{group} +\item{by}{If \code{x} is a numeric vector, \code{by} should be a factor that +indicates the group-classifying categories. If \code{x} is a data frame, \code{by} should be a character string, naming the variable in \code{x} that is used for -grouping. Numeric vectors are coerced to factors. Not that \code{group} should +grouping. Numeric vectors are coerced to factors. Not that \code{by} should only refer to a single variable.} \item{ci}{Level of confidence interval for mean estimates. Default is \code{0.95}. @@ -47,6 +56,8 @@ weights are used.} \item{digits}{Optional scalar, indicating the amount of digits after decimal point when rounding estimates and values.} +\item{group}{Deprecated. Use \code{by} instead.} + \item{select}{Variables that will be included when performing the required tasks. Can be either \itemize{ @@ -106,9 +117,9 @@ for each sub-group. Computes summary table of means by groups. } \details{ -This function is comparable to \code{aggregate(x, group, mean)}, but provides +This function is comparable to \code{aggregate(x, by, mean)}, but provides some further information, including summary statistics from a One-Way-ANOVA -using \code{x} as dependent and \code{group} as independent variable. \code{\link[emmeans:contrast]{emmeans::contrast()}} +using \code{x} as dependent and \code{by} as independent variable. \code{\link[emmeans:contrast]{emmeans::contrast()}} is used to get p-values for each sub-group. P-values indicate whether each group-mean is significantly different from the total mean. } From bda876b5e0849e0f2d1b62c79279f53e9bd092a7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 10:41:13 +0200 Subject: [PATCH 06/27] fix --- R/means_by_group.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/means_by_group.R b/R/means_by_group.R index fbec716ec..63a6d078e 100644 --- a/R/means_by_group.R +++ b/R/means_by_group.R @@ -63,7 +63,7 @@ means_by_group.numeric <- function(x, group = NULL, ...) { ## TODO: deprecate later - if (is.null(group)) { + if (!is.null(group)) { by <- group } @@ -141,7 +141,7 @@ means_by_group.data.frame <- function(x, group = NULL, ...) { ## TODO: deprecate later - if (is.null(group)) { + if (!is.null(group)) { by <- group } From 7eca0774bbe6e36e85e9c65eae9d5e3687b63362 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 10:42:34 +0200 Subject: [PATCH 07/27] update news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 1667c1179..9abe08e93 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ BREAKING CHANGES Please use `by` instead. This affects following functions in *datawizard*. * `data_partition()` + * `demean()` and `degroup()` + * `means_by_group()` CHANGES From ad303358eed36a4f4350a20baeb9565143d35202 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 10:47:57 +0200 Subject: [PATCH 08/27] fix rescale_weights --- NEWS.md | 1 + R/rescale_weights.R | 32 ++++++++++++++---------- man/rescale_weights.Rd | 12 +++++---- tests/testthat/_snaps/rescale_weights.md | 2 +- tests/testthat/test-rescale_weights.R | 18 +++++++------ 5 files changed, 38 insertions(+), 27 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9abe08e93..9ea8245bc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ BREAKING CHANGES * `data_partition()` * `demean()` and `degroup()` * `means_by_group()` + * `rescale_weights()` CHANGES diff --git a/R/rescale_weights.R b/R/rescale_weights.R index 6f82acdad..9622e8599 100644 --- a/R/rescale_weights.R +++ b/R/rescale_weights.R @@ -10,16 +10,17 @@ #' models, which then can be used for multilevel modelling. #' #' @param data A data frame. -#' @param group Variable names (as character vector, or as formula), indicating +#' @param by Variable names (as character vector, or as formula), indicating #' the grouping structure (strata) of the survey data (level-2-cluster #' variable). It is also possible to create weights for multiple group #' variables; in such cases, each created weighting variable will be suffixed #' by the name of the group variable. #' @param probability_weights Variable indicating the probability (design or #' sampling) weights of the survey data (level-1-weight). -#' @param nest Logical, if `TRUE` and `group` indicates at least two +#' @param nest Logical, if `TRUE` and `by` indicates at least two #' group variables, then groups are "nested", i.e. groups are now a -#' combination from each group level of the variables in `group`. +#' combination from each group level of the variables in `by`. +#' @param group Deprecated. Use `by` instead. #' #' @return `data`, including the new weighting variables: `pweights_a` #' and `pweights_b`, which represent the rescaled design weights to use @@ -71,7 +72,7 @@ #' # or nested structures. #' x <- rescale_weights( #' data = nhanes_sample, -#' group = c("SDMVSTRA", "SDMVPSU"), +#' by = c("SDMVSTRA", "SDMVPSU"), #' probability_weights = "WTINT2YR", #' nest = TRUE #' ) @@ -87,9 +88,14 @@ #' ) #' } #' @export -rescale_weights <- function(data, group, probability_weights, nest = FALSE) { - if (inherits(group, "formula")) { - group <- all.vars(group) +rescale_weights <- function(data, by, probability_weights, nest = FALSE, group = NULL) { + ## TODO: deprecate later + if (!is.null(group)) { + by <- group + } + + if (inherits(by, "formula")) { + by <- all.vars(by) } # check if weight has missings. we need to remove them first, @@ -107,22 +113,22 @@ rescale_weights <- function(data, group, probability_weights, nest = FALSE) { # sort id data_tmp$.bamboozled <- seq_len(nrow(data_tmp)) - if (nest && length(group) < 2) { + if (nest && length(by) < 2) { insight::format_warning( sprintf( - "Only one group variable selected, no nested structure possible. Rescaling weights for grout '%s' now.", - group + "Only one group variable selected in `by`, no nested structure possible. Rescaling weights for grout '%s' now.", + by ) ) nest <- FALSE } if (nest) { - out <- .rescale_weights_nested(data_tmp, group, probability_weights, nrow(data), weight_non_na) + out <- .rescale_weights_nested(data_tmp, group = by, probability_weights, nrow(data), weight_non_na) } else { - out <- lapply(group, function(i) { + out <- lapply(by, function(i) { x <- .rescale_weights(data_tmp, i, probability_weights, nrow(data), weight_non_na) - if (length(group) > 1) { + if (length(by) > 1) { colnames(x) <- sprintf(c("pweight_a_%s", "pweight_b_%s"), i) } x diff --git a/man/rescale_weights.Rd b/man/rescale_weights.Rd index 4a005eb99..4a67d4100 100644 --- a/man/rescale_weights.Rd +++ b/man/rescale_weights.Rd @@ -4,12 +4,12 @@ \alias{rescale_weights} \title{Rescale design weights for multilevel analysis} \usage{ -rescale_weights(data, group, probability_weights, nest = FALSE) +rescale_weights(data, by, probability_weights, nest = FALSE, group = NULL) } \arguments{ \item{data}{A data frame.} -\item{group}{Variable names (as character vector, or as formula), indicating +\item{by}{Variable names (as character vector, or as formula), indicating the grouping structure (strata) of the survey data (level-2-cluster variable). It is also possible to create weights for multiple group variables; in such cases, each created weighting variable will be suffixed @@ -18,9 +18,11 @@ by the name of the group variable.} \item{probability_weights}{Variable indicating the probability (design or sampling) weights of the survey data (level-1-weight).} -\item{nest}{Logical, if \code{TRUE} and \code{group} indicates at least two +\item{nest}{Logical, if \code{TRUE} and \code{by} indicates at least two group variables, then groups are "nested", i.e. groups are now a -combination from each group level of the variables in \code{group}.} +combination from each group level of the variables in \code{by}.} + +\item{group}{Deprecated. Use \code{by} instead.} } \value{ \code{data}, including the new weighting variables: \code{pweights_a} @@ -73,7 +75,7 @@ if (require("lme4")) { # or nested structures. x <- rescale_weights( data = nhanes_sample, - group = c("SDMVSTRA", "SDMVPSU"), + by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE ) diff --git a/tests/testthat/_snaps/rescale_weights.md b/tests/testthat/_snaps/rescale_weights.md index d158070a8..5de6d489a 100644 --- a/tests/testthat/_snaps/rescale_weights.md +++ b/tests/testthat/_snaps/rescale_weights.md @@ -34,7 +34,7 @@ # rescale_weights nested works as expected Code - rescale_weights(data = head(nhanes_sample, n = 30), group = c("SDMVSTRA", + rescale_weights(data = head(nhanes_sample, n = 30), by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE) Output total age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR pweights_a diff --git a/tests/testthat/test-rescale_weights.R b/tests/testthat/test-rescale_weights.R index bcd279355..504157180 100644 --- a/tests/testthat/test-rescale_weights.R +++ b/tests/testthat/test-rescale_weights.R @@ -13,19 +13,21 @@ test_that("rescale_weights nested works as expected", { expect_snapshot( rescale_weights( data = head(nhanes_sample, n = 30), - group = c("SDMVSTRA", "SDMVPSU"), + by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE ) ) expect_warning( - x <- rescale_weights( - data = head(nhanes_sample), - group = "SDMVPSU", - probability_weights = "WTINT2YR", - nest = TRUE - ), + { + x <- rescale_weights( + data = head(nhanes_sample), + by = "SDMVPSU", + probability_weights = "WTINT2YR", + nest = TRUE + ) + }, "Only one group variable selected" ) @@ -33,7 +35,7 @@ test_that("rescale_weights nested works as expected", { x, rescale_weights( data = head(nhanes_sample), - group = "SDMVPSU", + by = "SDMVPSU", probability_weights = "WTINT2YR" ) ) From 5ccb554b0e9f8a87092744bcb8fcf87bc9cbe9f5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 11:07:22 +0200 Subject: [PATCH 09/27] silence tests --- tests/testthat/test-recode_into.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-recode_into.R b/tests/testthat/test-recode_into.R index b9b0d4da3..53c75d3a7 100644 --- a/tests/testthat/test-recode_into.R +++ b/tests/testthat/test-recode_into.R @@ -264,12 +264,14 @@ test_that("recode_into, NA doesn't need to be of exact type", { x1 <- recode_into( mpg > 10 ~ 1, gear == 5 ~ NA_real_, - data = mtcars + data = mtcars, + verbose = FALSE ) x2 <- recode_into( mpg > 10 ~ 1, gear == 5 ~ NA, - data = mtcars + data = mtcars, + verbose = FALSE ) expect_identical(x1, x2) }) From 40874e5c3b5342a299326f6441c6aef2b7f4ceb4 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 20:40:17 +0200 Subject: [PATCH 10/27] Update NEWS.md Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 9ea8245bc..5b4a2f834 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,7 @@ BREAKING CHANGES * Arguments named `group` or `group_by` will be deprecated in a future release. - Please use `by` instead. This affects following functions in *datawizard*. + Please use `by` instead. This affects the following functions in *datawizard*. * `data_partition()` * `demean()` and `degroup()` From 3b1aae3b4ca4423aa5da296b44627ecb1600d709 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 22:21:31 +0200 Subject: [PATCH 11/27] deprecation warnings --- R/data_partition.R | 3 ++- R/demean.R | 6 ++++-- R/means_by_group.R | 6 ++++-- R/recode_values.R | 6 +++--- R/rescale_weights.R | 3 ++- R/skewness_kurtosis.R | 14 +++++++++++--- R/text_format.R | 10 +++++----- 7 files changed, 31 insertions(+), 17 deletions(-) diff --git a/R/data_partition.R b/R/data_partition.R index 69af36ed4..09add9dd7 100644 --- a/R/data_partition.R +++ b/R/data_partition.R @@ -55,9 +55,10 @@ data_partition <- function(data, # validation checks data <- .coerce_to_dataframe(data) - ## TODO: deprecate later + ## TODO: remove warning in future release if (!is.null(group)) { by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint } if (sum(proportion) > 1) { diff --git a/R/demean.R b/R/demean.R index fc797fc9e..bbf7d2dfc 100644 --- a/R/demean.R +++ b/R/demean.R @@ -228,9 +228,10 @@ demean <- function(x, add_attributes = TRUE, verbose = TRUE, group = NULL) { - ## TODO: deprecate later + ## TODO: remove warning in future release if (!is.null(group)) { by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint } degroup( @@ -261,9 +262,10 @@ degroup <- function(x, add_attributes = TRUE, verbose = TRUE, group = NULL) { - ## TODO: deprecate later + ## TODO: remove warning later if (!is.null(group)) { by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint } # ugly tibbles again... diff --git a/R/means_by_group.R b/R/means_by_group.R index 63a6d078e..ad188f275 100644 --- a/R/means_by_group.R +++ b/R/means_by_group.R @@ -62,9 +62,10 @@ means_by_group.numeric <- function(x, digits = NULL, group = NULL, ...) { - ## TODO: deprecate later + ## TODO: remove warning in future release if (!is.null(group)) { by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint } # validation check for arguments @@ -140,9 +141,10 @@ means_by_group.data.frame <- function(x, verbose = TRUE, group = NULL, ...) { - ## TODO: deprecate later + ## TODO: remove warning in future release if (!is.null(group)) { by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint } # evaluate select/exclude, may be select-helpers diff --git a/R/recode_values.R b/R/recode_values.R index e0bb9540f..e355e9cb0 100644 --- a/R/recode_values.R +++ b/R/recode_values.R @@ -476,7 +476,7 @@ recode_values.data.frame <- function(x, # create the new variables and updates "select", so new variables are processed if (!isFALSE(append)) { # process arguments - args <- .process_append( + my_args <- .process_append( x, select, append, @@ -484,8 +484,8 @@ recode_values.data.frame <- function(x, preserve_value_labels = TRUE ) # update processed arguments - x <- args$x - select <- args$select + x <- my_args$x + select <- my_args$select } x[select] <- lapply( diff --git a/R/rescale_weights.R b/R/rescale_weights.R index 9622e8599..02aab1d2e 100644 --- a/R/rescale_weights.R +++ b/R/rescale_weights.R @@ -89,9 +89,10 @@ #' } #' @export rescale_weights <- function(data, by, probability_weights, nest = FALSE, group = NULL) { - ## TODO: deprecate later + ## TODO: remove warning in future release if (!is.null(group)) { by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint } if (inherits(by, "formula")) { diff --git a/R/skewness_kurtosis.R b/R/skewness_kurtosis.R index e0da83c54..6142c59ad 100644 --- a/R/skewness_kurtosis.R +++ b/R/skewness_kurtosis.R @@ -115,6 +115,7 @@ skewness.numeric <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -148,9 +149,7 @@ skewness.numeric <- function(x, ) if (!is.null(iterations)) { - if (!requireNamespace("boot", quietly = TRUE)) { - insight::format_warning("Package 'boot' needed for bootstrapping SEs.") - } else { + if (requireNamespace("boot", quietly = TRUE)) { results <- boot::boot( data = x, statistic = .boot_skewness, @@ -159,6 +158,8 @@ skewness.numeric <- function(x, type = type ) out_se <- stats::sd(results$t, na.rm = TRUE) + } else { + insight::format_warning("Package 'boot' needed for bootstrapping SEs.") } } @@ -181,6 +182,7 @@ skewness.matrix <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -216,6 +218,7 @@ skewness.data.frame <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -243,6 +246,7 @@ skewness.default <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -278,6 +282,7 @@ kurtosis.numeric <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -342,6 +347,7 @@ kurtosis.matrix <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -373,6 +379,7 @@ kurtosis.data.frame <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -398,6 +405,7 @@ kurtosis.default <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } diff --git a/R/text_format.R b/R/text_format.R index ec70e87da..180807746 100644 --- a/R/text_format.R +++ b/R/text_format.R @@ -68,11 +68,11 @@ text_lastchar <- function(text, n = 1) { #' @rdname text_format #' @export text_concatenate <- function(text, sep = ", ", last = " and ", enclose = NULL) { - if (length(text) == 1 && nchar(text) == 0) { + if (length(text) == 1 && !nzchar(text, keepNA = TRUE)) { return(text) } - text <- text[text != ""] - if (length(text) && !is.null(enclose) && length(enclose) == 1 && nchar(enclose) > 0) { + text <- text[text != ""] # nolint + if (length(text) && !is.null(enclose) && length(enclose) == 1 && nzchar(enclose, keepNA = TRUE)) { text <- paste0(enclose, text, enclose) } if (length(text) == 1) { @@ -89,7 +89,7 @@ text_concatenate <- function(text, sep = ", ", last = " and ", enclose = NULL) { #' @export text_paste <- function(text, text2 = NULL, sep = ", ", enclose = NULL, ...) { if (!is.null(text2)) { - if (!is.null(enclose) && length(enclose) == 1 && nchar(enclose) > 0) { + if (!is.null(enclose) && length(enclose) == 1 && nzchar(enclose, keepNA = TRUE)) { text <- vapply(text, function(i) { if (i != "") { i <- paste0(enclose, i, enclose) @@ -103,7 +103,7 @@ text_paste <- function(text, text2 = NULL, sep = ", ", enclose = NULL, ...) { i }, character(1L)) } - paste0(text, ifelse(text == "" | text2 == "", "", sep), text2) + paste0(text, ifelse(text == "" | text2 == "", "", sep), text2) # nolint } } From 293fb514a40aec172f90c1068f205b2ad9ea0b5f Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 22:37:21 +0200 Subject: [PATCH 12/27] use insight remotes --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7c912d49a..ec594a4e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -69,7 +69,7 @@ Suggests: tidyr, withr Remotes: - easystats/modelbased + easystats/modelbased, easystats/insight VignetteBuilder: knitr Encoding: UTF-8 From 7c672606aa94dd2ca63833b703f204896e7dc1d0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 17 May 2024 13:10:24 +0200 Subject: [PATCH 13/27] Update NEWS.md Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 5b4a2f834..b8c4c533e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,8 @@ BREAKING CHANGES -* Arguments named `group` or `group_by` will be deprecated in a future release. +* Arguments named `group` or `group_by` are deprecated and will be removed + in a future release. Please use `by` instead. This affects the following functions in *datawizard*. * `data_partition()` From 620a06f37e24f99e1f277333121df4d169c019bc Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 17 May 2024 13:27:19 +0200 Subject: [PATCH 14/27] Also address #265 --- NAMESPACE | 2 +- NEWS.md | 5 + R/adjust.R | 2 +- R/assign_labels.R | 2 +- R/categorize.R | 2 +- R/center.R | 2 +- R/convert_na_to.R | 2 +- R/convert_to_na.R | 2 +- R/data_addprefix.R | 2 +- R/data_codebook.R | 2 +- R/data_duplicated.R | 2 +- R/data_extract.R | 2 +- R/data_group.R | 2 +- R/data_peek.R | 2 +- R/data_relocate.R | 2 +- R/data_remove.R | 2 +- R/data_replicate.R | 2 +- R/data_rescale.R | 2 +- R/data_reverse.R | 2 +- R/data_select.R | 24 ++- R/data_separate.R | 2 +- R/data_tabulate.R | 2 +- R/data_to_long.R | 2 +- R/data_unique.R | 2 +- R/data_unite.R | 2 +- R/describe_distribution.R | 2 +- R/{data_find.R => extract_column_names.R} | 64 ++++---- R/labels_to_levels.R | 2 +- R/means_by_group.R | 2 +- R/normalize.R | 2 +- R/ranktransform.R | 2 +- R/recode_values.R | 30 +++- R/replace_nan_inf.R | 2 +- R/row_means.R | 2 +- R/standardize.R | 2 +- R/text_format.R | 6 +- R/to_factor.R | 2 +- R/to_numeric.R | 2 +- README.Rmd | 6 +- README.md | 6 +- _pkgdown.yaml | 2 +- man/adjust.Rd | 4 +- man/assign_labels.Rd | 4 +- man/categorize.Rd | 4 +- man/center.Rd | 4 +- man/convert_na_to.Rd | 4 +- man/convert_to_na.Rd | 4 +- man/data_codebook.Rd | 4 +- man/data_duplicated.Rd | 4 +- man/data_extract.Rd | 4 +- man/data_group.Rd | 4 +- man/data_peek.Rd | 4 +- man/data_relocate.Rd | 4 +- man/data_rename.Rd | 4 +- man/data_replicate.Rd | 4 +- man/data_separate.Rd | 4 +- man/data_tabulate.Rd | 4 +- man/data_to_long.Rd | 4 +- man/data_unique.Rd | 4 +- man/data_unite.Rd | 4 +- man/describe_distribution.Rd | 4 +- ...ind_columns.Rd => extract_column_names.Rd} | 69 +++------ man/labels_to_levels.Rd | 4 +- man/means_by_group.Rd | 4 +- man/normalize.Rd | 4 +- man/ranktransform.Rd | 4 +- man/recode_values.Rd | 7 +- man/rescale.Rd | 4 +- man/reverse.Rd | 4 +- man/row_means.Rd | 4 +- man/slide.Rd | 4 +- man/standardize.Rd | 4 +- man/text_format.Rd | 10 -- man/to_factor.Rd | 4 +- man/to_numeric.Rd | 4 +- ...{test-get_columns.R => test-data_select.R} | 140 +++++++++--------- ..._columns.R => test-extract_column_names.R} | 80 +++++----- tests/testthat/test-select_nse.R | 2 +- vignettes/selection_syntax.Rmd | 4 +- 79 files changed, 331 insertions(+), 314 deletions(-) rename R/{data_find.R => extract_column_names.R} (72%) rename man/{find_columns.Rd => extract_column_names.Rd} (81%) rename tests/testthat/{test-get_columns.R => test-data_select.R} (61%) rename tests/testthat/{test-find_columns.R => test-extract_column_names.R} (51%) diff --git a/NAMESPACE b/NAMESPACE index eea4e22a4..9fe247ca1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -272,7 +272,7 @@ export(distribution_coef_var) export(distribution_mode) export(empty_columns) export(empty_rows) -export(find_columns) +export(extract_column_names) export(format_text) export(get_columns) export(kurtosis) diff --git a/NEWS.md b/NEWS.md index b8c4c533e..37926ea3d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,11 @@ BREAKING CHANGES * `means_by_group()` * `rescale_weights()` +* Following aliases are deprecated and will be removed in a future release: + + * `get_columns()`, use `data_select()` instead. + * `data_find()` and `find_columns()`, use `extract_column_names()` instead. + CHANGES * `recode_into()` is more relaxed regarding checking the type of `NA` values. diff --git a/R/adjust.R b/R/adjust.R index 69abbde6b..5d50b16d0 100644 --- a/R/adjust.R +++ b/R/adjust.R @@ -25,7 +25,7 @@ #' re-added. This avoids the centering around 0 that happens by default #' when regressing out another variable (see the examples below for a #' visual representation of this). -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams standardize #' #' @return A data frame comparable to `data`, with adjusted variables. diff --git a/R/assign_labels.R b/R/assign_labels.R index bd35513bf..b5541347e 100644 --- a/R/assign_labels.R +++ b/R/assign_labels.R @@ -14,7 +14,7 @@ #' `x`, the right-hand side (RHS) the associated value label. Non-matching #' labels are omitted. #' @param ... Currently not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @inheritSection center Selection of variables - the `select` argument #' diff --git a/R/categorize.R b/R/categorize.R index 341d0c0c9..e6bb8227e 100644 --- a/R/categorize.R +++ b/R/categorize.R @@ -45,7 +45,7 @@ #' variables are appended with new column names (using the defined suffix) to #' the original data frame. #' @param ... not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @inherit data_rename seealso #' diff --git a/R/center.R b/R/center.R index eac550de1..021e157f7 100644 --- a/R/center.R +++ b/R/center.R @@ -29,7 +29,7 @@ #' order, unless a named vector is given. In this case, names are matched #' against the names of the selected variables. #' @param ... Currently not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams standardize #' #' @section Selection of variables - the `select` argument: diff --git a/R/convert_na_to.R b/R/convert_na_to.R index a6aae6e11..5454d0c6f 100644 --- a/R/convert_na_to.R +++ b/R/convert_na_to.R @@ -145,7 +145,7 @@ convert_na_to.character <- function(x, replacement = NULL, verbose = TRUE, ...) #' @param replace_num Value to replace `NA` when variable is of type numeric. #' @param replace_char Value to replace `NA` when variable is of type character. #' @param replace_fac Value to replace `NA` when variable is of type factor. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @rdname convert_na_to #' @export diff --git a/R/convert_to_na.R b/R/convert_to_na.R index 0e95b7c5f..a86084974 100644 --- a/R/convert_to_na.R +++ b/R/convert_to_na.R @@ -12,7 +12,7 @@ #' @param drop_levels Logical, for factors, when specific levels are replaced #' by `NA`, should unused levels be dropped? #' @param ... Not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @return #' `x`, where all values in `na` are converted to `NA`. diff --git a/R/data_addprefix.R b/R/data_addprefix.R index 91952c029..6cf292ecc 100644 --- a/R/data_addprefix.R +++ b/R/data_addprefix.R @@ -1,5 +1,5 @@ #' @rdname data_rename -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @examples #' # Add prefix / suffix to all columns #' head(data_addprefix(iris, "NEW_")) diff --git a/R/data_codebook.R b/R/data_codebook.R index cec95196f..de312b0fd 100644 --- a/R/data_codebook.R +++ b/R/data_codebook.R @@ -22,7 +22,7 @@ #' @param line_padding For HTML tables, the distance (in pixel) between lines. #' @param row_color For HTML tables, the fill color for odd rows. #' @inheritParams standardize.data.frame -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @return A formatted data frame, summarizing the content of the data frame. #' Returned columns include the column index of the variables in the original diff --git a/R/data_duplicated.R b/R/data_duplicated.R index 5a15d7134..db3202bd6 100644 --- a/R/data_duplicated.R +++ b/R/data_duplicated.R @@ -7,7 +7,7 @@ #' values for that row, to help in the decision-making when #' selecting which duplicates to keep. #' -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @keywords duplicates #' @export diff --git a/R/data_extract.R b/R/data_extract.R index b5613309c..b37c6282b 100644 --- a/R/data_extract.R +++ b/R/data_extract.R @@ -26,7 +26,7 @@ #' @param verbose Toggle warnings. #' @param ... For use by future methods. #' -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @details `data_extract()` can be used to select multiple variables or pull a #' single variable from a data frame. Thus, the return value is by default not diff --git a/R/data_group.R b/R/data_group.R index 4883d69c4..e1ab00758 100644 --- a/R/data_group.R +++ b/R/data_group.R @@ -6,7 +6,7 @@ #' grouping information from a grouped data frame. #' #' @param data A data frame -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @return A grouped data frame, i.e. a data frame with additional information #' about the grouping structure saved as attributes. diff --git a/R/data_peek.R b/R/data_peek.R index 09a42a560..951cae209 100644 --- a/R/data_peek.R +++ b/R/data_peek.R @@ -9,7 +9,7 @@ #' @param width Maximum width of line length to display. If `NULL`, width will #' be determined using `options()$width`. #' @param ... not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @note To show only specific or a limited number of variables, use the #' `select` argument, e.g. `select = 1:5` to show only the first five variables. diff --git a/R/data_relocate.R b/R/data_relocate.R index dea668a2f..58cc0265e 100644 --- a/R/data_relocate.R +++ b/R/data_relocate.R @@ -15,7 +15,7 @@ #' character vector, indicating the name of the destination column, or a #' numeric value, indicating the index number of the destination column. #' If `-1`, will be added before or after the last column. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams data_rename #' #' @inherit data_rename seealso diff --git a/R/data_remove.R b/R/data_remove.R index 1fcd67b0e..472bb2e08 100644 --- a/R/data_remove.R +++ b/R/data_remove.R @@ -1,4 +1,4 @@ -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @rdname data_relocate #' @examples #' # Remove columns diff --git a/R/data_replicate.R b/R/data_replicate.R index 4ea6f998e..8ab630bb4 100644 --- a/R/data_replicate.R +++ b/R/data_replicate.R @@ -13,7 +13,7 @@ #' provided in `expand` are removed from the data frame. If `FALSE` and `expand` #' contains missing values, the function will throw an error. #' @param ... Currently not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @return A dataframe with each row replicated as many times as defined in `expand`. #' diff --git a/R/data_rescale.R b/R/data_rescale.R index 85ff885c6..90c84a49e 100644 --- a/R/data_rescale.R +++ b/R/data_rescale.R @@ -4,7 +4,7 @@ #' (change the keying/scoring direction), or to expand a range. #' #' @inheritParams categorize -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams standardize.data.frame #' #' @param to Numeric vector of length 2 giving the new range that the variable diff --git a/R/data_reverse.R b/R/data_reverse.R index 2fc9ef493..5543e2e7f 100644 --- a/R/data_reverse.R +++ b/R/data_reverse.R @@ -12,7 +12,7 @@ #' usually only makes sense when factor levels are numeric, not characters. #' @param ... Arguments passed to or from other methods. #' @inheritParams categorize -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @inheritSection center Selection of variables - the `select` argument #' diff --git a/R/data_select.R b/R/data_select.R index d8468b22b..5e05154eb 100644 --- a/R/data_select.R +++ b/R/data_select.R @@ -1,6 +1,6 @@ -#' @rdname find_columns +#' @rdname extract_column_names #' @export -get_columns <- function(data, +data_select <- function(data, select = NULL, exclude = NULL, ignore_case = FALSE, @@ -34,6 +34,22 @@ get_columns <- function(data, } -#' @rdname find_columns #' @export -data_select <- get_columns +get_columns <- function(data, + select = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ...) { + insight::format_warning("Function `get_columns()` is deprecated and will be removed in a future release. Please use `data_select()` instead.") # nolint + data_select( + data, + select = select, + exclude = exclude, + ignore_case = ignore_case, + regex = regex, + verbose = verbose, + ... + ) +} diff --git a/R/data_separate.R b/R/data_separate.R index 53243fb33..53c5f72a5 100644 --- a/R/data_separate.R +++ b/R/data_separate.R @@ -42,7 +42,7 @@ #' @param convert_na Logical, if `TRUE`, character `"NA"` values are converted #' into real `NA` values. #' @param ... Currently not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @seealso [`data_unite()`] #' diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 76c9733b1..a6099d6ac 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -26,7 +26,7 @@ #' not `NULL`. Can be `"row"` (row percentages), `"column"` (column percentages) #' or `"full"` (to calculate relative frequencies for the full table). #' @param ... not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @section Crosstables: #' If `by` is supplied, a crosstable is created. The crosstable includes `` diff --git a/R/data_to_long.R b/R/data_to_long.R index 06fe91efd..3d19e5bc2 100644 --- a/R/data_to_long.R +++ b/R/data_to_long.R @@ -21,7 +21,7 @@ #' @param rows_to The name of the column that will contain the row names or row #' numbers from the original data. If `NULL`, will be removed. #' @param ... Currently not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @param cols Identical to `select`. This argument is here to ensure compatibility #' with `tidyr::pivot_longer()`. If both `select` and `cols` are provided, `cols` #' is used. diff --git a/R/data_unique.R b/R/data_unique.R index 40a252a2f..149fad082 100644 --- a/R/data_unique.R +++ b/R/data_unique.R @@ -12,7 +12,7 @@ #' #' @param keep The method to be used for duplicate selection, either "best" #' (the default), "first", or "last". -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @return A data frame, containing only the chosen duplicates. #' @seealso [data_duplicated()] diff --git a/R/data_unite.R b/R/data_unite.R index a4cf9dea5..4cf6d340f 100644 --- a/R/data_unite.R +++ b/R/data_unite.R @@ -14,7 +14,7 @@ #' in the united values. If `FALSE`, missing values are represented as `"NA"` #' in the united values. #' @param ... Currently not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @seealso [`data_separate()`] #' diff --git a/R/describe_distribution.R b/R/describe_distribution.R index 37850299a..41f2a8b83 100644 --- a/R/describe_distribution.R +++ b/R/describe_distribution.R @@ -22,7 +22,7 @@ #' (based on [stats::IQR()], using `type = 6`). #' @param verbose Toggle warnings and messages. #' @inheritParams bayestestR::point_estimate -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @details If `x` is a data frame, only numeric variables are kept and will be #' displayed in the summary. diff --git a/R/data_find.R b/R/extract_column_names.R similarity index 72% rename from R/data_find.R rename to R/extract_column_names.R index ef6e6dfb5..b04acb045 100644 --- a/R/data_find.R +++ b/R/extract_column_names.R @@ -1,10 +1,8 @@ #' @title Find or get columns in a data frame based on search patterns -#' @name find_columns +#' @name extract_column_names #' -#' @description `find_columns()` returns column names from a data set that -#' match a certain search pattern, while `get_columns()` returns the found data. -#' `data_select()` is an alias for `get_columns()`, and `data_find()` is an alias -#' for `find_columns()`. +#' @description `extract_column_names()` returns column names from a data set that +#' match a certain search pattern, while `data_select()` returns the found data. #' #' @param data A data frame. #' @param select Variables that will be included when performing the required @@ -35,8 +33,8 @@ #' negation should not work as expected, use the `exclude` argument instead. #' #' If `NULL`, selects all columns. Patterns that found no matches are silently -#' ignored, e.g. `find_columns(iris, select = c("Species", "Test"))` will just -#' return `"Species"`. +#' ignored, e.g. `extract_column_names(iris, select = c("Species", "Test"))` +#' will just return `"Species"`. #' @param exclude See `select`, however, column names matched by the pattern #' from `exclude` will be excluded instead of selected. If `NULL` (the default), #' excludes no columns. @@ -58,9 +56,10 @@ #' #' @return #' -#' `find_columns()` returns a character vector with column names that matched -#' the pattern in `select` and `exclude`, or `NULL` if no matching column name -#' was found. `get_columns()` returns a data frame with matching columns. +#' `extract_column_names()` returns a character vector with column names that +#' matched the pattern in `select` and `exclude`, or `NULL` if no matching +#' column name was found. `data_select()` returns a data frame with matching +#' columns. #' #' @details #' @@ -69,12 +68,12 @@ #' #' ```r #' foo <- function(data, pattern) { -#' find_columns(data, select = starts_with(pattern)) +#' extract_column_names(data, select = starts_with(pattern)) #' } #' foo(iris, pattern = "Sep") #' #' foo2 <- function(data, pattern) { -#' find_columns(data, select = pattern) +#' extract_column_names(data, select = pattern) #' } #' foo2(iris, pattern = starts_with("Sep")) #' ``` @@ -84,7 +83,7 @@ #' ```r #' for (i in c("Sepal", "Sp")) { #' head(iris) |> -#' find_columns(select = starts_with(i)) |> +#' extract_column_names(select = starts_with(i)) |> #' print() #' } #' ``` @@ -94,7 +93,7 @@ #' #' ```r #' inner <- function(data, arg) { -#' find_columns(data, select = arg) +#' extract_column_names(data, select = arg) #' } #' outer <- function(data, arg) { #' inner(data, starts_with(arg)) @@ -114,25 +113,25 @@ #' #' @examples #' # Find columns names by pattern -#' find_columns(iris, starts_with("Sepal")) -#' find_columns(iris, ends_with("Width")) -#' find_columns(iris, regex("\\.")) -#' find_columns(iris, c("Petal.Width", "Sepal.Length")) +#' extract_column_names(iris, starts_with("Sepal")) +#' extract_column_names(iris, ends_with("Width")) +#' extract_column_names(iris, regex("\\.")) +#' extract_column_names(iris, c("Petal.Width", "Sepal.Length")) #' #' # starts with "Sepal", but not allowed to end with "width" -#' find_columns(iris, starts_with("Sepal"), exclude = contains("Width")) +#' extract_column_names(iris, starts_with("Sepal"), exclude = contains("Width")) #' #' # find numeric with mean > 3.5 #' numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5 -#' find_columns(iris, numeric_mean_35) +#' extract_column_names(iris, numeric_mean_35) #' @export -find_columns <- function(data, - select = NULL, - exclude = NULL, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ...) { +extract_column_names <- function(data, + select = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ...) { columns <- .select_nse( select, data, @@ -155,6 +154,13 @@ find_columns <- function(data, } -#' @rdname find_columns #' @export -data_find <- find_columns +data_find <- function(data, + select = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ...) { + insight::format_warning("Function `data_find()` is deprecated and will be removed in a future release. Please use `extract_column_names()` instead.") # nolint +} diff --git a/R/labels_to_levels.R b/R/labels_to_levels.R index c1ff97a16..b84f9e751 100644 --- a/R/labels_to_levels.R +++ b/R/labels_to_levels.R @@ -7,7 +7,7 @@ #' @param x A data frame or factor. Other variable types (e.g. numerics) are not #' allowed. #' @param ... Currently not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams categorize #' #' @return `x`, where for all factors former levels are replaced by their value diff --git a/R/means_by_group.R b/R/means_by_group.R index ad188f275..6fe65c5a6 100644 --- a/R/means_by_group.R +++ b/R/means_by_group.R @@ -20,7 +20,7 @@ #' point when rounding estimates and values. #' @param ... Currently not used #' @param group Deprecated. Use `by` instead. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @return A data frame with information on mean and further summary statistics #' for each sub-group. diff --git a/R/normalize.R b/R/normalize.R index ba2eee40d..a30955b5d 100644 --- a/R/normalize.R +++ b/R/normalize.R @@ -17,7 +17,7 @@ #' `1 - include_bounds`. #' @param ... Arguments passed to or from other methods. #' @inheritParams standardize.data.frame -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @inheritSection center Selection of variables - the `select` argument #' diff --git a/R/ranktransform.R b/R/ranktransform.R index 37beceb60..c52cb0d78 100644 --- a/R/ranktransform.R +++ b/R/ranktransform.R @@ -11,7 +11,7 @@ #' `"first"`, `"last"`, `"random"`, `"max"` or `"min"`. See [rank()] for #' details. #' @param ... Arguments passed to or from other methods. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams standardize.data.frame #' #' @inheritSection center Selection of variables - the `select` argument diff --git a/R/recode_values.R b/R/recode_values.R index e355e9cb0..84e99da94 100644 --- a/R/recode_values.R +++ b/R/recode_values.R @@ -22,7 +22,7 @@ #' `preserve_na=TRUE`, `default` will no longer convert `NA` into the specified #' default value. #' @param ... not used. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams categorize #' #' @return `x`, where old values are replaced by new values. @@ -531,6 +531,30 @@ recode_values.data.frame <- function(x, ## TODO Deprecate and remove alias later -#' @rdname recode_values #' @export -change_code <- recode_values +change_code <- function(x, + select = NULL, + exclude = NULL, + recode = NULL, + default = NULL, + preserve_na = TRUE, + append = FALSE, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ...) { + insight::format_warning("Function `change_code()` is deprecated. Please use `recode_values()` instead.") # nolint + recode_values( + x, + select = select, + exclude = exclude, + recode = recode, + default = default, + preserve_na = preserve_na, + append = append, + ignore_case = ignore_case, + regex = regex, + verbose = verbose, + ... + ) +} diff --git a/R/replace_nan_inf.R b/R/replace_nan_inf.R index 923743d1a..3ba0397f4 100644 --- a/R/replace_nan_inf.R +++ b/R/replace_nan_inf.R @@ -34,7 +34,7 @@ replace_nan_inf.default <- function(x, ...) { x } -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @export replace_nan_inf.data.frame <- function(x, select = NULL, diff --git a/R/row_means.R b/R/row_means.R index 2513cc3a1..fdcaa49fd 100644 --- a/R/row_means.R +++ b/R/row_means.R @@ -19,7 +19,7 @@ #' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) values #' before calculating row means. Only applies if `min_valuid` is not specified. #' @param verbose Toggle warnings. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @return A vector with row means for those rows with at least `n` valid values. #' diff --git a/R/standardize.R b/R/standardize.R index d5082803d..27de92946 100644 --- a/R/standardize.R +++ b/R/standardize.R @@ -68,7 +68,7 @@ #' @param force Logical, if `TRUE`, forces recoding of factors and character #' vectors as well. #' @param ... Arguments passed to or from other methods. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' #' @inheritSection center Selection of variables - the `select` argument #' diff --git a/R/text_format.R b/R/text_format.R index 180807746..9ca58219d 100644 --- a/R/text_format.R +++ b/R/text_format.R @@ -44,9 +44,11 @@ text_format <- function(text, sep = ", ", last = " and ", width = NULL, enclose ## TODO Deprecate and remove alias later -#' @rdname text_format #' @export -format_text <- text_format +format_text <- function(text, sep = ", ", last = " and ", width = NULL, enclose = NULL, ...) { + insight::format_warning("Function `format_text()` is deprecated and will be removed in a future release. Please use `text_format()` instead.") # nolint + text_format(text, sep = sep, last = last, width = width, enclose = enclose, ...) +} #' @rdname text_format #' @export diff --git a/R/to_factor.R b/R/to_factor.R index c31580072..8fa46d404 100644 --- a/R/to_factor.R +++ b/R/to_factor.R @@ -11,7 +11,7 @@ #' levels after `x` was converted to factor. Else, factor levels are based on #' the values of `x` (i.e. as if using `as.factor()`). #' @param ... Arguments passed to or from other methods. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams categorize #' #' @inheritSection center Selection of variables - the `select` argument diff --git a/R/to_numeric.R b/R/to_numeric.R index c43956399..e38e12e80 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -14,7 +14,7 @@ #' @param lowest Numeric, indicating the lowest (minimum) value when converting #' factors or character vectors to numeric values. #' @param ... Arguments passed to or from other methods. -#' @inheritParams find_columns +#' @inheritParams extract_column_names #' @inheritParams categorize #' #' @note By default, `to_numeric()` converts factors into "binary" dummies, i.e. diff --git a/README.Rmd b/README.Rmd index 6a696194c..8922aa1c1 100644 --- a/README.Rmd +++ b/README.Rmd @@ -90,14 +90,14 @@ data_match(mtcars, data.frame(vs = 0, am = 1)) data_filter(mtcars, vs == 0 & am == 1) ``` -Finding columns in a data frame, or retrieving the data of selected columns, can be achieved using `find_columns()` or `get_columns()`: +Finding columns in a data frame, or retrieving the data of selected columns, can be achieved using `extract_column_names()` or `data_select()`: ```{r} # find column names matching a pattern -find_columns(iris, starts_with("Sepal")) +extract_column_names(iris, starts_with("Sepal")) # return data columns matching a pattern -get_columns(iris, starts_with("Sepal")) |> head() +data_select(iris, starts_with("Sepal")) |> head() ``` It is also possible to extract one or more variables: diff --git a/README.md b/README.md index 0d459a9cf..70c7212e5 100644 --- a/README.md +++ b/README.md @@ -130,15 +130,15 @@ data_filter(mtcars, vs == 0 & am == 1) ``` Finding columns in a data frame, or retrieving the data of selected -columns, can be achieved using `find_columns()` or `get_columns()`: +columns, can be achieved using `extract_column_names()` or `data_select()`: ``` r # find column names matching a pattern -find_columns(iris, starts_with("Sepal")) +extract_column_names(iris, starts_with("Sepal")) #> [1] "Sepal.Length" "Sepal.Width" # return data columns matching a pattern -get_columns(iris, starts_with("Sepal")) |> head() +data(iris, starts_with("Sepal")) |> head() #> Sepal.Length Sepal.Width #> 1 5.1 3.5 #> 2 4.9 3.0 diff --git a/_pkgdown.yaml b/_pkgdown.yaml index 65bae30c8..d52994e16 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -102,7 +102,7 @@ reference: - contains("rownames") - rowid_as_column - contains("colnames") - - find_columns + - extract_column_names - data_restoretype - title: Helpers for Text Formatting diff --git a/man/adjust.Rd b/man/adjust.Rd index 08c841b7a..64e50d9d3 100644 --- a/man/adjust.Rd +++ b/man/adjust.Rd @@ -68,8 +68,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/assign_labels.Rd b/man/assign_labels.Rd index eea07534e..cca14cc85 100644 --- a/man/assign_labels.Rd +++ b/man/assign_labels.Rd @@ -63,8 +63,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/categorize.Rd b/man/categorize.Rd index 0f2478776..d8fddde17 100644 --- a/man/categorize.Rd +++ b/man/categorize.Rd @@ -103,8 +103,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/center.Rd b/man/center.Rd index c09012476..f143f64b2 100644 --- a/man/center.Rd +++ b/man/center.Rd @@ -97,8 +97,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/convert_na_to.Rd b/man/convert_na_to.Rd index c3f252a7b..91121ff94 100644 --- a/man/convert_na_to.Rd +++ b/man/convert_na_to.Rd @@ -66,8 +66,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/convert_to_na.Rd b/man/convert_to_na.Rd index f0b8ce263..2529294b7 100644 --- a/man/convert_to_na.Rd +++ b/man/convert_to_na.Rd @@ -69,8 +69,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_codebook.Rd b/man/data_codebook.Rd index 32e3be5e2..4c0f935e7 100644 --- a/man/data_codebook.Rd +++ b/man/data_codebook.Rd @@ -59,8 +59,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_duplicated.Rd b/man/data_duplicated.Rd index 130dab85b..73c3e8de1 100644 --- a/man/data_duplicated.Rd +++ b/man/data_duplicated.Rd @@ -45,8 +45,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_extract.Rd b/man/data_extract.Rd index 88d89f0eb..a0cd4e402 100644 --- a/man/data_extract.Rd +++ b/man/data_extract.Rd @@ -52,8 +52,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{...}{For use by future methods.} diff --git a/man/data_group.Rd b/man/data_group.Rd index 0f5b593d4..56f5f314e 100644 --- a/man/data_group.Rd +++ b/man/data_group.Rd @@ -49,8 +49,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_peek.Rd b/man/data_peek.Rd index 6cecf7a1a..4f3f88e8a 100644 --- a/man/data_peek.Rd +++ b/man/data_peek.Rd @@ -52,8 +52,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_relocate.Rd b/man/data_relocate.Rd index 4e3fd18eb..d51c4cf1e 100644 --- a/man/data_relocate.Rd +++ b/man/data_relocate.Rd @@ -69,8 +69,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{before, after}{Destination of columns. Supplying neither will move columns to the left-hand side; specifying both is an error. Can be a diff --git a/man/data_rename.Rd b/man/data_rename.Rd index 81ab63d1e..ea68c613b 100644 --- a/man/data_rename.Rd +++ b/man/data_rename.Rd @@ -77,8 +77,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_replicate.Rd b/man/data_replicate.Rd index 4c152b371..35448155d 100644 --- a/man/data_replicate.Rd +++ b/man/data_replicate.Rd @@ -52,8 +52,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_separate.Rd b/man/data_separate.Rd index 4dad5b713..37528d46e 100644 --- a/man/data_separate.Rd +++ b/man/data_separate.Rd @@ -55,8 +55,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{new_columns}{The names of the new columns, as character vector. If more than one variable was selected (in \code{select}), the new names are prefixed diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 34961481a..b744c1f1b 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -94,8 +94,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index f6abe5f39..1455dd01e 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -69,8 +69,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{names_to}{The name of the new column that will contain the column names.} diff --git a/man/data_unique.Rd b/man/data_unique.Rd index f7272c87b..8a45bfc21 100644 --- a/man/data_unique.Rd +++ b/man/data_unique.Rd @@ -46,8 +46,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{keep}{The method to be used for duplicate selection, either "best" (the default), "first", or "last".} diff --git a/man/data_unite.Rd b/man/data_unite.Rd index 63c2e73a6..ba7710a8a 100644 --- a/man/data_unite.Rd +++ b/man/data_unite.Rd @@ -52,8 +52,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/describe_distribution.Rd b/man/describe_distribution.Rd index fd229567d..369bd9ef6 100644 --- a/man/describe_distribution.Rd +++ b/man/describe_distribution.Rd @@ -111,8 +111,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/find_columns.Rd b/man/extract_column_names.Rd similarity index 81% rename from man/find_columns.Rd rename to man/extract_column_names.Rd index db5671755..9251348b3 100644 --- a/man/find_columns.Rd +++ b/man/extract_column_names.Rd @@ -1,33 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_find.R, R/data_select.R -\name{find_columns} -\alias{find_columns} -\alias{data_find} -\alias{get_columns} +% Please edit documentation in R/data_select.R, R/extract_column_names.R +\name{data_select} \alias{data_select} +\alias{extract_column_names} \title{Find or get columns in a data frame based on search patterns} \usage{ -find_columns( - data, - select = NULL, - exclude = NULL, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ... -) - -data_find( - data, - select = NULL, - exclude = NULL, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ... -) - -get_columns( +data_select( data, select = NULL, exclude = NULL, @@ -37,7 +15,7 @@ get_columns( ... ) -data_select( +extract_column_names( data, select = NULL, exclude = NULL, @@ -79,8 +57,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), @@ -104,27 +82,26 @@ functions (see 'Details'), this argument may be used as workaround.} \item{...}{Arguments passed down to other functions. Mostly not used yet.} } \value{ -\code{find_columns()} returns a character vector with column names that matched -the pattern in \code{select} and \code{exclude}, or \code{NULL} if no matching column name -was found. \code{get_columns()} returns a data frame with matching columns. +\code{extract_column_names()} returns a character vector with column names that +matched the pattern in \code{select} and \code{exclude}, or \code{NULL} if no matching +column name was found. \code{data_select()} returns a data frame with matching +columns. } \description{ -\code{find_columns()} returns column names from a data set that -match a certain search pattern, while \code{get_columns()} returns the found data. -\code{data_select()} is an alias for \code{get_columns()}, and \code{data_find()} is an alias -for \code{find_columns()}. +\code{extract_column_names()} returns column names from a data set that +match a certain search pattern, while \code{data_select()} returns the found data. } \details{ Note that it is possible to either pass an entire select helper or only the pattern inside a select helper as a function argument: \if{html}{\out{
}}\preformatted{foo <- function(data, pattern) \{ - find_columns(data, select = starts_with(pattern)) + extract_column_names(data, select = starts_with(pattern)) \} foo(iris, pattern = "Sep") foo2 <- function(data, pattern) \{ - find_columns(data, select = pattern) + extract_column_names(data, select = pattern) \} foo2(iris, pattern = starts_with("Sep")) }\if{html}{\out{
}} @@ -133,7 +110,7 @@ This means that it is also possible to use loop values as arguments or patterns: \if{html}{\out{
}}\preformatted{for (i in c("Sepal", "Sp")) \{ head(iris) |> - find_columns(select = starts_with(i)) |> + extract_column_names(select = starts_with(i)) |> print() \} }\if{html}{\out{
}} @@ -142,7 +119,7 @@ However, this behavior is limited to a "single-level function". It will not work in nested functions, like below: \if{html}{\out{
}}\preformatted{inner <- function(data, arg) \{ - find_columns(data, select = arg) + extract_column_names(data, select = arg) \} outer <- function(data, arg) \{ inner(data, starts_with(arg)) @@ -161,17 +138,17 @@ outer(iris, starts_with("Sep")) } \examples{ # Find columns names by pattern -find_columns(iris, starts_with("Sepal")) -find_columns(iris, ends_with("Width")) -find_columns(iris, regex("\\\\.")) -find_columns(iris, c("Petal.Width", "Sepal.Length")) +extract_column_names(iris, starts_with("Sepal")) +extract_column_names(iris, ends_with("Width")) +extract_column_names(iris, regex("\\\\.")) +extract_column_names(iris, c("Petal.Width", "Sepal.Length")) # starts with "Sepal", but not allowed to end with "width" -find_columns(iris, starts_with("Sepal"), exclude = contains("Width")) +extract_column_names(iris, starts_with("Sepal"), exclude = contains("Width")) # find numeric with mean > 3.5 numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5 -find_columns(iris, numeric_mean_35) +extract_column_names(iris, numeric_mean_35) } \seealso{ \itemize{ diff --git a/man/labels_to_levels.Rd b/man/labels_to_levels.Rd index 12e8c0fe3..8024eb2d3 100644 --- a/man/labels_to_levels.Rd +++ b/man/labels_to_levels.Rd @@ -58,8 +58,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd index 5473daeec..d7a6dfc96 100644 --- a/man/means_by_group.Rd +++ b/man/means_by_group.Rd @@ -87,8 +87,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/normalize.Rd b/man/normalize.Rd index 646e5b5ec..4a9a61a68 100644 --- a/man/normalize.Rd +++ b/man/normalize.Rd @@ -96,8 +96,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/ranktransform.Rd b/man/ranktransform.Rd index 01da194f2..c23105735 100644 --- a/man/ranktransform.Rd +++ b/man/ranktransform.Rd @@ -64,8 +64,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/recode_values.Rd b/man/recode_values.Rd index 45b127730..f807c8e7b 100644 --- a/man/recode_values.Rd +++ b/man/recode_values.Rd @@ -4,7 +4,6 @@ \alias{recode_values} \alias{recode_values.numeric} \alias{recode_values.data.frame} -\alias{change_code} \title{Recode old values of variables into new values} \usage{ recode_values(x, ...) @@ -31,8 +30,6 @@ recode_values(x, ...) verbose = TRUE, ... ) - -change_code(x, ...) } \arguments{ \item{x}{A data frame, numeric or character vector, or factor.} @@ -88,8 +85,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/rescale.Rd b/man/rescale.Rd index 83cc3d64d..016a6f841 100644 --- a/man/rescale.Rd +++ b/man/rescale.Rd @@ -92,8 +92,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/reverse.Rd b/man/reverse.Rd index 04066e8e6..6304dffc6 100644 --- a/man/reverse.Rd +++ b/man/reverse.Rd @@ -70,8 +70,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/row_means.Rd b/man/row_means.Rd index 21f1853d9..c347fc6f1 100644 --- a/man/row_means.Rd +++ b/man/row_means.Rd @@ -48,8 +48,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/slide.Rd b/man/slide.Rd index 2f1903ff4..554434259 100644 --- a/man/slide.Rd +++ b/man/slide.Rd @@ -59,8 +59,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/standardize.Rd b/man/standardize.Rd index 18c560c80..4041f2dc0 100644 --- a/man/standardize.Rd +++ b/man/standardize.Rd @@ -170,8 +170,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/text_format.Rd b/man/text_format.Rd index 87f045193..b65190f31 100644 --- a/man/text_format.Rd +++ b/man/text_format.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/text_format.R \name{text_format} \alias{text_format} -\alias{format_text} \alias{text_fullstop} \alias{text_lastchar} \alias{text_concatenate} @@ -20,15 +19,6 @@ text_format( ... ) -format_text( - text, - sep = ", ", - last = " and ", - width = NULL, - enclose = NULL, - ... -) - text_fullstop(text) text_lastchar(text, n = 1) diff --git a/man/to_factor.Rd b/man/to_factor.Rd index 6b57df59c..e035769ec 100644 --- a/man/to_factor.Rd +++ b/man/to_factor.Rd @@ -61,8 +61,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/to_numeric.Rd b/man/to_numeric.Rd index 7c78b1ba6..7478c9579 100644 --- a/man/to_numeric.Rd +++ b/man/to_numeric.Rd @@ -55,8 +55,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just -return \code{"Species"}.} +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/tests/testthat/test-get_columns.R b/tests/testthat/test-data_select.R similarity index 61% rename from tests/testthat/test-get_columns.R rename to tests/testthat/test-data_select.R index a23f267a7..2557a1f7b 100644 --- a/tests/testthat/test-get_columns.R +++ b/tests/testthat/test-data_select.R @@ -1,33 +1,33 @@ # input check --------------------- -test_that("get_columns checks for data frame", { - expect_error(get_columns(NULL), regexp = "provided") +test_that("data_select checks for data frame", { + expect_error(data_select(NULL), regexp = "provided") x <- list(a = 1:2, b = letters[1:3]) - expect_error(get_columns(x), regexp = "coerced") + expect_error(data_select(x), regexp = "coerced") }) # select helpers --------------------- -test_that("get_columns works with select helpers", { +test_that("data_select works with select helpers", { expect_identical( - get_columns(iris, starts_with("Sepal")), + data_select(iris, starts_with("Sepal")), iris[c("Sepal.Length", "Sepal.Width")] ) expect_identical( - get_columns(iris, ends_with("Width")), + data_select(iris, ends_with("Width")), iris[c("Sepal.Width", "Petal.Width")] ) expect_identical( - get_columns(iris, regex("\\.")), + data_select(iris, regex("\\.")), iris[c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")] ) expect_identical( - get_columns(iris, contains("Wid")), + data_select(iris, contains("Wid")), iris[c("Sepal.Width", "Petal.Width")] ) }) @@ -36,14 +36,14 @@ test_that("get_columns works with select helpers", { # select helpers, negation --------------------- -test_that("get_columns works with negation of select helpers", { +test_that("data_select works with negation of select helpers", { expect_identical( - get_columns(iris, -starts_with("Sepal")), + data_select(iris, -starts_with("Sepal")), iris[c("Petal.Length", "Petal.Width", "Species")] ) expect_identical( - get_columns(iris, -ends_with("Width")), + data_select(iris, -ends_with("Width")), iris[c("Sepal.Length", "Petal.Length", "Species")] ) }) @@ -52,28 +52,28 @@ test_that("get_columns works with negation of select helpers", { # select-nse with function --------------------- -test_that("get_columns works with select-functions", { +test_that("data_select works with select-functions", { expect_identical( - get_columns(iris, is.numeric()), + data_select(iris, is.numeric()), iris[sapply(iris, is.numeric)] ) expect_identical( - get_columns(iris, is.numeric), + data_select(iris, is.numeric), iris[sapply(iris, is.numeric)] ) expect_identical( - get_columns(iris, is.factor()), + data_select(iris, is.factor()), iris[sapply(iris, is.factor)] ) expect_identical( - get_columns(iris, is.factor), + data_select(iris, is.factor), iris[sapply(iris, is.factor)] ) - expect_warning(expect_null(get_columns(iris, is.logical()))) + expect_warning(expect_null(data_select(iris, is.logical()))) }) @@ -82,19 +82,19 @@ test_that("get_columns works with select-functions", { testfun <- function(i) { is.numeric(i) && mean(i, na.rm = TRUE) > 3.5 } -test_that("get_columns works with user-defined select-functions", { - expect_identical(get_columns(iris, testfun), iris[sapply(iris, testfun)]) - expect_identical(get_columns(iris, -testfun), iris[!sapply(iris, testfun)]) +test_that("data_select works with user-defined select-functions", { + expect_identical(data_select(iris, testfun), iris[sapply(iris, testfun)]) + expect_identical(data_select(iris, -testfun), iris[!sapply(iris, testfun)]) testfun2 <- function(i) { is.numeric(i) && mean(i, na.rm = TRUE) < 5 } expect_identical( - get_columns(iris, select = testfun, exclude = testfun2), + data_select(iris, select = testfun, exclude = testfun2), iris["Sepal.Length"] ) expect_identical( - get_columns(iris, select = testfun, exclude = -testfun2), + data_select(iris, select = testfun, exclude = -testfun2), iris["Petal.Length"] ) }) @@ -103,42 +103,42 @@ test_that("get_columns works with user-defined select-functions", { # select-nse with negation of functions --------------------- -test_that("get_columns works with negated select-functions", { +test_that("data_select works with negated select-functions", { expect_identical( - get_columns(iris, -is.numeric()), + data_select(iris, -is.numeric()), iris[sapply(iris, function(i) !is.numeric(i))] ) expect_identical( - get_columns(iris, -is.numeric), + data_select(iris, -is.numeric), iris[sapply(iris, function(i) !is.numeric(i))] ) expect_identical( - get_columns(iris, -is.factor()), + data_select(iris, -is.factor()), iris[sapply(iris, function(i) !is.factor(i))] ) expect_identical( - get_columns(iris, -is.factor), + data_select(iris, -is.factor), iris[sapply(iris, function(i) !is.factor(i))] ) - expect_identical(get_columns(iris, -is.logical), iris) + expect_identical(data_select(iris, -is.logical), iris) }) # select-nse with ranges --------------------- -test_that("get_columns works with ranges", { +test_that("data_select works with ranges", { expect_identical( - get_columns(iris, 2:3), + data_select(iris, 2:3), iris[2:3] ) expect_identical( - get_columns(iris, Sepal.Width:Petal.Length), + data_select(iris, Sepal.Width:Petal.Length), iris[2:3] ) }) @@ -147,33 +147,33 @@ test_that("get_columns works with ranges", { # select-nse with negated ranges --------------------- -test_that("get_columns works with negated ranges", { +test_that("data_select works with negated ranges", { expect_identical( - get_columns(iris, -(1:2)), + data_select(iris, -(1:2)), iris[c(3, 4, 5)] ) expect_identical( - get_columns(iris, -1:-2), + data_select(iris, -1:-2), iris[c(3, 4, 5)] ) expect_identical( - get_columns(iris, exclude = -1:-2), + data_select(iris, exclude = -1:-2), iris[1:2] ) expect_identical( - get_columns(iris, exclude = 2:3), + data_select(iris, exclude = 2:3), iris[c(1, 4, 5)] ) expect_error( - get_columns(iris, -Sepal.Width:Petal.Length), + data_select(iris, -Sepal.Width:Petal.Length), "can't mix negative and positive" ) expect_identical( - get_columns(iris, -(Sepal.Width:Petal.Length)), + data_select(iris, -(Sepal.Width:Petal.Length)), iris[c(1, 4, 5)] ) }) @@ -182,14 +182,14 @@ test_that("get_columns works with negated ranges", { # select-nse with formulas --------------------- -test_that("get_columns works with formulas", { +test_that("data_select works with formulas", { expect_identical( - get_columns(iris, ~ Sepal.Width + Petal.Length), + data_select(iris, ~ Sepal.Width + Petal.Length), iris[2:3] ) expect_identical( - get_columns(iris, exclude = ~ Sepal.Width + Petal.Length), + data_select(iris, exclude = ~ Sepal.Width + Petal.Length), iris[c(1, 4, 5)] ) }) @@ -198,50 +198,50 @@ test_that("get_columns works with formulas", { # select-nse, other cases --------------------- -test_that("get_columns works, other cases", { - expect_identical(get_columns(iris), iris) +test_that("data_select works, other cases", { + expect_identical(data_select(iris), iris) expect_identical( - get_columns(iris, c("Petal.Width", "Sepal.Length")), + data_select(iris, c("Petal.Width", "Sepal.Length")), iris[c("Petal.Width", "Sepal.Length")] ) expect_identical( - get_columns(iris, -c("Petal.Width", "Sepal.Length")), + data_select(iris, -c("Petal.Width", "Sepal.Length")), iris[setdiff(colnames(iris), c("Petal.Width", "Sepal.Length"))] ) expect_identical( - get_columns(iris, -Petal.Width), + data_select(iris, -Petal.Width), iris[setdiff(colnames(iris), "Petal.Width")] ) expect_identical( - get_columns(mtcars, c("am", "gear", "cyl")), + data_select(mtcars, c("am", "gear", "cyl")), mtcars[c("am", "gear", "cyl")] ) expect_identical( - get_columns(mtcars, c("vam", "gear", "cyl")), + data_select(mtcars, c("vam", "gear", "cyl")), mtcars[c("gear", "cyl")] ) - expect_warning(expect_null(get_columns(mtcars, ends_with("abc")))) + expect_warning(expect_null(data_select(mtcars, ends_with("abc")))) expect_identical( - get_columns(mtcars, regex("rb$")), + data_select(mtcars, regex("rb$")), mtcars["carb"] ) expect_identical( - get_columns(mtcars, regex("^c")), + data_select(mtcars, regex("^c")), mtcars[c("cyl", "carb")] ) - expect_warning(expect_null(get_columns(mtcars, "^c"))) + expect_warning(expect_null(data_select(mtcars, "^c"))) expect_identical( - get_columns(mtcars, regex("^C"), ignore_case = TRUE), + data_select(mtcars, regex("^C"), ignore_case = TRUE), mtcars[c("cyl", "carb")] ) }) @@ -250,9 +250,9 @@ test_that("get_columns works, other cases", { # select-nse works when called from other function --------------------- -test_that("get_columns from other functions", { +test_that("data_select from other functions", { test_fun1 <- function(data, i) { - get_columns(data, select = i) + data_select(data, select = i) } expect_identical( test_fun1(iris, c("Sepal.Length", "Sepal.Width")), @@ -265,7 +265,7 @@ test_that("get_columns from other functions", { ) test_fun1a <- function(data, i) { - get_columns(data, select = i, regex = TRUE) + data_select(data, select = i, regex = TRUE) } expect_identical( test_fun1a(iris, "Sep"), @@ -273,7 +273,7 @@ test_that("get_columns from other functions", { ) test_fun1b <- function(data, i) { - get_columns(data, select = i, regex = TRUE) + data_select(data, select = i, regex = TRUE) } expect_identical( test_fun1b(iris, "Width$"), @@ -281,7 +281,7 @@ test_that("get_columns from other functions", { ) test_fun1c <- function(data, i) { - get_columns(data, select = -i) + data_select(data, select = -i) } expect_identical( test_fun1c(iris, c("Sepal.Length", "Sepal.Width")), @@ -290,7 +290,7 @@ test_that("get_columns from other functions", { test_fun2 <- function(data) { - get_columns(data, select = starts_with("Sep")) + data_select(data, select = starts_with("Sep")) } expect_identical( test_fun2(iris), @@ -299,7 +299,7 @@ test_that("get_columns from other functions", { test_fun3 <- function(data) { i <- "Sep" - get_columns(data, select = starts_with(i)) + data_select(data, select = starts_with(i)) } expect_identical( test_fun3(iris), @@ -313,7 +313,7 @@ test_that("get_columns from other functions", { testfun2 <- function(i) { is.numeric(i) && mean(i, na.rm = TRUE) < 5 } - get_columns(x, select = testfun, exclude = -testfun2) + data_select(x, select = testfun, exclude = -testfun2) } expect_identical(test_top(iris), iris["Petal.Length"]) }) @@ -322,14 +322,14 @@ test_that("get_columns from other functions", { # preserve attributes -------------------------- -test_that("get_columns preserves attributes", { +test_that("data_select preserves attributes", { skip_if_not_installed("parameters") m <- lm(Sepal.Length ~ Species, data = iris) out <- parameters::parameters(m) a1 <- attributes(out) - out2 <- get_columns(out, 1:3) + out2 <- data_select(out, 1:3) a2 <- attributes(out2) expect_identical(sort(names(a1)), sort(names(a2))) @@ -339,7 +339,7 @@ test_that("get_columns preserves attributes", { test_that("select helpers work in functions and loops", { foo <- function(data, i) { - find_columns(data, select = starts_with(i)) + extract_column_names(data, select = starts_with(i)) } expect_identical( foo(iris, "Sep"), @@ -347,7 +347,7 @@ test_that("select helpers work in functions and loops", { ) for (i in "Sepal") { - x <- find_columns(iris, select = starts_with(i)) + x <- extract_column_names(iris, select = starts_with(i)) } expect_identical( x, @@ -355,7 +355,7 @@ test_that("select helpers work in functions and loops", { ) for (i in "Length") { - x <- find_columns(iris, select = ends_with(i)) + x <- extract_column_names(iris, select = ends_with(i)) } expect_identical( x, @@ -366,7 +366,7 @@ test_that("select helpers work in functions and loops", { test_that("select helpers work in functions and loops even if there's an object with the same name in the environment above", { i <- "Petal" foo <- function(data, i) { - find_columns(data, select = starts_with(i)) + extract_column_names(data, select = starts_with(i)) } expect_identical( foo(iris, "Sep"), @@ -374,7 +374,7 @@ test_that("select helpers work in functions and loops even if there's an object ) for (i in "Sepal") { - x <- find_columns(iris, select = starts_with(i)) + x <- extract_column_names(iris, select = starts_with(i)) } expect_identical( x, @@ -384,7 +384,7 @@ test_that("select helpers work in functions and loops even if there's an object i <- "Width" for (i in "Length") { - x <- find_columns(iris, select = ends_with(i)) + x <- extract_column_names(iris, select = ends_with(i)) } expect_identical( x, @@ -395,7 +395,7 @@ test_that("select helpers work in functions and loops even if there's an object test_that("old solution still works", { foo <- function(data) { i <- "Sep" - find_columns(data, select = i, regex = TRUE) + extract_column_names(data, select = i, regex = TRUE) } expect_identical( foo(iris), diff --git a/tests/testthat/test-find_columns.R b/tests/testthat/test-extract_column_names.R similarity index 51% rename from tests/testthat/test-find_columns.R rename to tests/testthat/test-extract_column_names.R index 923df8c4a..67cd680f7 100644 --- a/tests/testthat/test-find_columns.R +++ b/tests/testthat/test-extract_column_names.R @@ -1,83 +1,83 @@ -test_that("find_columns works as expected", { +test_that("extract_column_names works as expected", { expect_identical( - find_columns(iris, starts_with("Sepal")), + extract_column_names(iris, starts_with("Sepal")), c("Sepal.Length", "Sepal.Width") ) expect_identical( - find_columns(iris, starts_with("Sepal", "Petal")), + extract_column_names(iris, starts_with("Sepal", "Petal")), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - find_columns(iris, ends_with("Width")), + extract_column_names(iris, ends_with("Width")), c("Sepal.Width", "Petal.Width") ) expect_identical( - find_columns(iris, ends_with("Length", "Width")), + extract_column_names(iris, ends_with("Length", "Width")), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - find_columns(iris, regex("\\.")), + extract_column_names(iris, regex("\\.")), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - find_columns(iris, c("Petal.Width", "Sepal.Length")), + extract_column_names(iris, c("Petal.Width", "Sepal.Length")), c("Petal.Width", "Sepal.Length") ) expect_identical( - find_columns(iris, contains("Wid")), + extract_column_names(iris, contains("Wid")), c("Sepal.Width", "Petal.Width") ) expect_identical( - find_columns(iris, contains("en", "idt")), + extract_column_names(iris, contains("en", "idt")), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - find_columns(mtcars, c("am", "gear", "cyl")), + extract_column_names(mtcars, c("am", "gear", "cyl")), c("am", "gear", "cyl") ) expect_identical( - find_columns(mtcars, c("vam", "gear", "cyl")), + extract_column_names(mtcars, c("vam", "gear", "cyl")), c("gear", "cyl") ) - expect_warning(expect_null(find_columns(mtcars, ends_with("abc")))) + expect_warning(expect_null(extract_column_names(mtcars, ends_with("abc")))) expect_identical( - find_columns(mtcars, regex("rb$")), + extract_column_names(mtcars, regex("rb$")), "carb" ) expect_identical( - find_columns(mtcars, regex("^c")), + extract_column_names(mtcars, regex("^c")), c("cyl", "carb") ) - expect_warning(expect_null(find_columns(mtcars, "^c"))) + expect_warning(expect_null(extract_column_names(mtcars, "^c"))) expect_identical( - find_columns(mtcars, regex("^C"), ignore_case = TRUE), + extract_column_names(mtcars, regex("^C"), ignore_case = TRUE), c("cyl", "carb") ) expect_identical( - find_columns(iris, "Width$", regex = TRUE), + extract_column_names(iris, "Width$", regex = TRUE), c("Sepal.Width", "Petal.Width") ) }) -test_that("find_columns from other functions", { +test_that("extract_column_names from other functions", { test_fun1 <- function(data, i) { - find_columns(data, select = i) + extract_column_names(data, select = i) } expect_identical( test_fun1(iris, c("Sepal.Length", "Sepal.Width")), @@ -90,7 +90,7 @@ test_that("find_columns from other functions", { ) test_fun1a <- function(data, i) { - find_columns(data, select = i, regex = TRUE) + extract_column_names(data, select = i, regex = TRUE) } expect_identical( test_fun1a(iris, "Sep"), @@ -98,7 +98,7 @@ test_that("find_columns from other functions", { ) test_fun1b <- function(data, i) { - find_columns(data, select = i, regex = TRUE) + extract_column_names(data, select = i, regex = TRUE) } expect_identical( test_fun1b(iris, "Width$"), @@ -106,7 +106,7 @@ test_that("find_columns from other functions", { ) test_fun2 <- function(data) { - find_columns(data, select = starts_with("Sep")) + extract_column_names(data, select = starts_with("Sep")) } expect_identical( test_fun2(iris), @@ -115,7 +115,7 @@ test_that("find_columns from other functions", { test_fun3 <- function(data) { i <- "Sep" - find_columns(data, select = starts_with(i)) + extract_column_names(data, select = starts_with(i)) } expect_identical( test_fun3(iris), @@ -123,68 +123,68 @@ test_that("find_columns from other functions", { ) }) -test_that("find_columns regex", { +test_that("extract_column_names regex", { expect_identical( - find_columns(mtcars, select = "pg", regex = TRUE), - find_columns(mtcars, select = "mpg") + extract_column_names(mtcars, select = "pg", regex = TRUE), + extract_column_names(mtcars, select = "mpg") ) }) -test_that("find_columns works correctly with minus sign", { +test_that("extract_column_names works correctly with minus sign", { expect_identical( - find_columns(iris, -"Sepal.Length"), + extract_column_names(iris, -"Sepal.Length"), c("Sepal.Width", "Petal.Length", "Petal.Width", "Species") ) expect_identical( - find_columns(iris, -c("Sepal.Length", "Petal.Width")), + extract_column_names(iris, -c("Sepal.Length", "Petal.Width")), c("Sepal.Width", "Petal.Length", "Species") ) expect_identical( - find_columns(iris, -1), + extract_column_names(iris, -1), c("Sepal.Width", "Petal.Length", "Petal.Width", "Species") ) expect_error( - find_columns(iris, -1:2), + extract_column_names(iris, -1:2), regexp = "can't mix negative" ) expect_identical( - find_columns(iris, -(1:2)), + extract_column_names(iris, -(1:2)), c("Petal.Length", "Petal.Width", "Species") ) expect_identical( - find_columns(iris, -c(1, 3)), + extract_column_names(iris, -c(1, 3)), c("Sepal.Width", "Petal.Width", "Species") ) expect_identical( - find_columns(iris, -starts_with("Sepal", "Petal")), + extract_column_names(iris, -starts_with("Sepal", "Petal")), "Species" ) expect_identical( - find_columns(iris, -ends_with("Length", "Width")), + extract_column_names(iris, -ends_with("Length", "Width")), "Species" ) expect_identical( - find_columns(iris, -contains("en", "idt")), + extract_column_names(iris, -contains("en", "idt")), "Species" ) expect_identical( - find_columns(iris, -c("Sepal.Length", "Petal.Width"), exclude = "Species"), + extract_column_names(iris, -c("Sepal.Length", "Petal.Width"), exclude = "Species"), c("Sepal.Width", "Petal.Length") ) }) -test_that("find_columns with square brackets", { +test_that("extract_column_names with square brackets", { expect_identical( - find_columns(mtcars, select = names(mtcars)[-1]), - find_columns(mtcars, select = 2:11) + extract_column_names(mtcars, select = names(mtcars)[-1]), + extract_column_names(mtcars, select = 2:11) ) }) diff --git a/tests/testthat/test-select_nse.R b/tests/testthat/test-select_nse.R index 1f013a705..c0195ad94 100644 --- a/tests/testthat/test-select_nse.R +++ b/tests/testthat/test-select_nse.R @@ -47,7 +47,7 @@ test_that(".select_nse: arg 'select' works", { c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - find_columns(iris, sepal.length, ignore_case = TRUE), + extract_column_names(iris, sepal.length, ignore_case = TRUE), "Sepal.Length" ) expect_identical( diff --git a/vignettes/selection_syntax.Rmd b/vignettes/selection_syntax.Rmd index b636f33b4..9b501ebd5 100644 --- a/vignettes/selection_syntax.Rmd +++ b/vignettes/selection_syntax.Rmd @@ -191,13 +191,13 @@ For example, if we want to let the user decide the selection they want to use: ```{r} my_function <- function(data, selection) { - find_columns(data, select = selection) + extract_column_names(data, select = selection) } my_function(iris, "Sepal.Length") my_function(iris, starts_with("Sep")) my_function_2 <- function(data, pattern) { - find_columns(data, select = starts_with(pattern)) + extract_column_names(data, select = starts_with(pattern)) } my_function_2(iris, "Sep") ``` From 99237e0d84527d4b9fb4b2eac5b9f1b252b50007 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 17 May 2024 13:33:45 +0200 Subject: [PATCH 15/27] update docs --- R/data_modify.R | 4 ++-- R/data_rename.R | 2 +- R/select_nse.R | 19 +++++++++---------- man/categorize.Rd | 2 +- man/data_match.Rd | 2 +- man/data_merge.Rd | 2 +- man/data_modify.Rd | 4 ++-- man/data_partition.Rd | 2 +- man/data_relocate.Rd | 2 +- man/data_rename.Rd | 2 +- man/data_rotate.Rd | 2 +- man/data_to_long.Rd | 2 +- man/data_to_wide.Rd | 2 +- man/extract_column_names.Rd | 2 +- man/recode_values.Rd | 2 +- man/slide.Rd | 2 +- man/winsorize.Rd | 2 +- 17 files changed, 27 insertions(+), 28 deletions(-) diff --git a/R/data_modify.R b/R/data_modify.R index c9b9d035a..6942e7d3d 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -121,10 +121,10 @@ #' .modify = round #' ) #' -#' # combine "data_find()" and ".at" argument +#' # combine "extract_column_names()" and ".at" argument #' out <- data_modify( #' d, -#' .at = data_find(d, select = starts_with("Sepal")), +#' .at = extract_column_names(d, select = starts_with("Sepal")), #' .modify = as.factor #' ) #' # "Sepal.Length" and "Sepal.Width" are now factors diff --git a/R/data_rename.R b/R/data_rename.R index 2a9061707..2ef72e9e7 100644 --- a/R/data_rename.R +++ b/R/data_rename.R @@ -46,7 +46,7 @@ #' - Functions to recode data: [rescale()], [reverse()], [categorize()], [recode_values()], [slide()] #' - Functions to standardize, normalize, rank-transform: [center()], [standardize()], [normalize()], [ranktransform()], [winsorize()] #' - Split and merge data frames: [data_partition()], [data_merge()] -#' - Functions to find or select columns: [data_select()], [data_find()] +#' - Functions to find or select columns: [data_select()], [extract_column_names()] #' - Functions to filter rows: [data_match()], [data_filter()] #' #' @export diff --git a/R/select_nse.R b/R/select_nse.R index 118d40b15..6d4806558 100644 --- a/R/select_nse.R +++ b/R/select_nse.R @@ -148,7 +148,8 @@ # 3 types of symbols: # - unquoted variables -# - objects that need to be evaluated, e.g data_find(iris, i) where i is a +# - objects that need to be evaluated, e.g extract_column_names(iris, i) where +# i is a # function arg or is defined before. This can also be a vector of names or # positions. # - functions (without parenthesis) @@ -180,24 +181,22 @@ # if starts_with() et al. come from tidyselect but need to be used in # a select environment, then the error doesn't have the same structure. - if (is.null(fn) && - grepl("must be used within a", e$message, fixed = TRUE)) { - trace <- lapply(e$trace$call, function(x) { + if (is.null(fn) && grepl("must be used within a", e$message, fixed = TRUE)) { + call_trace <- lapply(e$call_trace$call, function(x) { tmp <- insight::safe_deparse(x) if (grepl(paste0("^", .regex_select_helper()), tmp)) { tmp } }) - fn <- Filter(Negate(is.null), trace)[1] + fn <- Filter(Negate(is.null), call_trace)[1] } # if we actually obtain the select helper call, return it, else return # what we already had if (length(fn) > 0L && grepl(.regex_select_helper(), fn)) { is_select_helper <<- TRUE return(fn) - } else { - NULL } + NULL } ) @@ -249,7 +248,7 @@ switch(type, `:` = .select_seq(x, data, ignore_case, regex, verbose), `-` = .select_minus(x, data, ignore_case, regex, verbose), - `c` = .select_c(x, data, ignore_case, regex, verbose), + `c` = .select_c(x, data, ignore_case, regex, verbose), # nolint `(` = .select_bracket(x, data, ignore_case, regex, verbose), `[` = .select_square_bracket(x, data, ignore_case, regex, verbose), `$` = .select_dollar(x, data, ignore_case, regex, verbose), @@ -494,7 +493,7 @@ # Almost identical to dynGet(). The difference is that we deparse the expression # because get0() allows symbol only since R 4.1.0 .dynGet <- function(x, - ifnotfound = stop(gettextf("%s not found", sQuote(x)), domain = NA), + ifnotfound = stop(gettextf("%s not found", sQuote(x)), domain = NA, call. = FALSE), minframe = 1L, inherits = FALSE) { x <- insight::safe_deparse(x) @@ -518,7 +517,7 @@ # Custom arg "remove_n_top_env" to remove the first environments which are # ".select_nse()" and the other custom functions .dynEval <- function(x, - ifnotfound = stop(gettextf("%s not found", sQuote(x)), domain = NA), + ifnotfound = stop(gettextf("%s not found", sQuote(x)), domain = NA, call. = FALSE), minframe = 1L, inherits = FALSE, remove_n_top_env = 0) { diff --git a/man/categorize.Rd b/man/categorize.Rd index d8fddde17..c3401216e 100644 --- a/man/categorize.Rd +++ b/man/categorize.Rd @@ -226,7 +226,7 @@ categorize(x, "equal_length", n_groups = 3, labels = "median") \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_match.Rd b/man/data_match.Rd index 5316c1244..b900d788b 100644 --- a/man/data_match.Rd +++ b/man/data_match.Rd @@ -124,7 +124,7 @@ data_filter(mtcars, fl) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_merge.Rd b/man/data_merge.Rd index 5934f8fc6..176dfea80 100644 --- a/man/data_merge.Rd +++ b/man/data_merge.Rd @@ -187,7 +187,7 @@ data_merge(list(x, y, z), join = "bind", by = "id", id = "source") \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_modify.Rd b/man/data_modify.Rd index 8cac82205..042962e03 100644 --- a/man/data_modify.Rd +++ b/man/data_modify.Rd @@ -138,10 +138,10 @@ data_modify( .modify = round ) -# combine "data_find()" and ".at" argument +# combine "extract_column_names()" and ".at" argument out <- data_modify( d, - .at = data_find(d, select = starts_with("Sepal")), + .at = extract_column_names(d, select = starts_with("Sepal")), .modify = as.factor ) # "Sepal.Length" and "Sepal.Width" are now factors diff --git a/man/data_partition.Rd b/man/data_partition.Rd index 4ed71e3c1..f68e5d141 100644 --- a/man/data_partition.Rd +++ b/man/data_partition.Rd @@ -77,7 +77,7 @@ lapply(out, function(i) table(i$Species)) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_relocate.Rd b/man/data_relocate.Rd index d51c4cf1e..86c7464ee 100644 --- a/man/data_relocate.Rd +++ b/man/data_relocate.Rd @@ -138,7 +138,7 @@ head(data_remove(iris, starts_with("Sepal"))) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_rename.Rd b/man/data_rename.Rd index ea68c613b..4c3cff00f 100644 --- a/man/data_rename.Rd +++ b/man/data_rename.Rd @@ -149,7 +149,7 @@ head(data_rename(iris, replacement = paste0("Var", 1:5))) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_rotate.Rd b/man/data_rotate.Rd index 2f5877e1b..973b0c7ae 100644 --- a/man/data_rotate.Rd +++ b/man/data_rotate.Rd @@ -58,7 +58,7 @@ data_rotate(x, colnames = "c") \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index 1455dd01e..ab3783584 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -165,7 +165,7 @@ data_to_long( \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_to_wide.Rd b/man/data_to_wide.Rd index 38ede8352..e04d4ac85 100644 --- a/man/data_to_wide.Rd +++ b/man/data_to_wide.Rd @@ -130,7 +130,7 @@ data_to_wide( \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/extract_column_names.Rd b/man/extract_column_names.Rd index 9251348b3..72fe0e6d6 100644 --- a/man/extract_column_names.Rd +++ b/man/extract_column_names.Rd @@ -158,7 +158,7 @@ extract_column_names(iris, numeric_mean_35) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/recode_values.Rd b/man/recode_values.Rd index f807c8e7b..4c376d3f5 100644 --- a/man/recode_values.Rd +++ b/man/recode_values.Rd @@ -282,7 +282,7 @@ options(data_recode_pattern = NULL) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/slide.Rd b/man/slide.Rd index 554434259..986b4b2fb 100644 --- a/man/slide.Rd +++ b/man/slide.Rd @@ -127,7 +127,7 @@ sapply(mtcars, min) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/winsorize.Rd b/man/winsorize.Rd index a8031f334..ceeccd8ba 100644 --- a/man/winsorize.Rd +++ b/man/winsorize.Rd @@ -88,7 +88,7 @@ winsorize(iris, threshold = 0.2) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } From b8e81e667fca3c85bdba26ff0f8fa1e6ca333ed1 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 17 May 2024 13:47:39 +0200 Subject: [PATCH 16/27] update docs and tests --- R/data_select.R | 1 + R/data_xtabulate.R | 10 ++-------- R/extract_column_names.R | 1 + R/recode_values.R | 1 + R/text_format.R | 1 + man/extract_column_names.Rd | 22 ++++++++++++++++++++++ man/recode_values.Rd | 15 +++++++++++++++ man/text_format.Rd | 10 ++++++++++ tests/testthat/_snaps/text_format.md | 10 +++++----- tests/testthat/test-attributes.R | 4 ++-- tests/testthat/test-labelled_data.R | 8 ++++---- tests/testthat/test-standardize_datagrid.R | 4 ++-- tests/testthat/test-text_format.R | 10 +++++----- 13 files changed, 71 insertions(+), 26 deletions(-) diff --git a/R/data_select.R b/R/data_select.R index 5e05154eb..c59d41b72 100644 --- a/R/data_select.R +++ b/R/data_select.R @@ -34,6 +34,7 @@ data_select <- function(data, } +#' @rdname extract_column_names #' @export get_columns <- function(data, select = NULL, diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 6cbe065e7..3cb25d62b 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -229,15 +229,12 @@ print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) { x$Group <- NULL } - - ## FIXME: change group_by argument later - # print table insight::export_table( format(x, big_mark = big_mark, format = "html", ...), missing = "(NA)", format = "html", - group_by = "groups" + by = "groups" ) } @@ -268,15 +265,12 @@ print_html.dw_data_xtabulates <- function(x, big_mark = NULL, ...) { out <- do.call(rbind, x) - - ## FIXME: change group_by argument later - # print table insight::export_table( out, missing = "(NA)", format = "html", - group_by = "groups" + by = "groups" ) } } diff --git a/R/extract_column_names.R b/R/extract_column_names.R index b04acb045..ab6f4897b 100644 --- a/R/extract_column_names.R +++ b/R/extract_column_names.R @@ -154,6 +154,7 @@ extract_column_names <- function(data, } +#' @rdname extract_column_names #' @export data_find <- function(data, select = NULL, diff --git a/R/recode_values.R b/R/recode_values.R index 84e99da94..b4570bf44 100644 --- a/R/recode_values.R +++ b/R/recode_values.R @@ -531,6 +531,7 @@ recode_values.data.frame <- function(x, ## TODO Deprecate and remove alias later +#' @rdname recode_values #' @export change_code <- function(x, select = NULL, diff --git a/R/text_format.R b/R/text_format.R index 9ca58219d..cbbb455e4 100644 --- a/R/text_format.R +++ b/R/text_format.R @@ -44,6 +44,7 @@ text_format <- function(text, sep = ", ", last = " and ", width = NULL, enclose ## TODO Deprecate and remove alias later +#' @rdname text_format #' @export format_text <- function(text, sep = ", ", last = " and ", width = NULL, enclose = NULL, ...) { insight::format_warning("Function `format_text()` is deprecated and will be removed in a future release. Please use `text_format()` instead.") # nolint diff --git a/man/extract_column_names.Rd b/man/extract_column_names.Rd index 72fe0e6d6..788a78151 100644 --- a/man/extract_column_names.Rd +++ b/man/extract_column_names.Rd @@ -2,7 +2,9 @@ % Please edit documentation in R/data_select.R, R/extract_column_names.R \name{data_select} \alias{data_select} +\alias{get_columns} \alias{extract_column_names} +\alias{data_find} \title{Find or get columns in a data frame based on search patterns} \usage{ data_select( @@ -15,6 +17,16 @@ data_select( ... ) +get_columns( + data, + select = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ... +) + extract_column_names( data, select = NULL, @@ -24,6 +36,16 @@ extract_column_names( verbose = TRUE, ... ) + +data_find( + data, + select = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ... +) } \arguments{ \item{data}{A data frame.} diff --git a/man/recode_values.Rd b/man/recode_values.Rd index 4c376d3f5..e4384593c 100644 --- a/man/recode_values.Rd +++ b/man/recode_values.Rd @@ -4,6 +4,7 @@ \alias{recode_values} \alias{recode_values.numeric} \alias{recode_values.data.frame} +\alias{change_code} \title{Recode old values of variables into new values} \usage{ recode_values(x, ...) @@ -30,6 +31,20 @@ recode_values(x, ...) verbose = TRUE, ... ) + +change_code( + x, + select = NULL, + exclude = NULL, + recode = NULL, + default = NULL, + preserve_na = TRUE, + append = FALSE, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ... +) } \arguments{ \item{x}{A data frame, numeric or character vector, or factor.} diff --git a/man/text_format.Rd b/man/text_format.Rd index b65190f31..87f045193 100644 --- a/man/text_format.Rd +++ b/man/text_format.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/text_format.R \name{text_format} \alias{text_format} +\alias{format_text} \alias{text_fullstop} \alias{text_lastchar} \alias{text_concatenate} @@ -19,6 +20,15 @@ text_format( ... ) +format_text( + text, + sep = ", ", + last = " and ", + width = NULL, + enclose = NULL, + ... +) + text_fullstop(text) text_lastchar(text, n = 1) diff --git a/tests/testthat/_snaps/text_format.md b/tests/testthat/_snaps/text_format.md index 78716bb34..6516c72af 100644 --- a/tests/testthat/_snaps/text_format.md +++ b/tests/testthat/_snaps/text_format.md @@ -1,7 +1,7 @@ # text formatting helpers work as expected Code - format_text(c("A very long First", "Some similar long Second", "Shorter Third", + text_format(c("A very long First", "Some similar long Second", "Shorter Third", "More or less long Fourth", "And finally the Last"), width = 20) Output [1] "A very long First,\nSome similar long\nSecond, Shorter\nThird, More or less\nlong Fourth and And\nfinally the Last\n" @@ -9,7 +9,7 @@ --- Code - format_text(c("A very long First", "Some similar long Second", "Shorter Third", + text_format(c("A very long First", "Some similar long Second", "Shorter Third", "More or less long Fourth", "And finally the Last"), last = " or ", enclose = "`", width = 20) Output @@ -19,7 +19,7 @@ Code long_text <- strrep("abc ", 100) - cat(format_text(long_text, width = 50)) + cat(text_format(long_text, width = 50)) Output abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc @@ -31,7 +31,7 @@ abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc Code - cat(format_text(long_text, width = 80)) + cat(text_format(long_text, width = 80)) Output abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc @@ -40,7 +40,7 @@ abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc Code withr::with_options(list(width = 50), code = { - cat(format_text(long_text)) + cat(text_format(long_text)) }) Output abc abc abc abc abc abc abc abc abc abc abc abc diff --git a/tests/testthat/test-attributes.R b/tests/testthat/test-attributes.R index 2fc88ecc9..ebd26de99 100644 --- a/tests/testthat/test-attributes.R +++ b/tests/testthat/test-attributes.R @@ -177,10 +177,10 @@ test_that("categorize, attributes preserved", { # change_code ----------------------------------- -test_that("change_code, attributes preserved", { +test_that("recode_values, attributes preserved", { x <- mtcars attr(x, "myattri") <- "I'm here" - x2 <- change_code(x, select = "am", recode = list(`5` = 0, `10` = 1)) + x2 <- recode_values(x, select = "am", recode = list(`5` = 0, `10` = 1)) expect_identical(attr(x2, "myattri", exact = TRUE), "I'm here") }) diff --git a/tests/testthat/test-labelled_data.R b/tests/testthat/test-labelled_data.R index 0b7e37a4d..2e933e5dc 100644 --- a/tests/testthat/test-labelled_data.R +++ b/tests/testthat/test-labelled_data.R @@ -311,10 +311,10 @@ test_that("convert_to_na, labels preserved", { -# get_columns ----------------------------------- +# data_select ----------------------------------- -test_that("get_columns, labels preserved", { - x <- get_columns(efc, starts_with("c")) +test_that("data_select, labels preserved", { + x <- data_select(efc, starts_with("c")) # numeric expect_equal( attr(x$c12hour, "label", exact = TRUE), @@ -322,7 +322,7 @@ test_that("get_columns, labels preserved", { ignore_attr = TRUE ) - x <- get_columns(efc, starts_with("e")) + x <- data_select(efc, starts_with("e")) # factor expect_equal( attr(x$e42dep, "label", exact = TRUE), diff --git a/tests/testthat/test-standardize_datagrid.R b/tests/testthat/test-standardize_datagrid.R index 511fe0045..c32616bdc 100644 --- a/tests/testthat/test-standardize_datagrid.R +++ b/tests/testthat/test-standardize_datagrid.R @@ -1,11 +1,11 @@ # standardize ----------------------------------------------------- test_that("standardize.datagrid", { - x <- insight::get_datagrid(iris, at = "Sepal.Length", range = "sd", length = 3) + x <- insight::get_datagrid(iris, by = "Sepal.Length", range = "sd", length = 3) out <- standardize(x) expect_equal(as.numeric(out$Sepal.Length), c(-1, 0, 1)) expect_equal(as.numeric(out$Sepal.Width), c(0, 0, 0)) - x <- insight::get_datagrid(iris, at = "Sepal.Length = c(-1, 0)") + x <- insight::get_datagrid(iris, by = "Sepal.Length = c(-1, 0)") out <- unstandardize(x, select = "Sepal.Length") expect_equal(out$Sepal.Length[1:2], c(mean(iris$Sepal.Length) - sd(iris$Sepal.Length), mean(iris$Sepal.Length))) }) diff --git a/tests/testthat/test-text_format.R b/tests/testthat/test-text_format.R index 90dab36dd..5d4dfd4b1 100644 --- a/tests/testthat/test-text_format.R +++ b/tests/testthat/test-text_format.R @@ -1,5 +1,5 @@ test_that("text formatting helpers work as expected", { - expect_snapshot(format_text( + expect_snapshot(text_format( c( "A very long First", "Some similar long Second", @@ -10,7 +10,7 @@ test_that("text formatting helpers work as expected", { width = 20 )) - expect_snapshot(format_text( + expect_snapshot(text_format( c( "A very long First", "Some similar long Second", @@ -73,11 +73,11 @@ test_that("text formatting helpers work as expected", { test_that("text formatters respect `width` argument", { expect_snapshot({ long_text <- strrep("abc ", 100) - cat(format_text(long_text, width = 50)) - cat(format_text(long_text, width = 80)) + cat(text_format(long_text, width = 50)) + cat(text_format(long_text, width = 80)) withr::with_options(list(width = 50), code = { - cat(format_text(long_text)) + cat(text_format(long_text)) }) }) }) From 764bd9e83959220f272da52f4ebc228e4a6b38c3 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 17 May 2024 13:51:38 +0200 Subject: [PATCH 17/27] Update extract_column_names.R --- R/extract_column_names.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/extract_column_names.R b/R/extract_column_names.R index ab6f4897b..74cd2d2d3 100644 --- a/R/extract_column_names.R +++ b/R/extract_column_names.R @@ -164,4 +164,13 @@ data_find <- function(data, verbose = TRUE, ...) { insight::format_warning("Function `data_find()` is deprecated and will be removed in a future release. Please use `extract_column_names()` instead.") # nolint + extract_column_names( + data, + select = select, + exclude = exclude, + ignore_case = ignore_case, + regex = regex, + verbose = verbose, + ... + ) } From af3f9f5e17904e4d71fadbccd663e11cc5ed0487 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 17 May 2024 13:55:52 +0200 Subject: [PATCH 18/27] update readme --- README.Rmd | 2 +- README.md | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.Rmd b/README.Rmd index 8922aa1c1..7610c5924 100644 --- a/README.Rmd +++ b/README.Rmd @@ -314,7 +314,7 @@ iris |> # all rows where Species is "versicolor" or "virginica" data_filter(Species %in% c("versicolor", "virginica")) |> # select only columns with "." in names (i.e. drop Species) - get_columns(contains("\\.")) |> + data_select(contains("\\.")) |> # move columns that ends with "Length" to start of data frame data_relocate(ends_with("Length")) |> # remove fourth column diff --git a/README.md b/README.md index 70c7212e5..411ad4c72 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,8 @@ data_filter(mtcars, vs == 0 & am == 1) ``` Finding columns in a data frame, or retrieving the data of selected -columns, can be achieved using `extract_column_names()` or `data_select()`: +columns, can be achieved using `extract_column_names()` or +`data_select()`: ``` r # find column names matching a pattern @@ -138,7 +139,7 @@ extract_column_names(iris, starts_with("Sepal")) #> [1] "Sepal.Length" "Sepal.Width" # return data columns matching a pattern -data(iris, starts_with("Sepal")) |> head() +data_select(iris, starts_with("Sepal")) |> head() #> Sepal.Length Sepal.Width #> 1 5.1 3.5 #> 2 4.9 3.0 @@ -584,7 +585,7 @@ iris |> # all rows where Species is "versicolor" or "virginica" data_filter(Species %in% c("versicolor", "virginica")) |> # select only columns with "." in names (i.e. drop Species) - get_columns(contains("\\.")) |> + data_select(contains("\\.")) |> # move columns that ends with "Length" to start of data frame data_relocate(ends_with("Length")) |> # remove fourth column From 227e0208b76aa76ab97a6697a1e86e3ffc246387 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 18 May 2024 11:54:58 +0200 Subject: [PATCH 19/27] trigger CI --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ec594a4e7..29c879ed8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.10.0.4 +Version: 0.10.0.5 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), From 6f7036df2f0222c3643623fe34f1ba51cb5b09ac Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 18 May 2024 20:44:43 +0100 Subject: [PATCH 20/27] revert commits related to aliases --- NAMESPACE | 2 +- NEWS.md | 5 - R/adjust.R | 2 +- R/assign_labels.R | 2 +- R/categorize.R | 2 +- R/center.R | 2 +- R/convert_na_to.R | 2 +- R/convert_to_na.R | 2 +- R/data_addprefix.R | 2 +- R/data_codebook.R | 2 +- R/data_duplicated.R | 2 +- R/data_extract.R | 2 +- R/{extract_column_names.R => data_find.R} | 74 ++++----- R/data_group.R | 2 +- R/data_peek.R | 2 +- R/data_relocate.R | 2 +- R/data_remove.R | 2 +- R/data_replicate.R | 2 +- R/data_rescale.R | 2 +- R/data_reverse.R | 2 +- R/data_select.R | 25 +--- R/data_separate.R | 2 +- R/data_tabulate.R | 2 +- R/data_to_long.R | 2 +- R/data_unique.R | 2 +- R/data_unite.R | 2 +- R/describe_distribution.R | 2 +- R/labels_to_levels.R | 2 +- R/means_by_group.R | 2 +- R/normalize.R | 2 +- R/ranktransform.R | 2 +- R/recode_values.R | 29 +--- R/replace_nan_inf.R | 2 +- R/row_means.R | 2 +- R/standardize.R | 2 +- R/text_format.R | 5 +- R/to_factor.R | 2 +- R/to_numeric.R | 2 +- README.Rmd | 6 +- README.md | 7 +- _pkgdown.yaml | 2 +- man/adjust.Rd | 4 +- man/assign_labels.Rd | 4 +- man/categorize.Rd | 4 +- man/center.Rd | 4 +- man/convert_na_to.Rd | 4 +- man/convert_to_na.Rd | 4 +- man/data_codebook.Rd | 4 +- man/data_duplicated.Rd | 4 +- man/data_extract.Rd | 4 +- man/data_group.Rd | 4 +- man/data_peek.Rd | 4 +- man/data_relocate.Rd | 4 +- man/data_rename.Rd | 4 +- man/data_replicate.Rd | 4 +- man/data_separate.Rd | 4 +- man/data_tabulate.Rd | 4 +- man/data_to_long.Rd | 4 +- man/data_unique.Rd | 4 +- man/data_unite.Rd | 4 +- man/describe_distribution.Rd | 4 +- ...xtract_column_names.Rd => find_columns.Rd} | 55 +++---- man/labels_to_levels.Rd | 4 +- man/means_by_group.Rd | 4 +- man/normalize.Rd | 4 +- man/ranktransform.Rd | 4 +- man/recode_values.Rd | 18 +-- man/rescale.Rd | 4 +- man/reverse.Rd | 4 +- man/row_means.Rd | 4 +- man/slide.Rd | 4 +- man/standardize.Rd | 4 +- man/to_factor.Rd | 4 +- man/to_numeric.Rd | 4 +- ...act_column_names.R => test-find_columns.R} | 80 +++++----- ...{test-data_select.R => test-get_columns.R} | 140 +++++++++--------- tests/testthat/test-select_nse.R | 2 +- vignettes/selection_syntax.Rmd | 4 +- 78 files changed, 282 insertions(+), 360 deletions(-) rename R/{extract_column_names.R => data_find.R} (70%) rename man/{extract_column_names.Rd => find_columns.Rd} (84%) rename tests/testthat/{test-extract_column_names.R => test-find_columns.R} (51%) rename tests/testthat/{test-data_select.R => test-get_columns.R} (61%) diff --git a/NAMESPACE b/NAMESPACE index 9fe247ca1..eea4e22a4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -272,7 +272,7 @@ export(distribution_coef_var) export(distribution_mode) export(empty_columns) export(empty_rows) -export(extract_column_names) +export(find_columns) export(format_text) export(get_columns) export(kurtosis) diff --git a/NEWS.md b/NEWS.md index 37926ea3d..b8c4c533e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,11 +11,6 @@ BREAKING CHANGES * `means_by_group()` * `rescale_weights()` -* Following aliases are deprecated and will be removed in a future release: - - * `get_columns()`, use `data_select()` instead. - * `data_find()` and `find_columns()`, use `extract_column_names()` instead. - CHANGES * `recode_into()` is more relaxed regarding checking the type of `NA` values. diff --git a/R/adjust.R b/R/adjust.R index 5d50b16d0..69abbde6b 100644 --- a/R/adjust.R +++ b/R/adjust.R @@ -25,7 +25,7 @@ #' re-added. This avoids the centering around 0 that happens by default #' when regressing out another variable (see the examples below for a #' visual representation of this). -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams standardize #' #' @return A data frame comparable to `data`, with adjusted variables. diff --git a/R/assign_labels.R b/R/assign_labels.R index b5541347e..bd35513bf 100644 --- a/R/assign_labels.R +++ b/R/assign_labels.R @@ -14,7 +14,7 @@ #' `x`, the right-hand side (RHS) the associated value label. Non-matching #' labels are omitted. #' @param ... Currently not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @inheritSection center Selection of variables - the `select` argument #' diff --git a/R/categorize.R b/R/categorize.R index e6bb8227e..341d0c0c9 100644 --- a/R/categorize.R +++ b/R/categorize.R @@ -45,7 +45,7 @@ #' variables are appended with new column names (using the defined suffix) to #' the original data frame. #' @param ... not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @inherit data_rename seealso #' diff --git a/R/center.R b/R/center.R index 021e157f7..eac550de1 100644 --- a/R/center.R +++ b/R/center.R @@ -29,7 +29,7 @@ #' order, unless a named vector is given. In this case, names are matched #' against the names of the selected variables. #' @param ... Currently not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams standardize #' #' @section Selection of variables - the `select` argument: diff --git a/R/convert_na_to.R b/R/convert_na_to.R index 5454d0c6f..a6aae6e11 100644 --- a/R/convert_na_to.R +++ b/R/convert_na_to.R @@ -145,7 +145,7 @@ convert_na_to.character <- function(x, replacement = NULL, verbose = TRUE, ...) #' @param replace_num Value to replace `NA` when variable is of type numeric. #' @param replace_char Value to replace `NA` when variable is of type character. #' @param replace_fac Value to replace `NA` when variable is of type factor. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @rdname convert_na_to #' @export diff --git a/R/convert_to_na.R b/R/convert_to_na.R index a86084974..0e95b7c5f 100644 --- a/R/convert_to_na.R +++ b/R/convert_to_na.R @@ -12,7 +12,7 @@ #' @param drop_levels Logical, for factors, when specific levels are replaced #' by `NA`, should unused levels be dropped? #' @param ... Not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @return #' `x`, where all values in `na` are converted to `NA`. diff --git a/R/data_addprefix.R b/R/data_addprefix.R index 6cf292ecc..91952c029 100644 --- a/R/data_addprefix.R +++ b/R/data_addprefix.R @@ -1,5 +1,5 @@ #' @rdname data_rename -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @examples #' # Add prefix / suffix to all columns #' head(data_addprefix(iris, "NEW_")) diff --git a/R/data_codebook.R b/R/data_codebook.R index de312b0fd..cec95196f 100644 --- a/R/data_codebook.R +++ b/R/data_codebook.R @@ -22,7 +22,7 @@ #' @param line_padding For HTML tables, the distance (in pixel) between lines. #' @param row_color For HTML tables, the fill color for odd rows. #' @inheritParams standardize.data.frame -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @return A formatted data frame, summarizing the content of the data frame. #' Returned columns include the column index of the variables in the original diff --git a/R/data_duplicated.R b/R/data_duplicated.R index db3202bd6..5a15d7134 100644 --- a/R/data_duplicated.R +++ b/R/data_duplicated.R @@ -7,7 +7,7 @@ #' values for that row, to help in the decision-making when #' selecting which duplicates to keep. #' -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @keywords duplicates #' @export diff --git a/R/data_extract.R b/R/data_extract.R index b37c6282b..b5613309c 100644 --- a/R/data_extract.R +++ b/R/data_extract.R @@ -26,7 +26,7 @@ #' @param verbose Toggle warnings. #' @param ... For use by future methods. #' -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @details `data_extract()` can be used to select multiple variables or pull a #' single variable from a data frame. Thus, the return value is by default not diff --git a/R/extract_column_names.R b/R/data_find.R similarity index 70% rename from R/extract_column_names.R rename to R/data_find.R index 74cd2d2d3..ef6e6dfb5 100644 --- a/R/extract_column_names.R +++ b/R/data_find.R @@ -1,8 +1,10 @@ #' @title Find or get columns in a data frame based on search patterns -#' @name extract_column_names +#' @name find_columns #' -#' @description `extract_column_names()` returns column names from a data set that -#' match a certain search pattern, while `data_select()` returns the found data. +#' @description `find_columns()` returns column names from a data set that +#' match a certain search pattern, while `get_columns()` returns the found data. +#' `data_select()` is an alias for `get_columns()`, and `data_find()` is an alias +#' for `find_columns()`. #' #' @param data A data frame. #' @param select Variables that will be included when performing the required @@ -33,8 +35,8 @@ #' negation should not work as expected, use the `exclude` argument instead. #' #' If `NULL`, selects all columns. Patterns that found no matches are silently -#' ignored, e.g. `extract_column_names(iris, select = c("Species", "Test"))` -#' will just return `"Species"`. +#' ignored, e.g. `find_columns(iris, select = c("Species", "Test"))` will just +#' return `"Species"`. #' @param exclude See `select`, however, column names matched by the pattern #' from `exclude` will be excluded instead of selected. If `NULL` (the default), #' excludes no columns. @@ -56,10 +58,9 @@ #' #' @return #' -#' `extract_column_names()` returns a character vector with column names that -#' matched the pattern in `select` and `exclude`, or `NULL` if no matching -#' column name was found. `data_select()` returns a data frame with matching -#' columns. +#' `find_columns()` returns a character vector with column names that matched +#' the pattern in `select` and `exclude`, or `NULL` if no matching column name +#' was found. `get_columns()` returns a data frame with matching columns. #' #' @details #' @@ -68,12 +69,12 @@ #' #' ```r #' foo <- function(data, pattern) { -#' extract_column_names(data, select = starts_with(pattern)) +#' find_columns(data, select = starts_with(pattern)) #' } #' foo(iris, pattern = "Sep") #' #' foo2 <- function(data, pattern) { -#' extract_column_names(data, select = pattern) +#' find_columns(data, select = pattern) #' } #' foo2(iris, pattern = starts_with("Sep")) #' ``` @@ -83,7 +84,7 @@ #' ```r #' for (i in c("Sepal", "Sp")) { #' head(iris) |> -#' extract_column_names(select = starts_with(i)) |> +#' find_columns(select = starts_with(i)) |> #' print() #' } #' ``` @@ -93,7 +94,7 @@ #' #' ```r #' inner <- function(data, arg) { -#' extract_column_names(data, select = arg) +#' find_columns(data, select = arg) #' } #' outer <- function(data, arg) { #' inner(data, starts_with(arg)) @@ -113,25 +114,25 @@ #' #' @examples #' # Find columns names by pattern -#' extract_column_names(iris, starts_with("Sepal")) -#' extract_column_names(iris, ends_with("Width")) -#' extract_column_names(iris, regex("\\.")) -#' extract_column_names(iris, c("Petal.Width", "Sepal.Length")) +#' find_columns(iris, starts_with("Sepal")) +#' find_columns(iris, ends_with("Width")) +#' find_columns(iris, regex("\\.")) +#' find_columns(iris, c("Petal.Width", "Sepal.Length")) #' #' # starts with "Sepal", but not allowed to end with "width" -#' extract_column_names(iris, starts_with("Sepal"), exclude = contains("Width")) +#' find_columns(iris, starts_with("Sepal"), exclude = contains("Width")) #' #' # find numeric with mean > 3.5 #' numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5 -#' extract_column_names(iris, numeric_mean_35) +#' find_columns(iris, numeric_mean_35) #' @export -extract_column_names <- function(data, - select = NULL, - exclude = NULL, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ...) { +find_columns <- function(data, + select = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ...) { columns <- .select_nse( select, data, @@ -154,23 +155,6 @@ extract_column_names <- function(data, } -#' @rdname extract_column_names +#' @rdname find_columns #' @export -data_find <- function(data, - select = NULL, - exclude = NULL, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ...) { - insight::format_warning("Function `data_find()` is deprecated and will be removed in a future release. Please use `extract_column_names()` instead.") # nolint - extract_column_names( - data, - select = select, - exclude = exclude, - ignore_case = ignore_case, - regex = regex, - verbose = verbose, - ... - ) -} +data_find <- find_columns diff --git a/R/data_group.R b/R/data_group.R index e1ab00758..4883d69c4 100644 --- a/R/data_group.R +++ b/R/data_group.R @@ -6,7 +6,7 @@ #' grouping information from a grouped data frame. #' #' @param data A data frame -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @return A grouped data frame, i.e. a data frame with additional information #' about the grouping structure saved as attributes. diff --git a/R/data_peek.R b/R/data_peek.R index 951cae209..09a42a560 100644 --- a/R/data_peek.R +++ b/R/data_peek.R @@ -9,7 +9,7 @@ #' @param width Maximum width of line length to display. If `NULL`, width will #' be determined using `options()$width`. #' @param ... not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @note To show only specific or a limited number of variables, use the #' `select` argument, e.g. `select = 1:5` to show only the first five variables. diff --git a/R/data_relocate.R b/R/data_relocate.R index 58cc0265e..dea668a2f 100644 --- a/R/data_relocate.R +++ b/R/data_relocate.R @@ -15,7 +15,7 @@ #' character vector, indicating the name of the destination column, or a #' numeric value, indicating the index number of the destination column. #' If `-1`, will be added before or after the last column. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams data_rename #' #' @inherit data_rename seealso diff --git a/R/data_remove.R b/R/data_remove.R index 472bb2e08..1fcd67b0e 100644 --- a/R/data_remove.R +++ b/R/data_remove.R @@ -1,4 +1,4 @@ -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @rdname data_relocate #' @examples #' # Remove columns diff --git a/R/data_replicate.R b/R/data_replicate.R index 8ab630bb4..4ea6f998e 100644 --- a/R/data_replicate.R +++ b/R/data_replicate.R @@ -13,7 +13,7 @@ #' provided in `expand` are removed from the data frame. If `FALSE` and `expand` #' contains missing values, the function will throw an error. #' @param ... Currently not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @return A dataframe with each row replicated as many times as defined in `expand`. #' diff --git a/R/data_rescale.R b/R/data_rescale.R index 90c84a49e..85ff885c6 100644 --- a/R/data_rescale.R +++ b/R/data_rescale.R @@ -4,7 +4,7 @@ #' (change the keying/scoring direction), or to expand a range. #' #' @inheritParams categorize -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams standardize.data.frame #' #' @param to Numeric vector of length 2 giving the new range that the variable diff --git a/R/data_reverse.R b/R/data_reverse.R index 5543e2e7f..2fc9ef493 100644 --- a/R/data_reverse.R +++ b/R/data_reverse.R @@ -12,7 +12,7 @@ #' usually only makes sense when factor levels are numeric, not characters. #' @param ... Arguments passed to or from other methods. #' @inheritParams categorize -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @inheritSection center Selection of variables - the `select` argument #' diff --git a/R/data_select.R b/R/data_select.R index c59d41b72..d8468b22b 100644 --- a/R/data_select.R +++ b/R/data_select.R @@ -1,6 +1,6 @@ -#' @rdname extract_column_names +#' @rdname find_columns #' @export -data_select <- function(data, +get_columns <- function(data, select = NULL, exclude = NULL, ignore_case = FALSE, @@ -34,23 +34,6 @@ data_select <- function(data, } -#' @rdname extract_column_names +#' @rdname find_columns #' @export -get_columns <- function(data, - select = NULL, - exclude = NULL, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ...) { - insight::format_warning("Function `get_columns()` is deprecated and will be removed in a future release. Please use `data_select()` instead.") # nolint - data_select( - data, - select = select, - exclude = exclude, - ignore_case = ignore_case, - regex = regex, - verbose = verbose, - ... - ) -} +data_select <- get_columns diff --git a/R/data_separate.R b/R/data_separate.R index 53c5f72a5..53243fb33 100644 --- a/R/data_separate.R +++ b/R/data_separate.R @@ -42,7 +42,7 @@ #' @param convert_na Logical, if `TRUE`, character `"NA"` values are converted #' into real `NA` values. #' @param ... Currently not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @seealso [`data_unite()`] #' diff --git a/R/data_tabulate.R b/R/data_tabulate.R index a6099d6ac..76c9733b1 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -26,7 +26,7 @@ #' not `NULL`. Can be `"row"` (row percentages), `"column"` (column percentages) #' or `"full"` (to calculate relative frequencies for the full table). #' @param ... not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @section Crosstables: #' If `by` is supplied, a crosstable is created. The crosstable includes `` diff --git a/R/data_to_long.R b/R/data_to_long.R index 3d19e5bc2..06fe91efd 100644 --- a/R/data_to_long.R +++ b/R/data_to_long.R @@ -21,7 +21,7 @@ #' @param rows_to The name of the column that will contain the row names or row #' numbers from the original data. If `NULL`, will be removed. #' @param ... Currently not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @param cols Identical to `select`. This argument is here to ensure compatibility #' with `tidyr::pivot_longer()`. If both `select` and `cols` are provided, `cols` #' is used. diff --git a/R/data_unique.R b/R/data_unique.R index 149fad082..40a252a2f 100644 --- a/R/data_unique.R +++ b/R/data_unique.R @@ -12,7 +12,7 @@ #' #' @param keep The method to be used for duplicate selection, either "best" #' (the default), "first", or "last". -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @return A data frame, containing only the chosen duplicates. #' @seealso [data_duplicated()] diff --git a/R/data_unite.R b/R/data_unite.R index 4cf6d340f..a4cf9dea5 100644 --- a/R/data_unite.R +++ b/R/data_unite.R @@ -14,7 +14,7 @@ #' in the united values. If `FALSE`, missing values are represented as `"NA"` #' in the united values. #' @param ... Currently not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @seealso [`data_separate()`] #' diff --git a/R/describe_distribution.R b/R/describe_distribution.R index 41f2a8b83..37850299a 100644 --- a/R/describe_distribution.R +++ b/R/describe_distribution.R @@ -22,7 +22,7 @@ #' (based on [stats::IQR()], using `type = 6`). #' @param verbose Toggle warnings and messages. #' @inheritParams bayestestR::point_estimate -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @details If `x` is a data frame, only numeric variables are kept and will be #' displayed in the summary. diff --git a/R/labels_to_levels.R b/R/labels_to_levels.R index b84f9e751..c1ff97a16 100644 --- a/R/labels_to_levels.R +++ b/R/labels_to_levels.R @@ -7,7 +7,7 @@ #' @param x A data frame or factor. Other variable types (e.g. numerics) are not #' allowed. #' @param ... Currently not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams categorize #' #' @return `x`, where for all factors former levels are replaced by their value diff --git a/R/means_by_group.R b/R/means_by_group.R index 6fe65c5a6..ad188f275 100644 --- a/R/means_by_group.R +++ b/R/means_by_group.R @@ -20,7 +20,7 @@ #' point when rounding estimates and values. #' @param ... Currently not used #' @param group Deprecated. Use `by` instead. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @return A data frame with information on mean and further summary statistics #' for each sub-group. diff --git a/R/normalize.R b/R/normalize.R index a30955b5d..ba2eee40d 100644 --- a/R/normalize.R +++ b/R/normalize.R @@ -17,7 +17,7 @@ #' `1 - include_bounds`. #' @param ... Arguments passed to or from other methods. #' @inheritParams standardize.data.frame -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @inheritSection center Selection of variables - the `select` argument #' diff --git a/R/ranktransform.R b/R/ranktransform.R index c52cb0d78..37beceb60 100644 --- a/R/ranktransform.R +++ b/R/ranktransform.R @@ -11,7 +11,7 @@ #' `"first"`, `"last"`, `"random"`, `"max"` or `"min"`. See [rank()] for #' details. #' @param ... Arguments passed to or from other methods. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams standardize.data.frame #' #' @inheritSection center Selection of variables - the `select` argument diff --git a/R/recode_values.R b/R/recode_values.R index b4570bf44..e355e9cb0 100644 --- a/R/recode_values.R +++ b/R/recode_values.R @@ -22,7 +22,7 @@ #' `preserve_na=TRUE`, `default` will no longer convert `NA` into the specified #' default value. #' @param ... not used. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams categorize #' #' @return `x`, where old values are replaced by new values. @@ -533,29 +533,4 @@ recode_values.data.frame <- function(x, #' @rdname recode_values #' @export -change_code <- function(x, - select = NULL, - exclude = NULL, - recode = NULL, - default = NULL, - preserve_na = TRUE, - append = FALSE, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ...) { - insight::format_warning("Function `change_code()` is deprecated. Please use `recode_values()` instead.") # nolint - recode_values( - x, - select = select, - exclude = exclude, - recode = recode, - default = default, - preserve_na = preserve_na, - append = append, - ignore_case = ignore_case, - regex = regex, - verbose = verbose, - ... - ) -} +change_code <- recode_values diff --git a/R/replace_nan_inf.R b/R/replace_nan_inf.R index 3ba0397f4..923743d1a 100644 --- a/R/replace_nan_inf.R +++ b/R/replace_nan_inf.R @@ -34,7 +34,7 @@ replace_nan_inf.default <- function(x, ...) { x } -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @export replace_nan_inf.data.frame <- function(x, select = NULL, diff --git a/R/row_means.R b/R/row_means.R index fdcaa49fd..2513cc3a1 100644 --- a/R/row_means.R +++ b/R/row_means.R @@ -19,7 +19,7 @@ #' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) values #' before calculating row means. Only applies if `min_valuid` is not specified. #' @param verbose Toggle warnings. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @return A vector with row means for those rows with at least `n` valid values. #' diff --git a/R/standardize.R b/R/standardize.R index 27de92946..d5082803d 100644 --- a/R/standardize.R +++ b/R/standardize.R @@ -68,7 +68,7 @@ #' @param force Logical, if `TRUE`, forces recoding of factors and character #' vectors as well. #' @param ... Arguments passed to or from other methods. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' #' @inheritSection center Selection of variables - the `select` argument #' diff --git a/R/text_format.R b/R/text_format.R index cbbb455e4..180807746 100644 --- a/R/text_format.R +++ b/R/text_format.R @@ -46,10 +46,7 @@ text_format <- function(text, sep = ", ", last = " and ", width = NULL, enclose #' @rdname text_format #' @export -format_text <- function(text, sep = ", ", last = " and ", width = NULL, enclose = NULL, ...) { - insight::format_warning("Function `format_text()` is deprecated and will be removed in a future release. Please use `text_format()` instead.") # nolint - text_format(text, sep = sep, last = last, width = width, enclose = enclose, ...) -} +format_text <- text_format #' @rdname text_format #' @export diff --git a/R/to_factor.R b/R/to_factor.R index 8fa46d404..c31580072 100644 --- a/R/to_factor.R +++ b/R/to_factor.R @@ -11,7 +11,7 @@ #' levels after `x` was converted to factor. Else, factor levels are based on #' the values of `x` (i.e. as if using `as.factor()`). #' @param ... Arguments passed to or from other methods. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams categorize #' #' @inheritSection center Selection of variables - the `select` argument diff --git a/R/to_numeric.R b/R/to_numeric.R index e38e12e80..c43956399 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -14,7 +14,7 @@ #' @param lowest Numeric, indicating the lowest (minimum) value when converting #' factors or character vectors to numeric values. #' @param ... Arguments passed to or from other methods. -#' @inheritParams extract_column_names +#' @inheritParams find_columns #' @inheritParams categorize #' #' @note By default, `to_numeric()` converts factors into "binary" dummies, i.e. diff --git a/README.Rmd b/README.Rmd index 7610c5924..35de33f56 100644 --- a/README.Rmd +++ b/README.Rmd @@ -90,14 +90,14 @@ data_match(mtcars, data.frame(vs = 0, am = 1)) data_filter(mtcars, vs == 0 & am == 1) ``` -Finding columns in a data frame, or retrieving the data of selected columns, can be achieved using `extract_column_names()` or `data_select()`: +Finding columns in a data frame, or retrieving the data of selected columns, can be achieved using `find_columns()` or `get_columns()`: ```{r} # find column names matching a pattern -extract_column_names(iris, starts_with("Sepal")) +find_columns(iris, starts_with("Sepal")) # return data columns matching a pattern -data_select(iris, starts_with("Sepal")) |> head() +get_columns(iris, starts_with("Sepal")) |> head() ``` It is also possible to extract one or more variables: diff --git a/README.md b/README.md index 411ad4c72..b164c693f 100644 --- a/README.md +++ b/README.md @@ -130,16 +130,15 @@ data_filter(mtcars, vs == 0 & am == 1) ``` Finding columns in a data frame, or retrieving the data of selected -columns, can be achieved using `extract_column_names()` or -`data_select()`: +columns, can be achieved using `find_columns()` or `get_columns()`: ``` r # find column names matching a pattern -extract_column_names(iris, starts_with("Sepal")) +find_columns(iris, starts_with("Sepal")) #> [1] "Sepal.Length" "Sepal.Width" # return data columns matching a pattern -data_select(iris, starts_with("Sepal")) |> head() +get_columns(iris, starts_with("Sepal")) |> head() #> Sepal.Length Sepal.Width #> 1 5.1 3.5 #> 2 4.9 3.0 diff --git a/_pkgdown.yaml b/_pkgdown.yaml index d52994e16..65bae30c8 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -102,7 +102,7 @@ reference: - contains("rownames") - rowid_as_column - contains("colnames") - - extract_column_names + - find_columns - data_restoretype - title: Helpers for Text Formatting diff --git a/man/adjust.Rd b/man/adjust.Rd index 64e50d9d3..08c841b7a 100644 --- a/man/adjust.Rd +++ b/man/adjust.Rd @@ -68,8 +68,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/assign_labels.Rd b/man/assign_labels.Rd index cca14cc85..eea07534e 100644 --- a/man/assign_labels.Rd +++ b/man/assign_labels.Rd @@ -63,8 +63,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/categorize.Rd b/man/categorize.Rd index c3401216e..767030a44 100644 --- a/man/categorize.Rd +++ b/man/categorize.Rd @@ -103,8 +103,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/center.Rd b/man/center.Rd index f143f64b2..c09012476 100644 --- a/man/center.Rd +++ b/man/center.Rd @@ -97,8 +97,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/convert_na_to.Rd b/man/convert_na_to.Rd index 91121ff94..c3f252a7b 100644 --- a/man/convert_na_to.Rd +++ b/man/convert_na_to.Rd @@ -66,8 +66,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/convert_to_na.Rd b/man/convert_to_na.Rd index 2529294b7..f0b8ce263 100644 --- a/man/convert_to_na.Rd +++ b/man/convert_to_na.Rd @@ -69,8 +69,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_codebook.Rd b/man/data_codebook.Rd index 4c0f935e7..32e3be5e2 100644 --- a/man/data_codebook.Rd +++ b/man/data_codebook.Rd @@ -59,8 +59,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_duplicated.Rd b/man/data_duplicated.Rd index 73c3e8de1..130dab85b 100644 --- a/man/data_duplicated.Rd +++ b/man/data_duplicated.Rd @@ -45,8 +45,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_extract.Rd b/man/data_extract.Rd index a0cd4e402..88d89f0eb 100644 --- a/man/data_extract.Rd +++ b/man/data_extract.Rd @@ -52,8 +52,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{...}{For use by future methods.} diff --git a/man/data_group.Rd b/man/data_group.Rd index 56f5f314e..0f5b593d4 100644 --- a/man/data_group.Rd +++ b/man/data_group.Rd @@ -49,8 +49,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_peek.Rd b/man/data_peek.Rd index 4f3f88e8a..6cecf7a1a 100644 --- a/man/data_peek.Rd +++ b/man/data_peek.Rd @@ -52,8 +52,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_relocate.Rd b/man/data_relocate.Rd index 86c7464ee..c20628563 100644 --- a/man/data_relocate.Rd +++ b/man/data_relocate.Rd @@ -69,8 +69,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{before, after}{Destination of columns. Supplying neither will move columns to the left-hand side; specifying both is an error. Can be a diff --git a/man/data_rename.Rd b/man/data_rename.Rd index 4c3cff00f..2d28c3400 100644 --- a/man/data_rename.Rd +++ b/man/data_rename.Rd @@ -77,8 +77,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_replicate.Rd b/man/data_replicate.Rd index 35448155d..4c152b371 100644 --- a/man/data_replicate.Rd +++ b/man/data_replicate.Rd @@ -52,8 +52,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_separate.Rd b/man/data_separate.Rd index 37528d46e..4dad5b713 100644 --- a/man/data_separate.Rd +++ b/man/data_separate.Rd @@ -55,8 +55,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{new_columns}{The names of the new columns, as character vector. If more than one variable was selected (in \code{select}), the new names are prefixed diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index b744c1f1b..34961481a 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -94,8 +94,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index ab3783584..2e402746b 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -69,8 +69,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{names_to}{The name of the new column that will contain the column names.} diff --git a/man/data_unique.Rd b/man/data_unique.Rd index 8a45bfc21..f7272c87b 100644 --- a/man/data_unique.Rd +++ b/man/data_unique.Rd @@ -46,8 +46,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{keep}{The method to be used for duplicate selection, either "best" (the default), "first", or "last".} diff --git a/man/data_unite.Rd b/man/data_unite.Rd index ba7710a8a..63c2e73a6 100644 --- a/man/data_unite.Rd +++ b/man/data_unite.Rd @@ -52,8 +52,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/describe_distribution.Rd b/man/describe_distribution.Rd index 369bd9ef6..fd229567d 100644 --- a/man/describe_distribution.Rd +++ b/man/describe_distribution.Rd @@ -111,8 +111,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/extract_column_names.Rd b/man/find_columns.Rd similarity index 84% rename from man/extract_column_names.Rd rename to man/find_columns.Rd index 788a78151..67b4e9fd1 100644 --- a/man/extract_column_names.Rd +++ b/man/find_columns.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_select.R, R/extract_column_names.R -\name{data_select} -\alias{data_select} -\alias{get_columns} -\alias{extract_column_names} +% Please edit documentation in R/data_find.R, R/data_select.R +\name{find_columns} +\alias{find_columns} \alias{data_find} +\alias{get_columns} +\alias{data_select} \title{Find or get columns in a data frame based on search patterns} \usage{ -data_select( +find_columns( data, select = NULL, exclude = NULL, @@ -17,7 +17,7 @@ data_select( ... ) -get_columns( +data_find( data, select = NULL, exclude = NULL, @@ -27,7 +27,7 @@ get_columns( ... ) -extract_column_names( +get_columns( data, select = NULL, exclude = NULL, @@ -37,7 +37,7 @@ extract_column_names( ... ) -data_find( +data_select( data, select = NULL, exclude = NULL, @@ -79,8 +79,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), @@ -104,26 +104,27 @@ functions (see 'Details'), this argument may be used as workaround.} \item{...}{Arguments passed down to other functions. Mostly not used yet.} } \value{ -\code{extract_column_names()} returns a character vector with column names that -matched the pattern in \code{select} and \code{exclude}, or \code{NULL} if no matching -column name was found. \code{data_select()} returns a data frame with matching -columns. +\code{find_columns()} returns a character vector with column names that matched +the pattern in \code{select} and \code{exclude}, or \code{NULL} if no matching column name +was found. \code{get_columns()} returns a data frame with matching columns. } \description{ -\code{extract_column_names()} returns column names from a data set that -match a certain search pattern, while \code{data_select()} returns the found data. +\code{find_columns()} returns column names from a data set that +match a certain search pattern, while \code{get_columns()} returns the found data. +\code{data_select()} is an alias for \code{get_columns()}, and \code{data_find()} is an alias +for \code{find_columns()}. } \details{ Note that it is possible to either pass an entire select helper or only the pattern inside a select helper as a function argument: \if{html}{\out{
}}\preformatted{foo <- function(data, pattern) \{ - extract_column_names(data, select = starts_with(pattern)) + find_columns(data, select = starts_with(pattern)) \} foo(iris, pattern = "Sep") foo2 <- function(data, pattern) \{ - extract_column_names(data, select = pattern) + find_columns(data, select = pattern) \} foo2(iris, pattern = starts_with("Sep")) }\if{html}{\out{
}} @@ -132,7 +133,7 @@ This means that it is also possible to use loop values as arguments or patterns: \if{html}{\out{
}}\preformatted{for (i in c("Sepal", "Sp")) \{ head(iris) |> - extract_column_names(select = starts_with(i)) |> + find_columns(select = starts_with(i)) |> print() \} }\if{html}{\out{
}} @@ -141,7 +142,7 @@ However, this behavior is limited to a "single-level function". It will not work in nested functions, like below: \if{html}{\out{
}}\preformatted{inner <- function(data, arg) \{ - extract_column_names(data, select = arg) + find_columns(data, select = arg) \} outer <- function(data, arg) \{ inner(data, starts_with(arg)) @@ -160,17 +161,17 @@ outer(iris, starts_with("Sep")) } \examples{ # Find columns names by pattern -extract_column_names(iris, starts_with("Sepal")) -extract_column_names(iris, ends_with("Width")) -extract_column_names(iris, regex("\\\\.")) -extract_column_names(iris, c("Petal.Width", "Sepal.Length")) +find_columns(iris, starts_with("Sepal")) +find_columns(iris, ends_with("Width")) +find_columns(iris, regex("\\\\.")) +find_columns(iris, c("Petal.Width", "Sepal.Length")) # starts with "Sepal", but not allowed to end with "width" -extract_column_names(iris, starts_with("Sepal"), exclude = contains("Width")) +find_columns(iris, starts_with("Sepal"), exclude = contains("Width")) # find numeric with mean > 3.5 numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5 -extract_column_names(iris, numeric_mean_35) +find_columns(iris, numeric_mean_35) } \seealso{ \itemize{ diff --git a/man/labels_to_levels.Rd b/man/labels_to_levels.Rd index 8024eb2d3..12e8c0fe3 100644 --- a/man/labels_to_levels.Rd +++ b/man/labels_to_levels.Rd @@ -58,8 +58,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd index d7a6dfc96..5473daeec 100644 --- a/man/means_by_group.Rd +++ b/man/means_by_group.Rd @@ -87,8 +87,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/normalize.Rd b/man/normalize.Rd index 4a9a61a68..646e5b5ec 100644 --- a/man/normalize.Rd +++ b/man/normalize.Rd @@ -96,8 +96,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/ranktransform.Rd b/man/ranktransform.Rd index c23105735..01da194f2 100644 --- a/man/ranktransform.Rd +++ b/man/ranktransform.Rd @@ -64,8 +64,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/recode_values.Rd b/man/recode_values.Rd index e4384593c..3e6ddb7f6 100644 --- a/man/recode_values.Rd +++ b/man/recode_values.Rd @@ -32,19 +32,7 @@ recode_values(x, ...) ... ) -change_code( - x, - select = NULL, - exclude = NULL, - recode = NULL, - default = NULL, - preserve_na = TRUE, - append = FALSE, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ... -) +change_code(x, ...) } \arguments{ \item{x}{A data frame, numeric or character vector, or factor.} @@ -100,8 +88,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/rescale.Rd b/man/rescale.Rd index 016a6f841..83cc3d64d 100644 --- a/man/rescale.Rd +++ b/man/rescale.Rd @@ -92,8 +92,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/reverse.Rd b/man/reverse.Rd index 6304dffc6..04066e8e6 100644 --- a/man/reverse.Rd +++ b/man/reverse.Rd @@ -70,8 +70,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/row_means.Rd b/man/row_means.Rd index c347fc6f1..21f1853d9 100644 --- a/man/row_means.Rd +++ b/man/row_means.Rd @@ -48,8 +48,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/slide.Rd b/man/slide.Rd index 986b4b2fb..440f316bc 100644 --- a/man/slide.Rd +++ b/man/slide.Rd @@ -59,8 +59,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/standardize.Rd b/man/standardize.Rd index 4041f2dc0..18c560c80 100644 --- a/man/standardize.Rd +++ b/man/standardize.Rd @@ -170,8 +170,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/to_factor.Rd b/man/to_factor.Rd index e035769ec..6b57df59c 100644 --- a/man/to_factor.Rd +++ b/man/to_factor.Rd @@ -61,8 +61,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/man/to_numeric.Rd b/man/to_numeric.Rd index 7478c9579..7c78b1ba6 100644 --- a/man/to_numeric.Rd +++ b/man/to_numeric.Rd @@ -55,8 +55,8 @@ negation should not work as expected, use the \code{exclude} argument instead. } If \code{NULL}, selects all columns. Patterns that found no matches are silently -ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} -will just return \code{"Species"}.} +ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just +return \code{"Species"}.} \item{exclude}{See \code{select}, however, column names matched by the pattern from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), diff --git a/tests/testthat/test-extract_column_names.R b/tests/testthat/test-find_columns.R similarity index 51% rename from tests/testthat/test-extract_column_names.R rename to tests/testthat/test-find_columns.R index 67cd680f7..923df8c4a 100644 --- a/tests/testthat/test-extract_column_names.R +++ b/tests/testthat/test-find_columns.R @@ -1,83 +1,83 @@ -test_that("extract_column_names works as expected", { +test_that("find_columns works as expected", { expect_identical( - extract_column_names(iris, starts_with("Sepal")), + find_columns(iris, starts_with("Sepal")), c("Sepal.Length", "Sepal.Width") ) expect_identical( - extract_column_names(iris, starts_with("Sepal", "Petal")), + find_columns(iris, starts_with("Sepal", "Petal")), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - extract_column_names(iris, ends_with("Width")), + find_columns(iris, ends_with("Width")), c("Sepal.Width", "Petal.Width") ) expect_identical( - extract_column_names(iris, ends_with("Length", "Width")), + find_columns(iris, ends_with("Length", "Width")), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - extract_column_names(iris, regex("\\.")), + find_columns(iris, regex("\\.")), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - extract_column_names(iris, c("Petal.Width", "Sepal.Length")), + find_columns(iris, c("Petal.Width", "Sepal.Length")), c("Petal.Width", "Sepal.Length") ) expect_identical( - extract_column_names(iris, contains("Wid")), + find_columns(iris, contains("Wid")), c("Sepal.Width", "Petal.Width") ) expect_identical( - extract_column_names(iris, contains("en", "idt")), + find_columns(iris, contains("en", "idt")), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - extract_column_names(mtcars, c("am", "gear", "cyl")), + find_columns(mtcars, c("am", "gear", "cyl")), c("am", "gear", "cyl") ) expect_identical( - extract_column_names(mtcars, c("vam", "gear", "cyl")), + find_columns(mtcars, c("vam", "gear", "cyl")), c("gear", "cyl") ) - expect_warning(expect_null(extract_column_names(mtcars, ends_with("abc")))) + expect_warning(expect_null(find_columns(mtcars, ends_with("abc")))) expect_identical( - extract_column_names(mtcars, regex("rb$")), + find_columns(mtcars, regex("rb$")), "carb" ) expect_identical( - extract_column_names(mtcars, regex("^c")), + find_columns(mtcars, regex("^c")), c("cyl", "carb") ) - expect_warning(expect_null(extract_column_names(mtcars, "^c"))) + expect_warning(expect_null(find_columns(mtcars, "^c"))) expect_identical( - extract_column_names(mtcars, regex("^C"), ignore_case = TRUE), + find_columns(mtcars, regex("^C"), ignore_case = TRUE), c("cyl", "carb") ) expect_identical( - extract_column_names(iris, "Width$", regex = TRUE), + find_columns(iris, "Width$", regex = TRUE), c("Sepal.Width", "Petal.Width") ) }) -test_that("extract_column_names from other functions", { +test_that("find_columns from other functions", { test_fun1 <- function(data, i) { - extract_column_names(data, select = i) + find_columns(data, select = i) } expect_identical( test_fun1(iris, c("Sepal.Length", "Sepal.Width")), @@ -90,7 +90,7 @@ test_that("extract_column_names from other functions", { ) test_fun1a <- function(data, i) { - extract_column_names(data, select = i, regex = TRUE) + find_columns(data, select = i, regex = TRUE) } expect_identical( test_fun1a(iris, "Sep"), @@ -98,7 +98,7 @@ test_that("extract_column_names from other functions", { ) test_fun1b <- function(data, i) { - extract_column_names(data, select = i, regex = TRUE) + find_columns(data, select = i, regex = TRUE) } expect_identical( test_fun1b(iris, "Width$"), @@ -106,7 +106,7 @@ test_that("extract_column_names from other functions", { ) test_fun2 <- function(data) { - extract_column_names(data, select = starts_with("Sep")) + find_columns(data, select = starts_with("Sep")) } expect_identical( test_fun2(iris), @@ -115,7 +115,7 @@ test_that("extract_column_names from other functions", { test_fun3 <- function(data) { i <- "Sep" - extract_column_names(data, select = starts_with(i)) + find_columns(data, select = starts_with(i)) } expect_identical( test_fun3(iris), @@ -123,68 +123,68 @@ test_that("extract_column_names from other functions", { ) }) -test_that("extract_column_names regex", { +test_that("find_columns regex", { expect_identical( - extract_column_names(mtcars, select = "pg", regex = TRUE), - extract_column_names(mtcars, select = "mpg") + find_columns(mtcars, select = "pg", regex = TRUE), + find_columns(mtcars, select = "mpg") ) }) -test_that("extract_column_names works correctly with minus sign", { +test_that("find_columns works correctly with minus sign", { expect_identical( - extract_column_names(iris, -"Sepal.Length"), + find_columns(iris, -"Sepal.Length"), c("Sepal.Width", "Petal.Length", "Petal.Width", "Species") ) expect_identical( - extract_column_names(iris, -c("Sepal.Length", "Petal.Width")), + find_columns(iris, -c("Sepal.Length", "Petal.Width")), c("Sepal.Width", "Petal.Length", "Species") ) expect_identical( - extract_column_names(iris, -1), + find_columns(iris, -1), c("Sepal.Width", "Petal.Length", "Petal.Width", "Species") ) expect_error( - extract_column_names(iris, -1:2), + find_columns(iris, -1:2), regexp = "can't mix negative" ) expect_identical( - extract_column_names(iris, -(1:2)), + find_columns(iris, -(1:2)), c("Petal.Length", "Petal.Width", "Species") ) expect_identical( - extract_column_names(iris, -c(1, 3)), + find_columns(iris, -c(1, 3)), c("Sepal.Width", "Petal.Width", "Species") ) expect_identical( - extract_column_names(iris, -starts_with("Sepal", "Petal")), + find_columns(iris, -starts_with("Sepal", "Petal")), "Species" ) expect_identical( - extract_column_names(iris, -ends_with("Length", "Width")), + find_columns(iris, -ends_with("Length", "Width")), "Species" ) expect_identical( - extract_column_names(iris, -contains("en", "idt")), + find_columns(iris, -contains("en", "idt")), "Species" ) expect_identical( - extract_column_names(iris, -c("Sepal.Length", "Petal.Width"), exclude = "Species"), + find_columns(iris, -c("Sepal.Length", "Petal.Width"), exclude = "Species"), c("Sepal.Width", "Petal.Length") ) }) -test_that("extract_column_names with square brackets", { +test_that("find_columns with square brackets", { expect_identical( - extract_column_names(mtcars, select = names(mtcars)[-1]), - extract_column_names(mtcars, select = 2:11) + find_columns(mtcars, select = names(mtcars)[-1]), + find_columns(mtcars, select = 2:11) ) }) diff --git a/tests/testthat/test-data_select.R b/tests/testthat/test-get_columns.R similarity index 61% rename from tests/testthat/test-data_select.R rename to tests/testthat/test-get_columns.R index 2557a1f7b..a23f267a7 100644 --- a/tests/testthat/test-data_select.R +++ b/tests/testthat/test-get_columns.R @@ -1,33 +1,33 @@ # input check --------------------- -test_that("data_select checks for data frame", { - expect_error(data_select(NULL), regexp = "provided") +test_that("get_columns checks for data frame", { + expect_error(get_columns(NULL), regexp = "provided") x <- list(a = 1:2, b = letters[1:3]) - expect_error(data_select(x), regexp = "coerced") + expect_error(get_columns(x), regexp = "coerced") }) # select helpers --------------------- -test_that("data_select works with select helpers", { +test_that("get_columns works with select helpers", { expect_identical( - data_select(iris, starts_with("Sepal")), + get_columns(iris, starts_with("Sepal")), iris[c("Sepal.Length", "Sepal.Width")] ) expect_identical( - data_select(iris, ends_with("Width")), + get_columns(iris, ends_with("Width")), iris[c("Sepal.Width", "Petal.Width")] ) expect_identical( - data_select(iris, regex("\\.")), + get_columns(iris, regex("\\.")), iris[c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")] ) expect_identical( - data_select(iris, contains("Wid")), + get_columns(iris, contains("Wid")), iris[c("Sepal.Width", "Petal.Width")] ) }) @@ -36,14 +36,14 @@ test_that("data_select works with select helpers", { # select helpers, negation --------------------- -test_that("data_select works with negation of select helpers", { +test_that("get_columns works with negation of select helpers", { expect_identical( - data_select(iris, -starts_with("Sepal")), + get_columns(iris, -starts_with("Sepal")), iris[c("Petal.Length", "Petal.Width", "Species")] ) expect_identical( - data_select(iris, -ends_with("Width")), + get_columns(iris, -ends_with("Width")), iris[c("Sepal.Length", "Petal.Length", "Species")] ) }) @@ -52,28 +52,28 @@ test_that("data_select works with negation of select helpers", { # select-nse with function --------------------- -test_that("data_select works with select-functions", { +test_that("get_columns works with select-functions", { expect_identical( - data_select(iris, is.numeric()), + get_columns(iris, is.numeric()), iris[sapply(iris, is.numeric)] ) expect_identical( - data_select(iris, is.numeric), + get_columns(iris, is.numeric), iris[sapply(iris, is.numeric)] ) expect_identical( - data_select(iris, is.factor()), + get_columns(iris, is.factor()), iris[sapply(iris, is.factor)] ) expect_identical( - data_select(iris, is.factor), + get_columns(iris, is.factor), iris[sapply(iris, is.factor)] ) - expect_warning(expect_null(data_select(iris, is.logical()))) + expect_warning(expect_null(get_columns(iris, is.logical()))) }) @@ -82,19 +82,19 @@ test_that("data_select works with select-functions", { testfun <- function(i) { is.numeric(i) && mean(i, na.rm = TRUE) > 3.5 } -test_that("data_select works with user-defined select-functions", { - expect_identical(data_select(iris, testfun), iris[sapply(iris, testfun)]) - expect_identical(data_select(iris, -testfun), iris[!sapply(iris, testfun)]) +test_that("get_columns works with user-defined select-functions", { + expect_identical(get_columns(iris, testfun), iris[sapply(iris, testfun)]) + expect_identical(get_columns(iris, -testfun), iris[!sapply(iris, testfun)]) testfun2 <- function(i) { is.numeric(i) && mean(i, na.rm = TRUE) < 5 } expect_identical( - data_select(iris, select = testfun, exclude = testfun2), + get_columns(iris, select = testfun, exclude = testfun2), iris["Sepal.Length"] ) expect_identical( - data_select(iris, select = testfun, exclude = -testfun2), + get_columns(iris, select = testfun, exclude = -testfun2), iris["Petal.Length"] ) }) @@ -103,42 +103,42 @@ test_that("data_select works with user-defined select-functions", { # select-nse with negation of functions --------------------- -test_that("data_select works with negated select-functions", { +test_that("get_columns works with negated select-functions", { expect_identical( - data_select(iris, -is.numeric()), + get_columns(iris, -is.numeric()), iris[sapply(iris, function(i) !is.numeric(i))] ) expect_identical( - data_select(iris, -is.numeric), + get_columns(iris, -is.numeric), iris[sapply(iris, function(i) !is.numeric(i))] ) expect_identical( - data_select(iris, -is.factor()), + get_columns(iris, -is.factor()), iris[sapply(iris, function(i) !is.factor(i))] ) expect_identical( - data_select(iris, -is.factor), + get_columns(iris, -is.factor), iris[sapply(iris, function(i) !is.factor(i))] ) - expect_identical(data_select(iris, -is.logical), iris) + expect_identical(get_columns(iris, -is.logical), iris) }) # select-nse with ranges --------------------- -test_that("data_select works with ranges", { +test_that("get_columns works with ranges", { expect_identical( - data_select(iris, 2:3), + get_columns(iris, 2:3), iris[2:3] ) expect_identical( - data_select(iris, Sepal.Width:Petal.Length), + get_columns(iris, Sepal.Width:Petal.Length), iris[2:3] ) }) @@ -147,33 +147,33 @@ test_that("data_select works with ranges", { # select-nse with negated ranges --------------------- -test_that("data_select works with negated ranges", { +test_that("get_columns works with negated ranges", { expect_identical( - data_select(iris, -(1:2)), + get_columns(iris, -(1:2)), iris[c(3, 4, 5)] ) expect_identical( - data_select(iris, -1:-2), + get_columns(iris, -1:-2), iris[c(3, 4, 5)] ) expect_identical( - data_select(iris, exclude = -1:-2), + get_columns(iris, exclude = -1:-2), iris[1:2] ) expect_identical( - data_select(iris, exclude = 2:3), + get_columns(iris, exclude = 2:3), iris[c(1, 4, 5)] ) expect_error( - data_select(iris, -Sepal.Width:Petal.Length), + get_columns(iris, -Sepal.Width:Petal.Length), "can't mix negative and positive" ) expect_identical( - data_select(iris, -(Sepal.Width:Petal.Length)), + get_columns(iris, -(Sepal.Width:Petal.Length)), iris[c(1, 4, 5)] ) }) @@ -182,14 +182,14 @@ test_that("data_select works with negated ranges", { # select-nse with formulas --------------------- -test_that("data_select works with formulas", { +test_that("get_columns works with formulas", { expect_identical( - data_select(iris, ~ Sepal.Width + Petal.Length), + get_columns(iris, ~ Sepal.Width + Petal.Length), iris[2:3] ) expect_identical( - data_select(iris, exclude = ~ Sepal.Width + Petal.Length), + get_columns(iris, exclude = ~ Sepal.Width + Petal.Length), iris[c(1, 4, 5)] ) }) @@ -198,50 +198,50 @@ test_that("data_select works with formulas", { # select-nse, other cases --------------------- -test_that("data_select works, other cases", { - expect_identical(data_select(iris), iris) +test_that("get_columns works, other cases", { + expect_identical(get_columns(iris), iris) expect_identical( - data_select(iris, c("Petal.Width", "Sepal.Length")), + get_columns(iris, c("Petal.Width", "Sepal.Length")), iris[c("Petal.Width", "Sepal.Length")] ) expect_identical( - data_select(iris, -c("Petal.Width", "Sepal.Length")), + get_columns(iris, -c("Petal.Width", "Sepal.Length")), iris[setdiff(colnames(iris), c("Petal.Width", "Sepal.Length"))] ) expect_identical( - data_select(iris, -Petal.Width), + get_columns(iris, -Petal.Width), iris[setdiff(colnames(iris), "Petal.Width")] ) expect_identical( - data_select(mtcars, c("am", "gear", "cyl")), + get_columns(mtcars, c("am", "gear", "cyl")), mtcars[c("am", "gear", "cyl")] ) expect_identical( - data_select(mtcars, c("vam", "gear", "cyl")), + get_columns(mtcars, c("vam", "gear", "cyl")), mtcars[c("gear", "cyl")] ) - expect_warning(expect_null(data_select(mtcars, ends_with("abc")))) + expect_warning(expect_null(get_columns(mtcars, ends_with("abc")))) expect_identical( - data_select(mtcars, regex("rb$")), + get_columns(mtcars, regex("rb$")), mtcars["carb"] ) expect_identical( - data_select(mtcars, regex("^c")), + get_columns(mtcars, regex("^c")), mtcars[c("cyl", "carb")] ) - expect_warning(expect_null(data_select(mtcars, "^c"))) + expect_warning(expect_null(get_columns(mtcars, "^c"))) expect_identical( - data_select(mtcars, regex("^C"), ignore_case = TRUE), + get_columns(mtcars, regex("^C"), ignore_case = TRUE), mtcars[c("cyl", "carb")] ) }) @@ -250,9 +250,9 @@ test_that("data_select works, other cases", { # select-nse works when called from other function --------------------- -test_that("data_select from other functions", { +test_that("get_columns from other functions", { test_fun1 <- function(data, i) { - data_select(data, select = i) + get_columns(data, select = i) } expect_identical( test_fun1(iris, c("Sepal.Length", "Sepal.Width")), @@ -265,7 +265,7 @@ test_that("data_select from other functions", { ) test_fun1a <- function(data, i) { - data_select(data, select = i, regex = TRUE) + get_columns(data, select = i, regex = TRUE) } expect_identical( test_fun1a(iris, "Sep"), @@ -273,7 +273,7 @@ test_that("data_select from other functions", { ) test_fun1b <- function(data, i) { - data_select(data, select = i, regex = TRUE) + get_columns(data, select = i, regex = TRUE) } expect_identical( test_fun1b(iris, "Width$"), @@ -281,7 +281,7 @@ test_that("data_select from other functions", { ) test_fun1c <- function(data, i) { - data_select(data, select = -i) + get_columns(data, select = -i) } expect_identical( test_fun1c(iris, c("Sepal.Length", "Sepal.Width")), @@ -290,7 +290,7 @@ test_that("data_select from other functions", { test_fun2 <- function(data) { - data_select(data, select = starts_with("Sep")) + get_columns(data, select = starts_with("Sep")) } expect_identical( test_fun2(iris), @@ -299,7 +299,7 @@ test_that("data_select from other functions", { test_fun3 <- function(data) { i <- "Sep" - data_select(data, select = starts_with(i)) + get_columns(data, select = starts_with(i)) } expect_identical( test_fun3(iris), @@ -313,7 +313,7 @@ test_that("data_select from other functions", { testfun2 <- function(i) { is.numeric(i) && mean(i, na.rm = TRUE) < 5 } - data_select(x, select = testfun, exclude = -testfun2) + get_columns(x, select = testfun, exclude = -testfun2) } expect_identical(test_top(iris), iris["Petal.Length"]) }) @@ -322,14 +322,14 @@ test_that("data_select from other functions", { # preserve attributes -------------------------- -test_that("data_select preserves attributes", { +test_that("get_columns preserves attributes", { skip_if_not_installed("parameters") m <- lm(Sepal.Length ~ Species, data = iris) out <- parameters::parameters(m) a1 <- attributes(out) - out2 <- data_select(out, 1:3) + out2 <- get_columns(out, 1:3) a2 <- attributes(out2) expect_identical(sort(names(a1)), sort(names(a2))) @@ -339,7 +339,7 @@ test_that("data_select preserves attributes", { test_that("select helpers work in functions and loops", { foo <- function(data, i) { - extract_column_names(data, select = starts_with(i)) + find_columns(data, select = starts_with(i)) } expect_identical( foo(iris, "Sep"), @@ -347,7 +347,7 @@ test_that("select helpers work in functions and loops", { ) for (i in "Sepal") { - x <- extract_column_names(iris, select = starts_with(i)) + x <- find_columns(iris, select = starts_with(i)) } expect_identical( x, @@ -355,7 +355,7 @@ test_that("select helpers work in functions and loops", { ) for (i in "Length") { - x <- extract_column_names(iris, select = ends_with(i)) + x <- find_columns(iris, select = ends_with(i)) } expect_identical( x, @@ -366,7 +366,7 @@ test_that("select helpers work in functions and loops", { test_that("select helpers work in functions and loops even if there's an object with the same name in the environment above", { i <- "Petal" foo <- function(data, i) { - extract_column_names(data, select = starts_with(i)) + find_columns(data, select = starts_with(i)) } expect_identical( foo(iris, "Sep"), @@ -374,7 +374,7 @@ test_that("select helpers work in functions and loops even if there's an object ) for (i in "Sepal") { - x <- extract_column_names(iris, select = starts_with(i)) + x <- find_columns(iris, select = starts_with(i)) } expect_identical( x, @@ -384,7 +384,7 @@ test_that("select helpers work in functions and loops even if there's an object i <- "Width" for (i in "Length") { - x <- extract_column_names(iris, select = ends_with(i)) + x <- find_columns(iris, select = ends_with(i)) } expect_identical( x, @@ -395,7 +395,7 @@ test_that("select helpers work in functions and loops even if there's an object test_that("old solution still works", { foo <- function(data) { i <- "Sep" - extract_column_names(data, select = i, regex = TRUE) + find_columns(data, select = i, regex = TRUE) } expect_identical( foo(iris), diff --git a/tests/testthat/test-select_nse.R b/tests/testthat/test-select_nse.R index c0195ad94..1f013a705 100644 --- a/tests/testthat/test-select_nse.R +++ b/tests/testthat/test-select_nse.R @@ -47,7 +47,7 @@ test_that(".select_nse: arg 'select' works", { c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") ) expect_identical( - extract_column_names(iris, sepal.length, ignore_case = TRUE), + find_columns(iris, sepal.length, ignore_case = TRUE), "Sepal.Length" ) expect_identical( diff --git a/vignettes/selection_syntax.Rmd b/vignettes/selection_syntax.Rmd index 9b501ebd5..b636f33b4 100644 --- a/vignettes/selection_syntax.Rmd +++ b/vignettes/selection_syntax.Rmd @@ -191,13 +191,13 @@ For example, if we want to let the user decide the selection they want to use: ```{r} my_function <- function(data, selection) { - extract_column_names(data, select = selection) + find_columns(data, select = selection) } my_function(iris, "Sepal.Length") my_function(iris, starts_with("Sep")) my_function_2 <- function(data, pattern) { - extract_column_names(data, select = starts_with(pattern)) + find_columns(data, select = starts_with(pattern)) } my_function_2(iris, "Sep") ``` From d19c4c0dff2136e255e91b9a15f84a35335f2435 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 18 May 2024 20:45:26 +0100 Subject: [PATCH 21/27] version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 29c879ed8..ec594a4e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.10.0.5 +Version: 0.10.0.4 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), From e484ee0ac69ae96eb79998705b8915be39cb5079 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 18 May 2024 20:46:05 +0100 Subject: [PATCH 22/27] news --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index b8c4c533e..1ceb53061 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,8 +3,8 @@ BREAKING CHANGES * Arguments named `group` or `group_by` are deprecated and will be removed - in a future release. - Please use `by` instead. This affects the following functions in *datawizard*. + in a future release. Please use `by` instead. This affects the following + functions in *datawizard* (#502). * `data_partition()` * `demean()` and `degroup()` From 0274388ebbccd83798ab009498cabb04381ee139 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 18 May 2024 20:47:19 +0100 Subject: [PATCH 23/27] other remnants --- R/data_modify.R | 4 ++-- R/data_rename.R | 2 +- R/select_nse.R | 2 +- man/categorize.Rd | 2 +- man/data_match.Rd | 2 +- man/data_merge.Rd | 2 +- man/data_modify.Rd | 4 ++-- man/data_partition.Rd | 2 +- man/data_relocate.Rd | 2 +- man/data_rename.Rd | 2 +- man/data_rotate.Rd | 2 +- man/data_to_long.Rd | 2 +- man/data_to_wide.Rd | 2 +- man/find_columns.Rd | 2 +- man/recode_values.Rd | 2 +- man/slide.Rd | 2 +- man/winsorize.Rd | 2 +- 17 files changed, 19 insertions(+), 19 deletions(-) diff --git a/R/data_modify.R b/R/data_modify.R index 6942e7d3d..c9b9d035a 100644 --- a/R/data_modify.R +++ b/R/data_modify.R @@ -121,10 +121,10 @@ #' .modify = round #' ) #' -#' # combine "extract_column_names()" and ".at" argument +#' # combine "data_find()" and ".at" argument #' out <- data_modify( #' d, -#' .at = extract_column_names(d, select = starts_with("Sepal")), +#' .at = data_find(d, select = starts_with("Sepal")), #' .modify = as.factor #' ) #' # "Sepal.Length" and "Sepal.Width" are now factors diff --git a/R/data_rename.R b/R/data_rename.R index 2ef72e9e7..2a9061707 100644 --- a/R/data_rename.R +++ b/R/data_rename.R @@ -46,7 +46,7 @@ #' - Functions to recode data: [rescale()], [reverse()], [categorize()], [recode_values()], [slide()] #' - Functions to standardize, normalize, rank-transform: [center()], [standardize()], [normalize()], [ranktransform()], [winsorize()] #' - Split and merge data frames: [data_partition()], [data_merge()] -#' - Functions to find or select columns: [data_select()], [extract_column_names()] +#' - Functions to find or select columns: [data_select()], [data_find()] #' - Functions to filter rows: [data_match()], [data_filter()] #' #' @export diff --git a/R/select_nse.R b/R/select_nse.R index 6d4806558..dca6fc4b7 100644 --- a/R/select_nse.R +++ b/R/select_nse.R @@ -148,7 +148,7 @@ # 3 types of symbols: # - unquoted variables -# - objects that need to be evaluated, e.g extract_column_names(iris, i) where +# - objects that need to be evaluated, e.g data_find(iris, i) where # i is a # function arg or is defined before. This can also be a vector of names or # positions. diff --git a/man/categorize.Rd b/man/categorize.Rd index 767030a44..0f2478776 100644 --- a/man/categorize.Rd +++ b/man/categorize.Rd @@ -226,7 +226,7 @@ categorize(x, "equal_length", n_groups = 3, labels = "median") \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_match.Rd b/man/data_match.Rd index b900d788b..5316c1244 100644 --- a/man/data_match.Rd +++ b/man/data_match.Rd @@ -124,7 +124,7 @@ data_filter(mtcars, fl) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_merge.Rd b/man/data_merge.Rd index 176dfea80..5934f8fc6 100644 --- a/man/data_merge.Rd +++ b/man/data_merge.Rd @@ -187,7 +187,7 @@ data_merge(list(x, y, z), join = "bind", by = "id", id = "source") \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_modify.Rd b/man/data_modify.Rd index 042962e03..8cac82205 100644 --- a/man/data_modify.Rd +++ b/man/data_modify.Rd @@ -138,10 +138,10 @@ data_modify( .modify = round ) -# combine "extract_column_names()" and ".at" argument +# combine "data_find()" and ".at" argument out <- data_modify( d, - .at = extract_column_names(d, select = starts_with("Sepal")), + .at = data_find(d, select = starts_with("Sepal")), .modify = as.factor ) # "Sepal.Length" and "Sepal.Width" are now factors diff --git a/man/data_partition.Rd b/man/data_partition.Rd index f68e5d141..4ed71e3c1 100644 --- a/man/data_partition.Rd +++ b/man/data_partition.Rd @@ -77,7 +77,7 @@ lapply(out, function(i) table(i$Species)) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_relocate.Rd b/man/data_relocate.Rd index c20628563..4e3fd18eb 100644 --- a/man/data_relocate.Rd +++ b/man/data_relocate.Rd @@ -138,7 +138,7 @@ head(data_remove(iris, starts_with("Sepal"))) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_rename.Rd b/man/data_rename.Rd index 2d28c3400..81ab63d1e 100644 --- a/man/data_rename.Rd +++ b/man/data_rename.Rd @@ -149,7 +149,7 @@ head(data_rename(iris, replacement = paste0("Var", 1:5))) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_rotate.Rd b/man/data_rotate.Rd index 973b0c7ae..2f5877e1b 100644 --- a/man/data_rotate.Rd +++ b/man/data_rotate.Rd @@ -58,7 +58,7 @@ data_rotate(x, colnames = "c") \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index 2e402746b..f6abe5f39 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -165,7 +165,7 @@ data_to_long( \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/data_to_wide.Rd b/man/data_to_wide.Rd index e04d4ac85..38ede8352 100644 --- a/man/data_to_wide.Rd +++ b/man/data_to_wide.Rd @@ -130,7 +130,7 @@ data_to_wide( \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/find_columns.Rd b/man/find_columns.Rd index 67b4e9fd1..db5671755 100644 --- a/man/find_columns.Rd +++ b/man/find_columns.Rd @@ -181,7 +181,7 @@ find_columns(iris, numeric_mean_35) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/recode_values.Rd b/man/recode_values.Rd index 3e6ddb7f6..45b127730 100644 --- a/man/recode_values.Rd +++ b/man/recode_values.Rd @@ -285,7 +285,7 @@ options(data_recode_pattern = NULL) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/slide.Rd b/man/slide.Rd index 440f316bc..2f1903ff4 100644 --- a/man/slide.Rd +++ b/man/slide.Rd @@ -127,7 +127,7 @@ sapply(mtcars, min) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } diff --git a/man/winsorize.Rd b/man/winsorize.Rd index ceeccd8ba..a8031f334 100644 --- a/man/winsorize.Rd +++ b/man/winsorize.Rd @@ -88,7 +88,7 @@ winsorize(iris, threshold = 0.2) \item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}}, \code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}} \item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}}, \code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}} \item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}} -\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}} +\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=data_find]{data_find()}} \item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}} } } From b8ac8eaab4121616fcf85facc9b50a18b7451618 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 18 May 2024 21:02:34 +0100 Subject: [PATCH 24/27] fix --- R/select_nse.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/select_nse.R b/R/select_nse.R index dca6fc4b7..1578a9fa1 100644 --- a/R/select_nse.R +++ b/R/select_nse.R @@ -182,7 +182,7 @@ # if starts_with() et al. come from tidyselect but need to be used in # a select environment, then the error doesn't have the same structure. if (is.null(fn) && grepl("must be used within a", e$message, fixed = TRUE)) { - call_trace <- lapply(e$call_trace$call, function(x) { + call_trace <- lapply(e$trace$call, function(x) { tmp <- insight::safe_deparse(x) if (grepl(paste0("^", .regex_select_helper()), tmp)) { tmp From 01a858cfbb1bf80dba480987ccd5aa1588705c44 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 18 May 2024 21:13:10 +0100 Subject: [PATCH 25/27] do not use devel pkgdown --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ec594a4e7..5817c1a0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -80,5 +80,5 @@ Config/testthat/edition: 3 Config/testthat/parallel: true Config/Needs/website: rstudio/bslib, - r-lib/pkgdown, + pkgdown, easystats/easystatstemplate From 5c1a25573e07e3c2740ef46410c28f730f44d92b Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 18 May 2024 21:18:50 +0100 Subject: [PATCH 26/27] lintr --- R/text_format.R | 6 +++--- tests/testthat/test-standardize_datagrid.R | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/text_format.R b/R/text_format.R index 180807746..b15935542 100644 --- a/R/text_format.R +++ b/R/text_format.R @@ -78,8 +78,8 @@ text_concatenate <- function(text, sep = ", ", last = " and ", enclose = NULL) { if (length(text) == 1) { s <- text } else { - s <- paste0(text[1:(length(text) - 1)], collapse = sep) - s <- paste0(c(s, text[length(text)]), collapse = last) + s <- paste(text[1:(length(text) - 1)], collapse = sep) + s <- paste(c(s, text[length(text)]), collapse = last) } s } @@ -130,7 +130,7 @@ text_wrap <- function(text, width = NULL, ...) { if (nchar(s) > width) { leading_spaces <- nchar(s) - nchar(insight::trim_ws(s)) s <- strwrap(s, width = width) - s <- paste0(s, collapse = "\n") + s <- paste(s, collapse = "\n") s <- paste0(strrep(" ", leading_spaces), s) } wrapped <- paste0(wrapped, s, "\n") diff --git a/tests/testthat/test-standardize_datagrid.R b/tests/testthat/test-standardize_datagrid.R index c32616bdc..2c095e6bc 100644 --- a/tests/testthat/test-standardize_datagrid.R +++ b/tests/testthat/test-standardize_datagrid.R @@ -2,10 +2,10 @@ test_that("standardize.datagrid", { x <- insight::get_datagrid(iris, by = "Sepal.Length", range = "sd", length = 3) out <- standardize(x) - expect_equal(as.numeric(out$Sepal.Length), c(-1, 0, 1)) - expect_equal(as.numeric(out$Sepal.Width), c(0, 0, 0)) + expect_identical(as.numeric(out$Sepal.Length), c(-1, 0, 1), tolerance = 1e-3) + expect_identical(as.numeric(out$Sepal.Width), c(0, 0, 0), tolerance = 1e-3) x <- insight::get_datagrid(iris, by = "Sepal.Length = c(-1, 0)") out <- unstandardize(x, select = "Sepal.Length") - expect_equal(out$Sepal.Length[1:2], c(mean(iris$Sepal.Length) - sd(iris$Sepal.Length), mean(iris$Sepal.Length))) + expect_identical(out$Sepal.Length[1:2], c(mean(iris$Sepal.Length) - sd(iris$Sepal.Length), mean(iris$Sepal.Length)), tolerance = 1e-3) }) From 814fae98c27adb0d465712cfd0a46c07b5d5d650 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 18 May 2024 21:24:21 +0100 Subject: [PATCH 27/27] same --- DESCRIPTION | 2 +- R/data_codebook.R | 2 +- tests/testthat/test-standardize_datagrid.R | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5817c1a0d..ec594a4e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -80,5 +80,5 @@ Config/testthat/edition: 3 Config/testthat/parallel: true Config/Needs/website: rstudio/bslib, - pkgdown, + r-lib/pkgdown, easystats/easystatstemplate diff --git a/R/data_codebook.R b/R/data_codebook.R index cec95196f..d6fe46beb 100644 --- a/R/data_codebook.R +++ b/R/data_codebook.R @@ -466,5 +466,5 @@ print_md.data_codebook <- function(x, ...) { N = "r" ) align <- align[colnames(x)] - paste0(unname(align), collapse = "") + paste(unname(align), collapse = "") } diff --git a/tests/testthat/test-standardize_datagrid.R b/tests/testthat/test-standardize_datagrid.R index 2c095e6bc..35a11fdb5 100644 --- a/tests/testthat/test-standardize_datagrid.R +++ b/tests/testthat/test-standardize_datagrid.R @@ -7,5 +7,9 @@ test_that("standardize.datagrid", { x <- insight::get_datagrid(iris, by = "Sepal.Length = c(-1, 0)") out <- unstandardize(x, select = "Sepal.Length") - expect_identical(out$Sepal.Length[1:2], c(mean(iris$Sepal.Length) - sd(iris$Sepal.Length), mean(iris$Sepal.Length)), tolerance = 1e-3) + expect_identical( + out$Sepal.Length[1:2], + c(mean(iris$Sepal.Length) - sd(iris$Sepal.Length), mean(iris$Sepal.Length)), + tolerance = 1e-3 + ) })