From 051221249c09717478e3bf119a212c070a53f2b2 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 2 Dec 2024 11:45:42 +0100 Subject: [PATCH] Revamp `data_rename()` (#568) * init * update wordlist * redoc * fix vignette * doc for text_format * comments * pkgdown * lints * seealso * pkgdown * do not use column indices as replacement * forbid partially named vector * add arg ifnotfound in select_nse() * simplify * simplify again [skip ci] * fix tests and examples * lint * use dev styler * fix --- DESCRIPTION | 2 +- NEWS.md | 17 +++- R/data_addprefix.R | 12 ++- R/data_rename.R | 143 +++++++++++++------------- R/select_nse.R | 151 +++++++++++++++++----------- R/text_format.R | 2 +- inst/WORDLIST | 13 +-- man/categorize.Rd | 3 +- man/data_match.Rd | 3 +- man/data_merge.Rd | 3 +- man/data_partition.Rd | 3 +- man/data_prefix_suffix.Rd | 103 +++++++++++++++++++ man/data_relocate.Rd | 3 +- man/data_rename.Rd | 116 ++++++--------------- man/data_rotate.Rd | 3 +- man/data_to_long.Rd | 3 +- man/data_to_wide.Rd | 3 +- man/extract_column_names.Rd | 3 +- man/recode_values.Rd | 3 +- man/rescale.Rd | 4 + man/slide.Rd | 3 +- man/text_format.Rd | 12 +-- man/winsorize.Rd | 3 +- pkgdown/_pkgdown.yaml | 1 + tests/testthat/test-attributes.R | 2 +- tests/testthat/test-data_rename.R | 107 ++++++++++---------- vignettes/tidyverse_translation.Rmd | 4 +- 27 files changed, 395 insertions(+), 330 deletions(-) create mode 100644 man/data_prefix_suffix.Rd diff --git a/DESCRIPTION b/DESCRIPTION index bcb41528b..a6869430a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.13.0.15 +Version: 0.13.0.16 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531")), diff --git a/NEWS.md b/NEWS.md index a701ba2b8..eccbbe98d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,18 @@ # datawizard (development) -BREAKING CHANGES - -* Argument `drop_na` in `data_match()` is deprecated now. Please use `remove_na` - instead. +BREAKING CHANGES AND DEPRECATIONS + +* Argument `drop_na` in `data_match()` is deprecated now. Please use + `remove_na` instead. + +* In `data_rename()` (#567): + - argument `pattern` is deprecated. Use `select` instead. + - argument `safe` is deprecated. The function now errors when `select` + contains unknown column names. + - when `replacement` is `NULL`, an error is now thrown (previously, column + indices were used as new names). + - if `select` (previously `pattern`) is a named vector, then all elements + must be named, e.g. `c(length = "Sepal.Length", "Sepal.Width")` errors. CHANGES diff --git a/R/data_addprefix.R b/R/data_addprefix.R index 6cf292ecc..9c20e6d71 100644 --- a/R/data_addprefix.R +++ b/R/data_addprefix.R @@ -1,5 +1,13 @@ -#' @rdname data_rename +#' Add a prefix or suffix to column names +#' +#' @rdname data_prefix_suffix #' @inheritParams extract_column_names +#' @param pattern A character string, which will be added as prefix or suffix +#' to the column names. +#' @param ... Other arguments passed to or from other functions. +#' +#' @seealso +#' [data_rename()] for more fine-grained column renaming. #' @examples #' # Add prefix / suffix to all columns #' head(data_addprefix(iris, "NEW_")) @@ -29,7 +37,7 @@ data_addprefix <- function(data, } -#' @rdname data_rename +#' @rdname data_prefix_suffix #' @export data_addsuffix <- function(data, pattern, diff --git a/R/data_rename.R b/R/data_rename.R index f5d6e0e03..eb2415d67 100644 --- a/R/data_rename.R +++ b/R/data_rename.R @@ -1,36 +1,27 @@ #' @title Rename columns and variable names -#' @name data_rename #' #' @description Safe and intuitive functions to rename variables or rows in #' data frames. `data_rename()` will rename column names, i.e. it facilitates -#' renaming variables `data_addprefix()` or `data_addsuffix()` add prefixes -#' or suffixes to column names. `data_rename_rows()` is a convenient shortcut +#' renaming variables. `data_rename_rows()` is a convenient shortcut #' to add or rename row names of a data frame, but unlike `row.names()`, its -#' input and output is a data frame, thus, integrating smoothly into a possible -#' pipe-workflow. +#' input and output is a data frame, thus, integrating smoothly into a +#' possible pipe-workflow. #' +#' @inheritParams extract_column_names #' @param data A data frame, or an object that can be coerced to a data frame. -#' @param pattern Character vector. -#' - For `data_addprefix()` or `data_addsuffix()`, a character string, which -#' will be added as prefix or suffix to the column names. -#' - For `data_rename()`, indicates columns that should be selected for -#' renaming. Can be `NULL` (in which case all columns are selected). -#' `pattern` can also be a named vector. In this case, names are used as -#' values for the `replacement` argument (i.e. `pattern` can be a character -#' vector using ` = ""` and argument `replacement` will -#' be ignored then). #' @param replacement Character vector. Can be one of the following: #' - A character vector that indicates the new names of the columns selected -#' in `pattern`. `pattern` and `replacement` must be of the same length. -#' - `NULL`, in which case columns are numbered in sequential order. -#' - A string (i.e. character vector of length 1) with a "glue" styled pattern. -#' Currently supported tokens are: +#' in `select`. `select` and `replacement` must be of the same length. +#' - A string (i.e. character vector of length 1) with a "glue" styled +#' pattern. Currently supported tokens are: #' - `{col}` which will be replaced by the column name, i.e. the -#' corresponding value in `pattern`. +#' corresponding value in `select`. #' - `{n}` will be replaced by the number of the variable that is replaced. -#' - `{letter}` will be replaced by alphabetical letters in sequential order. +#' - `{letter}` will be replaced by alphabetical letters in sequential +#' order. #' If more than 26 letters are required, letters are repeated, but have -#' sequential numeric indices (e.g., `a1` to `z1`, followed by `a2` to `z2`). +#' sequential numeric indices (e.g., `a1` to `z1`, followed by `a2` to +#' `z2`). #' - Finally, the name of a user-defined object that is available in the #' environment can be used. Note that the object's name is not allowed to #' be one of the pre-defined tokens, `"col"`, `"n"` and `"letter"`. @@ -39,35 +30,32 @@ #' ```r #' data_rename( #' mtcars, -#' pattern = c("am", "vs"), +#' select = c("am", "vs"), #' replacement = "new_name_from_{col}" #' ) #' ``` #' ... which would return new column names `new_name_from_am` and #' `new_name_from_vs`. See 'Examples'. #' -#' If `pattern` is a named vector, `replacement` is ignored. +#' If `select` is a named vector, `replacement` is ignored. #' @param rows Vector of row names. -#' @param safe Do not throw error if for instance the variable to be -#' renamed/removed doesn't exist. -#' @param verbose Toggle warnings and messages. +#' @param safe Deprecated. Passing unknown column names now always errors. +#' @param pattern Deprecated. Use `select` instead. #' @param ... Other arguments passed to or from other functions. #' +#' @details +#' `select` can also be a named character vector. In this case, the names are +#' used to rename the columns in the output data frame. See 'Examples'. +#' #' @return A modified data frame. #' #' @examples #' # Rename columns #' head(data_rename(iris, "Sepal.Length", "length")) -#' # data_rename(iris, "FakeCol", "length", safe=FALSE) # This fails -#' head(data_rename(iris, "FakeCol", "length")) # This doesn't -#' head(data_rename(iris, c("Sepal.Length", "Sepal.Width"), c("length", "width"))) #' #' # use named vector to rename #' head(data_rename(iris, c(length = "Sepal.Length", width = "Sepal.Width"))) #' -#' # Reset names -#' head(data_rename(iris, NULL)) -#' #' # Change all #' head(data_rename(iris, replacement = paste0("Var", 1:5))) #' @@ -80,8 +68,7 @@ #' x <- c("hi", "there", "!") #' head(data_rename(mtcars[1:3], c("mpg", "cyl", "disp"), "col_{x}")) #' @seealso -#' - Functions to rename stuff: [data_rename()], [data_rename_rows()], -#' [data_addprefix()], [data_addsuffix()] +#' - Add a prefix or suffix to column names: [data_addprefix()], [data_addsuffix()] #' - Functions to reorder or remove columns: [data_reorder()], [data_relocate()], #' [data_remove()] #' - Functions to reshape, pivot or rotate data frames: [data_to_long()], @@ -96,28 +83,48 @@ #' #' @export data_rename <- function(data, - pattern = NULL, + select = NULL, replacement = NULL, safe = TRUE, verbose = TRUE, + pattern = NULL, ...) { - # change all names if no pattern specified - if (is.null(pattern)) { - pattern <- names(data) + # If the user does data_rename(iris, pattern = "Sepal.Length", "length"), + # then "length" is matched to select by position while it's the replacement + # => do the switch manually + if (!is.null(pattern)) { + .is_deprecated("pattern", "select") + if (!is.null(select)) { + replacement <- select + } + select <- pattern } - - if (!is.character(pattern)) { - insight::format_error("Argument `pattern` must be of type character.") + if (isFALSE(safe)) { + insight::format_warning("In `data_rename()`, argument `safe` is no longer used and will be removed in a future release.") # nolint } - # check if `pattern` has names, and if so, use as "replacement" - if (!is.null(names(pattern))) { - replacement <- names(pattern) + # change all names if no pattern specified + select <- .select_nse( + select, + data, + exclude = NULL, + ignore_case = NULL, + regex = NULL, + allow_rename = TRUE, + verbose = verbose, + ifnotfound = "error" + ) + + # Forbid partially named "select", + # Ex: if select = c("foo" = "Species", "Sepal.Length") then the 2nd name and + # 2nd value are "Sepal.Length" + if (!is.null(names(select)) && any(names(select) == select)) { + insight::format_error("When `select` is a named vector, all elements must be named.") } - # name columns 1, 2, 3 etc. if no replacement - if (is.null(replacement)) { - replacement <- paste0(seq_along(pattern)) + # check if `select` has names, and if so, use as "replacement" + if (!is.null(names(select))) { + replacement <- names(select) } # coerce to character @@ -126,22 +133,22 @@ data_rename <- function(data, # check if `replacement` has no empty strings and no NA values invalid_replacement <- is.na(replacement) | !nzchar(replacement) if (any(invalid_replacement)) { - if (is.null(names(pattern))) { - # when user did not match `pattern` with `replacement` + if (is.null(names(select))) { + # when user did not match `select` with `replacement` msg <- c( "`replacement` is not allowed to have `NA` or empty strings.", sprintf( - "Following values in `pattern` have no match in `replacement`: %s", - toString(pattern[invalid_replacement]) + "Following values in `select` have no match in `replacement`: %s", + toString(select[invalid_replacement]) ) ) } else { - # when user did not name all elements of `pattern` + # when user did not name all elements of `select` msg <- c( - "Either name all elements of `pattern` or use `replacement`.", + "Either name all elements of `select` or use `replacement`.", sprintf( - "Following values in `pattern` were not named: %s", - toString(pattern[invalid_replacement]) + "Following values in `select` were not named: %s", + toString(select[invalid_replacement]) ) ) } @@ -163,30 +170,20 @@ data_rename <- function(data, # check if we have "glue" styled replacement-string glue_style <- length(replacement) == 1 && grepl("{", replacement, fixed = TRUE) - if (length(replacement) > length(pattern) && verbose) { - insight::format_alert( - paste0( - "There are more names in `replacement` than in `pattern`. The last ", - length(replacement) - length(pattern), " names of `replacement` are not used." - ) - ) - } else if (length(replacement) < length(pattern) && verbose && !glue_style) { - insight::format_alert( - paste0( - "There are more names in `pattern` than in `replacement`. The last ", - length(pattern) - length(replacement), " names of `pattern` are not modified." - ) - ) + if (length(replacement) > length(select)) { + insight::format_error("There are more names in `replacement` than in `select`.") + } else if (length(replacement) < length(select) && !glue_style) { + insight::format_error("There are more names in `select` than in `replacement`") } - # if we have glue-styled replacement-string, create replacement pattern now + # if we have glue-styled replacement-string, create replacement select now if (glue_style) { - replacement <- .glue_replacement(pattern, replacement) + replacement <- .glue_replacement(select, replacement) } - for (i in seq_along(pattern)) { + for (i in seq_along(select)) { if (!is.na(replacement[i])) { - data <- .data_rename(data, pattern[i], replacement[i], safe, verbose) + data <- .data_rename(data, select[i], replacement[i], safe, verbose) } } diff --git a/R/select_nse.R b/R/select_nse.R index 5120691a9..2d0255d0a 100644 --- a/R/select_nse.R +++ b/R/select_nse.R @@ -3,7 +3,7 @@ .select_nse <- function(select, data, exclude, ignore_case, regex = FALSE, remove_group_var = FALSE, allow_rename = FALSE, - verbose = FALSE) { + verbose = FALSE, ifnotfound = "warn") { .check_data(data) columns <- colnames(data) @@ -38,14 +38,16 @@ data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) excluded <- .eval_expr( expr_exclude, data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) selected_has_mix_idx <- any(selected < 0L) && any(selected > 0L) @@ -113,7 +115,7 @@ # * cyl:gear -> function (`:`) so find which function it is, then get the # position for each variable, then evaluate the function with the positions -.eval_expr <- function(x, data, ignore_case, regex, verbose) { +.eval_expr <- function(x, data, ignore_case, regex, verbose, ifnotfound) { if (is.null(x)) { return(NULL) } @@ -123,9 +125,18 @@ out <- switch(type, integer = x, double = as.integer(x), - character = .select_char(data, x, ignore_case, regex = regex, verbose), - symbol = .select_symbol(data, x, ignore_case, regex = regex, verbose), - language = .eval_call(data, x, ignore_case, regex = regex, verbose), + character = .select_char( + data, x, ignore_case, + regex = regex, verbose, ifnotfound + ), + symbol = .select_symbol( + data, x, ignore_case, + regex = regex, verbose, ifnotfound + ), + language = .eval_call( + data, x, ignore_case, + regex = regex, verbose, ifnotfound + ), insight::format_error(paste0( "Expressions of type <", typeof(x), "> cannot be evaluated for use when subsetting." @@ -143,7 +154,7 @@ # - character that should be regex-ed on variable names # - special word "all" to return all vars -.select_char <- function(data, x, ignore_case, regex, verbose) { +.select_char <- function(data, x, ignore_case, regex, verbose, ifnotfound) { # use colnames because names() doesn't work for matrices columns <- colnames(data) if (isTRUE(regex)) { @@ -160,7 +171,7 @@ colon_vars <- unlist(strsplit(x, ":", fixed = TRUE)) colon_match <- match(colon_vars, columns) if (anyNA(colon_match)) { - .warn_not_found(colon_vars, columns, colon_match, verbose) + .action_if_not_found(colon_vars, columns, colon_match, verbose, ifnotfound) matches <- NA } else { start_pos <- match(colon_vars[1], columns) @@ -180,26 +191,34 @@ # find columns, case sensitive matches <- match(x, columns) if (anyNA(matches)) { - .warn_not_found(x, columns, matches, verbose) + .action_if_not_found(x, columns, matches, verbose, ifnotfound) } matches[!is.na(matches)] } } # small helper, to avoid duplicated code -.warn_not_found <- function(x, columns, matches, verbose = TRUE) { - if (verbose) { - insight::format_warning( - paste0( - "Following variable(s) were not found: ", - toString(x[is.na(matches)]) - ), - .misspelled_string( - columns, - x[is.na(matches)], - default_message = "Possibly misspelled?" - ) - ) +.action_if_not_found <- function( + x, + columns, + matches, + verbose, + ifnotfound +) { + msg <- paste0( + "Following variable(s) were not found: ", + toString(x[is.na(matches)]) + ) + msg2 <- .misspelled_string( + columns, + x[is.na(matches)], + default_message = "Possibly misspelled?" + ) + if (ifnotfound == "error") { + insight::format_error(msg, msg2) + } + if (ifnotfound == "warn" && verbose) { + insight::format_warning(msg, msg2) } } @@ -217,7 +236,7 @@ # value but it errors because the function doesn't exist then it means that # it is a select helper that we grab from the error message. -.select_symbol <- function(data, x, ignore_case, regex, verbose) { +.select_symbol <- function(data, x, ignore_case, regex, verbose, ifnotfound) { try_eval <- try(eval(x), silent = TRUE) x_dep <- insight::safe_deparse(x) is_select_helper <- FALSE @@ -300,54 +319,59 @@ # Dispatch expressions to various select helpers according to the function call. -.eval_call <- function(data, x, ignore_case, regex, verbose) { +.eval_call <- function(data, x, ignore_case, regex, verbose, ifnotfound) { type <- insight::safe_deparse(x[[1]]) switch(type, - `:` = .select_seq(x, data, ignore_case, regex, verbose), - `-` = .select_minus(x, data, ignore_case, regex, verbose), - `c` = .select_c(x, data, ignore_case, regex, verbose), # nolint - `(` = .select_bracket(x, data, ignore_case, regex, verbose), - `[` = .select_square_bracket(x, data, ignore_case, regex, verbose), - `$` = .select_dollar(x, data, ignore_case, regex, verbose), - `~` = .select_tilde(x, data, ignore_case, regex, verbose), - list = .select_list(x, data, ignore_case, regex, verbose), - names = .select_names(x, data, ignore_case, regex, verbose), + `:` = .select_seq(x, data, ignore_case, regex, verbose, ifnotfound), + `-` = .select_minus(x, data, ignore_case, regex, verbose, ifnotfound), + `c` = .select_c(x, data, ignore_case, regex, verbose, ifnotfound), # nolint + `(` = .select_bracket(x, data, ignore_case, regex, verbose, ifnotfound), + `[` = .select_square_bracket( + x, data, ignore_case, regex, verbose, ifnotfound + ), + `$` = .select_dollar(x, data, ignore_case, regex, verbose, ifnotfound), + `~` = .select_tilde(x, data, ignore_case, regex, verbose, ifnotfound), + list = .select_list(x, data, ignore_case, regex, verbose, ifnotfound), + names = .select_names(x, data, ignore_case, regex, verbose, ifnotfound), starts_with = , ends_with = , matches = , contains = , - regex = .select_helper(x, data, ignore_case, regex, verbose), - .select_context(x, data, ignore_case, regex, verbose) + regex = .select_helper(x, data, ignore_case, regex, verbose, ifnotfound), + .select_context(x, data, ignore_case, regex, verbose, ifnotfound) ) } # e.g 1:3, or gear:cyl -.select_seq <- function(expr, data, ignore_case, regex, verbose) { +.select_seq <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { x <- .eval_expr( expr[[2]], data = data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) y <- .eval_expr( expr[[3]], data = data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) x:y } # e.g -cyl -.select_minus <- function(expr, data, ignore_case, regex, verbose) { +.select_minus <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { x <- .eval_expr( expr[[2]], data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) if (length(x) == 0L) { seq_along(data) @@ -357,7 +381,7 @@ } # e.g c("gear", "cyl") -.select_c <- function(expr, data, ignore_case, regex, verbose) { +.select_c <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { lst_expr <- as.list(expr) lst_expr[[1]] <- NULL unlist(lapply( @@ -366,40 +390,44 @@ data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ), use.names = FALSE) } # e.g -(gear:cyl) -.select_bracket <- function(expr, data, ignore_case, regex, verbose) { +.select_bracket <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { .eval_expr( expr[[2]], data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) } # e.g myvector[3] -.select_square_bracket <- function(expr, data, ignore_case, regex, verbose) { +.select_square_bracket <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { first_obj <- .eval_expr( expr[[2]], data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) .eval_expr( first_obj[eval(expr[[3]])], data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) } -.select_names <- function(expr, data, ignore_case, regex, verbose) { +.select_names <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { first_obj <- .dynEval(expr, inherits = FALSE, minframe = 0L) .eval_expr( first_obj, @@ -411,7 +439,7 @@ } # e.g starts_with("Sep") -.select_helper <- function(expr, data, ignore_case, regex, verbose) { +.select_helper <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { lst_expr <- as.list(expr) # need this if condition to distinguish between starts_with("Sep") (that we @@ -435,7 +463,7 @@ } # e.g args$select (happens when we use grouped_data (see center.grouped_df())) -.select_dollar <- function(expr, data, ignore_case, regex, verbose) { +.select_dollar <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { first_obj <- .dynGet(expr[[2]], ifnotfound = NULL, inherits = FALSE, minframe = 0L) if (is.null(first_obj)) { first_obj <- .dynEval(expr[[2]], inherits = FALSE, minframe = 0L) @@ -445,12 +473,13 @@ data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) } # e.g ~ gear + cyl -.select_tilde <- function(expr, data, ignore_case, regex, verbose) { +.select_tilde <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { vars <- all.vars(expr) unlist(lapply( vars, @@ -458,12 +487,13 @@ data = data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ), use.names = FALSE) } # e.g list(gear = 4, cyl = 5) -.select_list <- function(expr, data, ignore_case, regex, verbose) { +.select_list <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { vars <- names(.dynEval(expr, inherits = FALSE, minframe = 0L)) unlist(lapply( vars, @@ -471,12 +501,13 @@ data = data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ), use.names = FALSE) } # e.g is.numeric() -.select_context <- function(expr, data, ignore_case, regex, verbose) { +.select_context <- function(expr, data, ignore_case, regex, verbose, ifnotfound) { x_dep <- insight::safe_deparse(expr) if (endsWith(x_dep, "()")) { new_expr <- gsub("\\(\\)$", "", x_dep) @@ -486,7 +517,8 @@ data = data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) } else { out <- .dynEval(expr, inherits = FALSE, minframe = 0L) @@ -495,7 +527,8 @@ data = data, ignore_case = ignore_case, regex = regex, - verbose = verbose + verbose = verbose, + ifnotfound = ifnotfound ) } } diff --git a/R/text_format.R b/R/text_format.R index 0fa75bcac..46cbf2ee0 100644 --- a/R/text_format.R +++ b/R/text_format.R @@ -6,7 +6,7 @@ #' @param width Positive integer giving the target column width for wrapping #' lines in the output. Can be "auto", in which case it will select 90\% of the #' default width. -#' @inheritParams data_rename +#' @param pattern Regex pattern to remove from `text`. #' @param sep Separator. #' @param last Last separator. #' @param n The number of characters to find. diff --git a/inst/WORDLIST b/inst/WORDLIST index eda7dc71c..a8b4ff08d 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -8,14 +8,14 @@ CMD Carle Catran Crosstables -Dhaliwal -Disaggregating +DEPRECATIONS DOI De -Dom +Dhaliwal +Disaggregating EFC -Enders EUROFAMCARE +Enders Fairbrother GLMM Gelman @@ -54,7 +54,6 @@ Winsorizing al behaviour behaviours -bmwiernik codebook codebooks coercible @@ -77,7 +76,6 @@ joss labelled labelling leptokurtic -lifecycle lm lme meaned @@ -88,7 +86,6 @@ modelling nd panelr partialization -patilindrajeets platykurtic poorman pre @@ -102,7 +99,6 @@ recodes recoding recodings relevel -rempsyc reproducibility rescale rescaled @@ -111,7 +107,6 @@ rio rowid sd stackexchange -strengejacke tailedness th tibble diff --git a/man/categorize.Rd b/man/categorize.Rd index 0fed929de..aaca6014c 100644 --- a/man/categorize.Rd +++ b/man/categorize.Rd @@ -244,8 +244,7 @@ categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "observed") } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/data_match.Rd b/man/data_match.Rd index 0354a44f4..84ad56664 100644 --- a/man/data_match.Rd +++ b/man/data_match.Rd @@ -128,8 +128,7 @@ data_filter(mtcars, fl) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/data_merge.Rd b/man/data_merge.Rd index 169771b27..6876b06a1 100644 --- a/man/data_merge.Rd +++ b/man/data_merge.Rd @@ -190,8 +190,7 @@ data_merge(list(x, y, z), join = "bind", by = "id", id = "source") } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/data_partition.Rd b/man/data_partition.Rd index 73eb28286..23015e1b3 100644 --- a/man/data_partition.Rd +++ b/man/data_partition.Rd @@ -68,8 +68,7 @@ lapply(out, function(i) table(i$Species)) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/data_prefix_suffix.Rd b/man/data_prefix_suffix.Rd new file mode 100644 index 000000000..eab848058 --- /dev/null +++ b/man/data_prefix_suffix.Rd @@ -0,0 +1,103 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data_addprefix.R +\name{data_addprefix} +\alias{data_addprefix} +\alias{data_addsuffix} +\title{Add a prefix or suffix to column names} +\usage{ +data_addprefix( + data, + pattern, + select = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ... +) + +data_addsuffix( + data, + pattern, + select = NULL, + exclude = NULL, + ignore_case = FALSE, + regex = FALSE, + verbose = TRUE, + ... +) +} +\arguments{ +\item{data}{A data frame.} + +\item{pattern}{A character string, which will be added as prefix or suffix +to the column names.} + +\item{select}{Variables that will be included when performing the required +tasks. Can be either +\itemize{ +\item a variable specified as a literal variable name (e.g., \code{column_name}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), +\item a formula with variable names (e.g., \code{~column_1 + column_2}), +\item a vector of positive integers, giving the positions counting from the left +(e.g. \code{1} or \code{c(1, 3, 5)}), +\item a vector of negative integers, giving the positions counting from the +right (e.g., \code{-1} or \code{-1:-3}), +\item one of the following select-helpers: \code{starts_with()}, \code{ends_with()}, +\code{contains()}, a range using \code{:}, or \code{regex()}. \code{starts_with()}, +\code{ends_with()}, and \code{contains()} accept several patterns, e.g +\code{starts_with("Sep", "Petal")}. \code{regex()} can be used to define regular +expression patterns. +\item a function testing for logical conditions, e.g. \code{is.numeric()} (or +\code{is.numeric}), or any user-defined function that selects the variables +for which the function returns \code{TRUE} (like: \code{foo <- function(x) mean(x) > 3}), +\item ranges specified via literal variable names, select-helpers (except +\code{regex()}) and (user-defined) functions can be negated, i.e. return +non-matching elements, when prefixed with a \code{-}, e.g. \code{-ends_with()}, +\code{-is.numeric} or \code{-(Sepal.Width:Petal.Length)}. \strong{Note:} Negation means +that matches are \emph{excluded}, and thus, the \code{exclude} argument can be +used alternatively. For instance, \code{select=-ends_with("Length")} (with +\code{-}) is equivalent to \code{exclude=ends_with("Length")} (no \code{-}). In case +negation should not work as expected, use the \code{exclude} argument instead. +} + +If \code{NULL}, selects all columns. Patterns that found no matches are silently +ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} +will just return \code{"Species"}.} + +\item{exclude}{See \code{select}, however, column names matched by the pattern +from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), +excludes no columns.} + +\item{ignore_case}{Logical, if \code{TRUE} and when one of the select-helpers or +a regular expression is used in \code{select}, ignores lower/upper case in the +search pattern when matching against variable names.} + +\item{regex}{Logical, if \code{TRUE}, the search pattern from \code{select} will be +treated as regular expression. When \code{regex = TRUE}, select \emph{must} be a +character string (or a variable containing a character string) and is not +allowed to be one of the supported select-helpers or a character vector +of length > 1. \code{regex = TRUE} is comparable to using one of the two +select-helpers, \code{select = contains()} or \code{select = regex()}, however, +since the select-helpers may not work when called from inside other +functions (see 'Details'), this argument may be used as workaround.} + +\item{verbose}{Toggle warnings.} + +\item{...}{Other arguments passed to or from other functions.} +} +\description{ +Add a prefix or suffix to column names +} +\examples{ +# Add prefix / suffix to all columns +head(data_addprefix(iris, "NEW_")) +head(data_addsuffix(iris, "_OLD")) + +} +\seealso{ +\code{\link[=data_rename]{data_rename()}} for more fine-grained column renaming. +} diff --git a/man/data_relocate.Rd b/man/data_relocate.Rd index 5991691cb..a1227dcfa 100644 --- a/man/data_relocate.Rd +++ b/man/data_relocate.Rd @@ -135,8 +135,7 @@ head(data_remove(iris, starts_with("Sepal"))) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/data_rename.Rd b/man/data_rename.Rd index 3fea0fac4..8d99fb54f 100644 --- a/man/data_rename.Rd +++ b/man/data_rename.Rd @@ -1,40 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_addprefix.R, R/data_rename.R -\name{data_addprefix} -\alias{data_addprefix} -\alias{data_addsuffix} +% Please edit documentation in R/data_rename.R +\name{data_rename} \alias{data_rename} \alias{data_rename_rows} \title{Rename columns and variable names} \usage{ -data_addprefix( - data, - pattern, - select = NULL, - exclude = NULL, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ... -) - -data_addsuffix( - data, - pattern, - select = NULL, - exclude = NULL, - ignore_case = FALSE, - regex = FALSE, - verbose = TRUE, - ... -) - data_rename( data, - pattern = NULL, + select = NULL, replacement = NULL, safe = TRUE, verbose = TRUE, + pattern = NULL, ... ) @@ -43,18 +20,6 @@ data_rename_rows(data, rows = NULL) \arguments{ \item{data}{A data frame, or an object that can be coerced to a data frame.} -\item{pattern}{Character vector. -\itemize{ -\item For \code{data_addprefix()} or \code{data_addsuffix()}, a character string, which -will be added as prefix or suffix to the column names. -\item For \code{data_rename()}, indicates columns that should be selected for -renaming. Can be \code{NULL} (in which case all columns are selected). -\code{pattern} can also be a named vector. In this case, names are used as -values for the \code{replacement} argument (i.e. \code{pattern} can be a character -vector using \verb{ = ""} and argument \code{replacement} will -be ignored then). -}} - \item{select}{Variables that will be included when performing the required tasks. Can be either \itemize{ @@ -90,41 +55,21 @@ If \code{NULL}, selects all columns. Patterns that found no matches are silently ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))} will just return \code{"Species"}.} -\item{exclude}{See \code{select}, however, column names matched by the pattern -from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default), -excludes no columns.} - -\item{ignore_case}{Logical, if \code{TRUE} and when one of the select-helpers or -a regular expression is used in \code{select}, ignores lower/upper case in the -search pattern when matching against variable names.} - -\item{regex}{Logical, if \code{TRUE}, the search pattern from \code{select} will be -treated as regular expression. When \code{regex = TRUE}, select \emph{must} be a -character string (or a variable containing a character string) and is not -allowed to be one of the supported select-helpers or a character vector -of length > 1. \code{regex = TRUE} is comparable to using one of the two -select-helpers, \code{select = contains()} or \code{select = regex()}, however, -since the select-helpers may not work when called from inside other -functions (see 'Details'), this argument may be used as workaround.} - -\item{verbose}{Toggle warnings and messages.} - -\item{...}{Other arguments passed to or from other functions.} - \item{replacement}{Character vector. Can be one of the following: \itemize{ \item A character vector that indicates the new names of the columns selected -in \code{pattern}. \code{pattern} and \code{replacement} must be of the same length. -\item \code{NULL}, in which case columns are numbered in sequential order. -\item A string (i.e. character vector of length 1) with a "glue" styled pattern. -Currently supported tokens are: +in \code{select}. \code{select} and \code{replacement} must be of the same length. +\item A string (i.e. character vector of length 1) with a "glue" styled +pattern. Currently supported tokens are: \itemize{ \item \code{{col}} which will be replaced by the column name, i.e. the -corresponding value in \code{pattern}. +corresponding value in \code{select}. \item \code{{n}} will be replaced by the number of the variable that is replaced. -\item \code{{letter}} will be replaced by alphabetical letters in sequential order. +\item \code{{letter}} will be replaced by alphabetical letters in sequential +order. If more than 26 letters are required, letters are repeated, but have -sequential numeric indices (e.g., \code{a1} to \code{z1}, followed by \code{a2} to \code{z2}). +sequential numeric indices (e.g., \code{a1} to \code{z1}, followed by \code{a2} to +\code{z2}). \item Finally, the name of a user-defined object that is available in the environment can be used. Note that the object's name is not allowed to be one of the pre-defined tokens, \code{"col"}, \code{"n"} and \code{"letter"}. @@ -134,7 +79,7 @@ An example for the use of tokens is... \if{html}{\out{
}}\preformatted{data_rename( mtcars, - pattern = c("am", "vs"), + select = c("am", "vs"), replacement = "new_name_from_\{col\}" ) }\if{html}{\out{
}} @@ -143,10 +88,15 @@ An example for the use of tokens is... \code{new_name_from_vs}. See 'Examples'. } -If \code{pattern} is a named vector, \code{replacement} is ignored.} +If \code{select} is a named vector, \code{replacement} is ignored.} -\item{safe}{Do not throw error if for instance the variable to be -renamed/removed doesn't exist.} +\item{safe}{Deprecated. Passing unknown column names now always errors.} + +\item{verbose}{Toggle warnings.} + +\item{pattern}{Deprecated. Use \code{select} instead.} + +\item{...}{Other arguments passed to or from other functions.} \item{rows}{Vector of row names.} } @@ -156,29 +106,22 @@ A modified data frame. \description{ Safe and intuitive functions to rename variables or rows in data frames. \code{data_rename()} will rename column names, i.e. it facilitates -renaming variables \code{data_addprefix()} or \code{data_addsuffix()} add prefixes -or suffixes to column names. \code{data_rename_rows()} is a convenient shortcut +renaming variables. \code{data_rename_rows()} is a convenient shortcut to add or rename row names of a data frame, but unlike \code{row.names()}, its -input and output is a data frame, thus, integrating smoothly into a possible -pipe-workflow. +input and output is a data frame, thus, integrating smoothly into a +possible pipe-workflow. +} +\details{ +\code{select} can also be a named character vector. In this case, the names are +used to rename the columns in the output data frame. See 'Examples'. } \examples{ -# Add prefix / suffix to all columns -head(data_addprefix(iris, "NEW_")) -head(data_addsuffix(iris, "_OLD")) - # Rename columns head(data_rename(iris, "Sepal.Length", "length")) -# data_rename(iris, "FakeCol", "length", safe=FALSE) # This fails -head(data_rename(iris, "FakeCol", "length")) # This doesn't -head(data_rename(iris, c("Sepal.Length", "Sepal.Width"), c("length", "width"))) # use named vector to rename head(data_rename(iris, c(length = "Sepal.Length", width = "Sepal.Width"))) -# Reset names -head(data_rename(iris, NULL)) - # Change all head(data_rename(iris, replacement = paste0("Var", 1:5))) @@ -193,8 +136,7 @@ head(data_rename(mtcars[1:3], c("mpg", "cyl", "disp"), "col_{x}")) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/data_rotate.Rd b/man/data_rotate.Rd index 25ba9a82b..6967a069a 100644 --- a/man/data_rotate.Rd +++ b/man/data_rotate.Rd @@ -52,8 +52,7 @@ data_rotate(x, colnames = "c") } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index 3e587c049..36a474b83 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -223,8 +223,7 @@ head(even_longer_data) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/data_to_wide.Rd b/man/data_to_wide.Rd index 3690eed53..62dc14f8d 100644 --- a/man/data_to_wide.Rd +++ b/man/data_to_wide.Rd @@ -208,8 +208,7 @@ data_to_wide( } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/extract_column_names.Rd b/man/extract_column_names.Rd index fe334e22f..d74d092c0 100644 --- a/man/extract_column_names.Rd +++ b/man/extract_column_names.Rd @@ -179,8 +179,7 @@ head(data_select(mtcars, c(`Miles per Gallon` = "mpg", Cylinders = "cyl"))) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/recode_values.Rd b/man/recode_values.Rd index 11f9cfadc..d738093cb 100644 --- a/man/recode_values.Rd +++ b/man/recode_values.Rd @@ -279,8 +279,7 @@ options(data_recode_pattern = NULL) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/rescale.Rd b/man/rescale.Rd index 8d6025a8d..f163a6e8c 100644 --- a/man/rescale.Rd +++ b/man/rescale.Rd @@ -132,6 +132,10 @@ A rescaled object. Rescale variables to a new range. Can also be used to reverse-score variables (change the keying/scoring direction), or to expand a range. } +\details{ +\code{select} can also be a named character vector. In this case, the names are +used to rename the columns in the output data frame. See 'Examples'. +} \section{Selection of variables - the \code{select} argument}{ For most functions that have a \code{select} argument (including this function), diff --git a/man/slide.Rd b/man/slide.Rd index d68f7e30a..c350ab7e4 100644 --- a/man/slide.Rd +++ b/man/slide.Rd @@ -124,8 +124,7 @@ sapply(mtcars, min) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/man/text_format.Rd b/man/text_format.Rd index 16c76e67c..082e3cbe1 100644 --- a/man/text_format.Rd +++ b/man/text_format.Rd @@ -50,17 +50,7 @@ text elements will not be enclosed.} \item{n}{The number of characters to find.} -\item{pattern}{Character vector. -\itemize{ -\item For \code{data_addprefix()} or \code{data_addsuffix()}, a character string, which -will be added as prefix or suffix to the column names. -\item For \code{data_rename()}, indicates columns that should be selected for -renaming. Can be \code{NULL} (in which case all columns are selected). -\code{pattern} can also be a named vector. In this case, names are used as -values for the \code{replacement} argument (i.e. \code{pattern} can be a character -vector using \verb{ = ""} and argument \code{replacement} will -be ignored then). -}} +\item{pattern}{Regex pattern to remove from \code{text}.} } \value{ A character string. diff --git a/man/winsorize.Rd b/man/winsorize.Rd index 15fa6af9b..fd9e38c64 100644 --- a/man/winsorize.Rd +++ b/man/winsorize.Rd @@ -82,8 +82,7 @@ winsorize(iris, threshold = 0.2) } \seealso{ \itemize{ -\item Functions to rename stuff: \code{\link[=data_rename]{data_rename()}}, \code{\link[=data_rename_rows]{data_rename_rows()}}, -\code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} +\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}} \item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}}, \code{\link[=data_remove]{data_remove()}} \item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}}, diff --git a/pkgdown/_pkgdown.yaml b/pkgdown/_pkgdown.yaml index 31ec901d0..01b701f03 100644 --- a/pkgdown/_pkgdown.yaml +++ b/pkgdown/_pkgdown.yaml @@ -98,6 +98,7 @@ reference: Primarily useful in the context of other 'easystats' packages contents: - reshape_ci + - data_rename - data_addprefix - remove_empty - contains("rownames") diff --git a/tests/testthat/test-attributes.R b/tests/testthat/test-attributes.R index ebd26de99..df1ec0302 100644 --- a/tests/testthat/test-attributes.R +++ b/tests/testthat/test-attributes.R @@ -136,7 +136,7 @@ test_that("convert_to_na, attributes preserved", { test_that("data_rename, attributes preserved", { x <- mtcars attr(x, "myattri") <- "I'm here" - x2 <- data_rename(x, pattern = "hp", replacement = "horsepower") + x2 <- data_rename(x, select = "hp", replacement = "horsepower") expect_identical(attr(x2, "myattri", exact = TRUE), "I'm here") }) diff --git a/tests/testthat/test-data_rename.R b/tests/testthat/test-data_rename.R index 79f4427b3..3495a3355 100644 --- a/tests/testthat/test-data_rename.R +++ b/tests/testthat/test-data_rename.R @@ -20,31 +20,34 @@ test_that("data_rename works with one or several replacements", { ) }) +test_that("data_rename cannot have a partially named vector", { + expect_error( + data_rename(test, c(length = "Sepal.Length", "Sepal.Width")), + "all elements must" + ) +}) + test_that("data_rename returns a data frame", { x <- data_rename(test, "Sepal.Length", "length") expect_s3_class(x, "data.frame") }) -test_that("data_rename: pattern must be of type character", { - expect_error( - data_rename(test, pattern = 1), - regexp = "Argument `pattern` must be of type character" +test_that("data_rename: multiple selection types", { + expect_named( + data_rename(test, select = 1, "foo"), + c("foo", names(iris)[2:5]) ) - expect_error( - data_rename(test, pattern = TRUE), - regexp = "Argument `pattern` must be of type character" + expect_named( + data_rename(test, select = regex("tal"), c("foo1", "foo2")), + c("Sepal.Length", "Sepal.Width", "foo1", "foo2", "Species") ) }) test_that("data_rename: replacement not allowed to have NA or empty strings", { - expect_error( - data_rename(test, pattern = c(test = "Species", "Sepal.Length")), - regexp = "Either name all elements of `pattern`" - ) expect_error( data_rename( test, - pattern = c("Species", "Sepal.Length"), + select = c("Species", "Sepal.Length"), replacement = c("foo", NA_character_) ), regexp = "`replacement` is not allowed" @@ -53,78 +56,62 @@ test_that("data_rename: replacement not allowed to have NA or empty strings", { # replacement ------------- -test_that("data_rename uses indices when no replacement", { - x <- data_rename(test, pattern = c("Sepal.Length", "Petal.Length")) - expect_identical(dim(test), dim(x)) - expect_named(x, c("1", "Sepal.Width", "2", "Petal.Width", "Species")) +test_that("data_rename errors when no replacement", { + expect_error( + data_rename(test, select = c("Sepal.Length", "Petal.Length")), + "There are more names in `select` than in `replacement`" + ) }) -test_that("data_rename works when too many names in 'replacement'", { - expect_message( - { - x <- data_rename(test, replacement = paste0("foo", 1:6)) - }, - "There are more names in" +test_that("data_rename errors when too many names in 'replacement'", { + expect_error( + data_rename(test, replacement = paste0("foo", 1:6)), + "There are more names in `replacement` than in `select`" ) - expect_identical(dim(test), dim(x)) - expect_named(x, paste0("foo", 1:5)) }) test_that("data_rename works when not enough names in 'replacement'", { - expect_message( - { - x <- data_rename(test, replacement = paste0("foo", 1:2)) - }, - "There are more names in" + expect_error( + data_rename(test, replacement = paste0("foo", 1:2)), + "There are more names in `select` than in `replacement`" ) - expect_identical(dim(test), dim(x)) - expect_named(x, c("foo1", "foo2", "Petal.Length", "Petal.Width", "Species")) }) -# no pattern -------------- - -test_that("data_rename uses the whole dataset when pattern = NULL", { - x1 <- data_rename(test) - x2 <- data_rename(test, pattern = names(test)) - expect_identical(dim(test), dim(x1)) - expect_identical(x1, x2) +# no select -------------- - x3 <- data_rename(test, replacement = paste0("foo", 1:5)) - x4 <- data_rename(test, pattern = names(test), replacement = paste0("foo", 1:5)) - expect_identical(dim(test), dim(x3)) - expect_identical(x3, x4) +test_that("data_rename errors when select = NULL", { + expect_error( + data_rename(test), + "more names in `select`" + ) }) # other -------------- -test_that("data_rename: argument 'safe' works", { - expect_message( - data_rename(iris, "FakeCol", "length", safe = TRUE), - "Variable `FakeCol` is not in your data frame" +test_that("data_rename: argument 'safe' is deprecated", { + expect_error( + data_rename(iris, "FakeCol", "length", verbose = FALSE), + "were not found" ) expect_error( - data_rename(iris, "FakeCol", "length", safe = FALSE), - "Variable `FakeCol` is not in your data frame" + expect_warning( + data_rename(iris, "FakeCol", "length", safe = FALSE, verbose = FALSE), + "used" + ) ) }) test_that("data_rename deals correctly with duplicated replacement", { x <- data_rename(test, - pattern = names(test)[1:4], + select = names(test)[1:4], replacement = c("foo", "bar", "foo", "bar") ) expect_identical(dim(test), dim(x)) expect_named(x[1:4], c("foo", "bar", "foo.2", "bar.2")) }) -test_that("data_rename doesn't change colname if invalid pattern", { - x <- suppressMessages(data_rename(test, "FakeCol", "length")) - expect_named(x, names(test)) -}) - - # preserve attributes -------------------------- @@ -142,7 +129,7 @@ test_that("data_rename preserves attributes", { }) -# glue-styled pattern -------------------------- +# glue-styled select -------------------------- test_that("data_rename glue-style", { data(mtcars) @@ -226,3 +213,11 @@ withr::with_environment( ) }) ) + +test_that("Argument `pattern` is deprecated", { + expect_warning( + head(data_rename(iris, pattern = "Sepal.Length", "length")), + "Argument `pattern` is deprecated. Please use `select` instead.", + fixed = TRUE + ) +}) diff --git a/vignettes/tidyverse_translation.Rmd b/vignettes/tidyverse_translation.Rmd index ae4b339b3..a7e5225fc 100644 --- a/vignettes/tidyverse_translation.Rmd +++ b/vignettes/tidyverse_translation.Rmd @@ -515,7 +515,7 @@ a vector of new names for these columns that must be of the same length. # ---------- datawizard ----------- starwars |> data_rename( - pattern = c("sex", "hair_color"), + select = c("sex", "hair_color"), replacement = c("Sex", "Hair Color") ) ``` @@ -547,7 +547,7 @@ to_rename <- names(starwars) starwars |> data_rename( - pattern = to_rename, + select = to_rename, replacement = tools::toTitleCase(gsub("_", " ", to_rename, fixed = TRUE)) ) ```