From 34880ab540c931698d7ce71406b07c3998192c7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= <‘mxkuhn@gmail.com’> Date: Mon, 21 Oct 2024 19:18:04 -0400 Subject: [PATCH 01/25] imported files --- R/import-standalone-types-check.R | 17 +++++++++++- R/steps_and_checks.R | 44 +++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/R/import-standalone-types-check.R b/R/import-standalone-types-check.R index 6782d69b1..1ca83997d 100644 --- a/R/import-standalone-types-check.R +++ b/R/import-standalone-types-check.R @@ -13,6 +13,9 @@ # # ## Changelog # +# 2024-08-15: +# - `check_character()` gains an `allow_na` argument (@martaalcalde, #1724) +# # 2023-03-13: # - Improved error messages of number checkers (@teunbrand) # - Added `allow_infinite` argument to `check_number_whole()` (@mgirlich). @@ -461,15 +464,28 @@ check_formula <- function(x, # Vectors ----------------------------------------------------------------- +# TODO: Figure out what to do with logical `NA` and `allow_na = TRUE` + check_character <- function(x, ..., + allow_na = TRUE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { + if (!missing(x)) { if (is_character(x)) { + if (!allow_na && any(is.na(x))) { + abort( + sprintf("`%s` can't contain NA values.", arg), + arg = arg, + call = call + ) + } + return(invisible(NULL)) } + if (allow_null && is_null(x)) { return(invisible(NULL)) } @@ -479,7 +495,6 @@ check_character <- function(x, x, "a character vector", ..., - allow_na = FALSE, allow_null = allow_null, arg = arg, call = call diff --git a/R/steps_and_checks.R b/R/steps_and_checks.R index 695dfe780..f9a14dc2f 100644 --- a/R/steps_and_checks.R +++ b/R/steps_and_checks.R @@ -5,7 +5,8 @@ #' @param subclass A character string for the resulting class. For example, #' if `subclass = "blah"` the step object that is returned has class #' `step_blah` or `check_blah` depending on the context. -#' @param ... All arguments to the operator that should be returned. +#' @param ... All arguments to the operator that should be returned. Required +#' arguments are `trained`, `skip`, and `id`. #' @param .prefix Prefix to the subclass created. #' #' @seealso [developer_functions] @@ -13,16 +14,30 @@ #' @keywords internal #' @return An updated step or check with the new class. #' @export -step <- function(subclass, ..., .prefix = "step_") { - structure(list(...), +step <- function(subclass, ..., .prefix = "step_", + call = rlang::caller_env()) { + args <- list(...) + + check_string(subclass, call = call) + check_string(.prefix, call = call) + check_step_check_args(args, call = call) + + structure(args, class = c(paste0(.prefix, subclass), "step") ) } #' @rdname step #' @export -check <- function(subclass, ..., .prefix = "check_") { - structure(list(...), +check <- function(subclass, ..., .prefix = "check_", + call = rlang::caller_env()) { + args <- list(...) + + check_string(subclass, call = call) + check_string(.prefix, call = call) + check_step_check_args(args, call = call) + + structure(args, class = c(paste0(.prefix, subclass), "check") ) } @@ -51,3 +66,22 @@ add_check <- function(rec, object) { rec$steps[[length(rec$steps) + 1]] <- object rec } + +# ------------------------------------------------------------------------------ + +check_step_check_args <- function(x, call = rlang::caller_env()) { + req_args <- c("trained", "id", "skip") + nms <- names(x) + has_req_args <- req_args %in% nms + if (!all(has_req_args)) { + miss_args <- req_args[!has_req_args] + cli::cli_abort("Some required arguments are missing: {.arg {miss_args}}.", + call = call) + } + check_bool(x$trained, call = call, arg = "trained") + check_bool(x$skip, call = call, arg = "skip") + check_string(x$id, call = call, arg = "id") + invisible(x) +} + + From 0c4d811fc0e6d635f6bd7bec775ab343f125a259 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= <‘mxkuhn@gmail.com’> Date: Mon, 21 Oct 2024 19:18:19 -0400 Subject: [PATCH 02/25] check for required arguments --- NEWS.md | 2 ++ R/import-standalone-obj-type.R | 11 +++++++---- man/prep.Rd | 7 ++++--- tests/testthat/test-basics.R | 2 +- tests/testthat/test-skipping.R | 1 + tests/testthat/test-update.R | 8 ++++---- 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3dfa7962b..cc6c60540 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,8 @@ * `prep.recipe(..., strings_as_factors = TRUE)` now only converts string variables that have role "predictor" or "outcome". (@dajmcdon, #1358, #1376) +* All steps and checks now require arguments `trained`, `skip`, and `id` at all times. + # recipes 1.1.0 ## Improvements diff --git a/R/import-standalone-obj-type.R b/R/import-standalone-obj-type.R index 8e3c07df4..646aa33fc 100644 --- a/R/import-standalone-obj-type.R +++ b/R/import-standalone-obj-type.R @@ -5,13 +5,16 @@ # --- # repo: r-lib/rlang # file: standalone-obj-type.R -# last-updated: 2023-05-01 +# last-updated: 2024-02-14 # license: https://unlicense.org # imports: rlang (>= 1.1.0) # --- # # ## Changelog # +# 2024-02-14: +# - `obj_type_friendly()` now works for S7 objects. +# # 2023-05-01: # - `obj_type_friendly()` now only displays the first class of S3 objects. # @@ -267,19 +270,19 @@ vec_type_friendly <- function(x, length = FALSE) { #' Return OO type #' @param x Any R object. #' @return One of `"bare"` (for non-OO objects), `"S3"`, `"S4"`, -#' `"R6"`, or `"R7"`. +#' `"R6"`, or `"S7"`. #' @noRd obj_type_oo <- function(x) { if (!is.object(x)) { return("bare") } - class <- inherits(x, c("R6", "R7_object"), which = TRUE) + class <- inherits(x, c("R6", "S7_object"), which = TRUE) if (class[[1]]) { "R6" } else if (class[[2]]) { - "R7" + "S7" } else if (isS4(x)) { "S4" } else { diff --git a/man/prep.Rd b/man/prep.Rd index 204a1c6e4..4385d4b40 100644 --- a/man/prep.Rd +++ b/man/prep.Rd @@ -47,9 +47,10 @@ since it does not take environments into account.} \item{log_changes}{A logical for printing a summary for each step regarding which (if any) columns were added or removed during training.} -\item{strings_as_factors}{A logical: should character columns be converted to -factors? This affects the preprocessed training set (when -\code{retain = TRUE}) as well as the results of \code{bake.recipe}.} +\item{strings_as_factors}{A logical: should character columns that have role +"predictor" or "outcome" be converted to factors? This affects the +preprocessed training set (when \code{retain = TRUE}) as well as the results of +\code{bake.recipe}.} } \value{ A recipe whose step objects have been updated with the required diff --git a/tests/testthat/test-basics.R b/tests/testthat/test-basics.R index 5da0dac85..bc0473200 100644 --- a/tests/testthat/test-basics.R +++ b/tests/testthat/test-basics.R @@ -394,7 +394,7 @@ test_that("recipe() works with odd formula usage (#1283)", { sort(recipe(mpg ~ . + disp, data = mtcars)$var_info$variable), sort(colnames(mtcars)) ) - + expect_identical( sort(recipe(mpg ~ disp + disp, mtcars)$var_info$variable), c("disp", "mpg") diff --git a/tests/testthat/test-skipping.R b/tests/testthat/test-skipping.R index 8452da60f..838dd20fe 100644 --- a/tests/testthat/test-skipping.R +++ b/tests/testthat/test-skipping.R @@ -45,6 +45,7 @@ test_that("check existing steps for `skip` arg", { step_check <- step_check[step_check != "check_new_data"] step_check <- step_check[step_check != "check_role_requirements"] step_check <- step_check[step_check != "check_bake_role_requirements"] + step_check <- step_check[step_check != "check_step_check_args"] # R/import-standalone-types-check.R step_check <- step_check[step_check != "check_bool"] diff --git a/tests/testthat/test-update.R b/tests/testthat/test-update.R index a2d469201..230ebed2c 100644 --- a/tests/testthat/test-update.R +++ b/tests/testthat/test-update.R @@ -1,6 +1,6 @@ test_that("can update a step", { - stp_4 <- recipes::step("stp", x = 4, trained = FALSE) - stp_5 <- recipes::step("stp", x = 5, trained = FALSE) + stp_4 <- recipes::step("stp", x = 4, trained = FALSE, id = "a", skip = TRUE) + stp_5 <- recipes::step("stp", x = 5, trained = FALSE, id = "a", skip = TRUE) update(stp_4, x = 5) @@ -8,7 +8,7 @@ test_that("can update a step", { }) test_that("cannot create new fields for a step", { - stp <- recipes::step("stp", x = 4, trained = FALSE) + stp <- recipes::step("stp", x = 4, trained = FALSE, id = "a", skip = TRUE) expect_snapshot(error = TRUE, update(stp, y = 5) @@ -16,7 +16,7 @@ test_that("cannot create new fields for a step", { }) test_that("cannot update trained steps", { - stp <- recipes::step("stp", x = 4, trained = TRUE) + stp <- recipes::step("stp", x = 4, trained = TRUE, id = "a", skip = TRUE) expect_snapshot(error = TRUE, update(stp, x = 5) From 203db840c5cddda07a42620330e515b19c023127 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= <‘mxkuhn@gmail.com’> Date: Mon, 21 Oct 2024 19:38:31 -0400 Subject: [PATCH 03/25] check for required arguments --- R/misc.R | 11 ++-- R/steps_and_checks.R | 6 +- tests/testthat/_snaps/basics.md | 40 ++++++++++++ tests/testthat/_snaps/stringsAsFactors.md | 4 +- tests/testthat/test-basics.R | 76 ++++++++++++++++------- 5 files changed, 107 insertions(+), 30 deletions(-) diff --git a/R/misc.R b/R/misc.R index 87a661344..69043e4a9 100644 --- a/R/misc.R +++ b/R/misc.R @@ -98,9 +98,9 @@ get_rhs_vars <- function(formula, data, no_lhs = FALSE) { #' #' dummy_names("x", substring(after_mm, 2), ordinal = TRUE) #' @export -names0 <- function(num, prefix = "x") { +names0 <- function(num, prefix = "x", call = rlang::caller_env()) { if (num < 1) { - cli::cli_abort("{.arg num} should be > 0.") + cli::cli_abort("{.arg num} should be > 0.", call = call) } ind <- format(seq_len(num)) ind <- gsub(" ", "0", ind) @@ -635,7 +635,7 @@ rand_id <- function(prefix = "step", len = 5) { } -check_nominal_type <- function(x, lvl) { +check_nominal_type <- function(x, lvl, call = rlang::caller_env()) { all_act_cols <- names(x) # What columns do we expect to be factors based on the data @@ -667,7 +667,8 @@ check_nominal_type <- function(x, lvl) { ", "*" = "{.and {.var {was_factor}}}", "i" = "This may cause errors when processing new data." - ) + ), + call = call ) } } @@ -873,7 +874,7 @@ check_new_data <- function(req, object, new_data) { step_cls <- class(object)[1] step_id <- object$id cli::cli_abort( - "The following required {cli::qty(col_diff)} column{?s} {?is/are} missing + "The following required {cli::qty(col_diff)} column{?s} {?is/are} missing from {.arg new_data}: {col_diff}.", call = rlang::call2(step_cls) ) diff --git a/R/steps_and_checks.R b/R/steps_and_checks.R index f9a14dc2f..7dbc2497d 100644 --- a/R/steps_and_checks.R +++ b/R/steps_and_checks.R @@ -19,7 +19,8 @@ step <- function(subclass, ..., .prefix = "step_", args <- list(...) check_string(subclass, call = call) - check_string(.prefix, call = call) + .prefix <- rlang::arg_match0(.prefix, c("step_", "check_", ""), + error_call = call) check_step_check_args(args, call = call) structure(args, @@ -34,7 +35,8 @@ check <- function(subclass, ..., .prefix = "check_", args <- list(...) check_string(subclass, call = call) - check_string(.prefix, call = call) + .prefix <- rlang::arg_match0(.prefix, c("step_", "check_", ""), + error_call = call) check_step_check_args(args, call = call) structure(args, diff --git a/tests/testthat/_snaps/basics.md b/tests/testthat/_snaps/basics.md index 29c0145d6..59069b59d 100644 --- a/tests/testthat/_snaps/basics.md +++ b/tests/testthat/_snaps/basics.md @@ -238,3 +238,43 @@ Error in `as.data.frame.default()`: ! cannot coerce class '"function"' to a data.frame +# step constructor + + Code + step_lightly(trained = "yes") + Condition + Error in `step_lightly()`: + ! `trained` must be `TRUE` or `FALSE`, not the string "yes". + +--- + + Code + step_lightly(id = TRUE) + Condition + Error in `step_lightly()`: + ! `id` must be a single string, not `TRUE`. + +--- + + Code + step_lightly(skip = "you betcha") + Condition + Error in `step_lightly()`: + ! `skip` must be `TRUE` or `FALSE`, not the string "you betcha". + +--- + + Code + step(subclass = "heavy") + Condition + Error: + ! Some required arguments are missing: `trained`, `id`, and `skip`. + +--- + + Code + step() + Condition + Error: + ! `subclass` must be a single string, not absent. + diff --git a/tests/testthat/_snaps/stringsAsFactors.md b/tests/testthat/_snaps/stringsAsFactors.md index b7bea98be..e5b373a11 100644 --- a/tests/testthat/_snaps/stringsAsFactors.md +++ b/tests/testthat/_snaps/stringsAsFactors.md @@ -3,7 +3,7 @@ Code rec1_as_str <- bake(rec1, new_data = as_str) Condition - Warning: + Warning in `bake()`: ! There were 2 columns that were factors when the recipe was prepped: * `fact` and `ord` i This may cause errors when processing new data. @@ -13,7 +13,7 @@ Code rec2_as_str <- bake(rec2, new_data = as_str) Condition - Warning: + Warning in `bake()`: ! There were 2 columns that were factors when the recipe was prepped: * `fact` and `ord` i This may cause errors when processing new data. diff --git a/tests/testthat/test-basics.R b/tests/testthat/test-basics.R index bc0473200..f1ff5783c 100644 --- a/tests/testthat/test-basics.R +++ b/tests/testthat/test-basics.R @@ -17,16 +17,16 @@ test_that("Recipe correctly identifies output variable", { test_that("Recipe fails on in-line functions", { expect_snapshot(error = TRUE, - recipe(HHV ~ log(nitrogen), data = biomass) + recipe(HHV ~ log(nitrogen), data = biomass) ) expect_snapshot(error = TRUE, - recipe(HHV ~ (.)^2, data = biomass) + recipe(HHV ~ (.)^2, data = biomass) ) expect_snapshot(error = TRUE, - recipe(HHV ~ nitrogen + sulfur + nitrogen:sulfur, data = biomass) + recipe(HHV ~ nitrogen + sulfur + nitrogen:sulfur, data = biomass) ) expect_snapshot(error = TRUE, - recipe(HHV ~ nitrogen^2, data = biomass) + recipe(HHV ~ nitrogen^2, data = biomass) ) }) @@ -60,9 +60,9 @@ test_that("return character or factor values", { test_that("Using prepare", { expect_snapshot(error = TRUE, - prepare(recipe(HHV ~ ., data = biomass), - training = biomass - ) + prepare(recipe(HHV ~ ., data = biomass), + training = biomass + ) ) }) @@ -126,10 +126,10 @@ test_that("bake without prep", { step_scale(all_predictors()) %>% step_spatialsign(all_predictors()) expect_snapshot(error = TRUE, - bake(sp_signed, new_data = biomass_te) + bake(sp_signed, new_data = biomass_te) ) expect_snapshot(error = TRUE, - juice(sp_signed) + juice(sp_signed) ) }) @@ -170,7 +170,7 @@ test_that("bake without newdata", { prep(training = biomass) expect_snapshot(error = TRUE, - bake(rec, newdata = biomass) + bake(rec, newdata = biomass) ) }) @@ -198,17 +198,17 @@ test_that("tunable arguments at prep-time", { .tune <- function() rlang::call2("tune") expect_snapshot(error = TRUE, - recipe(Species ~ ., data = iris) %>% - step_ns(all_predictors(), deg_free = .tune()) %>% - prep() + recipe(Species ~ ., data = iris) %>% + step_ns(all_predictors(), deg_free = .tune()) %>% + prep() ) expect_snapshot(error = TRUE, - recipe(~., data = mtcars) %>% - step_pca(all_predictors(), threshold = .tune()) %>% - step_kpca(all_predictors(), num_comp = .tune()) %>% - step_bs(all_predictors(), deg_free = .tune()) %>% - prep() + recipe(~., data = mtcars) %>% + step_pca(all_predictors(), threshold = .tune()) %>% + step_kpca(all_predictors(), num_comp = .tune()) %>% + step_bs(all_predictors(), deg_free = .tune()) %>% + prep() ) }) @@ -278,7 +278,7 @@ test_that("case weights are being infered correctly for formula interface", { mtcars2$cyl <- importance_weights(mtcars2$cyl) expect_snapshot(error = TRUE, - recipe(mpg ~ cyl + disp, data = mtcars2) + recipe(mpg ~ cyl + disp, data = mtcars2) ) }) @@ -304,7 +304,7 @@ test_that("case weights are being infered correctly for x interface", { mtcars2$cyl <- importance_weights(mtcars2$cyl) expect_snapshot(error = TRUE, - recipe(mtcars2) + recipe(mtcars2) ) }) @@ -346,7 +346,7 @@ test_that("`internal data is kept as tibbles when prepping", { # Will ignore new_data and return `output` expect_snapshot(error = TRUE, - bake(rec_prepped, new_data = as_tibble(mtcars)) + bake(rec_prepped, new_data = as_tibble(mtcars)) ) rec_spec <- recipe(mpg ~ ., data = mtcars) %>% @@ -426,3 +426,37 @@ test_that("data argument is checked in recipe.formula() (#1325)", { recipe(~ ., data = data) ) }) + +test_that("step constructor", { + + step_lightly <- + function(trained = FALSE, skip = FALSE, id = "id") { + step( + subclass = "lightly", + trained = trained, + skip = skip, + id = id + ) + } + + expect_snapshot( + step_lightly(trained = "yes"), + error = TRUE + ) + expect_snapshot( + step_lightly(id = TRUE), + error = TRUE + ) + expect_snapshot( + step_lightly(skip = "you betcha"), + error = TRUE + ) + expect_snapshot( + step(subclass = "heavy"), + error = TRUE + ) + expect_snapshot( + step(), + error = TRUE + ) +}) From db81d9895a7992cfb90581ada4ba5e8af151bef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= <‘mxkuhn@gmail.com’> Date: Mon, 21 Oct 2024 19:52:29 -0400 Subject: [PATCH 04/25] add bake() to the warning --- tests/testthat/_snaps/nomial_types.md | 10 ++++++++++ tests/testthat/test-nomial_types.R | 5 ++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/testthat/_snaps/nomial_types.md b/tests/testthat/_snaps/nomial_types.md index 3424d386c..ece8ec9f4 100644 --- a/tests/testthat/_snaps/nomial_types.md +++ b/tests/testthat/_snaps/nomial_types.md @@ -28,3 +28,13 @@ * `city` and `zip` i This may cause errors when processing new data. +--- + + Code + res <- bake(rec, te %>% mutate(city = as.character(city))) + Condition + Warning in `bake()`: + ! There were 2 columns that were factors when the recipe was prepped: + * `city` and `zip` + i This may cause errors when processing new data. + diff --git a/tests/testthat/test-nomial_types.R b/tests/testthat/test-nomial_types.R index 70165bd50..4af29f3c4 100644 --- a/tests/testthat/test-nomial_types.R +++ b/tests/testthat/test-nomial_types.R @@ -147,7 +147,7 @@ test_that("missing single factor", { slice(1:500) te <- Sacramento_chr %>% - select(city) %>% + select(city) %>% slice(501:932) rec <- @@ -171,4 +171,7 @@ test_that("missing factors with skipping", { prep(training = tr) expect_snapshot(check_nominal_type(te, rec$orig_lvls)) + expect_snapshot( + res <- bake(rec, te %>% mutate(city = as.character(city))) + ) }) From 3b50659deec89ea4c14b287fe83d1c334adcb48f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= <‘mxkuhn@gmail.com’> Date: Tue, 22 Oct 2024 06:38:49 -0400 Subject: [PATCH 05/25] add call --- R/tune_args.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/tune_args.R b/R/tune_args.R index c5a58650d..b20997aa6 100644 --- a/R/tune_args.R +++ b/R/tune_args.R @@ -65,14 +65,16 @@ tune_tbl <- function(name = character(), source = character(), component = character(), component_id = character(), - full = FALSE) { + full = FALSE, + call = rlang::caller_env()) { complete_id <- id[!is.na(id)] dups <- duplicated(complete_id) if (any(dups)) { offenders <- unique(complete_id[dups]) cli::cli_abort( "There are duplicate {.field id} values listed in {.fn tune}: \\ - {.val {offenders}}." + {.val {offenders}}.", + call = call ) } From 8dbd5c4525ffd5d9b726a735691eba74965cfbeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Tue, 22 Oct 2024 06:53:49 -0400 Subject: [PATCH 06/25] redoc --- R/misc.R | 5 ++++- man/names0.Rd | 7 ++++++- man/step.Rd | 7 ++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/R/misc.R b/R/misc.R index 69043e4a9..c97063a85 100644 --- a/R/misc.R +++ b/R/misc.R @@ -65,7 +65,10 @@ get_rhs_vars <- function(formula, data, no_lhs = FALSE) { #' @param ordinal A logical; was the original factor ordered? #' @param sep A single character value for the separator between the names and #' levels. -#' +#' @param call The execution environment of a currently running function, e.g. +#' `caller_env()`. The function will be mentioned in error messages as the +#' source of the error. See the call argument of [rlang::abort()] for more +#' information. #' @details When using `dummy_names()`, factor levels that are not valid #' variable names (e.g. "some text with spaces") will be changed to valid #' names by [base::make.names()]; see example below. This function will also diff --git a/man/names0.Rd b/man/names0.Rd index a560a60e4..edd944974 100644 --- a/man/names0.Rd +++ b/man/names0.Rd @@ -6,7 +6,7 @@ \alias{dummy_extract_names} \title{Naming Tools} \usage{ -names0(num, prefix = "x") +names0(num, prefix = "x", call = rlang::caller_env()) dummy_names(var, lvl, ordinal = FALSE, sep = "_") @@ -17,6 +17,11 @@ dummy_extract_names(var, lvl, ordinal = FALSE, sep = "_") \item{prefix}{A character string that will start each name.} +\item{call}{The execution environment of a currently running function, e.g. +\code{caller_env()}. The function will be mentioned in error messages as the +source of the error. See the call argument of \code{\link[rlang:abort]{rlang::abort()}} for more +information.} + \item{var}{A single string for the original factor name.} \item{lvl}{A character vectors of the factor levels (in order). diff --git a/man/step.Rd b/man/step.Rd index afa4c6b44..5390ed172 100644 --- a/man/step.Rd +++ b/man/step.Rd @@ -5,16 +5,17 @@ \alias{check} \title{\code{step} sets the class of the \code{step} and \code{check} is for checks.} \usage{ -step(subclass, ..., .prefix = "step_") +step(subclass, ..., .prefix = "step_", call = rlang::caller_env()) -check(subclass, ..., .prefix = "check_") +check(subclass, ..., .prefix = "check_", call = rlang::caller_env()) } \arguments{ \item{subclass}{A character string for the resulting class. For example, if \code{subclass = "blah"} the step object that is returned has class \code{step_blah} or \code{check_blah} depending on the context.} -\item{...}{All arguments to the operator that should be returned.} +\item{...}{All arguments to the operator that should be returned. Required +arguments are \code{trained}, \code{skip}, and \code{id}.} \item{.prefix}{Prefix to the subclass created.} } From bf5c15a850f74c5f51255a321cde297b509d0bab Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 29 Oct 2024 08:08:05 -0400 Subject: [PATCH 07/25] Apply suggestions from code review Co-authored-by: Emil Hvitfeldt --- R/steps_and_checks.R | 4 ++-- tests/testthat/test-basics.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/steps_and_checks.R b/R/steps_and_checks.R index 7dbc2497d..bb9734ad4 100644 --- a/R/steps_and_checks.R +++ b/R/steps_and_checks.R @@ -15,7 +15,7 @@ #' @return An updated step or check with the new class. #' @export step <- function(subclass, ..., .prefix = "step_", - call = rlang::caller_env()) { + call = rlang::caller_env(2)) { args <- list(...) check_string(subclass, call = call) @@ -31,7 +31,7 @@ step <- function(subclass, ..., .prefix = "step_", #' @rdname step #' @export check <- function(subclass, ..., .prefix = "check_", - call = rlang::caller_env()) { + call = rlang::caller_env(2)) { args <- list(...) check_string(subclass, call = call) diff --git a/tests/testthat/test-basics.R b/tests/testthat/test-basics.R index f1ff5783c..e8c3e09dc 100644 --- a/tests/testthat/test-basics.R +++ b/tests/testthat/test-basics.R @@ -440,7 +440,7 @@ test_that("step constructor", { } expect_snapshot( - step_lightly(trained = "yes"), + recipe(~., mtcars) %>% step_normalize(trained = "yes"), error = TRUE ) expect_snapshot( From 2e3f931b7b951662644b40643da1d9a5bf6aa7df Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 08:25:13 -0400 Subject: [PATCH 08/25] add role as a req step argument --- NEWS.md | 2 +- R/steps_and_checks.R | 9 ++++----- man/step.Rd | 4 ++-- tests/testthat/_snaps/basics.md | 24 ++++++++++++++++-------- tests/testthat/test-basics.R | 10 +++++++--- tests/testthat/test-update.R | 8 ++++---- 6 files changed, 34 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index cc6c60540..6173cfbbe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,7 +8,7 @@ * `prep.recipe(..., strings_as_factors = TRUE)` now only converts string variables that have role "predictor" or "outcome". (@dajmcdon, #1358, #1376) -* All steps and checks now require arguments `trained`, `skip`, and `id` at all times. +* All steps and checks now require arguments `trained`, `skip`, `role`, and `id` at all times. # recipes 1.1.0 diff --git a/R/steps_and_checks.R b/R/steps_and_checks.R index bb9734ad4..25380518b 100644 --- a/R/steps_and_checks.R +++ b/R/steps_and_checks.R @@ -23,8 +23,7 @@ step <- function(subclass, ..., .prefix = "step_", error_call = call) check_step_check_args(args, call = call) - structure(args, - class = c(paste0(.prefix, subclass), "step") + structure(args, class = c(paste0(.prefix, subclass), "step") ) } @@ -39,8 +38,7 @@ check <- function(subclass, ..., .prefix = "check_", error_call = call) check_step_check_args(args, call = call) - structure(args, - class = c(paste0(.prefix, subclass), "check") + structure(args, class = c(paste0(.prefix, subclass), "check") ) } @@ -72,7 +70,7 @@ add_check <- function(rec, object) { # ------------------------------------------------------------------------------ check_step_check_args <- function(x, call = rlang::caller_env()) { - req_args <- c("trained", "id", "skip") + req_args <- c("trained", "id", "skip", "role") nms <- names(x) has_req_args <- req_args %in% nms if (!all(has_req_args)) { @@ -83,6 +81,7 @@ check_step_check_args <- function(x, call = rlang::caller_env()) { check_bool(x$trained, call = call, arg = "trained") check_bool(x$skip, call = call, arg = "skip") check_string(x$id, call = call, arg = "id") + check_string(x$role, allow_empty = FALSE, allow_na = TRUE, call = call) invisible(x) } diff --git a/man/step.Rd b/man/step.Rd index 5390ed172..9e5460851 100644 --- a/man/step.Rd +++ b/man/step.Rd @@ -5,9 +5,9 @@ \alias{check} \title{\code{step} sets the class of the \code{step} and \code{check} is for checks.} \usage{ -step(subclass, ..., .prefix = "step_", call = rlang::caller_env()) +step(subclass, ..., .prefix = "step_", call = rlang::caller_env(2)) -check(subclass, ..., .prefix = "check_", call = rlang::caller_env()) +check(subclass, ..., .prefix = "check_", call = rlang::caller_env(2)) } \arguments{ \item{subclass}{A character string for the resulting class. For example, diff --git a/tests/testthat/_snaps/basics.md b/tests/testthat/_snaps/basics.md index f27758fc2..933e84908 100644 --- a/tests/testthat/_snaps/basics.md +++ b/tests/testthat/_snaps/basics.md @@ -241,34 +241,42 @@ # step constructor Code - step_lightly(trained = "yes") + recipe(~., mtcars) %>% step_normalize(trained = "yes") Condition - Error in `step_lightly()`: + Error in `step_normalize()`: ! `trained` must be `TRUE` or `FALSE`, not the string "yes". --- Code - step_lightly(id = TRUE) + recipe(~., mtcars) %>% step_normalize(id = TRUE) Condition - Error in `step_lightly()`: + Error in `step_normalize()`: ! `id` must be a single string, not `TRUE`. --- Code - step_lightly(skip = "you betcha") + recipe(~., mtcars) %>% step_normalize(skip = "you betcha") Condition - Error in `step_lightly()`: + Error in `step_normalize()`: ! `skip` must be `TRUE` or `FALSE`, not the string "you betcha". --- Code - step(subclass = "heavy") + recipe(~., mtcars) %>% step_normalize(role = 13) + Condition + Error in `step_normalize()`: + ! `x$role` must be a single string or `NA`, not the number 13. + +--- + + Code + step(subclass = list()) Condition Error: - ! Some required arguments are missing: `trained`, `id`, and `skip`. + ! `subclass` must be a single string, not an empty list. --- diff --git a/tests/testthat/test-basics.R b/tests/testthat/test-basics.R index e8c3e09dc..cb81898da 100644 --- a/tests/testthat/test-basics.R +++ b/tests/testthat/test-basics.R @@ -444,15 +444,19 @@ test_that("step constructor", { error = TRUE ) expect_snapshot( - step_lightly(id = TRUE), + recipe(~., mtcars) %>% step_normalize(id = TRUE), error = TRUE ) expect_snapshot( - step_lightly(skip = "you betcha"), + recipe(~., mtcars) %>% step_normalize(skip = "you betcha"), error = TRUE ) expect_snapshot( - step(subclass = "heavy"), + recipe(~., mtcars) %>% step_normalize(role = 13), + error = TRUE + ) + expect_snapshot( + step(subclass = list()), error = TRUE ) expect_snapshot( diff --git a/tests/testthat/test-update.R b/tests/testthat/test-update.R index 0f9577d68..d5f77dc28 100644 --- a/tests/testthat/test-update.R +++ b/tests/testthat/test-update.R @@ -1,6 +1,6 @@ test_that("can update a step", { - stp_4 <- recipes::step("stp", x = 4, trained = FALSE, id = "a", skip = TRUE) - stp_5 <- recipes::step("stp", x = 5, trained = FALSE, id = "a", skip = TRUE) + stp_4 <- recipes::step("stp", x = 4, trained = FALSE, id = "a", skip = TRUE, role = "a") + stp_5 <- recipes::step("stp", x = 5, trained = FALSE, id = "a", skip = TRUE, role = "a") update(stp_4, x = 5) @@ -8,7 +8,7 @@ test_that("can update a step", { }) test_that("cannot create new fields for a step", { - stp <- recipes::step("stp", x = 4, trained = FALSE, id = "a", skip = TRUE) + stp <- recipes::step("stp", x = 4, trained = FALSE, id = "a", skip = TRUE, role = "a") expect_snapshot( error = TRUE, @@ -17,7 +17,7 @@ test_that("cannot create new fields for a step", { }) test_that("cannot update trained steps", { - stp <- recipes::step("stp", x = 4, trained = TRUE, id = "a", skip = TRUE) + stp <- recipes::step("stp", x = 4, trained = TRUE, id = "a", skip = TRUE, role = "a") expect_snapshot(error = TRUE, update(stp, x = 5) From 654a2b50cd71132f56725005857885a0168fce38 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 09:02:48 -0400 Subject: [PATCH 09/25] update checks and snapshots --- R/bin2factor.R | 4 +++- man/step_dummy_extract.Rd | 4 ++-- tests/testthat/_snaps/bin2factor.md | 11 ++++++++++- tests/testthat/test-bin2factor.R | 5 +++++ 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/R/bin2factor.R b/R/bin2factor.R index 322a8f278..11abbf977 100644 --- a/R/bin2factor.R +++ b/R/bin2factor.R @@ -65,7 +65,7 @@ step_bin2factor <- if (length(levels) != 2) { msg <- c( msg, - i = "{length(levels)} element{?s} were supplied." + i = "{length(levels)} element{?s} were supplied; two were expected." ) } if (!is.character(levels)) { @@ -76,6 +76,8 @@ step_bin2factor <- } cli::cli_abort(msg) } + check_bool(ref_first) + add_step( recipe, step_bin2factor_new( diff --git a/man/step_dummy_extract.Rd b/man/step_dummy_extract.Rd index 9ce0b62cc..554b3a3e3 100644 --- a/man/step_dummy_extract.Rd +++ b/man/step_dummy_extract.Rd @@ -161,10 +161,10 @@ dummies_color <- recipe(~colors, data = color_examples) \%>\% step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')") \%>\% prep() -dommies_data_color <- dummies_color \%>\% +dummies_data_color <- dummies_color \%>\% bake(new_data = NULL) -dommies_data_color +dummies_data_color \dontshow{\}) # examplesIf} } \seealso{ diff --git a/tests/testthat/_snaps/bin2factor.md b/tests/testthat/_snaps/bin2factor.md index e06e2aa5a..cae5c59fd 100644 --- a/tests/testthat/_snaps/bin2factor.md +++ b/tests/testthat/_snaps/bin2factor.md @@ -1,3 +1,12 @@ +# works with logicals + + Code + recipe(~., data = mtcars) %>% step_bin2factor(all_logical_predictors(), + ref_first = 1) + Condition + Error in `step_bin2factor()`: + ! `ref_first` must be `TRUE` or `FALSE`, not the number 1. + # bad options Code @@ -15,7 +24,7 @@ Condition Error in `step_bin2factor()`: x `levels` should be a 2-element character string. - i 5 elements were supplied. + i 5 elements were supplied; two were expected. --- diff --git a/tests/testthat/test-bin2factor.R b/tests/testthat/test-bin2factor.R index cc0333979..c9c39bf1d 100644 --- a/tests/testthat/test-bin2factor.R +++ b/tests/testthat/test-bin2factor.R @@ -31,6 +31,11 @@ test_that("works with logicals", { factor(mtcars$am, levels = c(TRUE, FALSE), labels = c("yes", "no")), res$am ) + expect_snapshot( + recipe(~., data = mtcars) %>% + step_bin2factor(all_logical_predictors(), ref_first = 1), + error = TRUE + ) }) From 12778c6e3a40b219e009efff735d00282dd03306 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 09:08:03 -0400 Subject: [PATCH 10/25] check keep_original_cols when present --- R/steps_and_checks.R | 5 +++++ tests/testthat/_snaps/basics.md | 8 ++++++++ tests/testthat/test-basics.R | 4 ++++ 3 files changed, 17 insertions(+) diff --git a/R/steps_and_checks.R b/R/steps_and_checks.R index 25380518b..da8110a58 100644 --- a/R/steps_and_checks.R +++ b/R/steps_and_checks.R @@ -82,6 +82,11 @@ check_step_check_args <- function(x, call = rlang::caller_env()) { check_bool(x$skip, call = call, arg = "skip") check_string(x$id, call = call, arg = "id") check_string(x$role, allow_empty = FALSE, allow_na = TRUE, call = call) + + if (any(names(x) == "keep_original_cols")) { + check_bool(x$keep_original_cols, call = call, arg = "keep_original_cols") + } + invisible(x) } diff --git a/tests/testthat/_snaps/basics.md b/tests/testthat/_snaps/basics.md index 933e84908..4bb753274 100644 --- a/tests/testthat/_snaps/basics.md +++ b/tests/testthat/_snaps/basics.md @@ -270,6 +270,14 @@ Error in `step_normalize()`: ! `x$role` must be a single string or `NA`, not the number 13. +--- + + Code + recipe(~., mtcars) %>% step_pca(all_predictors(), keep_original_cols = 0) + Condition + Error in `step_pca()`: + ! `keep_original_cols` must be `TRUE` or `FALSE`, not the number 0. + --- Code diff --git a/tests/testthat/test-basics.R b/tests/testthat/test-basics.R index cb81898da..943dfb290 100644 --- a/tests/testthat/test-basics.R +++ b/tests/testthat/test-basics.R @@ -455,6 +455,10 @@ test_that("step constructor", { recipe(~., mtcars) %>% step_normalize(role = 13), error = TRUE ) + expect_snapshot( + recipe(~., mtcars) %>% step_pca(all_predictors(), keep_original_cols = 0), + error = TRUE + ) expect_snapshot( step(subclass = list()), error = TRUE From b574a41827510049bd46243339d97b5d24d0f3d0 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 09:11:43 -0400 Subject: [PATCH 11/25] update checks and snapshots --- R/class.R | 2 ++ tests/testthat/_snaps/class.md | 16 ++++++++++++++++ tests/testthat/test-class.R | 13 +++++++++++++ 3 files changed, 31 insertions(+) diff --git a/R/class.R b/R/class.R index 4b6c0fac4..2bd1ff34d 100644 --- a/R/class.R +++ b/R/class.R @@ -102,6 +102,8 @@ check_class <- check_class_new <- function(terms, role, trained, class_nm, allow_additional, class_list, skip, id) { + check_character(class_nm, allow_null = TRUE, call = rlang::caller_env(2)) + check_bool(allow_additional, call = rlang::caller_env(2)) check( subclass = "class", terms = terms, diff --git a/tests/testthat/_snaps/class.md b/tests/testthat/_snaps/class.md index 449b36970..484461798 100644 --- a/tests/testthat/_snaps/class.md +++ b/tests/testthat/_snaps/class.md @@ -151,3 +151,19 @@ -- Operations * Checking the class(es) for: cyl, disp, hp, drat, wt, qsec, ... | Trained +# bad args + + Code + recipe(mpg ~ ., mtcars) %>% check_class(all_predictors(), class_nm = 1) + Condition + Error: + ! `class_nm` must be a character vector or `NULL`, not the number 1. + +--- + + Code + recipe(mpg ~ ., mtcars) %>% check_class(all_predictors(), allow_additional = "yes") + Condition + Error: + ! `allow_additional` must be `TRUE` or `FALSE`, not the string "yes". + diff --git a/tests/testthat/test-class.R b/tests/testthat/test-class.R index 7aada07a9..7f68ec499 100644 --- a/tests/testthat/test-class.R +++ b/tests/testthat/test-class.R @@ -169,3 +169,16 @@ test_that("printing", { expect_snapshot(print(rec7)) expect_snapshot(prep(rec7)) }) + + +test_that("bad args", { + expect_snapshot( + recipe(mpg ~ ., mtcars) %>% check_class(all_predictors(), class_nm = 1), + error = TRUE + ) + expect_snapshot( + recipe(mpg ~ ., mtcars) %>% check_class(all_predictors(), allow_additional = "yes"), + error = TRUE + ) +}) + From 4c8770bff7d32c13afa8665c074894cd72a1dfe7 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 09:15:55 -0400 Subject: [PATCH 12/25] update checks and snapshots --- R/classdist_shrunken.R | 2 ++ tests/testthat/_snaps/classdist_shrunken.md | 20 ++++++++++++++++++++ tests/testthat/test-classdist_shrunken.R | 18 ++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/R/classdist_shrunken.R b/R/classdist_shrunken.R index 1b277abcf..2f87d0f95 100644 --- a/R/classdist_shrunken.R +++ b/R/classdist_shrunken.R @@ -289,6 +289,8 @@ prep.step_classdist_shrunken <- function(x, training, info = NULL, ...) { sd_offset <- x$sd_offset check_number_decimal(sd_offset, min = 0, max = 1) + check_bool(x$log) + check_string(x$prefix) wts <- get_case_weights(info, training) were_weights_used <- are_weights_used(wts) diff --git a/tests/testthat/_snaps/classdist_shrunken.md b/tests/testthat/_snaps/classdist_shrunken.md index a1d45f08c..deb64e8c2 100644 --- a/tests/testthat/_snaps/classdist_shrunken.md +++ b/tests/testthat/_snaps/classdist_shrunken.md @@ -90,6 +90,26 @@ Caused by error in `prep()`: ! `sd_offset` must be a number between 0 and 1, not the number -1. +--- + + Code + recipe(class ~ x + y, data = nsc_test) %>% step_classdist_shrunken( + all_numeric_predictors(), class = "class", log = 2) %>% prep() + Condition + Error in `step_classdist_shrunken()`: + Caused by error in `prep()`: + ! `x$log` must be `TRUE` or `FALSE`, not the number 2. + +--- + + Code + recipe(class ~ x + y, data = nsc_test) %>% step_classdist_shrunken( + all_numeric_predictors(), class = "class", prefix = 2) %>% prep() + Condition + Error in `step_classdist_shrunken()`: + Caused by error in `prep()`: + ! `x$prefix` must be a single string, not the number 2. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/test-classdist_shrunken.R b/tests/testthat/test-classdist_shrunken.R index 0b9dfbd6f..66ec95901 100644 --- a/tests/testthat/test-classdist_shrunken.R +++ b/tests/testthat/test-classdist_shrunken.R @@ -115,6 +115,24 @@ test_that("shrunken centroids", { ) %>% prep(), error = TRUE ) + expect_snapshot( + recipe(class ~ x + y, data = nsc_test) %>% + step_classdist_shrunken( + all_numeric_predictors(), + class = "class", + log = 2 + ) %>% prep(), + error = TRUE + ) + expect_snapshot( + recipe(class ~ x + y, data = nsc_test) %>% + step_classdist_shrunken( + all_numeric_predictors(), + class = "class", + prefix = 2 + ) %>% prep(), + error = TRUE + ) # ------------------------------------------------------------------------------ From 5f36f5b09482591d6f27c828a49473eae0a452e1 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 09:29:25 -0400 Subject: [PATCH 13/25] update checks and snapshots --- R/classdist.R | 5 ++++ tests/testthat/_snaps/classdist.md | 40 ++++++++++++++++++++++++++++++ tests/testthat/test-classdist.R | 29 ++++++++++++++++++++++ 3 files changed, 74 insertions(+) diff --git a/R/classdist.R b/R/classdist.R index 639a5a4d4..225aacc91 100644 --- a/R/classdist.R +++ b/R/classdist.R @@ -195,6 +195,11 @@ prep.step_classdist <- function(x, training, info = NULL, ...) { wts <- NULL } + check_function(x$mean_func) + check_function(x$cov_func) + check_bool(x$pool) + check_string(x$prefix) + x_dat <- split(training[, x_names], training[[class_var]]) if (is.null(wts)) { wts_split <- map(x_dat, ~NULL) diff --git a/tests/testthat/_snaps/classdist.md b/tests/testthat/_snaps/classdist.md index 8165b3c3e..6abe9ed27 100644 --- a/tests/testthat/_snaps/classdist.md +++ b/tests/testthat/_snaps/classdist.md @@ -139,3 +139,43 @@ -- Operations * Distances to Species for: Sepal.Length and Sepal.Width, ... | Trained +# bad args + + Code + recipe(Species ~ ., data = iris) %>% step_classdist(all_predictors(), class = "Species", + mean_func = 2) %>% prep() + Condition + Error in `step_classdist()`: + Caused by error in `prep()`: + ! `x$mean_func` must be a function, not the number 2. + +--- + + Code + recipe(Species ~ ., data = iris) %>% step_classdist(all_predictors(), class = "Species", + cov_func = NULL) %>% prep() + Condition + Error in `step_classdist()`: + Caused by error in `prep()`: + ! `x$cov_func` must be a function, not `NULL`. + +--- + + Code + recipe(Species ~ ., data = iris) %>% step_classdist(all_predictors(), class = "Species", + prefix = NULL) %>% prep() + Condition + Error in `step_classdist()`: + Caused by error in `prep()`: + ! `x$prefix` must be a single string, not `NULL`. + +--- + + Code + recipe(Species ~ ., data = iris) %>% step_classdist(all_predictors(), class = "Species", + pool = NULL) %>% prep() + Condition + Error in `step_classdist()`: + Caused by error in `prep()`: + ! `x$pool` must be `TRUE` or `FALSE`, not `NULL`. + diff --git a/tests/testthat/test-classdist.R b/tests/testthat/test-classdist.R index 75bd58da6..f36b0fd2b 100644 --- a/tests/testthat/test-classdist.R +++ b/tests/testthat/test-classdist.R @@ -277,3 +277,32 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + + +test_that("bad args", { + expect_snapshot( + recipe(Species ~ ., data = iris) %>% + step_classdist(all_predictors(), class = "Species", mean_func = 2) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(Species ~ ., data = iris) %>% + step_classdist(all_predictors(), class = "Species", cov_func = NULL) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(Species ~ ., data = iris) %>% + step_classdist(all_predictors(), class = "Species", prefix = NULL) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(Species ~ ., data = iris) %>% + step_classdist(all_predictors(), class = "Species", pool = NULL) %>% + prep(), + error = TRUE + ) +}) + From 04c3762c053c16af44d7325c9cb1151efb6ddcdf Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 10:54:49 -0400 Subject: [PATCH 14/25] update checks and snapshots --- R/corr.R | 6 ++++++ tests/testthat/_snaps/corr.md | 28 ++++++++++++++++++++++++++++ tests/testthat/test-corr.R | 22 ++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/R/corr.R b/R/corr.R index 76f219f48..52180e690 100644 --- a/R/corr.R +++ b/R/corr.R @@ -129,6 +129,12 @@ step_corr_new <- prep.step_corr <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_number_decimal(x$threshold, min = 0, max = 1, arg = "threshold") + use <- x$use + rlang::arg_match(use, c("all.obs", "complete.obs", "pairwise.complete.obs", + "everything", "na.or.complete")) + method <- x$method + rlang::arg_match(method, c("pearson", "kendall", "spearman")) wts <- get_case_weights(info, training) were_weights_used <- are_weights_used(wts, unsupervised = TRUE) diff --git a/tests/testthat/_snaps/corr.md b/tests/testthat/_snaps/corr.md index 25541ab76..fb0d27e37 100644 --- a/tests/testthat/_snaps/corr.md +++ b/tests/testthat/_snaps/corr.md @@ -128,3 +128,31 @@ -- Operations * Correlation filter on: V6 and V1 | Trained +# bad args + + Code + recipe(mpg ~ ., mtcars) %>% step_corr(all_predictors(), threshold = 2) %>% prep() + Condition + Error in `step_corr()`: + Caused by error in `prep()`: + ! `threshold` must be a number between 0 and 1, not the number 2. + +--- + + Code + recipe(mpg ~ ., mtcars) %>% step_corr(all_predictors(), use = "this") %>% prep() + Condition + Error in `step_corr()`: + Caused by error in `prep()`: + ! `use` must be one of "all.obs", "complete.obs", "pairwise.complete.obs", "everything", or "na.or.complete", not "this". + +--- + + Code + recipe(mpg ~ ., mtcars) %>% step_corr(all_predictors(), method = "my dissertation") %>% + prep() + Condition + Error in `step_corr()`: + Caused by error in `prep()`: + ! `method` must be one of "pearson", "kendall", or "spearman", not "my dissertation". + diff --git a/tests/testthat/test-corr.R b/tests/testthat/test-corr.R index ab891cb4b..e16036c68 100644 --- a/tests/testthat/test-corr.R +++ b/tests/testthat/test-corr.R @@ -207,3 +207,25 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 1L) }) + +test_that("bad args", { + expect_snapshot( + recipe(mpg ~ ., mtcars) %>% + step_corr(all_predictors(), threshold = 2) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(mpg ~ ., mtcars) %>% + step_corr(all_predictors(), use = "this") %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(mpg ~ ., mtcars) %>% + step_corr(all_predictors(), method = "my dissertation") %>% + prep(), + error = TRUE + ) +}) + From 66bb8115042fa3a7d8c5d01efa8e56abe03b7c97 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 11:58:56 -0400 Subject: [PATCH 15/25] update checks and snapshots --- R/count.R | 3 +++ R/cut.R | 4 ++-- R/date.R | 3 +++ tests/testthat/_snaps/count.md | 29 +++++++++++++++++++++++++++++ tests/testthat/_snaps/cut.md | 10 ++++++++++ tests/testthat/_snaps/date.md | 30 ++++++++++++++++++++++++++++++ tests/testthat/test-count.R | 24 ++++++++++++++++++++++++ tests/testthat/test-cut.R | 9 ++++++++- tests/testthat/test-date.R | 23 +++++++++++++++++++++++ 9 files changed, 132 insertions(+), 3 deletions(-) diff --git a/R/count.R b/R/count.R index 36f10fe61..73cf6f10e 100644 --- a/R/count.R +++ b/R/count.R @@ -128,6 +128,9 @@ step_count_new <- prep.step_count <- function(x, training, info = NULL, ...) { col_name <- recipes_eval_select(x$terms, training, info) check_type(training[, col_name], types = c("string", "factor", "ordered")) + check_string(x$pattern, allow_empty = TRUE, arg = "pattern") + check_string(x$result, allow_empty = FALSE, arg = "result") + check_bool(x$normalize, arg = "normalize") step_count_new( terms = x$terms, diff --git a/R/cut.R b/R/cut.R index 6a709c804..f1d6f6863 100644 --- a/R/cut.R +++ b/R/cut.R @@ -115,10 +115,10 @@ prep.step_cut <- function(x, training, info = NULL, ...) { if (!is.numeric(x$breaks)) { cli::cli_abort( - "{.arg breaks} must be a numeric vector, \\ - not {.obj_type_friendly {x$breaks}}." + "{.arg breaks} must be a numeric vector, not {.obj_type_friendly {x$breaks}}." ) } + check_bool(x$include_outside_range, arg = "include_outside_range") all_breaks <- vector("list", length(col_names)) names(all_breaks) <- col_names diff --git a/R/date.R b/R/date.R index 5bdb45fd5..77d30fd04 100644 --- a/R/date.R +++ b/R/date.R @@ -160,6 +160,9 @@ step_date_new <- prep.step_date <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("date", "datetime")) + check_bool(x$abbr, arg = "abbr") + check_bool(x$label, arg = "label") + check_bool(x$ordinal, arg = "ordinal") step_date_new( terms = x$terms, diff --git a/tests/testthat/_snaps/count.md b/tests/testthat/_snaps/count.md index 1637a214d..cfdf01410 100644 --- a/tests/testthat/_snaps/count.md +++ b/tests/testthat/_snaps/count.md @@ -122,3 +122,32 @@ -- Operations * Regular expression counts using: description | Trained +# bad args + + Code + recipe(~description, covers) %>% step_count(description, pattern = character(0)) %>% + prep() + Condition + Error in `step_count()`: + ! `pattern` must be a single string, not an empty character vector. + +--- + + Code + recipe(~description, covers) %>% step_count(description, pattern = "(rock|stony)", + result = letters) %>% prep() + Condition + Error in `step_count()`: + Caused by error in `prep()`: + ! `result` must be a single string, not a character vector. + +--- + + Code + recipe(~description, covers) %>% step_count(description, pattern = "(rock|stony)", + normalize = "yes") %>% prep() + Condition + Error in `step_count()`: + Caused by error in `prep()`: + ! `normalize` must be `TRUE` or `FALSE`, not the string "yes". + diff --git a/tests/testthat/_snaps/cut.md b/tests/testthat/_snaps/cut.md index e8e20e1cb..e0ba6dfa3 100644 --- a/tests/testthat/_snaps/cut.md +++ b/tests/testthat/_snaps/cut.md @@ -18,6 +18,16 @@ x All columns selected for the step should be double or integer. * 1 factor variable found: `cat_var` +--- + + Code + recipe(~., x) %>% step_cut(num_var, breaks = 2, include_outside_range = 2) %>% + prep() + Condition + Error in `step_cut()`: + Caused by error in `prep()`: + ! `include_outside_range` must be `TRUE` or `FALSE`, not the number 2. + # full_breaks_check will give warnings Code diff --git a/tests/testthat/_snaps/date.md b/tests/testthat/_snaps/date.md index db44e6cd4..d4ab15f9a 100644 --- a/tests/testthat/_snaps/date.md +++ b/tests/testthat/_snaps/date.md @@ -123,3 +123,33 @@ -- Operations * Date features from: Dan and Stefan | Trained +# bad args + + Code + date_rec <- recipe(~ Dan + Stefan, examples) %>% step_date(all_predictors(), + abbr = "nope") %>% prep() + Condition + Error in `step_date()`: + Caused by error in `prep()`: + ! `abbr` must be `TRUE` or `FALSE`, not the string "nope". + +--- + + Code + date_rec <- recipe(~ Dan + Stefan, examples) %>% step_date(all_predictors(), + label = "no!") %>% prep() + Condition + Error in `step_date()`: + Caused by error in `prep()`: + ! `label` must be `TRUE` or `FALSE`, not the string "no!". + +--- + + Code + date_rec <- recipe(~ Dan + Stefan, examples) %>% step_date(all_predictors(), + ordinal = "never") %>% prep() + Condition + Error in `step_date()`: + Caused by error in `prep()`: + ! `ordinal` must be `TRUE` or `FALSE`, not the string "never". + diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R index 3433498bb..58c3f3a48 100644 --- a/tests/testthat/test-count.R +++ b/tests/testthat/test-count.R @@ -181,3 +181,27 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + +test_that("bad args", { + skip_if_not_installed("modeldata") + data(covers, package = "modeldata") + + expect_snapshot( + recipe(~description, covers) %>% + step_count(description, pattern = character(0)) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~description, covers) %>% + step_count(description, pattern = "(rock|stony)", result = letters) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~description, covers) %>% + step_count(description, pattern = "(rock|stony)", normalize = "yes") %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-cut.R b/tests/testthat/test-cut.R index 5d95c7a4b..fb7ad3445 100644 --- a/tests/testthat/test-cut.R +++ b/tests/testthat/test-cut.R @@ -7,6 +7,11 @@ test_that("step_cut throws error on non-numerics", { expect_snapshot(error = TRUE, recipe(~., x) %>% step_cut(all_predictors(), breaks = 2) %>% prep() ) + expect_snapshot(error = TRUE, + recipe(~., x) %>% + step_cut(num_var, breaks = 2, include_outside_range = 2) %>% + prep() + ) }) test_that("create_full_breaks helper function", { @@ -174,7 +179,7 @@ test_that("breaks argument are type checked", { step_cut(disp, hp, breaks = TRUE) %>% prep() ) - + expect_snapshot( error = TRUE, recipe(~., data = mtcars) %>% @@ -244,3 +249,5 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + + diff --git a/tests/testthat/test-date.R b/tests/testthat/test-date.R index 791bdca74..bda63ce8b 100644 --- a/tests/testthat/test-date.R +++ b/tests/testthat/test-date.R @@ -311,3 +311,26 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + +test_that("bad args", { + + expect_snapshot( + date_rec <- recipe(~ Dan + Stefan, examples) %>% + step_date(all_predictors(), abbr = "nope") %>% + prep(), + error = TRUE + ) + expect_snapshot( + date_rec <- recipe(~ Dan + Stefan, examples) %>% + step_date(all_predictors(), label = "no!") %>% + prep(), + error = TRUE + ) + expect_snapshot( + date_rec <- recipe(~ Dan + Stefan, examples) %>% + step_date(all_predictors(), ordinal = "never") %>% + prep(), + error = TRUE + ) +}) + From e386811e302305461ab006b5640e247d85eda685 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 15:40:41 -0400 Subject: [PATCH 16/25] update checks and snapshots --- R/depth.R | 6 ++++++ R/discretize.R | 25 ++++++++++++------------- R/dummy_extract.R | 24 ++++++++++++------------ man/step_dummy_extract.Rd | 4 ++-- tests/testthat/_snaps/depth.md | 20 ++++++++++++++++++++ tests/testthat/_snaps/discretize.md | 22 +++++++++++++++++++++- tests/testthat/test-depth.R | 19 +++++++++++++++++++ tests/testthat/test-discretize.R | 17 +++++++++++++++++ 8 files changed, 109 insertions(+), 28 deletions(-) diff --git a/R/depth.R b/R/depth.R index 6b07ca83c..deb3e4c7d 100644 --- a/R/depth.R +++ b/R/depth.R @@ -140,10 +140,16 @@ step_depth_new <- ) } +depth_metric <- c("potential", "halfspace", "Mahalanobis", "simplicialVolume", + "spatial", "zonoid") + #' @export prep.step_depth <- function(x, training, info = NULL, ...) { x_names <- recipes_eval_select(x$terms, training, info) check_type(training[, x_names], types = c("double", "integer")) + metric <- x$metric + rlang::arg_match(metric, depth_metric) + check_string(x$prefix, allow_empty = FALSE, arg = "prefix") class_var <- x$class[1] diff --git a/R/discretize.R b/R/discretize.R index 8e1e81f16..d395ddacb 100644 --- a/R/discretize.R +++ b/R/discretize.R @@ -90,12 +90,7 @@ discretize.numeric <- ...) { unique_vals <- length(unique(x)) missing_lab <- "_missing" - - if (cuts < 2) { - cli::cli_abort( - "There should be at least 2 {.arg cuts} but {.val {cuts}} was supplied." - ) - } + check_number_whole(cuts, min = 2) dots <- list(...) if (keep_na) { @@ -115,8 +110,8 @@ discretize.numeric <- breaks <- unique(breaks) if (num_breaks > length(breaks)) { cli::cli_warn( - "Not enough data for {cuts} breaks. \\ - Only {length(breaks)} breaks were used." + "Not enough data for {cuts} breaks. Only {length(breaks)} breaks + were used." ) } if (infs) { @@ -129,8 +124,8 @@ discretize.numeric <- prefix <- prefix[1] if (make.names(prefix) != prefix && !is.null(prefix)) { cli::cli_warn( - "The prefix {.val {prefix}} is not a valid R name. \\ - It has been changed to {.val {make.names(prefix)}}." + "The prefix {.val {prefix}} is not a valid R name. It has been + changed to {.val {make.names(prefix)}}." ) prefix <- make.names(prefix) } @@ -150,8 +145,8 @@ discretize.numeric <- } else { out <- list(bins = 0) cli::cli_warn( - "Data not binned; too few unique values per bin. \\ - Adjust {.arg min_unique} as needed." + "Data not binned; too few unique values per bin. Adjust + {.arg min_unique} as needed." ) } class(out) <- "discretize" @@ -301,8 +296,10 @@ step_discretize <- function(recipe, options = list(prefix = "bin"), skip = FALSE, id = rand_id("discretize")) { - if (any(names(options) %in% c("cuts", "min_unique"))) { + if (any(names(options) == "cuts")) { num_breaks <- options$cuts + } + if (any(names(options) == "min_unique")) { min_unique <- options$min_unique } @@ -348,6 +345,8 @@ bin_wrapper <- function(x, args) { prep.step_discretize <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_number_whole(x$num_breaks, min = 1, arg = "num_breaks") + check_number_whole(x$min_unique, min = 2, arg = "min_unique") if (length(col_names) > 1 & any(names(x$options) %in% c("prefix", "labels"))) { cli::cli_warn( diff --git a/R/dummy_extract.R b/R/dummy_extract.R index afeef628a..205750d9c 100644 --- a/R/dummy_extract.R +++ b/R/dummy_extract.R @@ -7,10 +7,10 @@ #' @inheritParams step_center #' @inheritParams step_other #' @inheritParams step_dummy -#' @param sep Character vector containing a regular expression to use +#' @param sep Character string containing a regular expression to use #' for splitting. [strsplit()] is used to perform the split. `sep` takes #' priority if `pattern` is also specified. -#' @param pattern Character vector containing a regular expression used +#' @param pattern Character string containing a regular expression used #' for extraction. [gregexpr()] and [regmatches()] are used to perform #' pattern extraction using `perl = TRUE`. #' @template step-return @@ -88,10 +88,10 @@ #' step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')") %>% #' prep() #' -#' dommies_data_color <- dummies_color %>% +#' dummies_data_color <- dummies_color %>% #' bake(new_data = NULL) #' -#' dommies_data_color +#' dummies_data_color step_dummy_extract <- function(recipe, ..., @@ -107,14 +107,6 @@ step_dummy_extract <- skip = FALSE, id = rand_id("dummy_extract")) { - if (!is_tune(threshold)) { - if (threshold >= 1) { - check_number_whole(threshold) - } else { - check_number_decimal(threshold, min = 0) - } - } - add_step( recipe, step_dummy_extract_new( @@ -160,6 +152,14 @@ step_dummy_extract_new <- prep.step_dummy_extract <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("string", "factor", "ordered")) + if (x$threshold >= 1) { + check_number_whole(x$threshold, arg = "threshold") + } else { + check_number_decimal(x$threshold, min = 0, arg = "threshold") + } + check_string(x$sep, arg = "sep", allow_null = TRUE) + check_string(x$pattern, arg = "pattern", allow_null = TRUE) + check_function(x$naming, arg = "naming", allow_empty = FALSE) wts <- get_case_weights(info, training) were_weights_used <- are_weights_used(wts, unsupervised = TRUE) diff --git a/man/step_dummy_extract.Rd b/man/step_dummy_extract.Rd index 554b3a3e3..ddfa40ced 100644 --- a/man/step_dummy_extract.Rd +++ b/man/step_dummy_extract.Rd @@ -33,11 +33,11 @@ created.} \item{trained}{A logical to indicate if the quantities for preprocessing have been estimated.} -\item{sep}{Character vector containing a regular expression to use +\item{sep}{Character string containing a regular expression to use for splitting. \code{\link[=strsplit]{strsplit()}} is used to perform the split. \code{sep} takes priority if \code{pattern} is also specified.} -\item{pattern}{Character vector containing a regular expression used +\item{pattern}{Character string containing a regular expression used for extraction. \code{\link[=gregexpr]{gregexpr()}} and \code{\link[=regmatches]{regmatches()}} are used to perform pattern extraction using \code{perl = TRUE}.} diff --git a/tests/testthat/_snaps/depth.md b/tests/testthat/_snaps/depth.md index 5d903e306..1572bc65e 100644 --- a/tests/testthat/_snaps/depth.md +++ b/tests/testthat/_snaps/depth.md @@ -85,3 +85,23 @@ -- Operations * Data depth by Species for: Sepal.Length and Sepal.Width, ... | Trained +# bad args + + Code + recipe(Species ~ ., data = iris) %>% step_depth(all_numeric_predictors(), + class = "Species", metric = "circular") %>% prep() + Condition + Error in `step_depth()`: + Caused by error in `prep()`: + ! `metric` must be one of "potential", "halfspace", "Mahalanobis", "simplicialVolume", "spatial", or "zonoid", not "circular". + +--- + + Code + recipe(Species ~ ., data = iris) %>% step_depth(all_numeric_predictors(), + class = "Species", prefix = 0L) %>% prep() + Condition + Error in `step_depth()`: + Caused by error in `prep()`: + ! `prefix` must be a single string, not the number 0. + diff --git a/tests/testthat/_snaps/discretize.md b/tests/testthat/_snaps/discretize.md index 9a6ef4145..55cf46804 100644 --- a/tests/testthat/_snaps/discretize.md +++ b/tests/testthat/_snaps/discretize.md @@ -74,7 +74,7 @@ Condition Error in `step_discretize()`: Caused by error in `recipes::discretize()`: - ! There should be at least 2 `cuts` but 1 was supplied. + ! `cuts` must be a whole number larger than or equal to 2, not the number 1. --- @@ -119,6 +119,26 @@ -- Operations * Discretize numeric variables from: x1 | Trained +--- + + Code + recipe(mpg ~ ., data = mtcars) %>% step_discretize(disp, num_breaks = 0) %>% + prep() + Condition + Error in `step_discretize()`: + Caused by error in `prep()`: + ! `num_breaks` must be a whole number larger than or equal to 1, not the number 0. + +--- + + Code + recipe(mpg ~ ., data = mtcars) %>% step_discretize(disp, min_unique = -1) %>% + prep() + Condition + Error in `step_discretize()`: + Caused by error in `prep()`: + ! `min_unique` must be a whole number larger than or equal to 2, not the number -1. + # war when less breaks are generated Code diff --git a/tests/testthat/test-depth.R b/tests/testthat/test-depth.R index bbc0a6149..9a32913d6 100644 --- a/tests/testthat/test-depth.R +++ b/tests/testthat/test-depth.R @@ -187,3 +187,22 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + +test_that("bad args", { + skip_if_not_installed("ddalpha") + expect_snapshot( + recipe(Species ~ ., data = iris) %>% + step_depth(all_numeric_predictors(), class = "Species", + metric = "circular") %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(Species ~ ., data = iris) %>% + step_depth(all_numeric_predictors(), class = "Species", + prefix = 0L) %>% + prep(), + error = TRUE + ) +}) + diff --git a/tests/testthat/test-discretize.R b/tests/testthat/test-discretize.R index db5ea1ce5..962908887 100644 --- a/tests/testthat/test-discretize.R +++ b/tests/testthat/test-discretize.R @@ -247,3 +247,20 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 2L) }) + + +test_that("bad args", { + expect_snapshot( + recipe(mpg ~ ., data = mtcars) %>% + step_discretize(disp, num_breaks = 0) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(mpg ~ ., data = mtcars) %>% + step_discretize(disp, min_unique = -1) %>% + prep(), + error = TRUE + ) +}) + From 99454d221b2f4ae53d15a94e00ed6ba02ff18c44 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 15:49:54 -0400 Subject: [PATCH 17/25] update checks and snapshots --- R/dummy_extract.R | 1 + R/dummy_multi_choice.R | 33 +++++++------- tests/testthat/_snaps/dummy_extract.md | 50 +++++++++++++++++++++ tests/testthat/_snaps/dummy_multi_choice.md | 20 +++++++++ tests/testthat/test-dummy_extract.R | 35 +++++++++++++++ tests/testthat/test-dummy_multi_choice.R | 18 ++++++++ 6 files changed, 140 insertions(+), 17 deletions(-) diff --git a/R/dummy_extract.R b/R/dummy_extract.R index 205750d9c..ce356e83f 100644 --- a/R/dummy_extract.R +++ b/R/dummy_extract.R @@ -157,6 +157,7 @@ prep.step_dummy_extract <- function(x, training, info = NULL, ...) { } else { check_number_decimal(x$threshold, min = 0, arg = "threshold") } + check_string(x$other, arg = "other", allow_null = TRUE) check_string(x$sep, arg = "sep", allow_null = TRUE) check_string(x$pattern, arg = "pattern", allow_null = TRUE) check_function(x$naming, arg = "naming", allow_empty = FALSE) diff --git a/R/dummy_multi_choice.R b/R/dummy_multi_choice.R index 1a7a9414f..27f1e3fa9 100644 --- a/R/dummy_multi_choice.R +++ b/R/dummy_multi_choice.R @@ -12,23 +12,23 @@ #' This is `NULL` until the step is trained by [prep()]. #' @template step-return #' @family dummy variable and encoding steps -#' +#' #' @details -#' The overall proportion (or total counts) of the categories are computed. The -#' `"other"` category is used in place of any categorical levels whose +#' The overall proportion (or total counts) of the categories are computed. The +#' `"other"` category is used in place of any categorical levels whose #' individual proportion (or frequency) in the training set is less than #' `threshold`. -#' +#' #' This step produces a number of columns, based on the number of categories it -#' finds. The naming of the columns is determined by the function based on the -#' `naming` argument. The default is to return `_`. By -#' default `prefix` is `NULL`, which means the name of the first column +#' finds. The naming of the columns is determined by the function based on the +#' `naming` argument. The default is to return `_`. By +#' default `prefix` is `NULL`, which means the name of the first column #' selected will be used in place. #' #' @template dummy-naming #' #' @details -#' +#' #' ```{r, echo = FALSE, results="asis"} #' step <- "step_dummy_multi_choice" #' result <- knitr::knit_child("man/rmd/tunable-args.Rmd") @@ -74,7 +74,7 @@ #' #' bake(dummy_multi_choice_rec2, new_data = NULL) #' tidy(dummy_multi_choice_rec2, number = 1) -#' +#' #' @export step_dummy_multi_choice <- function(recipe, ..., @@ -90,14 +90,6 @@ step_dummy_multi_choice <- function(recipe, skip = FALSE, id = rand_id("dummy_multi_choice")) { - if (!is_tune(threshold)) { - if (threshold >= 1) { - check_number_whole(threshold) - } else { - check_number_decimal(threshold, min = 0) - } - } - add_step( recipe, step_dummy_multi_choice_new( @@ -141,6 +133,13 @@ step_dummy_multi_choice_new <- prep.step_dummy_multi_choice <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("nominal", "logical")) + if (x$threshold >= 1) { + check_number_whole(x$threshold, arg = "threshold") + } else { + check_number_decimal(x$threshold, min = 0, arg = "threshold") + } + check_string(x$other, arg = "other", allow_null = TRUE) + check_function(x$naming, arg = "naming", allow_empty = FALSE) levels <- purrr::map(training[, col_names], levels) levels <- vctrs::list_unchop(levels, ptype = character(), name_spec = rlang::zap()) diff --git a/tests/testthat/_snaps/dummy_extract.md b/tests/testthat/_snaps/dummy_extract.md index c65d1a4ab..5665645c9 100644 --- a/tests/testthat/_snaps/dummy_extract.md +++ b/tests/testthat/_snaps/dummy_extract.md @@ -140,3 +140,53 @@ -- Operations * Extract patterns from: medium | Trained +# bad args + + Code + recipe(~colors, data = color_examples) %>% step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')", + other = 2) %>% prep() + Condition + Error in `step_dummy_extract()`: + Caused by error in `prep()`: + ! `other` must be a single string or `NULL`, not the number 2. + +--- + + Code + recipe(~colors, data = color_examples) %>% step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')", + other = 2) %>% prep() + Condition + Error in `step_dummy_extract()`: + Caused by error in `prep()`: + ! `other` must be a single string or `NULL`, not the number 2. + +--- + + Code + recipe(~colors, data = color_examples) %>% step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')", + sep = 2) %>% prep() + Condition + Error in `step_dummy_extract()`: + Caused by error in `prep()`: + ! `sep` must be a single string or `NULL`, not the number 2. + +--- + + Code + recipe(~colors, data = color_examples) %>% step_dummy_extract(colors, pattern = 2) %>% + prep() + Condition + Error in `step_dummy_extract()`: + Caused by error in `prep()`: + ! `pattern` must be a single string or `NULL`, not the number 2. + +--- + + Code + recipe(~colors, data = color_examples) %>% step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')", + naming = NULL) %>% prep() + Condition + Error in `step_dummy_extract()`: + Caused by error in `prep()`: + ! `naming` must be a function, not `NULL`. + diff --git a/tests/testthat/_snaps/dummy_multi_choice.md b/tests/testthat/_snaps/dummy_multi_choice.md index f0db3e1f4..0b890bd92 100644 --- a/tests/testthat/_snaps/dummy_multi_choice.md +++ b/tests/testthat/_snaps/dummy_multi_choice.md @@ -103,3 +103,23 @@ -- Operations * Multi-choice dummy variables from: lang_1, lang_2, lang_3, ... | Trained +# bad args + + Code + dummy_multi_choice_rec <- recipe(~., data = languages) %>% + step_dummy_multi_choice(starts_with("lang"), other = 2) %>% prep() + Condition + Error in `step_dummy_multi_choice()`: + Caused by error in `prep()`: + ! `other` must be a single string or `NULL`, not the number 2. + +--- + + Code + dummy_multi_choice_rec <- recipe(~., data = languages) %>% + step_dummy_multi_choice(starts_with("lang"), naming = NULL) %>% prep() + Condition + Error in `step_dummy_multi_choice()`: + Caused by error in `prep()`: + ! `naming` must be a function, not `NULL`. + diff --git a/tests/testthat/test-dummy_extract.R b/tests/testthat/test-dummy_extract.R index ba57de7b5..51e47ae08 100644 --- a/tests/testthat/test-dummy_extract.R +++ b/tests/testthat/test-dummy_extract.R @@ -396,3 +396,38 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + +test_that("bad args", { + + expect_snapshot( + recipe(~colors, data = color_examples) %>% + step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')", other = 2) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~colors, data = color_examples) %>% + step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')", other = 2) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~colors, data = color_examples) %>% + step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')", sep = 2) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~colors, data = color_examples) %>% + step_dummy_extract(colors, pattern = 2) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~colors, data = color_examples) %>% + step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')", naming = NULL) %>% + prep(), + error = TRUE + ) +}) + diff --git a/tests/testthat/test-dummy_multi_choice.R b/tests/testthat/test-dummy_multi_choice.R index c0252f6d2..c01f5d308 100644 --- a/tests/testthat/test-dummy_multi_choice.R +++ b/tests/testthat/test-dummy_multi_choice.R @@ -248,3 +248,21 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 1L) }) + + +test_that("bad args", { + + expect_snapshot( + dummy_multi_choice_rec <- recipe(~., data = languages) %>% + step_dummy_multi_choice(starts_with("lang"), other = 2) %>% + prep(), + error = TRUE + ) + expect_snapshot( + dummy_multi_choice_rec <- recipe(~., data = languages) %>% + step_dummy_multi_choice(starts_with("lang"), naming = NULL) %>% + prep(), + error = TRUE + ) +}) + From 02fd389e6e7b2fda1ee5290ceff60359da2c38ee Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 29 Oct 2024 15:56:34 -0400 Subject: [PATCH 18/25] update checks and snapshots --- R/dummy.R | 14 ++++++++------ R/filter_missing.R | 1 + tests/testthat/_snaps/dummy.md | 20 ++++++++++++++++++++ tests/testthat/_snaps/filter_missing.md | 10 ++++++++++ tests/testthat/test-dummy.R | 21 ++++++++++++++++++++- tests/testthat/test-filter_missing.R | 12 ++++++++++++ 6 files changed, 71 insertions(+), 7 deletions(-) diff --git a/R/dummy.R b/R/dummy.R index 8395d698b..226b4955b 100644 --- a/R/dummy.R +++ b/R/dummy.R @@ -124,7 +124,7 @@ step_dummy <- keep_original_cols = FALSE, skip = FALSE, id = rand_id("dummy")) { - + if (lifecycle::is_present(preserve)) { lifecycle::deprecate_stop( "0.1.16", @@ -172,6 +172,8 @@ step_dummy_new <- prep.step_dummy <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("factor", "ordered")) + check_bool(x$one_hot, arg = "one_hot") + check_function(x$naming, arg = "naming", allow_empty = FALSE) if (length(col_names) > 0) { ## I hate doing this but currently we are going to have @@ -229,14 +231,14 @@ warn_new_levels <- function(dat, lvl, column, step, details = NULL) { msg <- c("!" = "There are new levels in {.var {column}}: {.val {lvl2}}.") if (any(is.na(lvl2))) { msg <- c( - msg, + msg, "i" = "Consider using {.help [step_unknown()](recipes::step_unknown)} \\ before {.fn {step}} to handle missing values." ) } if (!all(is.na(lvl2))) { msg <- c( - msg, + msg, "i" = "Consider using {.help [step_novel()](recipes::step_novel)} \\ before {.fn {step}} to handle unseen values." ) @@ -278,9 +280,9 @@ bake.step_dummy <- function(object, new_data, ...) { } warn_new_levels( - new_data[[col_name]], - levels_values, - col_name, + new_data[[col_name]], + levels_values, + col_name, step = "step_dummy" ) diff --git a/R/filter_missing.R b/R/filter_missing.R index e662f4fde..62ae2924d 100644 --- a/R/filter_missing.R +++ b/R/filter_missing.R @@ -91,6 +91,7 @@ step_filter_missing_new <- #' @export prep.step_filter_missing <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_decimal(x$threshold, min = 0, max = 1, arg = "threshold") wts <- get_case_weights(info, training) were_weights_used <- are_weights_used(wts, unsupervised = TRUE) diff --git a/tests/testthat/_snaps/dummy.md b/tests/testthat/_snaps/dummy.md index 18c6da906..f64386c0c 100644 --- a/tests/testthat/_snaps/dummy.md +++ b/tests/testthat/_snaps/dummy.md @@ -241,3 +241,23 @@ -- Operations * Dummy variables from: city and zip | Trained +# bad args + + Code + recipe(~ city + sqft + price, data = Sacramento) %>% step_dummy(city, one_hot = 2) %>% + prep() + Condition + Error in `step_dummy()`: + Caused by error in `prep()`: + ! `one_hot` must be `TRUE` or `FALSE`, not the number 2. + +--- + + Code + recipe(~ city + sqft + price, data = Sacramento) %>% step_dummy(city, naming = NULL) %>% + prep() + Condition + Error in `step_dummy()`: + Caused by error in `prep()`: + ! `naming` must be a function, not `NULL`. + diff --git a/tests/testthat/_snaps/filter_missing.md b/tests/testthat/_snaps/filter_missing.md index 7efe3cf85..6221fd0e3 100644 --- a/tests/testthat/_snaps/filter_missing.md +++ b/tests/testthat/_snaps/filter_missing.md @@ -104,3 +104,13 @@ -- Operations * Missing value column filter removed: dbl2, dbl3, dbl4, dbl5, ... | Trained +# bad args + + Code + recipe(~., data = dat) %>% step_filter_missing(all_predictors(), threshold = - + 0.2) %>% prep() + Condition + Error in `step_filter_missing()`: + Caused by error in `prep()`: + ! `threshold` must be a number between 0 and 1, not the number -0.2. + diff --git a/tests/testthat/test-dummy.R b/tests/testthat/test-dummy.R index caf62dfc1..2a576e524 100644 --- a/tests/testthat/test-dummy.R +++ b/tests/testthat/test-dummy.R @@ -364,7 +364,7 @@ test_that("bake method errors when needed non-standard role columns are missing" dummy_trained <- prep(dummy, training = sacr_fac, verbose = FALSE, strings_as_factors = FALSE) expect_snapshot( - error = TRUE, + error = TRUE, bake(dummy_trained, new_data = sacr_fac[, 3:4], all_predictors()) ) }) @@ -454,3 +454,22 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + + +test_that("bad args", { + skip_if_not_installed("modeldata") + data(Sacramento, package = "modeldata") + + expect_snapshot( + recipe(~ city + sqft + price, data = Sacramento) %>% + step_dummy(city, one_hot = 2) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~ city + sqft + price, data = Sacramento) %>% + step_dummy(city, naming = NULL) %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-filter_missing.R b/tests/testthat/test-filter_missing.R index bbb331fc9..afaac62da 100644 --- a/tests/testthat/test-filter_missing.R +++ b/tests/testthat/test-filter_missing.R @@ -161,3 +161,15 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 1L) }) + + +test_that("bad args", { + + expect_snapshot( + recipe(~., data = dat) %>% + step_filter_missing(all_predictors(), threshold = -.2) %>% + prep(), + error = TRUE + ) + +}) From 051f3dcd3217ff0bd815932ded8ad15076816047 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 30 Oct 2024 11:45:38 -0400 Subject: [PATCH 19/25] update checks and snapshots --- R/hyperbolic.R | 8 +++++-- R/ica.R | 1 + R/impute_bag.R | 2 ++ R/impute_knn.R | 1 + R/impute_mean.R | 1 + R/impute_roll.R | 14 ++++++------ man/step_impute_roll.Rd | 2 +- tests/testthat/_snaps/hyperbolic.md | 19 +++++++++++++--- tests/testthat/_snaps/impute_bag.md | 20 +++++++++++++++++ tests/testthat/_snaps/impute_knn.md | 10 +++++++++ tests/testthat/_snaps/impute_mean.md | 10 +++++++++ tests/testthat/_snaps/impute_roll.md | 31 ++++++++++++++++++++++++++ tests/testthat/test-hyperbolic.R | 7 +++--- tests/testthat/test-ica.R | 13 ++++++++++- tests/testthat/test-impute_bag.R | 26 +++++++++++++++++++++- tests/testthat/test-impute_knn.R | 15 ++++++++++++- tests/testthat/test-impute_mean.R | 14 ++++++++++++ tests/testthat/test-impute_roll.R | 33 +++++++++++++++++++++++++++- 18 files changed, 207 insertions(+), 20 deletions(-) diff --git a/R/hyperbolic.R b/R/hyperbolic.R index 4f376bc01..16d0100e0 100644 --- a/R/hyperbolic.R +++ b/R/hyperbolic.R @@ -58,8 +58,9 @@ step_hyperbolic <- skip = FALSE, id = rand_id("hyperbolic")) { - func <- rlang::arg_match(func) - + if (!is_tune(func)) { + func <- rlang::arg_match(func) + } add_step( recipe, step_hyperbolic_new( @@ -94,6 +95,9 @@ step_hyperbolic_new <- prep.step_hyperbolic <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + func <- x$func + x$func <- rlang::arg_match(func, c("sinh", "cosh", "tanh"), error_arg = "func") + check_bool(x$inverse, error_arg = "inverse") step_hyperbolic_new( terms = x$terms, diff --git a/R/ica.R b/R/ica.R index c3c3a13bf..fb7beb14b 100644 --- a/R/ica.R +++ b/R/ica.R @@ -146,6 +146,7 @@ step_ica_new <- prep.step_ica <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_string(x$prefix, arg = "prefix") if (x$num_comp > 0 && length(col_names) > 0) { x$num_comp <- min(x$num_comp, length(col_names)) diff --git a/R/impute_bag.R b/R/impute_bag.R index 990be8381..dbec16d8d 100644 --- a/R/impute_bag.R +++ b/R/impute_bag.R @@ -210,6 +210,8 @@ impute_var_lists <- function(to_impute, impute_using, training, info) { #' @export prep.step_impute_bag <- function(x, training, info = NULL, ...) { + check_number_whole(x$trees, arg = "trees", min = 1) + check_number_whole(x$seed_val, arg = "seed_val") var_lists <- impute_var_lists( to_impute = x$terms, diff --git a/R/impute_knn.R b/R/impute_knn.R index b47f5324d..a8f67914d 100644 --- a/R/impute_knn.R +++ b/R/impute_knn.R @@ -168,6 +168,7 @@ step_impute_knn_new <- #' @export prep.step_impute_knn <- function(x, training, info = NULL, ...) { + check_number_whole(x$neighbors, arg = "neighbors", min = 1) var_lists <- impute_var_lists( to_impute = x$terms, diff --git a/R/impute_mean.R b/R/impute_mean.R index dd8c105e3..143c52639 100644 --- a/R/impute_mean.R +++ b/R/impute_mean.R @@ -129,6 +129,7 @@ trim <- function(x, trim) { prep.step_impute_mean <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_number_decimal(x$trim, arg = "trim", min = 0, max = 1/2) wts <- get_case_weights(info, training) were_weights_used <- are_weights_used(wts, unsupervised = TRUE) diff --git a/R/impute_roll.R b/R/impute_roll.R index c11a7d69c..c076c6bc3 100644 --- a/R/impute_roll.R +++ b/R/impute_roll.R @@ -85,15 +85,9 @@ step_impute_roll <- trained = FALSE, columns = NULL, statistic = median, - window = 5, + window = 5L, skip = FALSE, id = rand_id("impute_roll")) { - if (!is_tune(window)) { - if (window < 3 | window %% 2 != 1) { - cli::cli_abort("{.arg window} should be an odd integer >= 3.") - } - window <- as.integer(floor(window)) - } add_step( recipe, @@ -129,6 +123,12 @@ step_impute_roll_new <- prep.step_impute_roll <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = "double") + check_function(x$statistic, arg = "statistic", ) + check_number_whole(x$window, arg = "window", min = 3) + if (x$window %% 2 != 1) { + cli::cli_abort("{.arg window} should be an odd integer >= 3.") + } + x$window <- as.integer(x$window) step_impute_roll_new( terms = x$terms, diff --git a/man/step_impute_roll.Rd b/man/step_impute_roll.Rd index 3a4dab74c..553e6302c 100644 --- a/man/step_impute_roll.Rd +++ b/man/step_impute_roll.Rd @@ -11,7 +11,7 @@ step_impute_roll( trained = FALSE, columns = NULL, statistic = median, - window = 5, + window = 5L, skip = FALSE, id = rand_id("impute_roll") ) diff --git a/tests/testthat/_snaps/hyperbolic.md b/tests/testthat/_snaps/hyperbolic.md index d2a29ae84..902bde024 100644 --- a/tests/testthat/_snaps/hyperbolic.md +++ b/tests/testthat/_snaps/hyperbolic.md @@ -1,7 +1,20 @@ -# wrong function +# wrong arguments - `func` must be one of "sinh", "cosh", or "tanh", not "cos". - i Did you mean "cosh"? + Code + step_hyperbolic(rec, func = "cos") %>% prep() + Condition + Error in `step_hyperbolic()`: + ! `func` must be one of "sinh", "cosh", or "tanh", not "cos". + i Did you mean "cosh"? + +--- + + Code + step_hyperbolic(rec, inverse = 2) %>% prep() + Condition + Error in `step_hyperbolic()`: + Caused by error in `prep()`: + ! `x$inverse` must be `TRUE` or `FALSE`, not the number 2. # bake method errors when needed non-standard role columns are missing diff --git a/tests/testthat/_snaps/impute_bag.md b/tests/testthat/_snaps/impute_bag.md index b27599d9a..0742b7e45 100644 --- a/tests/testthat/_snaps/impute_bag.md +++ b/tests/testthat/_snaps/impute_bag.md @@ -97,3 +97,23 @@ -- Operations * Bagged tree imputation for: carbon | Trained +# bad args + + Code + recipe(~., data = mtcars) %>% step_impute_bag(all_predictors(), trees = -1) %>% + prep() + Condition + Error in `step_impute_bag()`: + Caused by error in `prep()`: + ! `trees` must be a whole number larger than or equal to 1, not the number -1. + +--- + + Code + recipe(~., data = mtcars) %>% step_impute_bag(all_predictors(), seed_val = 1:4) %>% + prep() + Condition + Error in `step_impute_bag()`: + Caused by error in `prep()`: + ! `seed_val` must be a whole number, not an integer vector. + diff --git a/tests/testthat/_snaps/impute_knn.md b/tests/testthat/_snaps/impute_knn.md index b545960e6..04fdf0915 100644 --- a/tests/testthat/_snaps/impute_knn.md +++ b/tests/testthat/_snaps/impute_knn.md @@ -123,3 +123,13 @@ -- Operations * K-nearest neighbor imputation for: carbon and nitrogen | Trained +# bad args + + Code + recipe(~., data = mtcars) %>% step_impute_knn(all_predictors(), neighbors = 0L) %>% + prep() + Condition + Error in `step_impute_knn()`: + Caused by error in `prep()`: + ! `neighbors` must be a whole number larger than or equal to 1, not the number 0. + diff --git a/tests/testthat/_snaps/impute_mean.md b/tests/testthat/_snaps/impute_mean.md index 76c073961..3528eb3c6 100644 --- a/tests/testthat/_snaps/impute_mean.md +++ b/tests/testthat/_snaps/impute_mean.md @@ -126,3 +126,13 @@ -- Operations * Mean imputation for: Age, Assets, Income | Trained +# bad args + + Code + recipe(~., data = mtcars) %>% step_impute_mean(all_predictors(), trim = 0.6) %>% + prep() + Condition + Error in `step_impute_mean()`: + Caused by error in `prep()`: + ! `trim` must be a number between 0 and 0.5, not the number 0.6. + diff --git a/tests/testthat/_snaps/impute_roll.md b/tests/testthat/_snaps/impute_roll.md index 9d7ab7e47..7b6ad030a 100644 --- a/tests/testthat/_snaps/impute_roll.md +++ b/tests/testthat/_snaps/impute_roll.md @@ -16,6 +16,7 @@ step_impute_roll(all_predictors(), window = 4) %>% prep(training = example_data) Condition Error in `step_impute_roll()`: + Caused by error in `prep()`: ! `window` should be an odd integer >= 3. --- @@ -29,6 +30,36 @@ x All columns selected for the step should be double. * 1 integer variable found: `x4` +--- + + Code + recipe(~., data = mtcars) %>% step_impute_roll(all_predictors(), statistic = mean, + window = 1) %>% prep() + Condition + Error in `step_impute_roll()`: + Caused by error in `prep()`: + ! `window` must be a whole number larger than or equal to 3, not the number 1. + +--- + + Code + recipe(~., data = mtcars) %>% step_impute_roll(all_predictors(), statistic = mean, + window = 4) %>% prep() + Condition + Error in `step_impute_roll()`: + Caused by error in `prep()`: + ! `window` should be an odd integer >= 3. + +--- + + Code + recipe(~., data = mtcars) %>% step_impute_roll(all_predictors(), statistic = NULL) %>% + prep() + Condition + Error in `step_impute_roll()`: + Caused by error in `prep()`: + ! `statistic` must be a function, not `NULL`. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/test-hyperbolic.R b/tests/testthat/test-hyperbolic.R index 510ddef39..3f7fed891 100644 --- a/tests/testthat/test-hyperbolic.R +++ b/tests/testthat/test-hyperbolic.R @@ -54,9 +54,10 @@ test_that("simple hyperbolic trans", { }) -test_that("wrong function", { +test_that("wrong arguments", { rec <- recipe(mpg ~ ., mtcars) - expect_snapshot_error(step_hyperbolic(rec, func = "cos")) + expect_snapshot(step_hyperbolic(rec, func = "cos") %>% prep(), error = TRUE) + expect_snapshot(step_hyperbolic(rec, inverse = 2) %>% prep(), error = TRUE) }) # Infrastructure --------------------------------------------------------------- @@ -70,7 +71,7 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = ex_dat, verbose = FALSE) expect_snapshot( - error = TRUE, + error = TRUE, bake(rec_trained, new_data = ex_dat[, 2, drop = FALSE]) ) }) diff --git a/tests/testthat/test-ica.R b/tests/testthat/test-ica.R index 96c773d17..0fba1d69b 100644 --- a/tests/testthat/test-ica.R +++ b/tests/testthat/test-ica.R @@ -174,7 +174,7 @@ test_that("bake method errors when needed non-standard role columns are missing" ica_extract_trained <- prep(ica_extract, training = biomass_tr, verbose = FALSE) expect_snapshot( - error = TRUE, + error = TRUE, bake(ica_extract_trained, new_data = biomass_tr[, c(-3)]) ) }) @@ -312,3 +312,14 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 1L) }) + + +test_that("bad args", { + + expect_snapshot( + rec %>% + step_ica(carbon, hydrogen, prefix = 2) %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-impute_bag.R b/tests/testthat/test-impute_bag.R index 3260f88be..cc3b06bb0 100644 --- a/tests/testthat/test-impute_bag.R +++ b/tests/testthat/test-impute_bag.R @@ -144,7 +144,7 @@ test_that("bake method errors when needed non-standard role columns are missing" imputed_trained <- prep(imputed, training = biomass, verbose = FALSE) expect_snapshot( - error = TRUE, + error = TRUE, bake(imputed_trained, new_data = biomass[, c(-3, -9)]) ) }) @@ -208,3 +208,27 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 1L) }) + + +test_that("bad args", { + + expect_snapshot( + recipe(~., data = mtcars) %>% + step_impute_bag( + all_predictors(), + trees = -1 + ) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~., data = mtcars) %>% + step_impute_bag( + all_predictors(), + seed_val = 1:4 + ) %>% + prep(), + error = TRUE + ) +}) + diff --git a/tests/testthat/test-impute_knn.R b/tests/testthat/test-impute_knn.R index d65e27f4a..f17aa8a76 100644 --- a/tests/testthat/test-impute_knn.R +++ b/tests/testthat/test-impute_knn.R @@ -217,7 +217,7 @@ test_that("bake method errors when needed non-standard role columns are missing" imputed_trained <- prep(imputed, training = biomass, verbose = FALSE) expect_snapshot( - error = TRUE, + error = TRUE, bake(imputed_trained, new_data = biomass[, c(-4)]) ) }) @@ -289,3 +289,16 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 1L) }) + + +test_that("bad args", { + expect_snapshot( + recipe(~., data = mtcars) %>% + step_impute_knn( + all_predictors(), + neighbors = 0L + ) %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-impute_mean.R b/tests/testthat/test-impute_mean.R index 356dd30f1..a637d186e 100644 --- a/tests/testthat/test-impute_mean.R +++ b/tests/testthat/test-impute_mean.R @@ -269,3 +269,17 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 1L) }) + + +test_that("bad args", { + expect_snapshot( + recipe(~., data = mtcars) %>% + step_impute_mean( + all_predictors(), + trim = 0.6 + ) %>% + prep(), + error = TRUE + ) +}) + diff --git a/tests/testthat/test-impute_roll.R b/tests/testthat/test-impute_roll.R index 591ac8a3c..b36f4a7fe 100644 --- a/tests/testthat/test-impute_roll.R +++ b/tests/testthat/test-impute_roll.R @@ -118,7 +118,7 @@ test_that("bake method errors when needed non-standard role columns are missing" prep(training = example_data) expect_snapshot( - error = TRUE, + error = TRUE, bake(seven_pt, new_data = example_data[, c(-2)]) ) }) @@ -182,3 +182,34 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 2L) }) + + +test_that("bad args", { + expect_snapshot( + recipe(~., data = mtcars) %>% + step_impute_roll( + all_predictors(), + statistic = mean, window = 1 + ) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~., data = mtcars) %>% + step_impute_roll( + all_predictors(), + statistic = mean, window = 4 + ) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~., data = mtcars) %>% + step_impute_roll( + all_predictors(), + statistic = NULL + ) %>% + prep(), + error = TRUE + ) +}) From 79bc114e7f6308e2678b81dfa0d256970f4c1346 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 30 Oct 2024 12:13:47 -0400 Subject: [PATCH 20/25] update checks and snapshots --- R/integer.R | 2 ++ R/interact.R | 5 ++-- R/inverse.R | 1 + R/isomap.R | 2 ++ R/kpca_poly.R | 6 +++++ R/kpca_rbf.R | 4 +++ tests/testthat/_snaps/integer.md | 20 +++++++++++++++ tests/testthat/_snaps/interact.md | 18 ++++++++++++++ tests/testthat/_snaps/inverse.md | 10 ++++++++ tests/testthat/_snaps/isomap.md | 20 +++++++++++++++ tests/testthat/_snaps/kpca_poly.md | 40 ++++++++++++++++++++++++++++++ tests/testthat/_snaps/kpca_rbf.md | 20 +++++++++++++++ tests/testthat/test-integer.R | 16 ++++++++++++ tests/testthat/test-interact.R | 32 +++++++++++++++++------- tests/testthat/test-inverse.R | 9 +++++++ tests/testthat/test-isomap.R | 15 +++++++++++ tests/testthat/test-kpca_poly.R | 27 ++++++++++++++++++++ tests/testthat/test-kpca_rbf.R | 16 ++++++++++++ 18 files changed, 252 insertions(+), 11 deletions(-) diff --git a/R/integer.R b/R/integer.R index 7a8712cc5..666858098 100644 --- a/R/integer.R +++ b/R/integer.R @@ -121,6 +121,8 @@ prep.step_integer <- function(x, training, info = NULL, ...) { types = c("string", "factor", "ordered", "integer", "double", "logical", "date", "datetime") ) + check_bool(x$strict, arg = "strict") + check_bool(x$zero_based, arg = "zero_based") step_integer_new( terms = x$terms, diff --git a/R/interact.R b/R/interact.R index 0ed8a8bd8..22527602c 100644 --- a/R/interact.R +++ b/R/interact.R @@ -114,6 +114,7 @@ step_interact <- ## Initializes a new object step_interact_new <- function(terms, role, trained, objects, sep, keep_original_cols, skip, id) { + check_string(sep, call = rlang::call2("step_interact")) step( subclass = "interact", terms = terms, @@ -403,7 +404,7 @@ find_selectors <- function(f) { } else { # User supplied incorrect input cli::cli_abort( - "Don't know how to handle type {.code {typeof(f)}}.", + "Don't know how to handle type {.code {typeof(f)}}.", .internal = TRUE ) } @@ -423,7 +424,7 @@ replace_selectors <- function(x, elem, value) { } else { # User supplied incorrect input cli::cli_abort( - "Don't know how to handle type {.code {typeof(f)}}.", + "Don't know how to handle type {.code {typeof(f)}}.", .internal = TRUE ) } diff --git a/R/inverse.R b/R/inverse.R index 88fccff32..acd86f116 100644 --- a/R/inverse.R +++ b/R/inverse.R @@ -82,6 +82,7 @@ step_inverse_new <- prep.step_inverse <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_number_decimal(x$offset, arg = "offset") step_inverse_new( terms = x$terms, diff --git a/R/isomap.R b/R/isomap.R index 17ef23d10..4369fc369 100644 --- a/R/isomap.R +++ b/R/isomap.R @@ -155,6 +155,8 @@ step_isomap_new <- prep.step_isomap <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_number_whole(x$neighbors, arg = "neighbors", min = 1) + check_string(x$prefix, arg = "prefix") if (x$num_terms > 0 && length(col_names) > 0L) { x$num_terms <- min(x$num_terms, ncol(training)) diff --git a/R/kpca_poly.R b/R/kpca_poly.R index dd93a99cf..d729a2191 100644 --- a/R/kpca_poly.R +++ b/R/kpca_poly.R @@ -124,8 +124,14 @@ step_kpca_poly_new <- prep.step_kpca_poly <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_number_whole(x$degree, arg = "degree", min = 1) + check_number_decimal(x$scale_factor, arg = "scale_factor", min = 0) + check_number_decimal(x$offset, arg = "offset") + check_string(x$prefix, arg = "prefix") if (x$num_comp > 0 && length(col_names) > 0) { + check_number_whole(x$num_comp, arg = "num_comp", min = 0) + cl <- rlang::call2( "kpca", diff --git a/R/kpca_rbf.R b/R/kpca_rbf.R index d96afec52..08220a104 100644 --- a/R/kpca_rbf.R +++ b/R/kpca_rbf.R @@ -118,8 +118,12 @@ step_kpca_rbf_new <- prep.step_kpca_rbf <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_number_decimal(x$sigma, arg = "sigma", min = .Machine$double.eps) + check_string(x$prefix, arg = "prefix") if (x$num_comp > 0 && length(col_names) > 0) { + check_number_whole(x$num_comp, arg = "num_comp", min = 0) + cl <- rlang::call2( "kpca", diff --git a/tests/testthat/_snaps/integer.md b/tests/testthat/_snaps/integer.md index e6dfa7168..bf8e72760 100644 --- a/tests/testthat/_snaps/integer.md +++ b/tests/testthat/_snaps/integer.md @@ -74,3 +74,23 @@ -- Operations * Integer encoding for: x, y, z | Trained +# bad args + + Code + recipe(~ x + y + z, data = tr_dat) %>% step_integer(all_predictors(), strict = "yes") %>% + prep() + Condition + Error in `step_integer()`: + Caused by error in `prep()`: + ! `strict` must be `TRUE` or `FALSE`, not the string "yes". + +--- + + Code + recipe(~ x + y + z, data = tr_dat) %>% step_integer(all_predictors(), + zero_based = "sure!") %>% prep() + Condition + Error in `step_integer()`: + Caused by error in `prep()`: + ! `zero_based` must be `TRUE` or `FALSE`, not the string "sure!". + diff --git a/tests/testthat/_snaps/interact.md b/tests/testthat/_snaps/interact.md index 5f44474ad..c82d9cc17 100644 --- a/tests/testthat/_snaps/interact.md +++ b/tests/testthat/_snaps/interact.md @@ -114,3 +114,21 @@ -- Operations * Interactions with: x1:x2 | Trained +# bad args + + Code + recipe(mpg ~ ., data = mtcars) %>% step_interact(~ disp::wt, sep = TRUE) %>% + prep() + Condition + Error in `step_interact()`: + ! `sep` must be a single string, not `TRUE`. + +--- + + Code + recipe(~ x + y + z, data = tr_dat) %>% step_integer(all_predictors(), + zero_based = "sure!") %>% prep() + Condition + Error: + ! object 'tr_dat' not found + diff --git a/tests/testthat/_snaps/inverse.md b/tests/testthat/_snaps/inverse.md index 5d396ca68..1899a7f9f 100644 --- a/tests/testthat/_snaps/inverse.md +++ b/tests/testthat/_snaps/inverse.md @@ -74,3 +74,13 @@ -- Operations * Inverse transformation on: x1, x2, x3, x4 | Trained +# bad args + + Code + recipe(~., data = ex_dat) %>% step_inverse(x1, x2, x3, x4, offset = function(x) + x / 3) %>% prep() + Condition + Error in `step_inverse()`: + Caused by error in `prep()`: + ! `offset` must be a number, not a function. + diff --git a/tests/testthat/_snaps/isomap.md b/tests/testthat/_snaps/isomap.md index 74096af2a..65c2e7230 100644 --- a/tests/testthat/_snaps/isomap.md +++ b/tests/testthat/_snaps/isomap.md @@ -127,3 +127,23 @@ -- Operations * Isomap approximation with: x1, x2, x3 | Trained +# bad args + + Code + recipe(~., data = mtcars) %>% step_isomap(all_predictors(), num_terms = 2, + neighbors = -1 / 3) %>% prep() + Condition + Error in `step_isomap()`: + Caused by error in `prep()`: + ! `neighbors` must be a whole number, not the number -0.33. + +--- + + Code + recipe(~., data = mtcars) %>% step_isomap(all_predictors(), prefix = NULL) %>% + prep() + Condition + Error in `step_isomap()`: + Caused by error in `prep()`: + ! `prefix` must be a single string, not `NULL`. + diff --git a/tests/testthat/_snaps/kpca_poly.md b/tests/testthat/_snaps/kpca_poly.md index b5bf211a3..1fa7623f7 100644 --- a/tests/testthat/_snaps/kpca_poly.md +++ b/tests/testthat/_snaps/kpca_poly.md @@ -118,3 +118,43 @@ -- Training information Training data contained 100 data points and no incomplete rows. +# bad args + + Code + recipe(~., data = tr_dat) %>% step_kpca_poly(all_numeric_predictors(), degree = 1.1) %>% + prep() + Condition + Error in `step_kpca_poly()`: + Caused by error in `prep()`: + ! `degree` must be a whole number, not the number 1.1. + +--- + + Code + recipe(~., data = tr_dat) %>% step_kpca_poly(all_numeric_predictors(), + scale_factor = -1.1) %>% prep() + Condition + Error in `step_kpca_poly()`: + Caused by error in `prep()`: + ! `scale_factor` must be a number larger than or equal to 0, not the number -1.1. + +--- + + Code + recipe(~., data = tr_dat) %>% step_kpca_poly(all_numeric_predictors(), offset = "a") %>% + prep() + Condition + Error in `step_kpca_poly()`: + Caused by error in `prep()`: + ! `offset` must be a number, not the string "a". + +--- + + Code + recipe(~., data = tr_dat) %>% step_kpca_poly(all_numeric_predictors(), prefix = 1) %>% + prep() + Condition + Error in `step_kpca_poly()`: + Caused by error in `prep()`: + ! `prefix` must be a single string, not the number 1. + diff --git a/tests/testthat/_snaps/kpca_rbf.md b/tests/testthat/_snaps/kpca_rbf.md index 41cb8b233..f6719ae11 100644 --- a/tests/testthat/_snaps/kpca_rbf.md +++ b/tests/testthat/_snaps/kpca_rbf.md @@ -108,3 +108,23 @@ -- Training information Training data contained 100 data points and no incomplete rows. +# bad args + + Code + recipe(~., data = tr_dat) %>% step_kpca_rbf(all_numeric_predictors(), sigma = 0) %>% + prep() + Condition + Error in `step_kpca_rbf()`: + Caused by error in `prep()`: + ! `sigma` must be a number larger than or equal to 2.22044604925031e-16, not the number 0. + +--- + + Code + recipe(~., data = tr_dat) %>% step_kpca_rbf(all_numeric_predictors(), prefix = 1) %>% + prep() + Condition + Error in `step_kpca_rbf()`: + Caused by error in `prep()`: + ! `prefix` must be a single string, not the number 1. + diff --git a/tests/testthat/test-integer.R b/tests/testthat/test-integer.R index 7d8c74287..3196eea80 100644 --- a/tests/testthat/test-integer.R +++ b/tests/testthat/test-integer.R @@ -137,3 +137,19 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + + +test_that("bad args", { + expect_snapshot( + recipe(~ x + y + z, data = tr_dat) %>% + step_integer(all_predictors(), strict = "yes") %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~ x + y + z, data = tr_dat) %>% + step_integer(all_predictors(), zero_based = "sure!") %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-interact.R b/tests/testthat/test-interact.R index 5ab8521f0..4e13dfe7f 100644 --- a/tests/testthat/test-interact.R +++ b/tests/testthat/test-interact.R @@ -236,7 +236,7 @@ test_that("replacing selectors in formulas", { test_that('with factors', { int_rec <- recipe(Sepal.Width ~ ., data = iris) %>% step_interact(~ (. - Sepal.Width)^2, sep = ":") - + suppressWarnings( int_rec_trained <- prep(int_rec, iris) ) @@ -285,7 +285,7 @@ test_that("works with long formulas (#1231)", { ccccccccccccccccccc = 1:10, d = 1:10 ) - + df_short <- data.frame( a = 1:10, b = 1:10, @@ -294,14 +294,14 @@ test_that("works with long formulas (#1231)", { ) res_long <- recipe(df_long) %>% - step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc') + + step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbbb'):starts_with('d')) %>% prep() %>% bake(new_data = NULL) %>% unname() res_short <- recipe(df_short) %>% - step_interact(~starts_with('b'):starts_with('c') + + step_interact(~starts_with('b'):starts_with('c') + starts_with('b'):starts_with('d')) %>% prep() %>% bake(new_data = NULL) %>% @@ -313,8 +313,8 @@ test_that("works with long formulas (#1231)", { test_that("gives informative error if terms isn't a formula (#1299)", { expect_snapshot( error = TRUE, - recipe(mpg ~ ., data = mtcars) %>% - step_interact(terms = starts_with("dis")) %>% + recipe(mpg ~ ., data = mtcars) %>% + step_interact(terms = starts_with("dis")) %>% prep() ) }) @@ -323,8 +323,8 @@ test_that("gives informative error if terms isn't a formula (#1299)", { mtcars$am <- as.character(mtcars$am) expect_snapshot( - tmp <- recipe(mpg ~ ., data = mtcars) %>% - step_interact(~disp:am) %>% + tmp <- recipe(mpg ~ ., data = mtcars) %>% + step_interact(~disp:am) %>% prep(strings_as_factors = FALSE) ) }) @@ -343,7 +343,7 @@ test_that("bake method errors when needed non-standard role columns are missing" ) expect_snapshot( - error = TRUE, + error = TRUE, bake(int_rec_trained, dat_tr[, 4:6]) ) }) @@ -434,3 +434,17 @@ test_that("printing", { expect_snapshot(prep(rec)) }) +test_that("bad args", { + expect_snapshot( + recipe(mpg ~ ., data = mtcars) %>% + step_interact(~ disp::wt, sep = TRUE) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~ x + y + z, data = tr_dat) %>% + step_integer(all_predictors(), zero_based = "sure!") %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-inverse.R b/tests/testthat/test-inverse.R index 66dc80f9b..82602583a 100644 --- a/tests/testthat/test-inverse.R +++ b/tests/testthat/test-inverse.R @@ -92,3 +92,12 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + +test_that("bad args", { + expect_snapshot( + recipe(~., data = ex_dat) %>% + step_inverse(x1, x2, x3, x4, offset = function(x) x/3) %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-isomap.R b/tests/testthat/test-isomap.R index 994610dbb..37b393479 100644 --- a/tests/testthat/test-isomap.R +++ b/tests/testthat/test-isomap.R @@ -284,3 +284,18 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 2L) }) + +test_that("bad args", { + expect_snapshot( + recipe(~., data = mtcars) %>% + step_isomap(all_predictors(), num_terms = 2, neighbors = -1/3) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~., data = mtcars) %>% + step_isomap(all_predictors(), prefix = NULL) %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-kpca_poly.R b/tests/testthat/test-kpca_poly.R index b24a24ff1..09b5eb3f1 100644 --- a/tests/testthat/test-kpca_poly.R +++ b/tests/testthat/test-kpca_poly.R @@ -240,3 +240,30 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 4L) }) + +test_that("bad args", { + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca_poly(all_numeric_predictors(), degree = 1.1) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca_poly(all_numeric_predictors(), scale_factor = -1.1) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca_poly(all_numeric_predictors(), offset = "a") %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca_poly(all_numeric_predictors(), prefix = 1) %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-kpca_rbf.R b/tests/testthat/test-kpca_rbf.R index b3ef7947c..f522f9b40 100644 --- a/tests/testthat/test-kpca_rbf.R +++ b/tests/testthat/test-kpca_rbf.R @@ -214,3 +214,19 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 2L) }) + + +test_that("bad args", { + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca_rbf(all_numeric_predictors(), sigma = 0) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca_rbf(all_numeric_predictors(), prefix = 1) %>% + prep(), + error = TRUE + ) +}) From dcc992de7144b0a2177f11feb30aaa4d5a6ddfb4 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 30 Oct 2024 12:23:43 -0400 Subject: [PATCH 21/25] updates to the kpca tests/checks --- R/kpca.R | 2 ++ R/kpca_poly.R | 6 +++--- R/kpca_rbf.R | 6 +++--- man/step_kpca_poly.Rd | 3 ++- man/step_kpca_rbf.Rd | 3 ++- tests/testthat/_snaps/ica.md | 9 +++++++++ tests/testthat/_snaps/kpca.md | 20 ++++++++++++++++++++ tests/testthat/_snaps/kpca_poly.md | 10 ++++++++++ tests/testthat/_snaps/kpca_rbf.md | 10 ++++++++++ tests/testthat/test-kpca.R | 15 +++++++++++++++ tests/testthat/test-kpca_poly.R | 6 ++++++ tests/testthat/test-kpca_rbf.R | 6 ++++++ 12 files changed, 88 insertions(+), 8 deletions(-) diff --git a/R/kpca.R b/R/kpca.R index 9f2666a93..2ffef2f8d 100644 --- a/R/kpca.R +++ b/R/kpca.R @@ -126,6 +126,8 @@ step_kpca_new <- prep.step_kpca <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_string(x$prefix, arg = "prefix") + check_number_whole(x$num_comp, arg = "num_comp", min = 0) if (x$num_comp > 0 && length(col_names) > 0) { cl <- diff --git a/R/kpca_poly.R b/R/kpca_poly.R index d729a2191..ff29332e9 100644 --- a/R/kpca_poly.R +++ b/R/kpca_poly.R @@ -6,7 +6,8 @@ #' #' @inheritParams step_pca #' @inheritParams step_center -#' @param degree,scale_factor,offset Numeric values for the polynomial kernel function. +#' @param degree,scale_factor,offset Numeric values for the polynomial kernel +#' function. See the documentation at [kernlab::polydot()]. #' @param res An S4 [kernlab::kpca()] object is stored #' here once this preprocessing step has be trained by #' [prep()]. @@ -128,10 +129,9 @@ prep.step_kpca_poly <- function(x, training, info = NULL, ...) { check_number_decimal(x$scale_factor, arg = "scale_factor", min = 0) check_number_decimal(x$offset, arg = "offset") check_string(x$prefix, arg = "prefix") + check_number_whole(x$num_comp, arg = "num_comp", min = 0) if (x$num_comp > 0 && length(col_names) > 0) { - check_number_whole(x$num_comp, arg = "num_comp", min = 0) - cl <- rlang::call2( "kpca", diff --git a/R/kpca_rbf.R b/R/kpca_rbf.R index 08220a104..fe64fc81c 100644 --- a/R/kpca_rbf.R +++ b/R/kpca_rbf.R @@ -6,7 +6,8 @@ #' #' @inheritParams step_pca #' @inheritParams step_center -#' @param sigma A numeric value for the radial basis function parameter. +#' @param sigma A numeric value for the radial basis function parameter. See +#' the documentation at [kernlab::rbfdot()]. #' @param res An S4 [kernlab::kpca()] object is stored #' here once this preprocessing step has be trained by #' [prep()]. @@ -120,10 +121,9 @@ prep.step_kpca_rbf <- function(x, training, info = NULL, ...) { check_type(training[, col_names], types = c("double", "integer")) check_number_decimal(x$sigma, arg = "sigma", min = .Machine$double.eps) check_string(x$prefix, arg = "prefix") + check_number_whole(x$num_comp, arg = "num_comp", min = 0) if (x$num_comp > 0 && length(col_names) > 0) { - check_number_whole(x$num_comp, arg = "num_comp", min = 0) - cl <- rlang::call2( "kpca", diff --git a/man/step_kpca_poly.Rd b/man/step_kpca_poly.Rd index 3c6c3299b..06ea9a120 100644 --- a/man/step_kpca_poly.Rd +++ b/man/step_kpca_poly.Rd @@ -48,7 +48,8 @@ here once this preprocessing step has be trained by \item{columns}{A character string of the selected variable names. This field is a placeholder and will be populated once \code{\link[=prep]{prep()}} is used.} -\item{degree, scale_factor, offset}{Numeric values for the polynomial kernel function.} +\item{degree, scale_factor, offset}{Numeric values for the polynomial kernel +function. See the documentation at \code{\link[kernlab:dots]{kernlab::polydot()}}.} \item{prefix}{A character string for the prefix of the resulting new variables. See notes below.} diff --git a/man/step_kpca_rbf.Rd b/man/step_kpca_rbf.Rd index fa0f4b926..4ce56837e 100644 --- a/man/step_kpca_rbf.Rd +++ b/man/step_kpca_rbf.Rd @@ -46,7 +46,8 @@ here once this preprocessing step has be trained by \item{columns}{A character string of the selected variable names. This field is a placeholder and will be populated once \code{\link[=prep]{prep()}} is used.} -\item{sigma}{A numeric value for the radial basis function parameter.} +\item{sigma}{A numeric value for the radial basis function parameter. See +the documentation at \code{\link[kernlab:dots]{kernlab::rbfdot()}}.} \item{prefix}{A character string for the prefix of the resulting new variables. See notes below.} diff --git a/tests/testthat/_snaps/ica.md b/tests/testthat/_snaps/ica.md index 49cc4d787..18b8b51e3 100644 --- a/tests/testthat/_snaps/ica.md +++ b/tests/testthat/_snaps/ica.md @@ -127,3 +127,12 @@ * Centering and scaling for: carbon, hydrogen, oxygen, ... | Trained * ICA extraction with: carbon and hydrogen | Trained +# bad args + + Code + rec %>% step_ica(carbon, hydrogen, prefix = 2) %>% prep() + Condition + Error in `step_ica()`: + Caused by error in `prep()`: + ! `prefix` must be a single string, not the number 2. + diff --git a/tests/testthat/_snaps/kpca.md b/tests/testthat/_snaps/kpca.md index 431c967fa..db265df35 100644 --- a/tests/testthat/_snaps/kpca.md +++ b/tests/testthat/_snaps/kpca.md @@ -130,3 +130,23 @@ -- Operations * Kernel PCA extraction with: X2, X3, X4, X5, X6 | Trained +# bad args + + Code + recipe(~., data = tr_dat) %>% step_kpca(all_numeric_predictors(), num_comp = -1) %>% + prep() + Condition + Error in `step_kpca()`: + Caused by error in `prep()`: + ! `num_comp` must be a whole number larger than or equal to 0, not the number -1. + +--- + + Code + recipe(~., data = tr_dat) %>% step_kpca(all_numeric_predictors(), prefix = 1) %>% + prep() + Condition + Error in `step_kpca()`: + Caused by error in `prep()`: + ! `prefix` must be a single string, not the number 1. + diff --git a/tests/testthat/_snaps/kpca_poly.md b/tests/testthat/_snaps/kpca_poly.md index 1fa7623f7..4e52b9ccf 100644 --- a/tests/testthat/_snaps/kpca_poly.md +++ b/tests/testthat/_snaps/kpca_poly.md @@ -120,6 +120,16 @@ # bad args + Code + recipe(~., data = tr_dat) %>% step_kpca_poly(all_numeric_predictors(), + num_comp = -1) %>% prep() + Condition + Error in `step_kpca_poly()`: + Caused by error in `prep()`: + ! `num_comp` must be a whole number larger than or equal to 0, not the number -1. + +--- + Code recipe(~., data = tr_dat) %>% step_kpca_poly(all_numeric_predictors(), degree = 1.1) %>% prep() diff --git a/tests/testthat/_snaps/kpca_rbf.md b/tests/testthat/_snaps/kpca_rbf.md index f6719ae11..aa884e7cc 100644 --- a/tests/testthat/_snaps/kpca_rbf.md +++ b/tests/testthat/_snaps/kpca_rbf.md @@ -110,6 +110,16 @@ # bad args + Code + recipe(~., data = tr_dat) %>% step_kpca_rbf(all_numeric_predictors(), num_comp = - + 1) %>% prep() + Condition + Error in `step_kpca_rbf()`: + Caused by error in `prep()`: + ! `num_comp` must be a whole number larger than or equal to 0, not the number -1. + +--- + Code recipe(~., data = tr_dat) %>% step_kpca_rbf(all_numeric_predictors(), sigma = 0) %>% prep() diff --git a/tests/testthat/test-kpca.R b/tests/testthat/test-kpca.R index d8718d982..e4d91175e 100644 --- a/tests/testthat/test-kpca.R +++ b/tests/testthat/test-kpca.R @@ -209,3 +209,18 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) }) + +test_that("bad args", { + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca(all_numeric_predictors(), num_comp = -1) %>% + prep(), + error = TRUE + ) + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca(all_numeric_predictors(), prefix = 1) %>% + prep(), + error = TRUE + ) +}) diff --git a/tests/testthat/test-kpca_poly.R b/tests/testthat/test-kpca_poly.R index 09b5eb3f1..52deba407 100644 --- a/tests/testthat/test-kpca_poly.R +++ b/tests/testthat/test-kpca_poly.R @@ -242,6 +242,12 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { }) test_that("bad args", { + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca_poly(all_numeric_predictors(), num_comp = -1) %>% + prep(), + error = TRUE + ) expect_snapshot( recipe(~ ., data = tr_dat) %>% step_kpca_poly(all_numeric_predictors(), degree = 1.1) %>% diff --git a/tests/testthat/test-kpca_rbf.R b/tests/testthat/test-kpca_rbf.R index f522f9b40..8f48280cd 100644 --- a/tests/testthat/test-kpca_rbf.R +++ b/tests/testthat/test-kpca_rbf.R @@ -217,6 +217,12 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { test_that("bad args", { + expect_snapshot( + recipe(~ ., data = tr_dat) %>% + step_kpca_rbf(all_numeric_predictors(), num_comp = -1) %>% + prep(), + error = TRUE + ) expect_snapshot( recipe(~ ., data = tr_dat) %>% step_kpca_rbf(all_numeric_predictors(), sigma = 0) %>% From b7d4f8b93a52b8f2140e4c017a034a5e3ae351c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 30 Oct 2024 17:46:56 -0400 Subject: [PATCH 22/25] missing skips --- tests/testthat/test-isomap.R | 6 ++++++ tests/testthat/test-kpca.R | 2 ++ tests/testthat/test-kpca_poly.R | 2 ++ tests/testthat/test-kpca_rbf.R | 2 ++ 4 files changed, 12 insertions(+) diff --git a/tests/testthat/test-isomap.R b/tests/testthat/test-isomap.R index 37b393479..152fc6e85 100644 --- a/tests/testthat/test-isomap.R +++ b/tests/testthat/test-isomap.R @@ -286,6 +286,12 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { }) test_that("bad args", { + skip_on_cran() + skip_if_not_installed("RSpectra") + skip_if_not_installed("igraph") + skip_if_not_installed("RANN") + skip_if_not_installed("dimRed") + expect_snapshot( recipe(~., data = mtcars) %>% step_isomap(all_predictors(), num_terms = 2, neighbors = -1/3) %>% diff --git a/tests/testthat/test-kpca.R b/tests/testthat/test-kpca.R index e4d91175e..b02fbb991 100644 --- a/tests/testthat/test-kpca.R +++ b/tests/testthat/test-kpca.R @@ -211,6 +211,8 @@ test_that("printing", { }) test_that("bad args", { + skip_if_not_installed("kernlab") + expect_snapshot( recipe(~ ., data = tr_dat) %>% step_kpca(all_numeric_predictors(), num_comp = -1) %>% diff --git a/tests/testthat/test-kpca_poly.R b/tests/testthat/test-kpca_poly.R index 52deba407..e2f61cdaa 100644 --- a/tests/testthat/test-kpca_poly.R +++ b/tests/testthat/test-kpca_poly.R @@ -242,6 +242,8 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { }) test_that("bad args", { + skip_if_not_installed("kernlab") + expect_snapshot( recipe(~ ., data = tr_dat) %>% step_kpca_poly(all_numeric_predictors(), num_comp = -1) %>% diff --git a/tests/testthat/test-kpca_rbf.R b/tests/testthat/test-kpca_rbf.R index 8f48280cd..e14a7bccf 100644 --- a/tests/testthat/test-kpca_rbf.R +++ b/tests/testthat/test-kpca_rbf.R @@ -217,6 +217,8 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { test_that("bad args", { + skip_if_not_installed("kernlab") + expect_snapshot( recipe(~ ., data = tr_dat) %>% step_kpca_rbf(all_numeric_predictors(), num_comp = -1) %>% From 630bea2d93a4eacfa9c11fa4df19c0ebcf84f001 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= Date: Wed, 30 Oct 2024 18:21:43 -0400 Subject: [PATCH 23/25] small updates --- tests/testthat/_snaps/misc.md | 2 +- tests/testthat/_snaps/tune_args.md | 2 +- tests/testthat/test-ica.R | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/misc.md b/tests/testthat/_snaps/misc.md index 0cfe46036..fe8d85153 100644 --- a/tests/testthat/_snaps/misc.md +++ b/tests/testthat/_snaps/misc.md @@ -85,7 +85,7 @@ Code names0(0) Condition - Error in `names0()`: + Error: ! `num` must be a whole number larger than or equal to 1, not the number 0. # ellipse_check() errors on empty selection diff --git a/tests/testthat/_snaps/tune_args.md b/tests/testthat/_snaps/tune_args.md index 544568233..a49aff306 100644 --- a/tests/testthat/_snaps/tune_args.md +++ b/tests/testthat/_snaps/tune_args.md @@ -13,6 +13,6 @@ tune_tbl(name = c("a", "b"), tunable = c(TRUE, TRUE), id = c("a", "a"), source = c( "a", "b"), component = c("a", "b"), component_id = c("a", "b"), full = TRUE) Condition - Error in `tune_tbl()`: + Error: ! There are duplicate id values listed in `tune()`: "a". diff --git a/tests/testthat/test-ica.R b/tests/testthat/test-ica.R index 0fba1d69b..032c1da40 100644 --- a/tests/testthat/test-ica.R +++ b/tests/testthat/test-ica.R @@ -315,6 +315,7 @@ test_that("tunable is setup to work with extract_parameter_set_dials", { test_that("bad args", { + skip_if_not_installed("fastICA") expect_snapshot( rec %>% From 141f1e9019851f64ada3182bb6a1301dacb5f0b6 Mon Sep 17 00:00:00 2001 From: topepo Date: Thu, 31 Oct 2024 08:14:49 -0400 Subject: [PATCH 24/25] updated test --- tests/testthat/_snaps/update.md | 4 ++-- tests/testthat/test-update.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/_snaps/update.md b/tests/testthat/_snaps/update.md index 7f23d8a04..00d6cf223 100644 --- a/tests/testthat/_snaps/update.md +++ b/tests/testthat/_snaps/update.md @@ -19,6 +19,6 @@ Code update(step, x = 5, x = 6) Condition - Error in `validate_has_unique_names()`: - ! All of the changes supplied in `...` must be uniquely named. + Error in `update()`: + ! To update `step_stp()`, it must not be trained. diff --git a/tests/testthat/test-update.R b/tests/testthat/test-update.R index 8420b9583..95fb2c72d 100644 --- a/tests/testthat/test-update.R +++ b/tests/testthat/test-update.R @@ -23,7 +23,7 @@ test_that("cannot update trained steps", { }) test_that("update() errors on duplicate assignments", { - step <- recipes::step("stp", x = 4, trained = FALSE) + step <- recipes::step("stp", x = 4, trained = TRUE, id = "a", skip = TRUE, role = "a") expect_snapshot( error = TRUE, From f7dedb198540ce4de4452299033dfc02eff464b3 Mon Sep 17 00:00:00 2001 From: topepo Date: Thu, 31 Oct 2024 08:40:33 -0400 Subject: [PATCH 25/25] remove bad test --- tests/testthat/_snaps/interact.md | 9 --------- tests/testthat/test-interact.R | 6 ------ 2 files changed, 15 deletions(-) diff --git a/tests/testthat/_snaps/interact.md b/tests/testthat/_snaps/interact.md index c82d9cc17..f48e898cb 100644 --- a/tests/testthat/_snaps/interact.md +++ b/tests/testthat/_snaps/interact.md @@ -123,12 +123,3 @@ Error in `step_interact()`: ! `sep` must be a single string, not `TRUE`. ---- - - Code - recipe(~ x + y + z, data = tr_dat) %>% step_integer(all_predictors(), - zero_based = "sure!") %>% prep() - Condition - Error: - ! object 'tr_dat' not found - diff --git a/tests/testthat/test-interact.R b/tests/testthat/test-interact.R index 4e13dfe7f..668d89bd7 100644 --- a/tests/testthat/test-interact.R +++ b/tests/testthat/test-interact.R @@ -441,10 +441,4 @@ test_that("bad args", { prep(), error = TRUE ) - expect_snapshot( - recipe(~ x + y + z, data = tr_dat) %>% - step_integer(all_predictors(), zero_based = "sure!") %>% - prep(), - error = TRUE - ) })