diff --git a/DESCRIPTION b/DESCRIPTION index 49466795a..1aaf0d501 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.9.0.1 +Version: 0.9.0.2 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index 4e88f6690..404961f86 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # datawizard 0.9.0.9000 (development version) +CHANGES + +* `rescale()` gains `multiply` and `add` arguments, to expand ranges by a given + factor or value. + # datawizard 0.9.0 NEW FUNCTIONS diff --git a/R/data_rescale.R b/R/data_rescale.R index ce5059160..85ff885c6 100644 --- a/R/data_rescale.R +++ b/R/data_rescale.R @@ -1,15 +1,26 @@ #' Rescale Variables to a New Range #' -#' Rescale variables to a new range. -#' Can also be used to reverse-score variables (change the keying/scoring direction). +#' Rescale variables to a new range. Can also be used to reverse-score variables +#' (change the keying/scoring direction), or to expand a range. #' #' @inheritParams categorize #' @inheritParams find_columns #' @inheritParams standardize.data.frame #' -#' @param to Numeric vector of length 2 giving the new range that the variable will have after rescaling. -#' To reverse-score a variable, the range should be given with the maximum value first. -#' See examples. +#' @param to Numeric vector of length 2 giving the new range that the variable +#' will have after rescaling. To reverse-score a variable, the range should +#' be given with the maximum value first. See examples. +#' @param multiply If not `NULL`, `to` is ignored and `multiply` will be used, +#' giving the factor by which the actual range of `x` should be expanded. +#' For example, if a vector ranges from 5 to 15 and `multiply = 1.1`, the current +#' range of 10 will be expanded by the factor of 1.1, giving a new range of +#' 11. Thus, the rescaled vector would range from 4.5 to 15.5. +#' @param add A vector of length 1 or 2. If not `NULL`, `to` is ignored and `add` +#' will be used, giving the amount by which the minimum and maximum of the +#' actual range of `x` should be expanded. For example, if a vector ranges from +#' 5 to 15 and `add = 1`, the range will be expanded from 4 to 16. If `add` is +#' of length 2, then the first value is used for the lower bound and the second +#' value for the upper bound. #' @param range Initial (old) range of values. If `NULL`, will take the range of #' the input vector (`range(x)`). #' @param ... Arguments passed to or from other methods. @@ -37,6 +48,21 @@ #' "Sepal.Length" = c(0, 1), #' "Petal.Length" = c(-1, 0) #' ))) +#' +#' # "expand" ranges by a factor or a given value +#' x <- 5:15 +#' x +#' # both will expand the range by 10% +#' rescale(x, multiply = 1.1) +#' rescale(x, add = 0.5) +#' +#' # expand range by different values +#' rescale(x, add = c(1, 3)) +#' +#' # Specify list of multipliers +#' d <- data.frame(x = 5:15, y = 5:15) +#' rescale(d, multiply = list(x = 1.1, y = 0.5)) +#' #' @inherit data_rename #' #' @return A rescaled object. @@ -75,6 +101,8 @@ rescale.default <- function(x, verbose = TRUE, ...) { #' @export rescale.numeric <- function(x, to = c(0, 100), + multiply = NULL, + add = NULL, range = NULL, verbose = TRUE, ...) { @@ -91,6 +119,9 @@ rescale.numeric <- function(x, range <- c(min(x, na.rm = TRUE), max(x, na.rm = TRUE)) } + # check if user specified "multiply" or "add", and then update "to" + to <- .update_to(x, to, multiply, add) + # called from "makepredictcal()"? Then we have additional arguments dot_args <- list(...) required_dot_args <- c("min_value", "max_value", "new_min", "new_max") @@ -144,6 +175,8 @@ rescale.grouped_df <- function(x, select = NULL, exclude = NULL, to = c(0, 100), + multiply = NULL, + add = NULL, range = NULL, append = FALSE, ignore_case = FALSE, @@ -188,6 +221,8 @@ rescale.grouped_df <- function(x, select = select, exclude = exclude, to = to, + multiply = multiply, + add = add, range = range, append = FALSE, # need to set to FALSE here, else variable will be doubled add_transform_class = FALSE, @@ -207,6 +242,8 @@ rescale.data.frame <- function(x, select = NULL, exclude = NULL, to = c(0, 100), + multiply = NULL, + add = NULL, range = NULL, append = FALSE, ignore_case = FALSE, @@ -245,9 +282,61 @@ rescale.data.frame <- function(x, if (!is.list(to)) { to <- stats::setNames(rep(list(to), length(select)), select) } + # Transform the 'multiply' so that it is a list now + if (!is.null(multiply) && !is.list(multiply)) { + multiply <- stats::setNames(rep(list(multiply), length(select)), select) + } + # Transform the 'add' so that it is a list now + if (!is.null(add) && !is.list(add)) { + add <- stats::setNames(rep(list(add), length(select)), select) + } + # update "to" if user specified "multiply" or "add" + to[] <- lapply(names(to), function(i) { + .update_to(x[[i]], to[[i]], multiply[[i]], add[[i]]) + }) x[select] <- as.data.frame(sapply(select, function(n) { rescale(x[[n]], to = to[[n]], range = range[[n]], add_transform_class = FALSE) }, simplify = FALSE)) x } + + +# helper ---------------------------------------------------------------------- + +# expand the new target range by multiplying or adding +.update_to <- function(x, to, multiply, add) { + # check if user specified "multiply" or "add", and if not, return "to" + if (is.null(multiply) && is.null(add)) { + return(to) + } + # only one of "multiply" or "add" can be specified + if (!is.null(multiply) && !is.null(add)) { + insight::format_error("Only one of `multiply` or `add` can be specified.") + } + # multiply? If yes, calculate the "add" value + if (!is.null(multiply)) { + # check for correct length + if (length(multiply) > 1) { + insight::format_error("The length of `multiply` must be 1.") + } + add <- (diff(range(x, na.rm = TRUE)) * (multiply - 1)) / 2 + } + # add? + if (!is.null(add)) { + # add must be of length 1 or 2 + if (length(add) > 2) { + insight::format_error("The length of `add` must be 1 or 2.") + } + # if add is of length 2, then the first value is used for the lower bound + # and the second value for the upper bound + if (length(add) == 2) { + add_low <- add[1] + add_high <- add[2] + } else { + add_low <- add_high <- add + } + to <- c(min(x, na.rm = TRUE) - add_low, max(x, na.rm = TRUE) + add_high) + } + to +} diff --git a/man/rescale.Rd b/man/rescale.Rd index fc8b0f2bf..83cc3d64d 100644 --- a/man/rescale.Rd +++ b/man/rescale.Rd @@ -11,13 +11,23 @@ rescale(x, ...) change_scale(x, ...) -\method{rescale}{numeric}(x, to = c(0, 100), range = NULL, verbose = TRUE, ...) +\method{rescale}{numeric}( + x, + to = c(0, 100), + multiply = NULL, + add = NULL, + range = NULL, + verbose = TRUE, + ... +) \method{rescale}{data.frame}( x, select = NULL, exclude = NULL, to = c(0, 100), + multiply = NULL, + add = NULL, range = NULL, append = FALSE, ignore_case = FALSE, @@ -31,9 +41,22 @@ change_scale(x, ...) \item{...}{Arguments passed to or from other methods.} -\item{to}{Numeric vector of length 2 giving the new range that the variable will have after rescaling. -To reverse-score a variable, the range should be given with the maximum value first. -See examples.} +\item{to}{Numeric vector of length 2 giving the new range that the variable +will have after rescaling. To reverse-score a variable, the range should +be given with the maximum value first. See examples.} + +\item{multiply}{If not \code{NULL}, \code{to} is ignored and \code{multiply} will be used, +giving the factor by which the actual range of \code{x} should be expanded. +For example, if a vector ranges from 5 to 15 and \code{multiply = 1.1}, the current +range of 10 will be expanded by the factor of 1.1, giving a new range of +11. Thus, the rescaled vector would range from 4.5 to 15.5.} + +\item{add}{A vector of length 1 or 2. If not \code{NULL}, \code{to} is ignored and \code{add} +will be used, giving the amount by which the minimum and maximum of the +actual range of \code{x} should be expanded. For example, if a vector ranges from +5 to 15 and \code{add = 1}, the range will be expanded from 4 to 16. If \code{add} is +of length 2, then the first value is used for the lower bound and the second +value for the upper bound.} \item{range}{Initial (old) range of values. If \code{NULL}, will take the range of the input vector (\code{range(x)}).} @@ -103,8 +126,8 @@ functions (see 'Details'), this argument may be used as workaround.} A rescaled object. } \description{ -Rescale variables to a new range. -Can also be used to reverse-score variables (change the keying/scoring direction). +Rescale variables to a new range. Can also be used to reverse-score variables +(change the keying/scoring direction), or to expand a range. } \section{Selection of variables - the \code{select} argument}{ @@ -138,6 +161,21 @@ head(rescale(iris, to = list( "Sepal.Length" = c(0, 1), "Petal.Length" = c(-1, 0) ))) + +# "expand" ranges by a factor or a given value +x <- 5:15 +x +# both will expand the range by 10\% +rescale(x, multiply = 1.1) +rescale(x, add = 0.5) + +# expand range by different values +rescale(x, add = c(1, 3)) + +# Specify list of multipliers +d <- data.frame(x = 5:15, y = 5:15) +rescale(d, multiply = list(x = 1.1, y = 0.5)) + } \seealso{ See \code{\link[=makepredictcall.dw_transformer]{makepredictcall.dw_transformer()}} for use in model formulas. diff --git a/tests/testthat/test-data_rescale.R b/tests/testthat/test-data_rescale.R index 3539e2fc0..9caf3ee16 100644 --- a/tests/testthat/test-data_rescale.R +++ b/tests/testthat/test-data_rescale.R @@ -109,3 +109,67 @@ test_that("data_rescale regex", { ignore_attr = TRUE ) }) + + +# expanding range ------------------------------ +test_that("data_rescale can expand range", { + # for vectors + x <- 5:15 + expect_equal( + rescale(x, multiply = 1.1), + c(4.5, 5.6, 6.7, 7.8, 8.9, 10, 11.1, 12.2, 13.3, 14.4, 15.5), + ignore_attr = TRUE + ) + expect_equal(rescale(x, multiply = 1.1), rescale(x, add = 0.5), ignore_attr = TRUE) + expect_error(rescale(x, multiply = 0.9, add = 1), regex = "Only one of") + expect_error(rescale(x, multiply = c(1.2, 1.4)), regex = "The length of") + + # different values for add + expect_equal( + rescale(x, add = c(1, 3)), + c(4, 5.4, 6.8, 8.2, 9.6, 11, 12.4, 13.8, 15.2, 16.6, 18), + ignore_attr = TRUE + ) + expect_error(rescale(x, add = 1:3), regex = "The length of") + + # works with NA + expect_equal( + rescale(rep(NA_real_, 3), multiply = 1.1), + rep(NA_real_, 3), + ignore_attr = TRUE + ) + expect_equal( + rescale(rep(NA_real_, 3), add = 2), + rep(NA_real_, 3), + ignore_attr = TRUE + ) + + # for data frames + d <- data.frame(x = 5:15, y = 5:15) + expect_equal( + rescale(d, multiply = 1.1), + rescale(d, add = 0.5), + ignore_attr = TRUE + ) + expect_equal( + rescale(d, multiply = list(x = 1.1, y = 0.5)), + rescale(d, add = list(x = 0.5, y = -2.5)), + ignore_attr = TRUE + ) + # data frames accept multiple add-values per column + out <- rescale(d, add = list(x = c(1, 3), y = c(2, 4))) + expect_equal( + out$x, + rescale(d$x, add = c(1, 3)), + ignore_attr = TRUE + ) + expect_equal( + out$y, + rescale(d$y, add = c(2, 4)), + ignore_attr = TRUE + ) + + expect_error(rescale(d, multiply = 0.9, add = 1), regex = "Only one of") + expect_error(rescale(d, multiply = list(x = 0.9, y = 2), add = list(y = 1)), regex = "Only one of") + expect_error(rescale(d, multiply = c(0.9, 1.5)), regex = "The length of") +})