From 2ca5c6e726fef42eecd3c655738acf1dce8517bd Mon Sep 17 00:00:00 2001 From: Edgar-Zamora Date: Thu, 15 Aug 2024 14:08:35 -0700 Subject: [PATCH 1/4] update namespace and add mday dep --- NAMESPACE | 1 + NEWS.md | 2 ++ 2 files changed, 3 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index b1488b87d..ed6a8d583 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -748,6 +748,7 @@ importFrom(lubridate,am) importFrom(lubridate,decimal_date) importFrom(lubridate,hour) importFrom(lubridate,is.Date) +importFrom(lubridate,mday) importFrom(lubridate,minute) importFrom(lubridate,month) importFrom(lubridate,quarter) diff --git a/NEWS.md b/NEWS.md index e17b3a5eb..c54c5848a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -34,6 +34,8 @@ * `step_dummy()` now throws more informative warnings for `NA` values. (#450) +* `step_date()` now accepts `"mday"` as a possible feature. (@Edgar-Zamora, #1211) + ## Bug Fixes * `NA` levels in factors aren't dropped when passed to `recipe()`. (#1291) From 9275b59c47b0b0056d8121ee17d2e591998912da Mon Sep 17 00:00:00 2001 From: Edgar-Zamora Date: Thu, 15 Aug 2024 14:09:01 -0700 Subject: [PATCH 2/4] add tests --- tests/testthat/test-date.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-date.R b/tests/testthat/test-date.R index 5c4337e1b..1bfc137f8 100644 --- a/tests/testthat/test-date.R +++ b/tests/testthat/test-date.R @@ -51,7 +51,7 @@ test_that("default option", { test_that("nondefault options", { date_rec <- recipe(~ Dan + Stefan, examples) %>% - step_date(all_predictors(), features = c("dow", "month"), label = FALSE) + step_date(all_predictors(), features = c("dow", "month", "mday"), label = FALSE) date_rec <- prep(date_rec, training = examples) date_res <- bake(date_rec, new_data = examples) @@ -61,8 +61,10 @@ test_that("nondefault options", { Stefan = examples$Stefan, Dan_dow = wday(examples$Dan, label = FALSE), Dan_month = month(examples$Dan, label = FALSE), + Dan_mday = mday(examples$Dan), Stefan_dow = wday(examples$Stefan, label = FALSE), - Stefan_month = month(examples$Stefan, label = FALSE) + Stefan_month = month(examples$Stefan, label = FALSE), + Stefan_mday = mday(examples$Stefan) ) expect_equal(date_res, date_exp) @@ -237,7 +239,7 @@ test_that("empty selection tidy method works", { }) test_that("keep_original_cols works", { - new_names <- c("Dan_dow", "Dan_month", "Dan_year") + new_names <- c("Dan_dow", "Dan_month", "Dan_year", "Dan_mday") rec <- recipe(~ Dan, examples) %>% step_date(all_predictors(), keep_original_cols = FALSE) From f83aaaab9e7dae4e32331fdf587c28ec6b9b2000 Mon Sep 17 00:00:00 2001 From: Edgar-Zamora Date: Thu, 15 Aug 2024 14:09:17 -0700 Subject: [PATCH 3/4] add mday opition --- R/date.R | 6 +++++- R/recipes-package.R | 1 + man/roles.Rd | 7 ++++--- man/step_date.Rd | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/R/date.R b/R/date.R index 23e7b0d9d..5dbdeabe2 100644 --- a/R/date.R +++ b/R/date.R @@ -9,7 +9,7 @@ #' for this step. The selected variables should have class `Date` or #' `POSIXct`. See [selections()] for more details. #' @param features A character string that includes at least one -#' of the following values: `month`, `dow` (day of week), +#' of the following values: `month`, `dow` (day of week), `mday` (day of month), #' `doy` (day of year), `week`, `month`, #' `decimal` (decimal date, e.g. 2002.197), `quarter`, #' `semester`, `year`. @@ -98,6 +98,7 @@ step_date <- c( "year", "doy", + "mday", "week", "decimal", "semester", @@ -202,6 +203,9 @@ get_date_features <- if ("doy" %in% feats) { res[, grepl("doy$", names(res))] <- vec_cast(yday(dt), integer()) } + if ("mday" %in% feats) { + res[, grepl("mday$", names(res))] <- vec_cast(mday(dt), integer()) + } if ("week" %in% feats) { res[, grepl("week$", names(res))] <- vec_cast(week(dt), integer()) } diff --git a/R/recipes-package.R b/R/recipes-package.R index bfe956b67..636d085bf 100644 --- a/R/recipes-package.R +++ b/R/recipes-package.R @@ -62,6 +62,7 @@ #' @importFrom lubridate wday #' @importFrom lubridate week #' @importFrom lubridate yday +#' @importFrom lubridate mday #' @importFrom lubridate year #' @importFrom Matrix Matrix #' @importFrom purrr map diff --git a/man/roles.Rd b/man/roles.Rd index 8e4a9c770..72ab16fa7 100644 --- a/man/roles.Rd +++ b/man/roles.Rd @@ -93,9 +93,10 @@ If you really aren't using \code{sample} in your recipe, we recommend that you i bake(rec, biomass_test) #> Error in `bake()`: -#> x The following required columns are missing from `new_data`: `sample`. -#> i These columns have one of the following roles, which are required at `bake()` -#> time: `id variable`. +#> x The following required columns are missing from `new_data`: +#> `sample`. +#> i These columns have one of the following roles, which are required +#> at `bake()` time: `id variable`. #> i If these roles are not required at `bake()` time, use #> `update_role_requirements(role = "your_role", bake = FALSE)`. }\if{html}{\out{}} diff --git a/man/step_date.Rd b/man/step_date.Rd index d91d99bae..38c80e65e 100644 --- a/man/step_date.Rd +++ b/man/step_date.Rd @@ -36,7 +36,7 @@ the original variables will be used as \emph{predictors} in a model.} preprocessing have been estimated.} \item{features}{A character string that includes at least one -of the following values: \code{month}, \code{dow} (day of week), +of the following values: \code{month}, \code{dow} (day of week), \code{mday} (day of month), \code{doy} (day of year), \code{week}, \code{month}, \code{decimal} (decimal date, e.g. 2002.197), \code{quarter}, \code{semester}, \code{year}.} From eb9c9b7aee9488fbbf4ab3af77ac653d14ced591 Mon Sep 17 00:00:00 2001 From: Edgar-Zamora Date: Thu, 15 Aug 2024 14:15:46 -0700 Subject: [PATCH 4/4] remove mday from expected --- tests/testthat/test-date.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-date.R b/tests/testthat/test-date.R index 1bfc137f8..2bbca3ece 100644 --- a/tests/testthat/test-date.R +++ b/tests/testthat/test-date.R @@ -239,7 +239,7 @@ test_that("empty selection tidy method works", { }) test_that("keep_original_cols works", { - new_names <- c("Dan_dow", "Dan_month", "Dan_year", "Dan_mday") + new_names <- c("Dan_dow", "Dan_month", "Dan_year") rec <- recipe(~ Dan, examples) %>% step_date(all_predictors(), keep_original_cols = FALSE)