Skip to content

Commit

Permalink
feat: add message embedding to check returns (#61)
Browse files Browse the repository at this point in the history
This (hopefully) should avoid the issue of having side-effecty checks
whose messages get lost in distributed workloads.
  • Loading branch information
psanker authored Mar 27, 2024
1 parent 56d1da3 commit 2ee0092
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 27 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Suggests:
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
URL: https://github.com/nyuglobalties/blueprintr
BugReports: https://github.com/nyuglobalties/blueprintr/issues
Depends:
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# blueprintr 0.2.5
* Add capability to embed custom messages to check results, using `check.errors` attribute in returned logical value
* Refactor side-effect messages from built-in checks to `check.errors`

# blueprintr 0.2.4
* Fix for issue where labelling large datasets would take a very long time

Expand Down
117 changes: 107 additions & 10 deletions R/checks-base.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,72 @@ is_variable_check_func <- function(func) {
identical(arg_ast$head, "$")
}

check_errors_attr <- "check.errors"

has_check_errors <- function(x) {
has_attr(x, check_errors_attr)
}

get_check_errors <- function(x) {
get_attr(x, check_errors_attr)
}

set_check_errors <- function(x, reasons) {
set_attr(x, check_errors_attr, reasons)
}

#' Helper function to embed reasons
#' @noRd
fail_check <- function(reasons = NULL) {
set_check_errors(FALSE, reasons)
}

#' Helper function to embed reasons
#' @noRd
warn_check <- function(reasons = NULL) {
set_check_errors(TRUE, reasons)
}

#' Evaluate all checks on a blueprint
#'
#' Runs all checks -- dataset and variable -- on a blueprint
#' to determine if a built dataset passes all restrictions.
#'
#' @details # Check functions
#'
#' Check functions are simple functions that take in either
#' a data.frame or variable at the minimum, plus some extra
#' arguments if need, and returns a logical value: `TRUE` or `FALSE.`
#' In blueprintr, the entire check passes or fails unlike other
#' testing frameworks like {pointblank}. If you'd like to embed
#' extra context for your test result, modify the "check.errors"
#' attribute of the returned logical value with a character vector
#' which will be rendered into a bulleted list. Note: if you embed
#' reasons for a `TRUE`, the check will produce a warning in the drake
#' or targets pipeline.
#'
#' @param ... All quoted check calls
#' @param .env The environment in which the calls are evaluated
#'
#' @export
eval_checks <- function(..., .env = parent.frame()) {
checks_dt <- checks(...)

checks_dt$.pass <- vlapply(checks_dt$check_func, function(f) eval(f, envir = .env))
checks_results <- lapply(checks_dt$check_func, function(f) eval(f, envir = .env))
checks_dt$.pass <- vlapply(checks_results, as.logical)
checks_dt$.fail_meta <- vcapply(checks_results, \(x) {
if (has_check_errors(x)) {
errs <- get_check_errors(x)
errs <- vcapply(errs, \(x) paste0(" * ", x))
paste0(":\n", paste0(errs, collapse = "\n"))
} else {
""
}
})

if (any(checks_dt$.pass == TRUE & checks_dt$.fail_meta != "")) {
checks_warn(checks_dt)
}

if (any(checks_dt$.pass == FALSE)) {
checks_error(checks_dt)
Expand All @@ -80,18 +133,62 @@ eval_checks <- function(..., .env = parent.frame()) {
}

checks_error <- function(checks) {
false_funcs <-
checks %>%
dplyr::filter(.data$.pass == FALSE) %>%
dplyr::pull(.data$check_func) %>%
vcapply(safe_deparse, collapse = " ", trim = TRUE)

err_msgs <- glue("`{false_funcs}` is not TRUE")
checks <- checks %>%
dplyr::mutate(
.call = vcapply(.data$check_func, safe_deparse, collapse = " ", trim = TRUE),
) %>%
dplyr::mutate(
.message = dplyr::if_else(
.data$.pass == FALSE,
paste0(
"`", .data$.call, "` is not TRUE", .data$.fail_meta
),
NA_character_
),
# Include the warning here for user experience
.message = dplyr::if_else(
.data$.pass == TRUE & .data$.fail_meta != "",
paste0(
"`", .data$.call, "` has some potential issues", .data$.fail_meta
),
.data$.message
)
)

err_msgs <- checks %>%
dplyr::filter(!is.na(.data$.message)) %>%
dplyr::pull(.data$.message)

rlang::abort(
glue_collapse(err_msgs, "\n"),
checks = checks,
.subclass = "checks_error"
class = "checks_error",
checks = checks
)
}

checks_warn <- function(checks) {
checks <- checks %>%
dplyr::mutate(
.call = vcapply(.data$check_func, safe_deparse, collapse = " ", trim = TRUE),
) %>%
dplyr::mutate(
.message = dplyr::if_else(
.data$.pass == TRUE & .data$.fail_meta != "",
paste0(
"`", .data$.call, "` has some potential issues", .data$.fail_meta
),
NA_character_
)
)

warn_msgs <- checks %>%
dplyr::filter(!is.na(.data$.message)) %>%
dplyr::pull(.data$.message)

rlang::warn(
message = glue_collapse(warn_msgs, "\n"),
class = "checks_warn",
checks = checks
)
}

Expand Down
34 changes: 19 additions & 15 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,36 @@ all_variables_present <- function(df, meta, blueprint) {
new_vars <- setdiff(names(df), known_vars)
missing_vars <- setdiff(known_vars, names(df))

err_reasons <- NULL

if (length(missing_vars) > 0) {
message(
glue("Expected variables are missing: {glue_collapse(missing_vars, ', ')}")
)
err_reasons <- glue("Expected variables are missing: [{glue_collapse(missing_vars, ', ')}]")
}

if (length(new_vars) > 0) {
message(
glue("Unexpected new variables: [{glue_collapse(new_vars, ', ')}]. "),
"Please edit documentation if this is intended."
)
err_reasons <- c(err_reasons, paste0(
paste0(
glue("Unexpected new variables: [{glue_collapse(new_vars, ', ')}]."),
"\n Please edit documentation if this is intended."
)
))
}

res <- TRUE

if (length(missing_vars) > 0) {
return(FALSE)
res <- FALSE
}

if (length(new_vars) > 0 && isTRUE(blueprint$stop_on_new_vars)) {
return(FALSE)
res <- FALSE
}

TRUE
if (!res) {
return(fail_check(err_reasons))
}

warn_check(err_reasons)
}

#' @rdname checks
Expand Down Expand Up @@ -89,11 +97,7 @@ all_types_match <- function(df, meta) {
dplyr::filter(.data$issue == TRUE) %>%
dplyr::pull(.data$.err)

for (.err in errors) {
message(.err)
}

return(FALSE)
return(fail_check(errors))
}

TRUE
Expand Down
2 changes: 1 addition & 1 deletion R/cleanup-default.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ drop_columns <- function(df, blueprint, meta) {
dropped_cols <- meta[!is.na(meta$dropped) & meta$dropped == TRUE, "name", drop = TRUE]

if (length(dropped_cols) > 0) {
df <- dplyr::select(df, -dropped_cols)
df <- dplyr::select(df, -tidyselect::all_of(dropped_cols))
}

df
Expand Down
13 changes: 13 additions & 0 deletions man/eval_checks.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions tests/testthat/test-02-checking.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,40 @@ test_that("Variable checks work", {
meta <- drake::diagnose(mtcars_vartests_checks)
expect_true(inherits(meta$error, "checks_error"))
})

test_that("Content checking embeds extra information if desired", {
mtcars_bp <- blueprint(
"mtcars_vartests",
description = "It's just mtcars, OK?",
metadata_directory = bp_path("blueprints"),
command = {
df <- mtcars
dplyr::rename(df, ma = am)
}
)

plan <- plan_from_blueprint(mtcars_bp)

drake::clean()
err <- expect_error(drake::make(plan))
expect_true(any(grepl(" \\* ", err$message))) # Embeds reasons into err message

mtcars_bp <- blueprint(
"mtcars_vartests",
description = "It's just mtcars, OK?",
metadata_directory = bp_path("blueprints"),
command = {
df <- mtcars
df$new_col <- 0
df
}
)

plan <- plan_from_blueprint(mtcars_bp)

drake::clean()
expect_warning(drake::make(plan))

meta <- drake::diagnose(mtcars_vartests_checks)
expect_true(any(grepl(" \\* ", meta$warnings))) # Allow passing with warning messages!
})

0 comments on commit 2ee0092

Please sign in to comment.