Skip to content

Commit

Permalink
Add helper functions to refactor PKCT01 (#1048)
Browse files Browse the repository at this point in the history
Closes #277 #543
  • Loading branch information
edelarua authored Sep 8, 2023
1 parent b9c0944 commit 5dee25f
Show file tree
Hide file tree
Showing 35 changed files with 822 additions and 31 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ Collate:
'h_step.R'
'h_survival_biomarkers_subgroups.R'
'h_survival_duration_subgroups.R'
'imputation_rule.R'
'incidence_rate.R'
'individual_patient_plot.R'
'kaplan_meier_plot.R'
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ export(format_extreme_values_ci)
export(format_fraction)
export(format_fraction_fixed_dp)
export(format_fraction_threshold)
export(format_sigfig)
export(format_xx)
export(g_forest)
export(g_ipp)
Expand Down Expand Up @@ -211,6 +212,7 @@ export(has_counts_difference)
export(has_fraction_in_any_col)
export(has_fraction_in_cols)
export(has_fractions_difference)
export(imputation_rule)
export(keep_content_rows)
export(keep_rows)
export(logistic_regression_cols)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# tern 0.9.0.9001
### New Features
* Added `imputation_rule` function to apply imputation rule to data.
* Added new format function `format_sigfig` to allow for numeric value formatting by a specified number of significant figures.

### Enhancements
* Refactored `tabulate_rsp_subgroups` to pass sanitation checks by preventing creation of degenerate subtables.
* Updated `analyze_vars_in_cols` to use caching, allow implementation of imputation rule via the `imp_rule` argument, and allow user to specify cell alignment via the `.aligns` argument.
* Updated `add_rowcounts` to allow addition of row counts from `alt_counts_df` using the `alt_counts` argument.

# tern 0.9.0
### New Features
Expand Down
2 changes: 1 addition & 1 deletion R/analyze_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ s_summary.factor <- function(x,
}
)

y$n_blq <- sum(grepl("BLQ|LTR|<[1-9]", x))
y$n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", x))

y
}
Expand Down
87 changes: 76 additions & 11 deletions R/analyze_vars_in_cols.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@
#'
#' @inheritParams argument_convention
#' @inheritParams rtables::analyze_colvars
#' @param imp_rule (`character`)\cr imputation rule setting. Defaults to `NULL` for no imputation rule. Can
#' also be `"1/3"` to implement 1/3 imputation rule or `"1/2"` to implement 1/2 imputation rule. In order
#' to use an imputation rule, the `avalcat_var` argument must be specified. See [imputation_rule()]
#' for more details on imputation.
#' @param avalcat_var (`character`)\cr if `imp_rule` is not `NULL`, name of variable that indicates whether a
#' row in the data corresponds to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of
#' the above (defaults to `"AVALCAT1"`). Variable must be present in the data and should match the variable
#' used to calculate the `n_blq` statistic (if included in `.stats`).
#' @param cache (`flag`)\cr whether to store computed values in a temporary caching environment. This will
#' speed up calculations in large tables, but should be set to `FALSE` if the same `rtable` layout is
#' used for multiple tables with different data. Defaults to `FALSE`.
#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels`
#' character vector applies on the column space. You can change the row labels by defining this
#' parameter to a named character vector with names corresponding to the split values. It defaults
Expand Down Expand Up @@ -143,10 +154,14 @@ analyze_vars_in_cols <- function(lyt,
row_labels = NULL,
do_summarize_row_groups = FALSE,
split_col_vars = TRUE,
imp_rule = NULL,
avalcat_var = "AVALCAT1",
cache = FALSE,
.indent_mods = NULL,
nested = TRUE,
na_level = NULL,
.formats = NULL) {
.formats = NULL,
.aligns = NULL) {
checkmate::assert_string(na_level, null.ok = TRUE)
checkmate::assert_character(row_labels, null.ok = TRUE)
checkmate::assert_int(.indent_mods, null.ok = TRUE)
Expand Down Expand Up @@ -201,17 +216,40 @@ analyze_vars_in_cols <- function(lyt,
)
}

env <- new.env() # create caching environment

if (do_summarize_row_groups) {
if (length(unique(vars)) > 1) {
stop("When using do_summarize_row_groups only one label level var should be inserted.")
}

# Function list for do_summarize_row_groups. Slightly different handling of labels
cfun_list <- Map(
function(stat) {
function(u, .spl_context, labelstr, ...) {
function(stat, use_cache, cache_env) {
function(u, .spl_context, labelstr, .df_row, ...) {
# Statistic
res <- s_summary(u, ...)[[stat]]
var_row_val <- paste(
gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
paste(.spl_context$value, collapse = "_"),
sep = "_"
)
if (use_cache) {
if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...)
x_stats <- cache_env[[var_row_val]]
} else {
x_stats <- s_summary(u, ...)
}

if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
res <- x_stats[[stat]]
} else {
res_imp <- imputation_rule(
.df_row, x_stats, stat,
imp_rule = imp_rule, post = as.numeric(tail(.spl_context$value, 1)) > 0, avalcat_var = avalcat_var
)
res <- res_imp[["val"]]
na_level <- res_imp[["na_level"]]
}

# Label check and replacement
if (length(row_labels) > 1) {
Expand All @@ -234,11 +272,14 @@ analyze_vars_in_cols <- function(lyt,
label = lbl,
format = formats_v[names(formats_v) == stat][[1]],
format_na_str = na_level,
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
align = .aligns
)
}
},
stat = .stats
stat = .stats,
use_cache = cache,
cache_env = replicate(length(.stats), env)
)

# Main call to rtables
Expand All @@ -251,10 +292,31 @@ analyze_vars_in_cols <- function(lyt,
} else {
# Function list for analyze_colvars
afun_list <- Map(
function(stat) {
function(u, .spl_context, ...) {
function(stat, use_cache, cache_env) {
function(u, .spl_context, .df_row, ...) {
# Main statistics
res <- s_summary(u, ...)[[stat]]
var_row_val <- paste(
gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
paste(.spl_context$value, collapse = "_"),
sep = "_"
)
if (use_cache) {
if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...)
x_stats <- cache_env[[var_row_val]]
} else {
x_stats <- s_summary(u, ...)
}

if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
res <- x_stats[[stat]]
} else {
res_imp <- imputation_rule(
.df_row, x_stats, stat,
imp_rule = imp_rule, post = as.numeric(tail(.spl_context$value, 1)) > 0, avalcat_var = avalcat_var
)
res <- res_imp[["val"]]
na_level <- res_imp[["na_level"]]
}

if (is.list(res)) {
if (length(res) > 1) {
Expand Down Expand Up @@ -292,11 +354,14 @@ analyze_vars_in_cols <- function(lyt,
label = lbl,
format = formats_v[names(formats_v) == stat][[1]],
format_na_str = na_level,
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
align = .aligns
)
}
},
stat = .stats
stat = .stats,
use_cache = cache,
cache_env = replicate(length(.stats), env)
)

# Main call to rtables
Expand Down
2 changes: 2 additions & 0 deletions R/argument_convention.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#' that are used repeatedly to express an analysis.
#'
#' @param ... additional arguments for the lower level functions.
#' @param .aligns (`character`)\cr alignment for table contents (not including labels). When `NULL`, `"center"`
#' is applied. See [formatters::list_valid_aligns()] for a list of all currently supported alignments.
#' @param .all_col_counts (`vector` of `integer`)\cr each value represents a global count for a column. Values are
#' taken from `alt_counts_df` if specified (see [rtables::build_table()]).
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split.
Expand Down
27 changes: 27 additions & 0 deletions R/formatting_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,33 @@ format_xx <- function(str) {
return(rtable_format)
}

#' Formatting Numeric Values By Significant Figures
#'
#' Format numeric values to print with a specified number of significant figures.
#'
#' @param sigfig (`integer`)\cr number of significant figures to display.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' fmt_3sf <- format_sigfig(3)
#' fmt_3sf(1.658)
#' fmt_3sf(1e1)
#'
#' fmt_5sf <- format_sigfig(5)
#' fmt_5sf(0.57)
#' fmt_5sf(0.000025645)
#'
#' @family formatting functions
#' @export
format_sigfig <- function(sigfig) {
checkmate::assert_integerish(sigfig)
function(x, ...) {
if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.")
formatC(signif(x, digits = sigfig), digits = sigfig, format = "fg", flag = "#")
}
}

#' Formatting Fraction with Lower Threshold
#'
#' @description `r lifecycle::badge("stable")`
Expand Down
60 changes: 60 additions & 0 deletions R/imputation_rule.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' Apply 1/3 or 1/2 Imputation Rule to Data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param x_stats (`named list`)\cr a named list of statistics, typically the results of [s_summary()].
#' @param stat (`character`)\cr statistic to return the value/NA level of according to the imputation
#' rule applied.
#' @param imp_rule (`character`)\cr imputation rule setting. Set to `"1/3"` to implement 1/3 imputation
#' rule or `"1/2"` to implement 1/2 imputation rule.
#' @param post (`flag`)\cr whether the data corresponds to a post-dose time-point (defaults to `FALSE`).
#' This parameter is only used when `imp_rule` is set to `"1/3"`.
#' @param avalcat_var (`character`)\cr name of variable that indicates whether a row in `df` corresponds
#' to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of the above
#' (defaults to `"AVALCAT1"`). Variable `avalcat_var` must be present in `df`.
#'
#' @return A `list` containing statistic value (`val`) and NA level (`na_level`) that should be displayed
#' according to the specified imputation rule.
#'
#' @seealso [analyze_vars_in_cols()] where this function can be implemented by setting the `imp_rule`
#' argument.
#'
#' @examples
#' set.seed(1)
#' df <- data.frame(
#' AVAL = runif(50, 0, 1),
#' AVALCAT1 = sample(c(1, "BLQ"), 50, replace = TRUE)
#' )
#' x_stats <- s_summary(df$AVAL)
#' imputation_rule(df, x_stats, "max", "1/3")
#' imputation_rule(df, x_stats, "geom_mean", "1/3")
#' imputation_rule(df, x_stats, "mean", "1/2")
#'
#' @export
imputation_rule <- function(df, x_stats, stat, imp_rule, post = FALSE, avalcat_var = "AVALCAT1") {
checkmate::assert_choice(avalcat_var, names(df))
checkmate::assert_choice(imp_rule, c("1/3", "1/2"))
n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", df[[avalcat_var]]))
ltr_blq_ratio <- n_blq / max(1, nrow(df))

# defaults
val <- x_stats[[stat]]
na_level <- "NE"

if (imp_rule == "1/3") {
if (!post && stat == "geom_mean") val <- NA # 1/3_pre_LT, 1/3_pre_GT
if (ltr_blq_ratio > 1 / 3) {
if (stat != "geom_mean") na_level <- "ND" # 1/3_pre_GT, 1/3_post_GT
if (!post && !stat %in% c("median", "max")) val <- NA # 1/3_pre_GT
if (post && !stat %in% c("median", "max", "geom_mean")) val <- NA # 1/3_post_GT
}
} else if (imp_rule == "1/2") {
if (ltr_blq_ratio > 1 / 2 && !stat == "max") {
val <- NA # 1/2_GT
na_level <- "ND" # 1/2_GT
}
}

list(val = val, na_level = na_level)
}
50 changes: 40 additions & 10 deletions R/utils_rtables.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,54 @@ cfun_by_flag <- function(analysis_var,

#' Content Row Function to Add Row Total to Labels
#'
#' This takes the label of the latest row split level and adds the row total in parentheses.
#' This takes the label of the latest row split level and adds the row total from `df` in parentheses.
#' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than
#' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`.
#'
#' @inheritParams argument_convention
#'
#' @return A `list` containing "row_count" with the row count value and the correct label.
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
#'
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
#' the former is already split by columns and will refer to the first column of the data only.
#'
#' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from
#' `alt_counts_df` instead of `df`.
#'
#' @keywords internal
c_label_n <- function(df,
labelstr,
.N_row) { # nolint
label <- paste0(labelstr, " (N=", .N_row, ")")
list(row_count = formatters::with_label(c(.N_row, .N_row), label))
in_rows(
.list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)),
.formats = c(row_count = function(x, ...) "")
)
}

#' Content Row Function to Add `alt_counts_df` Row Total to Labels
#'
#' This takes the label of the latest row split level and adds the row total from `alt_counts_df`
#' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df`
#' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`.
#'
#' @inheritParams argument_convention
#'
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
#'
#' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead
#' of `alt_counts_df`.
#'
#' @keywords internal
c_label_n_alt <- function(df,
labelstr,
.alt_df_row) {
N_row_alt <- nrow(.alt_df_row) # nolint
label <- paste0(labelstr, " (N=", N_row_alt, ")")
in_rows(
.list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)),
.formats = c(row_count = function(x, ...) "")
)
}

#' Layout Creating Function to Add Row Total Counts
Expand All @@ -114,6 +147,8 @@ c_label_n <- function(df,
#' is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`)
#' or from `df` (`FALSE`). Defaults to `FALSE`.
#'
#' @return A modified layout where the latest row split labels now have the row-wise
#' total counts (i.e. without column-based subsetting) attached in parentheses.
Expand All @@ -131,15 +166,10 @@ c_label_n <- function(df,
#' build_table(DM)
#'
#' @export
add_rowcounts <- function(lyt) {
c_lbl_n_fun <- make_afun(
c_label_n,
.stats = c("row_count"),
.formats = c(row_count = function(x, ...) "")
)
add_rowcounts <- function(lyt, alt_counts = FALSE) {
summarize_row_groups(
lyt,
cfun = c_lbl_n_fun
cfun = if (alt_counts) c_label_n_alt else c_label_n
)
}

Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ reference:
desc: These functions are useful in defining an analysis.
contents:
- starts_with("h_")
- imputation_rule
- starts_with("or_")
- starts_with("prop_")
- summary_stats
Expand Down
Loading

0 comments on commit 5dee25f

Please sign in to comment.