diff --git a/release-candidate/coverage-report/index.html b/release-candidate/coverage-report/index.html new file mode 100644 index 00000000..74ab6a14 --- /dev/null +++ b/release-candidate/coverage-report/index.html @@ -0,0 +1,11247 @@ + + +
+ + + + + + + + + + + + + + + + + + + + + + +1 | ++ |
+ #' Transforming data.frame into Wide Format+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @details instead of nesting duplicated values, the function will throw an error if the same parameter is+ |
+
4 | ++ |
+ #' provided twice for the same observation.+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' @param data (`data.frame`) to be pivoted.+ |
+
7 | ++ |
+ #' @param id (`character`) the name of the column identifying the observations. It will correspond to the row names+ |
+
8 | ++ |
+ #' of the output.+ |
+
9 | ++ |
+ #' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The+ |
+
10 | ++ |
+ #' unique values in this column will become column names in the output.+ |
+
11 | ++ |
+ #' @param value_from (`character`) the name of the column containing the values that will populate the output.+ |
+
12 | ++ |
+ #'+ |
+
13 | ++ |
+ #' @return `data.frame` in a wide format.+ |
+
14 | ++ |
+ #'+ |
+
15 | ++ |
+ #' @export+ |
+
16 | ++ |
+ #' @examples+ |
+
17 | ++ |
+ #' test_data <- data.frame(+ |
+
18 | ++ |
+ #' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),+ |
+
19 | ++ |
+ #' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),+ |
+
20 | ++ |
+ #' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),+ |
+
21 | ++ |
+ #' the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE)+ |
+
22 | ++ |
+ #' )+ |
+
23 | ++ |
+ #'+ |
+
24 | ++ |
+ #' mini_pivot_wider(test_data, "the_obs", "the_param", "the_val")+ |
+
25 | ++ |
+ mini_pivot_wider <- function(data,+ |
+
26 | ++ |
+ id,+ |
+
27 | ++ |
+ param_from,+ |
+
28 | ++ |
+ value_from) {+ |
+
29 | ++ |
+ # check for duplication of observation-parameter+ |
+
30 | +3x | +
+ checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3)+ |
+
31 | +3x | +
+ checkmate::assert_character(id, len = 1)+ |
+
32 | +3x | +
+ checkmate::assert_character(param_from, len = 1)+ |
+
33 | +1x | +
+ checkmate::assert_character(value_from, len = 1)+ |
+
34 | +1x | +
+ checkmate::assert_subset(c(id, param_from, value_from), colnames(data))+ |
+
35 | +1x | +
+ checkmate::assert_false(any(duplicated(data[, c(id, param_from)])))+ |
+
36 | ++ | + + | +
37 | +1x | +
+ unique_id <- sort(unique(data[[id]]))+ |
+
38 | +1x | +
+ param <- data[[param_from]]+ |
+
39 | ++ | + + | +
40 | +1x | +
+ mini_data <- data[, c(id, param_from, value_from)]+ |
+
41 | +1x | +
+ data_ls <- split(mini_data, param)+ |
+
42 | ++ | + + | +
43 | ++ |
+ # transform to named vector+ |
+
44 | +1x | +
+ data_vec <-+ |
+
45 | +1x | +
+ lapply(+ |
+
46 | +1x | +
+ data_ls,+ |
+
47 | +1x | +
+ function(x) setNames(x[[value_from]], x[[id]])+ |
+
48 | ++ |
+ )+ |
+
49 | ++ | + + | +
50 | ++ |
+ # query each id in each param+ |
+
51 | +1x | +
+ all_vec <- lapply(data_vec, function(x) setNames(x[unique_id], unique_id))+ |
+
52 | ++ | + + | +
53 | +1x | +
+ bind_data <- as.data.frame(all_vec)+ |
+
54 | ++ | + + | +
55 | +1x | +
+ res <- cbind(id = unique_id, bind_data)+ |
+
56 | +1x | +
+ rownames(res) <- NULL+ |
+
57 | ++ | + + | +
58 | +1x | +
+ res+ |
+
59 | ++ |
+ }+ |
+
60 | ++ | + + | +
61 | ++ |
+ #' Transforming data.frame with Complex Identifiers into Wide Format+ |
+
62 | ++ |
+ #'+ |
+
63 | ++ |
+ #' @details This function allows to identify observations on the basis of several columns. Warning: Instead of nesting+ |
+
64 | ++ |
+ #' duplicated values, the function will throw an error if the same parameter is provided twice for the same+ |
+
65 | ++ |
+ #' observation.+ |
+
66 | ++ |
+ #'+ |
+
67 | ++ |
+ #' @param data (`data.frame`) to be pivoted.+ |
+
68 | ++ |
+ #' @param id (`character`) the name of the columns whose combination uniquely identify the observations.+ |
+
69 | ++ |
+ #' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The+ |
+
70 | ++ |
+ #' unique values in this column will become column names in the output.+ |
+
71 | ++ |
+ #' @param value_from (`character`) the name of the column containing the values that will populate the output.+ |
+
72 | ++ |
+ #' @param drop_na (`logical`) should column containing only `NAs` be dropped.+ |
+
73 | ++ |
+ #'+ |
+
74 | ++ |
+ #' @return `data.frame` in a wide format.+ |
+
75 | ++ |
+ #'+ |
+
76 | ++ |
+ #' @export+ |
+
77 | ++ |
+ #' @examples+ |
+
78 | ++ |
+ #' test_data <- data.frame(+ |
+
79 | ++ |
+ #' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),+ |
+
80 | ++ |
+ #' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),+ |
+
81 | ++ |
+ #' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),+ |
+
82 | ++ |
+ #' the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE)+ |
+
83 | ++ |
+ #' )+ |
+
84 | ++ |
+ #'+ |
+
85 | ++ |
+ #' multi_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val")+ |
+
86 | ++ |
+ #' multi_pivot_wider(test_data, "the_obs2", "the_param", "the_val")+ |
+
87 | ++ |
+ multi_pivot_wider <- function(data,+ |
+
88 | ++ |
+ id,+ |
+
89 | ++ |
+ param_from,+ |
+
90 | ++ |
+ value_from,+ |
+
91 | ++ |
+ drop_na = FALSE) {+ |
+
92 | ++ |
+ # check for duplication of observation-parameter+ |
+
93 | +19x | +
+ checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3)+ |
+
94 | +19x | +
+ checkmate::assert_character(id)+ |
+
95 | +19x | +
+ checkmate::assert_character(param_from, len = 1)+ |
+
96 | +19x | +
+ checkmate::assert_character(value_from, len = 1)+ |
+
97 | +19x | +
+ checkmate::assert_false(any(duplicated(data[, c(id, param_from)])))+ |
+
98 | +18x | +
+ checkmate::assert_subset(c(id, param_from, value_from), colnames(data))+ |
+
99 | ++ | + + | +
100 | ++ |
+ # find a way to sort+ |
+
101 | +18x | +
+ unique_id <- unique(data[id])+ |
+
102 | +18x | +
+ key <- apply(unique_id[id], 1, paste, collapse = "-")+ |
+
103 | +18x | +
+ unique_id <- cbind(key, unique_id)+ |
+
104 | ++ | + + | +
105 | +18x | +
+ param <- data[[param_from]]+ |
+
106 | ++ | + + | +
107 | +18x | +
+ mini_data <- data[, c(param_from, value_from)]+ |
+
108 | +18x | +
+ f_key <- apply(data[id], 1, paste, collapse = "-")+ |
+
109 | +18x | +
+ mini_data <- cbind(f_key, mini_data)+ |
+
110 | ++ | + + | +
111 | +18x | +
+ data_ls <- split(mini_data, param)+ |
+
112 | ++ | + + | +
113 | ++ |
+ # Transform to named vector, the first column is the key.+ |
+
114 | +18x | +
+ data_vec <-+ |
+
115 | +18x | +
+ lapply(+ |
+
116 | +18x | +
+ data_ls,+ |
+
117 | +18x | +
+ function(x) setNames(x[[value_from]], x[, 1])+ |
+
118 | ++ |
+ )+ |
+
119 | ++ | + + | +
120 | ++ |
+ # query each id in each param+ |
+
121 | +18x | +
+ all_vec <- lapply(data_vec, function(x) x[unique_id[, 1]])+ |
+
122 | ++ | + + | +
123 | +16x | +
+ if (drop_na) all_vec <- Filter(function(x) !all(is.na(x)), all_vec)+ |
+
124 | ++ | + + | +
125 | +18x | +
+ bind_data <- do.call(cbind, all_vec)+ |
+
126 | ++ | + + | +
127 | +18x | +
+ res <- cbind(unique_id[, -1, drop = FALSE], bind_data)+ |
+
128 | ++ | + + | +
129 | +18x | +
+ rownames(res) <- NULL+ |
+
130 | +18x | +
+ res+ |
+
131 | ++ |
+ }+ |
+
132 | ++ | + + | +
133 | ++ |
+ #' Transforming data.frame with multiple Data Column into Wide Format+ |
+
134 | ++ |
+ #'+ |
+
135 | ++ |
+ #' @details This function is adapted to cases where the data are distributed in several columns while the name of the+ |
+
136 | ++ |
+ #' parameter is in one. Typical example is `adsub` where numeric data are stored in `AVAL` while categorical data are+ |
+
137 | ++ |
+ #' in `AVALC`.+ |
+
138 | ++ |
+ #'+ |
+
139 | ++ |
+ #' @param data (`data.frame`) to be pivoted.+ |
+
140 | ++ |
+ #' @param id (`character`) the name of the columns whose combination uniquely identify the observations.+ |
+
141 | ++ |
+ #' @param param_from (`character`) the name of the columns containing the names of the parameters to be pivoted. The+ |
+
142 | ++ |
+ #' unique values in this column will become column names in the output.+ |
+
143 | ++ |
+ #' @param value_from (`character`) the name of the column containing the values that will populate the output.+ |
+
144 | ++ |
+ #' @param labels_from (`character`) the name of the column congaing the labels of the new columns. from. If not+ |
+
145 | ++ |
+ #' provided, the labels will be equal to the column names. When several labels are available for the same column, the+ |
+
146 | ++ |
+ #' first one will be selected.+ |
+
147 | ++ |
+ #'+ |
+
148 | ++ |
+ #' @return `list` of `data.frame` in a wide format with label attribute attached to each columns.+ |
+
149 | ++ |
+ #'+ |
+
150 | ++ |
+ #' @export+ |
+
151 | ++ |
+ #' @examples+ |
+
152 | ++ |
+ #' test_data <- data.frame(+ |
+
153 | ++ |
+ #' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"),+ |
+
154 | ++ |
+ #' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"),+ |
+
155 | ++ |
+ #' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"),+ |
+
156 | ++ |
+ #' the_label = c(+ |
+
157 | ++ |
+ #' "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)",+ |
+
158 | ++ |
+ #' "Gender", "Height (cm)", "Height (cm)", "Pre-condition"+ |
+
159 | ++ |
+ #' ),+ |
+
160 | ++ |
+ #' the_val = c(65, 165, NA, 66, NA, 166, 155, NA),+ |
+
161 | ++ |
+ #' the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE)+ |
+
162 | ++ |
+ #' )+ |
+
163 | ++ |
+ #'+ |
+
164 | ++ |
+ #' x <- poly_pivot_wider(+ |
+
165 | ++ |
+ #' test_data,+ |
+
166 | ++ |
+ #' c("the_obs", "the_obs2"),+ |
+
167 | ++ |
+ #' "the_param",+ |
+
168 | ++ |
+ #' c("the_val", "the_val2"),+ |
+
169 | ++ |
+ #' "the_label"+ |
+
170 | ++ |
+ #' )+ |
+
171 | ++ |
+ #' x+ |
+
172 | ++ |
+ #' Reduce(function(u, v) merge(u, v, all = TRUE), x)+ |
+
173 | ++ |
+ poly_pivot_wider <- function(data,+ |
+
174 | ++ |
+ id,+ |
+
175 | ++ |
+ param_from,+ |
+
176 | ++ |
+ value_from,+ |
+
177 | ++ |
+ labels_from = NULL) {+ |
+
178 | ++ |
+ # other tests are performed at lower levels.+ |
+
179 | +7x | +
+ checkmate::assert_character(value_from, unique = TRUE)+ |
+
180 | ++ | + + | +
181 | ++ |
+ # Create new labels for new columns.+ |
+
182 | +7x | +
+ if (is.null(labels_from) || labels_from == param_from) {+ |
+
183 | +1x | +
+ new_labels <- unique(data[[param_from]])+ |
+
184 | +1x | +
+ names(new_labels) <- new_labels+ |
+
185 | ++ |
+ } else {+ |
+
186 | +6x | +
+ checkmate::assert_character(labels_from, len = 1)+ |
+
187 | +6x | +
+ checkmate::assert_subset(labels_from, colnames(data))+ |
+
188 | ++ | + + | +
189 | +6x | +
+ new_labels_df <- data[, c(labels_from, param_from)]+ |
+
190 | +6x | +
+ new_labels_df <- unique(new_labels_df)+ |
+
191 | ++ | + + | +
192 | +6x | +
+ new_labels <- as.character(new_labels_df[[labels_from]])+ |
+
193 | +6x | +
+ names(new_labels) <- as.character(new_labels_df[[param_from]])+ |
+
194 | ++ |
+ }+ |
+
195 | ++ | + + | +
196 | ++ |
+ # Retrieve old labels.+ |
+
197 | +7x | +
+ old_labels <- lapply(data, attr, "label")+ |
+
198 | +7x | +
+ n_old_label <- names(old_labels)+ |
+
199 | +7x | +
+ null_label <- unlist(lapply(old_labels, is.null))+ |
+
200 | +7x | +
+ old_labels[null_label] <- n_old_label[null_label]+ |
+
201 | +7x | +
+ old_labels <- unlist(old_labels)+ |
+
202 | ++ | + + | +
203 | +7x | +
+ all_labels <- c(new_labels, old_labels)+ |
+
204 | ++ | + + | +
205 | +7x | +
+ res_ls <- list()+ |
+
206 | +7x | +
+ for (n_value_from in value_from) {+ |
+
207 | +14x | +
+ res <- multi_pivot_wider(+ |
+
208 | +14x | +
+ data = data,+ |
+
209 | +14x | +
+ id = id,+ |
+
210 | +14x | +
+ param_from = param_from,+ |
+
211 | +14x | +
+ value_from = n_value_from,+ |
+
212 | +14x | +
+ drop_na = TRUE+ |
+
213 | ++ |
+ )+ |
+
214 | ++ | + + | +
215 | +14x | +
+ res <- attr_label_df(res, all_labels[colnames(res)])+ |
+
216 | +14x | +
+ res_ls[[n_value_from]] <- res+ |
+
217 | ++ |
+ }+ |
+
218 | +7x | +
+ res_ls+ |
+
219 | ++ |
+ }+ |
+
1 | ++ |
+ #' Join `adsub` to `adsl`+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @param adam_db (`list` of `data.frame`) object input with an `adsl` and `adsub` table.+ |
+
4 | ++ |
+ #' @param keys (`character`) the name of the columns in `adsl` uniquely identifying a row.+ |
+
5 | ++ |
+ #' @param continuous_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which+ |
+
6 | ++ |
+ #' columns containing continuous values should be created. If `"all"`, all parameter values are selected, if `NULL`,+ |
+
7 | ++ |
+ #' none are selected.+ |
+
8 | ++ |
+ #' @param categorical_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which+ |
+
9 | ++ |
+ #' columns containing categorical values should be created. If `"all"`, all parameter values are selected, if `NULL`,+ |
+
10 | ++ |
+ #' none are selected.+ |
+
11 | ++ |
+ #' @param continuous_suffix (`string`) the suffixes to add to the newly generated columns containing continuous values.+ |
+
12 | ++ |
+ #' @param categorical_suffix (`string`) the suffixes to add to the newly generated columns containing categorical+ |
+
13 | ++ |
+ #' values.+ |
+
14 | ++ |
+ #'+ |
+
15 | ++ |
+ #' @return a `list` of `data.frame` with new columns in the `adsl` table.+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' @rdname join_adsub_adsl+ |
+
18 | ++ |
+ #' @export+ |
+
19 | ++ |
+ #'+ |
+
20 | ++ |
+ join_adsub_adsl <- function(adam_db,+ |
+
21 | ++ |
+ keys,+ |
+
22 | ++ |
+ continuous_var,+ |
+
23 | ++ |
+ categorical_var,+ |
+
24 | ++ |
+ continuous_suffix,+ |
+
25 | ++ |
+ categorical_suffix) {+ |
+
26 | +5x | +
+ UseMethod("join_adsub_adsl")+ |
+
27 | ++ |
+ }+ |
+
28 | ++ | + + | +
29 | ++ |
+ #' @rdname join_adsub_adsl+ |
+
30 | ++ |
+ #' @export+ |
+
31 | ++ |
+ #'+ |
+
32 | ++ |
+ #' @examples+ |
+
33 | ++ |
+ #' adsl <- data.frame(+ |
+
34 | ++ |
+ #' USUBJID = c("S1", "S2", "S3", "S4"),+ |
+
35 | ++ |
+ #' STUDYID = "My_study",+ |
+
36 | ++ |
+ #' AGE = c(60, 44, 23, 31)+ |
+
37 | ++ |
+ #' )+ |
+
38 | ++ |
+ #'+ |
+
39 | ++ |
+ #' adsub <- data.frame(+ |
+
40 | ++ |
+ #' USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"),+ |
+
41 | ++ |
+ #' STUDYID = "My_study",+ |
+
42 | ++ |
+ #' PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"),+ |
+
43 | ++ |
+ #' PARAMCD = c("w", "w", "w", "w", "h", "h", "h"),+ |
+
44 | ++ |
+ #' AVAL = c(98, 75, 70, 71, 182, 155, 152),+ |
+
45 | ++ |
+ #' AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180")+ |
+
46 | ++ |
+ #' )+ |
+
47 | ++ |
+ #'+ |
+
48 | ++ |
+ #' db <- list(adsl = adsl, adsub = adsub)+ |
+
49 | ++ |
+ #'+ |
+
50 | ++ |
+ #' x <- join_adsub_adsl(adam_db = db)+ |
+
51 | ++ |
+ #' x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")+ |
+
52 | ++ |
+ join_adsub_adsl.list <- function(adam_db,+ |
+
53 | ++ |
+ keys = c("USUBJID", "STUDYID"),+ |
+
54 | ++ |
+ continuous_var = "all",+ |
+
55 | ++ |
+ categorical_var = "all",+ |
+
56 | ++ |
+ continuous_suffix = "",+ |
+
57 | ++ |
+ categorical_suffix = "_CAT") {+ |
+
58 | +5x | +
+ checkmate::assert_list(adam_db, types = "data.frame")+ |
+
59 | +5x | +
+ checkmate::assert_names(names(adam_db), must.include = c("adsl", "adsub"))+ |
+
60 | +5x | +
+ checkmate::assert_names(names(adam_db$adsub), must.include = c("PARAM", "PARAMCD", "AVAL", "AVALC", keys))+ |
+
61 | +5x | +
+ checkmate::assert_names(names(adam_db$adsl), must.include = keys)+ |
+
62 | +5x | +
+ checkmate::assert_string(continuous_suffix)+ |
+
63 | +5x | +
+ checkmate::assert_string(categorical_suffix)+ |
+
64 | ++ | + + | +
65 | +5x | +
+ value_col <- c("AVAL", "AVALC")+ |
+
66 | +5x | +
+ vars_ls <- list(continuous_var, categorical_var)+ |
+
67 | +5x | +
+ suffix_ls <- list(continuous_suffix, categorical_suffix)+ |
+
68 | ++ | + + | +
69 | ++ |
+ # Select variables names.+ |
+
70 | +5x | +
+ vars_ls <- lapply(vars_ls, function(x) {+ |
+
71 | +10x | +
+ if (identical(x, "all")) {+ |
+
72 | +6x | +
+ unique(adam_db$adsub$PARAMCD)+ |
+
73 | ++ |
+ } else {+ |
+
74 | +4x | +
+ x+ |
+
75 | ++ |
+ }+ |
+
76 | ++ |
+ })+ |
+
77 | ++ | + + | +
78 | ++ |
+ # Create new variable names.+ |
+
79 | +5x | +
+ vars_nam <- mapply(+ |
+
80 | +5x | +
+ function(x, y) {+ |
+
81 | +10x | +
+ if (!is.null(x)) {+ |
+
82 | +6x | +
+ names(x) <- paste0(x, y)+ |
+
83 | +6x | +
+ x+ |
+
84 | ++ |
+ } else {+ |
+
85 | +4x | +
+ NULL+ |
+
86 | ++ |
+ }+ |
+
87 | ++ |
+ },+ |
+
88 | +5x | +
+ vars_ls,+ |
+
89 | +5x | +
+ suffix_ls,+ |
+
90 | +5x | +
+ SIMPLIFY = FALSE+ |
+
91 | ++ |
+ )+ |
+
92 | ++ | + + | +
93 | ++ |
+ # Test if new columns already exist in adsl.+ |
+
94 | +5x | +
+ assert_names_notadsl(vars_nam, adam_db$adsl)+ |
+
95 | ++ | + + | +
96 | ++ |
+ # Test if categorical and continuous column will result in the same column name.+ |
+
97 | +5x | +
+ assert_names_collision(vars_nam)+ |
+
98 | ++ | + + | +
99 | ++ |
+ # Pivot and keep labels.+ |
+
100 | +5x | +
+ adsub_wide_ls <-+ |
+
101 | +5x | +
+ adam_db$adsub %>%+ |
+
102 | +5x | +
+ poly_pivot_wider(id = keys, param_from = "PARAMCD", value_from = value_col, labels_from = "PARAM")+ |
+
103 | ++ | + + | +
104 | ++ |
+ # Merge categorical and continuous variables.+ |
+
105 | +5x | +
+ for (i in seq_along(value_col)) {+ |
+
106 | +10x | +
+ adsub_df <- adsub_wide_ls[[value_col[i]]]+ |
+
107 | +10x | +
+ adsub_df <- adsub_df[, c(keys, vars_nam[[i]])]+ |
+
108 | +10x | +
+ colnames(adsub_df) <- c(keys, names(vars_nam[[i]]))+ |
+
109 | ++ | + + | +
110 | +10x | +
+ adam_db$adsl <- dplyr::left_join(+ |
+
111 | +10x | +
+ x = adam_db$adsl,+ |
+
112 | +10x | +
+ y = adsub_df,+ |
+
113 | +10x | +
+ by = keys+ |
+
114 | ++ |
+ )+ |
+
115 | ++ |
+ }+ |
+
116 | ++ | + + | +
117 | +5x | +
+ adam_db+ |
+
118 | ++ |
+ }+ |
+
119 | ++ | + + | +
120 | ++ |
+ # Utility functions ----+ |
+
121 | ++ | + + | +
122 | ++ |
+ assert_names_collision <- function(vars_nam) {+ |
+
123 | +5x | +
+ final_names_ls <- lapply(vars_nam, names)+ |
+
124 | +5x | +
+ in_both <- final_names_ls[[1]] %in% final_names_ls[[2]]+ |
+
125 | +5x | +
+ if (any(in_both)) {+ |
+
126 | +! | +
+ warning(+ |
+
127 | +! | +
+ paste(+ |
+
128 | +! | +
+ toString(final_names_ls[[1]][in_both]),+ |
+
129 | +! | +
+ "are new columns for continuous and categorical variable,+ |
+
130 | +! | +
+ Please set different `continuous_suffix` or `categorical_suffix`+ |
+
131 | +! | +
+ or select different columns to avoid automatic renaming."+ |
+
132 | ++ |
+ )+ |
+
133 | ++ |
+ )+ |
+
134 | ++ |
+ }+ |
+
135 | ++ |
+ }+ |
+
136 | ++ | + + | +
137 | ++ |
+ assert_names_notadsl <- function(vars_nam, df) {+ |
+
138 | +5x | +
+ final_names <- unique(sapply(vars_nam, names))+ |
+
139 | +5x | +
+ already_in_adsl <- final_names %in% colnames(df)+ |
+
140 | +5x | +
+ if (any(already_in_adsl)) {+ |
+
141 | +1x | +
+ warning(+ |
+
142 | +1x | +
+ paste(+ |
+
143 | +1x | +
+ toString(final_names[already_in_adsl]),+ |
+
144 | +1x | +
+ "already exist in adsl, the name will default to another values.+ |
+
145 | +1x | +
+ Please change `continuous_suffix` or `categorical_suffix` to avoid automatic reneaming"+ |
+
146 | ++ |
+ )+ |
+
147 | ++ |
+ )+ |
+
148 | ++ |
+ }+ |
+
149 | ++ |
+ }+ |
+
1 | ++ |
+ #' Filter Data with Log+ |
+
2 | ++ |
+ #' @param data (`data.frame`) input data to subset, or named (`list`) of (`data.frame`).+ |
+
3 | ++ |
+ #' @param condition (`call`) of subset condition. Must evaluate as logical.+ |
+
4 | ++ |
+ #' @param suffix (`string`) optional argument describing the filter.+ |
+
5 | ++ |
+ #' @param ... further arguments to be passed to or from other methods.+ |
+
6 | ++ |
+ #'+ |
+
7 | ++ |
+ #' @details+ |
+
8 | ++ |
+ #' `log_filter` will filter the data/named list of data according to the `condition`.+ |
+
9 | ++ |
+ #' All the variables in `condition` must exist in the data (as variables) or in the parent+ |
+
10 | ++ |
+ #' frame(e.g., in global environment).+ |
+
11 | ++ |
+ #' For named list of data, if `ADSL` is available, `log_filter` will also try to subset all+ |
+
12 | ++ |
+ #' other datasets with `USUBJID`.+ |
+
13 | ++ |
+ #' @export+ |
+
14 | ++ |
+ log_filter <- function(data, condition, ...) {+ |
+
15 | +40x | +
+ UseMethod("log_filter")+ |
+
16 | ++ |
+ }+ |
+
17 | ++ | + + | +
18 | ++ |
+ #' @rdname log_filter+ |
+
19 | ++ |
+ #' @export+ |
+
20 | ++ |
+ #' @examples+ |
+
21 | ++ |
+ #' data <- iris+ |
+
22 | ++ |
+ #' attr(data$Sepal.Length, "label") <- "cm"+ |
+
23 | ++ |
+ #' log_filter(data, Sepal.Length >= 7)+ |
+
24 | ++ |
+ #'+ |
+
25 | ++ |
+ log_filter.data.frame <- function(data, condition, suffix = NULL, ...) {+ |
+
26 | +23x | +
+ checkmate::assert_string(suffix, null.ok = TRUE)+ |
+
27 | ++ | + + | +
28 | +23x | +
+ condition <- match.call()$condition+ |
+
29 | +23x | +
+ vars <- all.vars(condition)+ |
+
30 | +23x | +
+ var_in_env <- vapply(vars, exists, envir = parent.frame(), inherits = TRUE, FUN.VALUE = TRUE)+ |
+
31 | +23x | +
+ var_in_data <- vapply(vars, `%in%`, table = names(data), FUN.VALUE = TRUE)+ |
+
32 | +23x | +
+ if (!all(var_in_env | var_in_data)) {+ |
+
33 | +1x | +
+ stop(sprintf("Variable %s not found in data or environment.", toString(vars[!(var_in_data | var_in_env)])))+ |
+
34 | ++ |
+ }+ |
+
35 | +22x | +
+ res <- eval(bquote(dplyr::filter(data, .(condition))))+ |
+
36 | +22x | +
+ rows <- list(list(init = nrow(data), final = nrow(res), suffix = suffix))+ |
+
37 | +22x | +
+ rlbl <- paste0(deparse(condition), collapse = "")+ |
+
38 | +22x | +
+ rlbl <- stringr::str_replace_all(rlbl, "\\s+", " ")+ |
+
39 | +22x | +
+ names(rows) <- rlbl+ |
+
40 | +22x | +
+ attr(res, "rows") <- c(attr(data, "rows"), rows)+ |
+
41 | ++ | + + | +
42 | +22x | +
+ res+ |
+
43 | ++ |
+ }+ |
+
44 | ++ | + + | +
45 | ++ |
+ #' @rdname log_filter+ |
+
46 | ++ |
+ #' @param table (`string`) table name.+ |
+
47 | ++ |
+ #' @param by (`character`) variable names shared by `adsl` and other datasets for filtering.+ |
+
48 | ++ |
+ #' @export+ |
+
49 | ++ |
+ #' @examples+ |
+
50 | ++ |
+ #' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))+ |
+
51 | ++ |
+ log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, ...) {+ |
+
52 | +17x | +
+ checkmate::assert_list(data, types = "data.frame", names = "unique")+ |
+
53 | +17x | +
+ checkmate::assert_subset(table, names(data))+ |
+
54 | +17x | +
+ checkmate::assert_names(colnames(data[[table]]), must.include = by)+ |
+
55 | +17x | +
+ condition <- match.call()$condition+ |
+
56 | +17x | +
+ data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix))))+ |
+
57 | +17x | +
+ if (identical(table, "adsl")) {+ |
+
58 | +8x | +
+ for (k in setdiff(names(data), "adsl")) {+ |
+
59 | +8x | +
+ if (all(by %in% names(data[[k]]))) {+ |
+
60 | +1x | +
+ if (length(by) == 0) by <- intersect(names(data[[k]]), names(data$adsl))+ |
+
61 | ++ | + + | +
62 | +8x | +
+ ori_n <- nrow(data[[k]])+ |
+
63 | +8x | +
+ ori_att <- attr(data[[k]], "rows")+ |
+
64 | ++ | + + | +
65 | +8x | +
+ data[[k]] <- dplyr::semi_join(data[[k]], data$adsl, by = by)+ |
+
66 | ++ | + + | +
67 | +8x | +
+ rows <- list(list(init = ori_n, final = nrow(data[[k]]), suffix = suffix))+ |
+
68 | +8x | +
+ names(rows) <- paste0("Filtered by adsl: ", deparse(condition), collapse = "")+ |
+
69 | +8x | +
+ attr(data[[k]], "rows") <- c(ori_att, rows)+ |
+
70 | ++ |
+ }+ |
+
71 | ++ |
+ }+ |
+
72 | ++ |
+ }+ |
+
73 | +17x | +
+ return(data)+ |
+
74 | ++ |
+ }+ |
+
75 | ++ | + + | +
76 | ++ |
+ # Get Log ----+ |
+
77 | ++ | + + | +
78 | ++ |
+ #' Get Log+ |
+
79 | ++ |
+ #'+ |
+
80 | ++ |
+ #' @param data (`list` of `data.frame` or `data.frame`) filtered with `log_filter`.+ |
+
81 | ++ |
+ #' @param incl (`flag`) should information about unfiltered `data.frame` be printed.+ |
+
82 | ++ |
+ #' @param incl.adsl (`flag`) should indication of filtering performed through `adsl` be printed.+ |
+
83 | ++ |
+ #'+ |
+
84 | ++ |
+ #' @export+ |
+
85 | ++ |
+ get_log <- function(data, incl, incl.adsl) {+ |
+
86 | +39x | +
+ UseMethod("get_log")+ |
+
87 | ++ |
+ }+ |
+
88 | ++ | + + | +
89 | ++ |
+ #' @rdname get_log+ |
+
90 | ++ |
+ #' @export+ |
+
91 | ++ |
+ #' @examples+ |
+
92 | ++ |
+ #' data <- log_filter(iris, Sepal.Length >= 7, "xx")+ |
+
93 | ++ |
+ #' data <- log_filter(data, Sepal.Length < 2)+ |
+
94 | ++ |
+ #' data <- log_filter(data, Sepal.Length >= 2, "yy")+ |
+
95 | ++ |
+ #' get_log(data)+ |
+
96 | ++ |
+ #'+ |
+
97 | ++ |
+ get_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {+ |
+
98 | +27x | +
+ checkmate::assert_flag(incl)+ |
+
99 | ++ | + + | +
100 | +27x | +
+ att <- attr(data, "rows")+ |
+
101 | +27x | +
+ if (!incl.adsl) {+ |
+
102 | +4x | +
+ sel <- grepl("Filtered by adsl", names(att))+ |
+
103 | +4x | +
+ att <- att[!sel]+ |
+
104 | ++ |
+ }+ |
+
105 | ++ | + + | +
106 | +27x | +
+ if (length(att) != 0L) {+ |
+
107 | +17x | +
+ start_row <- lapply(att, "[[", "init")+ |
+
108 | +17x | +
+ end_row <- lapply(att, "[[", "final")+ |
+
109 | +17x | +
+ suffix <- lapply(att, "[[", "suffix")+ |
+
110 | +17x | +
+ suffix <- vapply(suffix, function(x) ifelse(is.null(x), "", paste0(x, ": ")), character(1))+ |
+
111 | +17x | +
+ res <- paste0(suffix, names(att), " [", start_row, " --> ", end_row, " rows.]")+ |
+
112 | +10x | +
+ } else if (incl) {+ |
+
113 | +6x | +
+ paste0("No filtering [", nrow(data), " rows.]")+ |
+
114 | ++ |
+ } else {+ |
+
115 | +4x | +
+ NULL+ |
+
116 | ++ |
+ }+ |
+
117 | ++ |
+ }+ |
+
118 | ++ | + + | +
119 | ++ | + + | +
120 | ++ |
+ #' @rdname get_log+ |
+
121 | ++ |
+ #' @export+ |
+
122 | ++ |
+ #' @examples+ |
+
123 | ++ |
+ #' data <- log_filter(+ |
+
124 | ++ |
+ #' list(iris1 = iris, iris2 = iris),+ |
+
125 | ++ |
+ #' Sepal.Length >= 7,+ |
+
126 | ++ |
+ #' "iris1",+ |
+
127 | ++ |
+ #' character(0),+ |
+
128 | ++ |
+ #' "Sep"+ |
+
129 | ++ |
+ #' )+ |
+
130 | ++ |
+ #' get_log(data)+ |
+
131 | ++ |
+ #'+ |
+
132 | ++ |
+ get_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {+ |
+
133 | +12x | +
+ checkmate::assert_list(data, types = "data.frame", names = "unique")+ |
+
134 | +12x | +
+ checkmate::assert_flag(incl)+ |
+
135 | ++ | + + | +
136 | +12x | +
+ lapply(data, get_log, incl = incl, incl.adsl = incl.adsl)+ |
+
137 | ++ |
+ }+ |
+
138 | ++ | + + | +
139 | ++ |
+ # Print Log ----+ |
+
140 | ++ | + + | +
141 | ++ |
+ #' Print Log+ |
+
142 | ++ |
+ #'+ |
+
143 | ++ |
+ #' @inheritParams get_log+ |
+
144 | ++ |
+ #' @export+ |
+
145 | ++ |
+ #'+ |
+
146 | ++ |
+ print_log <- function(data, incl, incl.adsl) {+ |
+
147 | +10x | +
+ UseMethod("print_log")+ |
+
148 | ++ |
+ }+ |
+
149 | ++ | + + | +
150 | ++ |
+ #' @rdname print_log+ |
+
151 | ++ |
+ #' @export+ |
+
152 | ++ |
+ #' @examples+ |
+
153 | ++ |
+ #' data <- log_filter(iris, Sepal.Length >= 7, "Sep")+ |
+
154 | ++ |
+ #' print_log(data)+ |
+
155 | ++ |
+ print_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) {+ |
+
156 | +3x | +
+ checkmate::assert_flag(incl)+ |
+
157 | ++ | + + | +
158 | +3x | +
+ cat("Filter Log:")+ |
+
159 | +3x | +
+ cat(paste0("\n ", get_log(data, incl = incl, incl.adsl = incl.adsl)))+ |
+
160 | +3x | +
+ cat("\n")+ |
+
161 | +3x | +
+ invisible()+ |
+
162 | ++ |
+ }+ |
+
163 | ++ | + + | +
164 | ++ |
+ #' @rdname print_log+ |
+
165 | ++ |
+ #' @export+ |
+
166 | ++ |
+ #' @examples+ |
+
167 | ++ |
+ #' data <- log_filter(+ |
+
168 | ++ |
+ #' list(+ |
+
169 | ++ |
+ #' adsl = iris,+ |
+
170 | ++ |
+ #' iris2 = iris,+ |
+
171 | ++ |
+ #' mtcars = mtcars,+ |
+
172 | ++ |
+ #' iris3 = iris+ |
+
173 | ++ |
+ #' ),+ |
+
174 | ++ |
+ #' Sepal.Length >= 7,+ |
+
175 | ++ |
+ #' "adsl",+ |
+
176 | ++ |
+ #' character(0),+ |
+
177 | ++ |
+ #' "adsl filter"+ |
+
178 | ++ |
+ #' )+ |
+
179 | ++ |
+ #' data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter")+ |
+
180 | ++ |
+ #' print_log(data)+ |
+
181 | ++ |
+ #' print_log(data, incl = FALSE)+ |
+
182 | ++ |
+ #' print_log(data, incl.adsl = FALSE, incl = FALSE)+ |
+
183 | ++ |
+ print_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) {+ |
+
184 | +7x | +
+ checkmate::assert_list(data, types = "data.frame", names = "unique")+ |
+
185 | +7x | +
+ checkmate::assert_flag(incl)+ |
+
186 | ++ | + + | +
187 | +7x | +
+ filter_log <- get_log(data, incl = incl, incl.adsl = incl.adsl)+ |
+
188 | ++ | + + | +
189 | +7x | +
+ if (!incl) {+ |
+
190 | +2x | +
+ filter_log <- filter_log[!vapply(filter_log, is.null, logical(1))]+ |
+
191 | ++ |
+ }+ |
+
192 | ++ | + + | +
193 | +7x | +
+ cat("Filter Log:")+ |
+
194 | +7x | +
+ if (length(filter_log) == 0) {+ |
+
195 | +1x | +
+ cat("\n No filtering")+ |
+
196 | ++ |
+ } else {+ |
+
197 | +6x | +
+ mapply(+ |
+
198 | +6x | +
+ function(x, y) {+ |
+
199 | +11x | +
+ cat(paste0("\n - ", x, ":"))+ |
+
200 | +11x | +
+ cat(paste0("\n ", y, ""))+ |
+
201 | ++ |
+ },+ |
+
202 | +6x | +
+ as.list(names(filter_log)),+ |
+
203 | +6x | +
+ filter_log+ |
+
204 | ++ |
+ )+ |
+
205 | ++ |
+ }+ |
+
206 | +7x | +
+ cat("\n")+ |
+
207 | ++ | + + | +
208 | +7x | +
+ invisible()+ |
+
209 | ++ |
+ }+ |
+
1 | ++ |
+ #' Reformat Values+ |
+
2 | ++ |
+ #' @param obj object to reformat.+ |
+
3 | ++ |
+ #' @param format (`rule`) or (`list`) of `rule` depending on the class of obj.+ |
+
4 | ++ |
+ #' @param ... for compatibility between methods and pass additional special mapping to transform rules.+ |
+
5 | ++ |
+ #' * `.string_as_fct` (`flag`) whether the reformatted character object should be converted to factor.+ |
+
6 | ++ |
+ #' * `.to_NA` (`character`) values that should be converted to `NA`. For `factor`, the corresponding levels are+ |
+
7 | ++ |
+ #' dropped. If `NULL`, the argument will be taken from the `to_NA`attribute of the rule.+ |
+
8 | ++ |
+ #' * `.drop` (`flag`) whether to drop empty levels. If `NULL`, the argument will be taken from the `drop`attribute of+ |
+
9 | ++ |
+ #' the rule.+ |
+
10 | ++ |
+ #' * `.na_last` (`flag`) whether the level replacing `NA` should be last.+ |
+
11 | ++ |
+ #'+ |
+
12 | ++ |
+ #' @export+ |
+
13 | ++ |
+ #' @note When the rule is empty rule or when values subject to reformatting are absent from the object, no error is+ |
+
14 | ++ |
+ #' raised. The conversion to factor if `.string_as_fct = TRUE`) is still carried out. The conversion of the levels+ |
+
15 | ++ |
+ #' declared in `.to_NA` to `NA` values occurs after the remapping. `NA` values created this way are not affected by a+ |
+
16 | ++ |
+ #' rule declaring a remapping of `NA` values. For factors, level dropping is the last step, hence, levels converted to+ |
+
17 | ++ |
+ #' `NA` by the `.to_NA` argument, will be removed if `.drop` is `TRUE`. Arguments passed via `reformat` override the+ |
+
18 | ++ |
+ #' ones defined during rule creation.+ |
+
19 | ++ |
+ #'+ |
+
20 | ++ |
+ #' @rdname reformat+ |
+
21 | ++ |
+ #'+ |
+
22 | ++ |
+ reformat <- function(obj, ...) {+ |
+
23 | +51x | +
+ UseMethod("reformat")+ |
+
24 | ++ |
+ }+ |
+
25 | ++ | + + | +
26 | ++ |
+ #' @export+ |
+
27 | ++ |
+ #' @rdname reformat+ |
+
28 | ++ |
+ reformat.default <- function(obj, format, ...) {+ |
+
29 | +2x | +
+ if (!is(format, "empty_rule")) {+ |
+
30 | +1x | +
+ warning(paste0(c("Not implemented for class: ", toString(class(obj)), "! Only empty rule allowed.")))+ |
+
31 | ++ |
+ }+ |
+
32 | +2x | +
+ return(obj)+ |
+
33 | ++ |
+ }+ |
+
34 | ++ | + + | +
35 | ++ |
+ #' @export+ |
+
36 | ++ |
+ #' @rdname reformat+ |
+
37 | ++ |
+ #'+ |
+
38 | ++ |
+ #' @examples+ |
+
39 | ++ |
+ #'+ |
+
40 | ++ |
+ #' # Reformatting of character.+ |
+
41 | ++ |
+ #' obj <- c("a", "b", "x", NA)+ |
+
42 | ++ |
+ #' attr(obj, "label") <- "my label"+ |
+
43 | ++ |
+ #' format <- rule("A" = "a", "NN" = NA)+ |
+
44 | ++ |
+ #'+ |
+
45 | ++ |
+ #' reformat(obj, format)+ |
+
46 | ++ |
+ #' reformat(obj, format, .string_as_fct = FALSE, .to_NA = "x")+ |
+
47 | ++ |
+ #' reformat(obj, empty_rule, .string_as_fct = FALSE, .to_NA = "x")+ |
+
48 | ++ |
+ #'+ |
+
49 | ++ |
+ reformat.character <- function(obj, format, ...) {+ |
+
50 | +19x | +
+ checkmate::assert_class(format, "rule")+ |
+
51 | ++ | + + | +
52 | ++ |
+ # Give priority to argument defined in reformat.+ |
+
53 | +19x | +
+ format <- do.call(rule, modifyList(as.list(format), list(...)))+ |
+
54 | ++ | + + | +
55 | +19x | +
+ if (attr(format, ".string_as_fct")) {+ |
+
56 | ++ |
+ # Keep attributes.+ |
+
57 | +11x | +
+ att <- attributes(obj)+ |
+
58 | +11x | +
+ obj_fact <- as.factor(obj)+ |
+
59 | +11x | +
+ supp_att_name <- setdiff(names(att), attributes(obj_fact))+ |
+
60 | +11x | +
+ supp_att <- att[supp_att_name]+ |
+
61 | +11x | +
+ attributes(obj_fact) <- c(attributes(obj_fact), supp_att)+ |
+
62 | ++ | + + | +
63 | +11x | +
+ reformat(obj_fact, format)+ |
+
64 | ++ |
+ } else {+ |
+
65 | +8x | +
+ if (!is(format, "empty_rule")) {+ |
+
66 | +6x | +
+ value_match <- unlist(format)+ |
+
67 | +6x | +
+ m <- match(obj, value_match)+ |
+
68 | +6x | +
+ obj[!is.na(m)] <- names(format)[m[!is.na(m)]]+ |
+
69 | ++ |
+ }+ |
+
70 | ++ | + + | +
71 | ++ | + + | +
72 | +8x | +
+ val_to_NA <- attr(format, ".to_NA")+ |
+
73 | +8x | +
+ if (!is.null(val_to_NA)) {+ |
+
74 | +4x | +
+ obj[obj %in% val_to_NA] <- NA_character_+ |
+
75 | ++ |
+ }+ |
+
76 | ++ | + + | +
77 | +8x | +
+ obj+ |
+
78 | ++ |
+ }+ |
+
79 | ++ |
+ }+ |
+
80 | ++ | + + | +
81 | ++ |
+ #' @export+ |
+
82 | ++ |
+ #' @rdname reformat+ |
+
83 | ++ |
+ #'+ |
+
84 | ++ |
+ #' @examples+ |
+
85 | ++ |
+ #'+ |
+
86 | ++ |
+ #' # Reformatting of factor.+ |
+
87 | ++ |
+ #' obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z"))+ |
+
88 | ++ |
+ #' attr(obj, "label") <- "my label"+ |
+
89 | ++ |
+ #' format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P")+ |
+
90 | ++ |
+ #'+ |
+
91 | ++ |
+ #' reformat(obj, format)+ |
+
92 | ++ |
+ #' reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE)+ |
+
93 | ++ |
+ #' reformat(obj, empty_rule, .na_last = FALSE, .to_NA = "b", .drop = FALSE)+ |
+
94 | ++ |
+ #'+ |
+
95 | ++ |
+ reformat.factor <- function(obj, format, ...) {+ |
+
96 | +27x | +
+ checkmate::assert_class(format, "rule")+ |
+
97 | ++ | + + | +
98 | +27x | +
+ format <- do.call(rule, modifyList(as.list(format), list(...)))+ |
+
99 | ++ | + + | +
100 | +27x | +
+ if (!is(format, "empty_rule")) {+ |
+
101 | +20x | +
+ any_na <- anyNA(obj)+ |
+
102 | +20x | +
+ if (any(is.na(format)) && any_na) {+ |
+
103 | +18x | +
+ obj <- forcats::fct_na_value_to_level(obj)+ |
+
104 | ++ |
+ }+ |
+
105 | ++ | + + | +
106 | +20x | +
+ absent_format <- format[!format %in% levels(obj)]+ |
+
107 | +20x | +
+ sel_format <- format[format %in% levels(obj)]+ |
+
108 | +20x | +
+ obj <- forcats::fct_recode(obj, !!!sel_format)+ |
+
109 | +20x | +
+ obj <- forcats::fct_expand(obj, unique(names(absent_format)))+ |
+
110 | +20x | +
+ obj <- forcats::fct_relevel(obj, unique(names(format)))+ |
+
111 | ++ | + + | +
112 | +20x | +
+ if (any(is.na(format)) && attr(format, ".na_last")) {+ |
+
113 | +13x | +
+ na_lvl <- names(format)[is.na(format)]+ |
+
114 | +13x | +
+ obj <- forcats::fct_relevel(obj, na_lvl, after = Inf)+ |
+
115 | ++ |
+ }+ |
+
116 | ++ |
+ }+ |
+
117 | ++ | + + | +
118 | +27x | +
+ drop_lvl <- attr(format, ".drop")+ |
+
119 | +27x | +
+ if (drop_lvl) {+ |
+
120 | +3x | +
+ obj <- forcats::fct_drop(obj)+ |
+
121 | ++ |
+ }+ |
+
122 | ++ | + + | +
123 | ++ |
+ # Levels converted to NA are dropped.+ |
+
124 | +27x | +
+ val_to_NA <- attr(format, ".to_NA")+ |
+
125 | +27x | +
+ if (!is.null(val_to_NA)) {+ |
+
126 | +4x | +
+ obj <- forcats::fct_na_level_to_value(obj, val_to_NA)+ |
+
127 | ++ |
+ }+ |
+
128 | ++ | + + | +
129 | +27x | +
+ obj+ |
+
130 | ++ |
+ }+ |
+
131 | ++ | + + | +
132 | ++ |
+ #' @export+ |
+
133 | ++ |
+ #' @rdname reformat+ |
+
134 | ++ |
+ #'+ |
+
135 | ++ |
+ #' @examples+ |
+
136 | ++ |
+ #'+ |
+
137 | ++ |
+ #' # Reformatting of list of data.frame.+ |
+
138 | ++ |
+ #' df1 <- data.frame(+ |
+
139 | ++ |
+ #' var1 = c("a", "b", NA),+ |
+
140 | ++ |
+ #' var2 = factor(c("F1", "F2", NA))+ |
+
141 | ++ |
+ #' )+ |
+
142 | ++ |
+ #'+ |
+
143 | ++ |
+ #' df2 <- data.frame(+ |
+
144 | ++ |
+ #' var1 = c("x", NA, "y"),+ |
+
145 | ++ |
+ #' var2 = factor(c("F11", NA, "F22"))+ |
+
146 | ++ |
+ #' )+ |
+
147 | ++ |
+ #'+ |
+
148 | ++ |
+ #' db <- list(df1 = df1, df2 = df2)+ |
+
149 | ++ |
+ #'+ |
+
150 | ++ |
+ #' format <- list(+ |
+
151 | ++ |
+ #' df1 = list(+ |
+
152 | ++ |
+ #' var1 = rule("X" = "x", "N" = NA, .to_NA = "b")+ |
+
153 | ++ |
+ #' ),+ |
+
154 | ++ |
+ #' df2 = list(+ |
+
155 | ++ |
+ #' var1 = empty_rule,+ |
+
156 | ++ |
+ #' var2 = rule("f11" = "F11", "NN" = NA)+ |
+
157 | ++ |
+ #' )+ |
+
158 | ++ |
+ #' )+ |
+
159 | ++ |
+ #'+ |
+
160 | ++ |
+ #' reformat(db, format)+ |
+
161 | ++ |
+ reformat.list <- function(obj, format, ...) {+ |
+
162 | +3x | +
+ checkmate::assert_list(obj, types = c("data.frame", "tibble"))+ |
+
163 | +3x | +
+ checkmate::assert_named(obj)+ |
+
164 | +3x | +
+ checkmate::assert_list(format, names = "unique", types = "list", null.ok = TRUE)+ |
+
165 | ++ | + + | +
166 | +3x | +
+ if (length(format) == 0) {+ |
+
167 | +1x | +
+ return(obj)+ |
+
168 | ++ |
+ }+ |
+
169 | ++ | + + | +
170 | +2x | +
+ assert_valid_format(format)+ |
+
171 | ++ | + + | +
172 | +2x | +
+ for (tab in names(format)) {+ |
+
173 | +3x | +
+ local_map <- format[[tab]]+ |
+
174 | +3x | +
+ local_map <- local_map[names(local_map) %in% names(obj[[tab]])]+ |
+
175 | ++ | + + | +
176 | +3x | +
+ obj[[tab]][names(local_map)] <- mapply(+ |
+
177 | +3x | +
+ function(rl, col) reformat(obj[[tab]][[col]], format = rl, ...),+ |
+
178 | +3x | +
+ local_map,+ |
+
179 | +3x | +
+ names(local_map),+ |
+
180 | +3x | +
+ SIMPLIFY = FALSE+ |
+
181 | ++ |
+ )+ |
+
182 | ++ |
+ }+ |
+
183 | ++ | + + | +
184 | +2x | +
+ obj+ |
+
185 | ++ |
+ }+ |
+
1 | ++ |
+ #' Create rule based on mappings+ |
+
2 | ++ |
+ #' @param ... Mapping pairs, the argument name is the transformed while+ |
+
3 | ++ |
+ #' its values are original values.+ |
+
4 | ++ |
+ #' @param .lst (`list`) of mapping.+ |
+
5 | ++ |
+ #' @param .string_as_fct (`flag`) whether to convert characters to factors.+ |
+
6 | ++ |
+ #' @param .na_last (`flag`) whether the level replacing `NA` should be last.+ |
+
7 | ++ |
+ #' @param .drop (`flag`) whether to drop empty levels.+ |
+
8 | ++ |
+ #' @param .to_NA (`character`) values that should be converted to `NA`.+ |
+
9 | ++ |
+ #'+ |
+
10 | ++ |
+ #' @note Conversion to `NA` is the last step of the remapping process.+ |
+
11 | ++ | + + | +
12 | ++ |
+ #'+ |
+
13 | ++ |
+ #' @export+ |
+
14 | ++ |
+ #' @examples+ |
+
15 | ++ |
+ #' rule("X" = "x", "Y" = c("y", "z"))+ |
+
16 | ++ |
+ #' rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)+ |
+
17 | ++ |
+ #'+ |
+
18 | ++ |
+ rule <- function(..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = NULL) {+ |
+
19 | +97x | +
+ checkmate::assert_flag(.string_as_fct)+ |
+
20 | +97x | +
+ checkmate::assert_flag(.na_last)+ |
+
21 | +97x | +
+ checkmate::assert_flag(.drop)+ |
+
22 | +97x | +
+ checkmate::assert_character(.to_NA, null.ok = TRUE, any.missing = FALSE)+ |
+
23 | ++ | + + | +
24 | +97x | +
+ if (length(.lst) == 0) {+ |
+
25 | +23x | +
+ res <- empty_rule+ |
+
26 | +23x | +
+ attr(res, ".string_as_fct") <- .string_as_fct %||% TRUE+ |
+
27 | +23x | +
+ attr(res, ".drop") <- .drop %||% FALSE+ |
+
28 | +23x | +
+ attr(res, ".to_NA") <- .to_NA %||% NULL+ |
+
29 | +23x | +
+ return(res)+ |
+
30 | ++ |
+ } else {+ |
+
31 | +74x | +
+ .lst[is.na(.lst)] <- NA_character_+ |
+
32 | +74x | +
+ if (!checkmate::test_list(.lst, types = c("character"))) {+ |
+
33 | +1x | +
+ stop("Value mapping may only contain the type: {character}")+ |
+
34 | ++ |
+ }+ |
+
35 | +73x | +
+ vals <- as.character(unlist(.lst, use.names = FALSE))+ |
+
36 | +73x | +
+ checkmate::assert_character(vals, unique = TRUE)+ |
+
37 | +71x | +
+ nms <- unlist(lapply(seq_len(length(.lst)), function(x) {+ |
+
38 | +166x | +
+ rep(names(.lst)[x], length(.lst[[x]]))+ |
+
39 | ++ |
+ }))+ |
+
40 | ++ | + + | +
41 | +71x | +
+ res <- structure(+ |
+
42 | +71x | +
+ setNames(vals, nms),+ |
+
43 | +71x | +
+ class = c("rule", "character"),+ |
+
44 | +71x | +
+ .string_as_fct = .string_as_fct,+ |
+
45 | +71x | +
+ .na_last = .na_last,+ |
+
46 | +71x | +
+ .drop = .drop,+ |
+
47 | +71x | +
+ .to_NA = .to_NA+ |
+
48 | ++ |
+ )+ |
+
49 | ++ | + + | +
50 | +71x | +
+ res+ |
+
51 | ++ |
+ }+ |
+
52 | ++ |
+ }+ |
+
53 | ++ | + + | +
54 | ++ |
+ #' Create empty rule+ |
+
55 | ++ |
+ #' @export+ |
+
56 | ++ |
+ empty_rule <- structure(+ |
+
57 | ++ |
+ character(0L),+ |
+
58 | ++ |
+ class = c("empty_rule", "rule", "character"),+ |
+
59 | ++ |
+ .string_as_fct = TRUE,+ |
+
60 | ++ |
+ .na_last = FALSE,+ |
+
61 | ++ |
+ .drop = FALSE+ |
+
62 | ++ |
+ )+ |
+
63 | ++ | + + | +
64 | ++ |
+ #' @export+ |
+
65 | ++ |
+ #'+ |
+
66 | ++ |
+ print.rule <- function(x, ...) {+ |
+
67 | +1x | +
+ cat("Mapping of:\n")+ |
+
68 | +1x | +
+ nms <- names(x)+ |
+
69 | +1x | +
+ for (i in seq_len(length(x))) {+ |
+
70 | +2x | +
+ cat(nms[i], " <- ", if (length(x[[i]]) > 1) sprintf("[%s]", toString(x[[i]])) else x[[i]], "\n")+ |
+
71 | ++ |
+ }+ |
+
72 | +! | +
+ if (!is.null(attr(x, ".to_NA"))) cat("NA <- ", toString(attr(x, ".to_NA")), "\n")+ |
+
73 | +1x | +
+ cat("Convert to factor:", attr(x, ".string_as_fct"), "\n")+ |
+
74 | +1x | +
+ cat("Drop unused level:", attr(x, ".drop"), "\n")+ |
+
75 | +1x | +
+ cat("NA-replacing level in last position:", attr(x, ".na_last"), "\n")+ |
+
76 | ++ |
+ }+ |
+
77 | ++ | + + | +
78 | ++ |
+ #' Convert nested list into list of `rule`+ |
+
79 | ++ |
+ #' @param obj (`nested list`) to convert into list of rules.+ |
+
80 | ++ |
+ #' @export+ |
+
81 | ++ |
+ #' @examples+ |
+
82 | ++ |
+ #' obj <- list(+ |
+
83 | ++ |
+ #' rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"),+ |
+
84 | ++ |
+ #' rule2 = list(Missing = c(NA, "")),+ |
+
85 | ++ |
+ #' rule3 = list(Missing = c(NA, ""), .drop = TRUE),+ |
+
86 | ++ |
+ #' rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy")+ |
+
87 | ++ |
+ #' )+ |
+
88 | ++ |
+ #' list2rules(obj)+ |
+
89 | ++ |
+ #'+ |
+
90 | ++ |
+ list2rules <- function(obj) {+ |
+
91 | +2x | +
+ coll <- checkmate::makeAssertCollection()+ |
+
92 | +2x | +
+ checkmate::assert_list(obj, unique = TRUE, types = "list", add = coll)+ |
+
93 | +2x | +
+ checkmate::assert_names(names(obj), type = "unique", add = coll)+ |
+
94 | +2x | +
+ checkmate::reportAssertions(coll)+ |
+
95 | ++ | + + | +
96 | +1x | +
+ lapply(obj, function(x) {+ |
+
97 | +3x | +
+ do.call("rule", x)+ |
+
98 | ++ |
+ })+ |
+
99 | ++ |
+ }+ |
+
100 | ++ | + + | +
101 | ++ |
+ #' Convert Rule to List+ |
+
102 | ++ |
+ #' @param x (`rule`) to convert.+ |
+
103 | ++ |
+ #' @param ... not used.+ |
+
104 | ++ |
+ #'+ |
+
105 | ++ |
+ #' @export+ |
+
106 | ++ |
+ #' @examples+ |
+
107 | ++ |
+ #'+ |
+
108 | ++ |
+ #' x <- rule("a" = c("a", "b"), "X" = "x")+ |
+
109 | ++ |
+ #' as.list(x)+ |
+
110 | ++ |
+ as.list.rule <- function(x, ...) {+ |
+
111 | +48x | +
+ nms <- names(x)+ |
+
112 | +48x | +
+ unames <- unique(nms)+ |
+
113 | +48x | +
+ res <- lapply(unames, function(i) {+ |
+
114 | +80x | +
+ unname(x[nms == i])+ |
+
115 | ++ |
+ })+ |
+
116 | ++ | + + | +
117 | ++ | + + | +
118 | +48x | +
+ att <- attributes(x)+ |
+
119 | +48x | +
+ arg <- att[!names(att) %in% c("names", "class")]+ |
+
120 | ++ | + + | +
121 | +48x | +
+ res <- c(res, unname(arg))+ |
+
122 | +48x | +
+ unames <- c(unames, names(arg))+ |
+
123 | ++ | + + | +
124 | +48x | +
+ setNames(res, unames)+ |
+
125 | ++ |
+ }+ |
+
126 | ++ | + + | +
127 | ++ |
+ #' @export+ |
+
128 | ++ |
+ print.empty_rule <- function(x, ...) {+ |
+
129 | +2x | +
+ cat("Empty mapping\n")+ |
+
130 | +! | +
+ if (!is.null(attr(x, ".to_NA"))) cat("NA <- ", toString(attr(x, ".to_NA")), "\n")+ |
+
131 | +2x | +
+ cat("Convert to factor:", attr(x, ".string_as_fct"), "\n")+ |
+
132 | +2x | +
+ cat("Drop unused level:", attr(x, ".drop"), "\n")+ |
+
133 | ++ |
+ }+ |
+
1 | ++ |
+ #' Propagate Column+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' `propagate`copy columns from a given table of a `list` of `data.frame` to all tables based on other+ |
+
4 | ++ |
+ #' common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving+ |
+
5 | ++ |
+ #' tables. In safe mode, the key must be unique in the original table.+ |
+
6 | ++ |
+ #'+ |
+
7 | ++ |
+ #' @param db (`list` of `data.frame`) object for which some variable need to be propagated.+ |
+
8 | ++ |
+ #' @param from (`string`) the name of the table where the variables to propagate are stored.+ |
+
9 | ++ |
+ #' @param add (`character`) the names of the variables to propagate.+ |
+
10 | ++ |
+ #' @param by (`character`) the key binding the `from` table to the other tables.+ |
+
11 | ++ |
+ #' @param safe (`flag`) should the key be checked for uniqueness in the `from` table.+ |
+
12 | ++ |
+ #'+ |
+
13 | ++ |
+ #' @return updated `list` of `data.frame`.+ |
+
14 | ++ |
+ #'+ |
+
15 | ++ |
+ #' @rdname propagate+ |
+
16 | ++ |
+ #' @export+ |
+
17 | ++ |
+ #'+ |
+
18 | ++ |
+ propagate <- function(db, from, add, by, safe = TRUE) {+ |
+
19 | +3x | +
+ UseMethod("propagate")+ |
+
20 | ++ |
+ }+ |
+
21 | ++ | + + | +
22 | ++ |
+ #' @rdname propagate+ |
+
23 | ++ |
+ #' @export+ |
+
24 | ++ |
+ #'+ |
+
25 | ++ |
+ #'+ |
+
26 | ++ |
+ #' @examples+ |
+
27 | ++ |
+ #' df1 <- data.frame(+ |
+
28 | ++ |
+ #' id1 = c("a", "a", "c", "d", "e", "f"),+ |
+
29 | ++ |
+ #' id2 = c("A", "B", "A", "A", "A", "A"),+ |
+
30 | ++ |
+ #' int = c(1, 2, 3, 4, 5, 6),+ |
+
31 | ++ |
+ #' bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE)+ |
+
32 | ++ |
+ #' )+ |
+
33 | ++ |
+ #'+ |
+
34 | ++ |
+ #' df2 <- data.frame(+ |
+
35 | ++ |
+ #' id1 = c("a", "a", "d", "e", "f", "g"),+ |
+
36 | ++ |
+ #' id2 = c("A", "B", "A", "A", "A", "A")+ |
+
37 | ++ |
+ #' )+ |
+
38 | ++ |
+ #'+ |
+
39 | ++ |
+ #' df3 <- data.frame(+ |
+
40 | ++ |
+ #' id1 = c("a", "c", "d", "e", "f", "x"),+ |
+
41 | ++ |
+ #' id2 = c("A", "A", "A", "A", "B", "A"),+ |
+
42 | ++ |
+ #' int = c(11, 22, 33, 44, 55, 66)+ |
+
43 | ++ |
+ #' )+ |
+
44 | ++ |
+ #'+ |
+
45 | ++ |
+ #' db <- list(df1 = df1, fd2 = df2, df3 = df3)+ |
+
46 | ++ |
+ #' propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))+ |
+
47 | ++ |
+ #'+ |
+
48 | ++ |
+ propagate.list <- function(db, from, add, by, safe = TRUE) {+ |
+
49 | +3x | +
+ checkmate::assert_list(db, types = "data.frame", names = "unique")+ |
+
50 | +3x | +
+ checkmate::assert_names(names(db), must.include = from)+ |
+
51 | +3x | +
+ checkmate::assert_names(colnames(db[[from]]), must.include = add)+ |
+
52 | +3x | +
+ checkmate::assert_names(colnames(db[[from]]), must.include = by)+ |
+
53 | +3x | +
+ checkmate::assert_flag(safe)+ |
+
54 | ++ | + + | +
55 | +3x | +
+ if (safe) {+ |
+
56 | +2x | +
+ keys <- db[[from]][, by]+ |
+
57 | +1x | +
+ if (anyDuplicated(keys)) stop(paste("Duplicated key"))+ |
+
58 | ++ |
+ }+ |
+
59 | ++ | + + | +
60 | +2x | +
+ toJoin <- db[[from]]+ |
+
61 | ++ | + + | +
62 | +2x | +
+ for (tab_name in setdiff(names(db), from)) {+ |
+
63 | +3x | +
+ tab_colnames <- colnames(db[[tab_name]])+ |
+
64 | +3x | +
+ if (!all(add %in% tab_colnames) && all(by %in% tab_colnames)) {+ |
+
65 | +2x | +
+ missing_var <- setdiff(add, tab_colnames)+ |
+
66 | +2x | +
+ sel_var <- c(missing_var, by)+ |
+
67 | +2x | +
+ sel_tab <- toJoin[, sel_var]+ |
+
68 | ++ | + + | +
69 | +2x | +
+ cat(paste0("\nUpdating: ", tab_name, " with: ", toString(missing_var)))+ |
+
70 | ++ | + + | +
71 | +2x | +
+ db[[tab_name]] <- db[[tab_name]] %>%+ |
+
72 | +2x | +
+ dplyr::left_join(sel_tab, by = by)+ |
+
73 | ++ |
+ } else {+ |
+
74 | +1x | +
+ cat(paste0("\nSkipping: ", tab_name))+ |
+
75 | ++ |
+ }+ |
+
76 | ++ |
+ }+ |
+
77 | +2x | +
+ cat("\n")+ |
+
78 | +2x | +
+ return(db)+ |
+
79 | ++ |
+ }+ |
+
1 | ++ |
+ #' Safe transformer+ |
+
2 | ++ |
+ #' @details Obtain content in global environment by default.+ |
+
3 | ++ |
+ #' If not found, use the environment here.+ |
+
4 | ++ |
+ #' @keywords internal+ |
+
5 | ++ |
+ safe_transformer <- function(text, envir) {+ |
+
6 | +7x | +
+ text_lower <- tolower(text)+ |
+
7 | +7x | +
+ res <- if (exists(text_lower, envir = envir, inherits = FALSE)) {+ |
+
8 | +6x | +
+ get(text_lower, envir = envir)+ |
+
9 | ++ |
+ } else {+ |
+
10 | +1x | +
+ text+ |
+
11 | ++ |
+ }+ |
+
12 | +7x | +
+ if (is.character(res)) {+ |
+
13 | +7x | +
+ if (identical(text, tolower(text))) {+ |
+
14 | +3x | +
+ res <- tolower(res)+ |
+
15 | +4x | +
+ } else if (identical(text, toupper(text))) {+ |
+
16 | +2x | +
+ res <- toupper(res)+ |
+
17 | +2x | +
+ } else if (identical(text, stringr::str_to_title(text))) {+ |
+
18 | +2x | +
+ res <- stringr::str_to_title(res)+ |
+
19 | ++ |
+ }+ |
+
20 | ++ |
+ }+ |
+
21 | +7x | +
+ res+ |
+
22 | ++ |
+ }+ |
+
23 | ++ | + + | +
24 | ++ |
+ #' Render whiskers safely+ |
+
25 | ++ |
+ #' @param x (`character`) input to be rendered safely.+ |
+
26 | ++ |
+ #' @export+ |
+
27 | ++ |
+ render_safe <- function(x) {+ |
+
28 | +4x | +
+ checkmate::assert_character(x, null.ok = TRUE)+ |
+
29 | +4x | +
+ if (is.null(x)) {+ |
+
30 | +! | +
+ return(NULL)+ |
+
31 | ++ |
+ }+ |
+
32 | +4x | +
+ ret <- lapply(+ |
+
33 | +4x | +
+ x,+ |
+
34 | +4x | +
+ glue::glue,+ |
+
35 | +4x | +
+ .transformer = safe_transformer,+ |
+
36 | +4x | +
+ .envir = whisker_env,+ |
+
37 | +4x | +
+ .null = "NULL",+ |
+
38 | +4x | +
+ .open = "{",+ |
+
39 | +4x | +
+ .close = "}"+ |
+
40 | ++ |
+ )+ |
+
41 | +4x | +
+ ret <- vapply(ret, `[[`, i = 1L, FUN.VALUE = "")+ |
+
42 | +4x | +
+ setNames(ret, names(x))+ |
+
43 | ++ |
+ }+ |
+
44 | ++ |
+ #' Add whisker values+ |
+
45 | ++ |
+ #' @param x Named (`character`) input.+ |
+
46 | ++ |
+ #' @export+ |
+
47 | ++ |
+ add_whisker <- function(x) {+ |
+
48 | +1x | +
+ checkmate::assert_character(x, names = "unique", any.missing = FALSE)+ |
+
49 | +1x | +
+ lapply(+ |
+
50 | +1x | +
+ names(x),+ |
+
51 | +1x | +
+ function(i) {+ |
+
52 | +2x | +
+ assign(i, x[i], envir = whisker_env)+ |
+
53 | ++ |
+ }+ |
+
54 | ++ |
+ )+ |
+
55 | +1x | +
+ invisible()+ |
+
56 | ++ |
+ }+ |
+
57 | ++ | + + | +
58 | ++ |
+ #' Remove whisker values+ |
+
59 | ++ |
+ #' @param x Named (`character`) input.+ |
+
60 | ++ |
+ #' @export+ |
+
61 | ++ |
+ remove_whisker <- function(x) {+ |
+
62 | +1x | +
+ checkmate::assert_character(x, any.missing = FALSE)+ |
+
63 | +1x | +
+ rm(list = x, envir = whisker_env)+ |
+
64 | ++ |
+ }+ |
+
1 | ++ |
+ #' Unite Columns of a Table in a `list` of `data.frame`.+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @param adam_db (`list` of `data.frames`) to be transformed.+ |
+
4 | ++ |
+ #' @param tab (`string`) the name of a table in the `adam_db` object.+ |
+
5 | ++ |
+ #' @param cols (`character`) the name of the columns to unite.+ |
+
6 | ++ |
+ #' @param sep (`string`) the separator for the new column name.+ |
+
7 | ++ |
+ #' @param new (`string`) the name of the new column. If `NULL` the concatenation of `cols` separated by `sep` is used.+ |
+
8 | ++ |
+ #'+ |
+
9 | ++ |
+ #' @return `list` of `data.frames` object with a united column.+ |
+
10 | ++ |
+ #' @export+ |
+
11 | ++ |
+ #'+ |
+
12 | ++ |
+ #' @examples+ |
+
13 | ++ |
+ #' db <- list(mtcars = mtcars, iris = iris)+ |
+
14 | ++ |
+ #'+ |
+
15 | ++ |
+ #' x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION")+ |
+
16 | ++ |
+ #' x$mtcars+ |
+
17 | ++ |
+ ls_unite <- function(adam_db, tab, cols, sep = ".", new = NULL) {+ |
+
18 | +4x | +
+ checkmate::assert_list(adam_db, types = "data.frame")+ |
+
19 | +4x | +
+ checkmate::assert_string(tab)+ |
+
20 | +4x | +
+ checkmate::assert_names(names(adam_db), must.include = tab)+ |
+
21 | +4x | +
+ checkmate::assert_character(cols, min.len = 1)+ |
+
22 | +4x | +
+ checkmate::assert_names(names(adam_db[[tab]]), must.include = cols)+ |
+
23 | +4x | +
+ checkmate::assert_string(sep)+ |
+
24 | +4x | +
+ checkmate::assert_string(new, null.ok = TRUE)+ |
+
25 | ++ | + + | +
26 | +4x | +
+ x_interaction <- if (!is.null(new)) {+ |
+
27 | +3x | +
+ new+ |
+
28 | ++ |
+ } else {+ |
+
29 | +1x | +
+ paste(cols, collapse = sep)+ |
+
30 | ++ |
+ }+ |
+
31 | ++ | + + | +
32 | +4x | +
+ x_df <- adam_db[[tab]][, cols, drop = FALSE]+ |
+
33 | +4x | +
+ lvl <- lapply(x_df, function(y) {+ |
+
34 | +9x | +
+ uni <- if (is.factor(y)) {+ |
+
35 | +7x | +
+ levels(y)+ |
+
36 | ++ |
+ } else {+ |
+
37 | +2x | +
+ unique(y)+ |
+
38 | ++ |
+ }+ |
+
39 | +9x | +
+ factor(uni, levels = uni)+ |
+
40 | ++ |
+ })+ |
+
41 | ++ | + + | +
42 | +4x | +
+ all_lvl_df <- expand.grid(lvl)+ |
+
43 | ++ | + + | +
44 | +4x | +
+ all_lvl <- all_lvl_df[, cols, drop = FALSE] %>%+ |
+
45 | +4x | +
+ arrange(across(all_of(cols))) %>%+ |
+
46 | +4x | +
+ apply(1, paste, collapse = sep)+ |
+
47 | ++ | + + | +
48 | +4x | +
+ x_vec <- x_df[, cols, drop = FALSE] %>%+ |
+
49 | +4x | +
+ apply(1, paste, collapse = sep)+ |
+
50 | ++ | + + | +
51 | +4x | +
+ existing_lvl <- intersect(all_lvl, x_vec)+ |
+
52 | +4x | +
+ x_fact <- factor(x_vec, existing_lvl)+ |
+
53 | ++ | + + | +
54 | +4x | +
+ adam_db[[tab]][, x_interaction] <- x_fact+ |
+
55 | +4x | +
+ adam_db+ |
+
56 | ++ |
+ }+ |
+
1 | ++ |
+ #' Cutting data by group+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @details Function used to categorize numeric data stored in long format depending on their group. Intervals are+ |
+
4 | ++ |
+ #' closed on the right (and open on the left).+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' @param df (`dataframe`) with a column of data to be cut and a column specifying the group of each observation.+ |
+
7 | ++ |
+ #' @param col_data (`character`) the column containing the data to be cut.+ |
+
8 | ++ |
+ #' @param col_group (`character`) the column containing the names of the groups according to which the data should be+ |
+
9 | ++ |
+ #' split.+ |
+
10 | ++ |
+ #' @param group (`nested list`) providing for each parameter value that should be analyzed in a categorical way: the+ |
+
11 | ++ |
+ #' name of the parameter (`character`), a series of breakpoints (`numeric`) where the first breakpoints is typically+ |
+
12 | ++ |
+ #' `-Inf` and the last `Inf`, and a series of name which will describe each category (`character`).+ |
+
13 | ++ |
+ #' @param cat_col (`character`) the name of the new column in which the cut label should he stored.+ |
+
14 | ++ |
+ #'+ |
+
15 | ++ |
+ #' @export+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' @return `data.frame` with a column containing categorical values.+ |
+
18 | ++ |
+ #' @examples+ |
+
19 | ++ |
+ #' group <- list(+ |
+
20 | ++ |
+ #' list(+ |
+
21 | ++ |
+ #' "Height",+ |
+
22 | ++ |
+ #' c(-Inf, 150, 170, Inf),+ |
+
23 | ++ |
+ #' c("=<150", "150-170", ">170")+ |
+
24 | ++ |
+ #' ),+ |
+
25 | ++ |
+ #' list(+ |
+
26 | ++ |
+ #' "Weight",+ |
+
27 | ++ |
+ #' c(-Inf, 65, Inf),+ |
+
28 | ++ |
+ #' c("=<65", ">65")+ |
+
29 | ++ |
+ #' ),+ |
+
30 | ++ |
+ #' list(+ |
+
31 | ++ |
+ #' "Age",+ |
+
32 | ++ |
+ #' c(-Inf, 31, Inf),+ |
+
33 | ++ |
+ #' c("=<31", ">31")+ |
+
34 | ++ |
+ #' ),+ |
+
35 | ++ |
+ #' list(+ |
+
36 | ++ |
+ #' "PreCondition",+ |
+
37 | ++ |
+ #' c(-Inf, 1, Inf),+ |
+
38 | ++ |
+ #' c("=<1", "<1")+ |
+
39 | ++ |
+ #' )+ |
+
40 | ++ |
+ #' )+ |
+
41 | ++ |
+ #' data <- data.frame(+ |
+
42 | ++ |
+ #' SUBJECT = rep(letters[1:10], 4),+ |
+
43 | ++ |
+ #' PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10),+ |
+
44 | ++ |
+ #' AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)),+ |
+
45 | ++ |
+ #' index = 1:40+ |
+
46 | ++ |
+ #' )+ |
+
47 | ++ |
+ #'+ |
+
48 | ++ |
+ #' cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")+ |
+
49 | ++ |
+ cut_by_group <- function(df,+ |
+
50 | ++ |
+ col_data,+ |
+
51 | ++ |
+ col_group,+ |
+
52 | ++ |
+ group,+ |
+
53 | ++ |
+ cat_col) {+ |
+
54 | +4x | +
+ checkmate::assert_data_frame(df)+ |
+
55 | +4x | +
+ checkmate::assert_subset(c(col_data, col_group), colnames(df))+ |
+
56 | +4x | +
+ checkmate::assert_numeric(df[, col_data])+ |
+
57 | +4x | +
+ checkmate::assert_list(group)+ |
+
58 | ++ | + + | +
59 | +4x | +
+ lapply(+ |
+
60 | +4x | +
+ group,+ |
+
61 | +4x | +
+ function(list_element) {+ |
+
62 | +11x | +
+ checkmate::assert_list(list_element, len = 3, types = c("character", "numeric", "character"))+ |
+
63 | ++ |
+ }+ |
+
64 | ++ |
+ )+ |
+
65 | ++ | + + | +
66 | +4x | +
+ df[cat_col] <- NA+ |
+
67 | ++ | + + | +
68 | +4x | +
+ for (g in group) {+ |
+
69 | +10x | +
+ selected_row <- df[[col_group]] == g[[1]]+ |
+
70 | ++ | + + | +
71 | +10x | +
+ df[selected_row, cat_col] <- as.character(cut(df[[col_data]][selected_row], breaks = g[[2]], labels = g[[3]]))+ |
+
72 | ++ |
+ }+ |
+
73 | +3x | +
+ df+ |
+
74 | ++ |
+ }+ |
+
1 | ++ |
+ #' Reorder Two Columns Levels Simultaneously+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @details The function expect a 1:1 matching between the elements of the two selected column.+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' @param df (`data.frame`) with two column whose factors should be reordered.+ |
+
6 | ++ |
+ #' @param primary (`string`) the name of the column on which the levels reordering should be based.+ |
+
7 | ++ |
+ #' @param secondary (`string`) the name of the column whose levels should be reordered following the levels of the+ |
+
8 | ++ |
+ #' primary column.+ |
+
9 | ++ |
+ #' @param levels_primary (`character`) the levels in the desired order. Existing levels that are not included will be+ |
+
10 | ++ |
+ #' placed afterward in their current order.+ |
+
11 | ++ |
+ #'+ |
+
12 | ++ |
+ #' @export+ |
+
13 | ++ |
+ #'+ |
+
14 | ++ |
+ #' @examples+ |
+
15 | ++ |
+ #'+ |
+
16 | ++ |
+ #' df <- data.frame(+ |
+
17 | ++ |
+ #' SUBJID = 1:3,+ |
+
18 | ++ |
+ #' PARAMCD = factor(c("A", "B", "C")),+ |
+
19 | ++ |
+ #' PARAM = factor(paste("letter", LETTERS[1:3]))+ |
+
20 | ++ |
+ #' )+ |
+
21 | ++ |
+ #' co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))+ |
+
22 | ++ |
+ co_relevels <- function(df, primary, secondary, levels_primary) {+ |
+
23 | +6x | +
+ checkmate::assert_data_frame(df, min.rows = 1)+ |
+
24 | +6x | +
+ checkmate::assert_subset(c(primary, secondary), colnames(df))+ |
+
25 | +6x | +
+ checkmate::assert_character(levels_primary, min.len = 1)+ |
+
26 | +6x | +
+ checkmate::assert_vector(df[[primary]], any.missing = FALSE)+ |
+
27 | +5x | +
+ checkmate::assert_vector(df[[secondary]], any.missing = FALSE)+ |
+
28 | ++ | + + | +
29 | +5x | +
+ df[, primary] <- as.factor(df[[primary]])+ |
+
30 | +5x | +
+ df[, secondary] <- as.factor(df[[secondary]])+ |
+
31 | ++ | + + | +
32 | ++ |
+ # check unique relationship+ |
+
33 | +5x | +
+ df_key <- df[, c(primary, secondary)]+ |
+
34 | +5x | +
+ df_key <- unique(df_key)+ |
+
35 | ++ | + + | +
36 | +5x | +
+ if (any(duplicated(df_key[[primary]])) || any(duplicated(df_key[[secondary]]))) {+ |
+
37 | +1x | +
+ stop("non univoque relation between values in primary and secondary column")+ |
+
38 | ++ |
+ }+ |
+
39 | ++ | + + | +
40 | +4x | +
+ keys <- setNames(as.character(df_key[[secondary]]), as.character(df_key[[primary]]))+ |
+
41 | ++ | + + | +
42 | +4x | +
+ all_levels_primary <- c(levels_primary, setdiff(levels(df[[primary]]), levels_primary))+ |
+
43 | +4x | +
+ all_levels_secondary <- keys[all_levels_primary]+ |
+
44 | ++ | + + | +
45 | +4x | +
+ df[, primary] <- factor(df[[primary]], all_levels_primary)+ |
+
46 | +4x | +
+ df[, secondary] <- factor(df[[secondary]], all_levels_secondary)+ |
+
47 | ++ | + + | +
48 | +4x | +
+ df+ |
+
49 | ++ |
+ }+ |
+
1 | ++ |
+ #' Transforming Empty Strings and White Spaces to NAs+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @description `r lifecycle::badge("experimental")`+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' SAS imports missing data as empty strings or white spaces. This helper function replaces the empty strings and white+ |
+
6 | ++ |
+ #' space-only character and levels by `NAs`.+ |
+
7 | ++ |
+ #'+ |
+
8 | ++ |
+ #' @param x (`vector`) where empty of white space should be transformed to `NAs`.+ |
+
9 | ++ |
+ #'+ |
+
10 | ++ |
+ #' @return `character` or `factor` without explicit NA. `logical` and `numeric` are returned as `character`.+ |
+
11 | ++ |
+ #'+ |
+
12 | ++ |
+ #' @export+ |
+
13 | ++ |
+ #' @examples+ |
+
14 | ++ |
+ #' char1 <- c(" ", " ", "a", "b", "", "")+ |
+
15 | ++ |
+ #' h_ws_to_na(char1)+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' fact1 <- as.factor(char1)+ |
+
18 | ++ |
+ #' h_ws_to_na(fact1)+ |
+
19 | ++ |
+ #'+ |
+
20 | ++ |
+ #' num1 <- c(1:10)+ |
+
21 | ++ |
+ #' h_ws_to_na(num1)+ |
+
22 | ++ |
+ #'+ |
+
23 | ++ |
+ #' logi1 <- c(TRUE, FALSE, NA)+ |
+
24 | ++ |
+ #' h_ws_to_na(logi1)+ |
+
25 | ++ |
+ h_ws_to_na <- function(x) {+ |
+
26 | +9x | +
+ if (is.factor(x)) {+ |
+
27 | +2x | +
+ levels_x <- levels(x)+ |
+
28 | ++ | + + | +
29 | +2x | +
+ ws_levels <- grepl("^\\s*$", levels_x) | levels_x == ""+ |
+
30 | ++ | + + | +
31 | +2x | +
+ levels(x)[ws_levels] <- NA+ |
+
32 | +7x | +
+ } else if (is.character(x)) {+ |
+
33 | +6x | +
+ ws_char <- grepl("^\\s*$", x) | x == ""+ |
+
34 | ++ | + + | +
35 | +6x | +
+ x[ws_char] <- NA+ |
+
36 | ++ |
+ } else {+ |
+
37 | +1x | +
+ x <- as.character(x)+ |
+
38 | ++ |
+ }+ |
+
39 | +9x | +
+ x+ |
+
40 | ++ |
+ }+ |
+
41 | ++ | + + | +
42 | ++ |
+ #' Transforming Empty Strings and White Spaces to Explicit NAs+ |
+
43 | ++ |
+ #'+ |
+
44 | ++ |
+ #' @description `r lifecycle::badge("experimental")`+ |
+
45 | ++ |
+ #'+ |
+
46 | ++ |
+ #' SAS imports missing data as empty strings or white spaces. This helper function is a thin wrapper around+ |
+
47 | ++ |
+ #' [dunlin::h_ws_to_na] which replaces them with explicit missing level.+ |
+
48 | ++ |
+ #'+ |
+
49 | ++ |
+ #' @param x (`vector`) where empty of white space should be transformed to `NAs`.+ |
+
50 | ++ |
+ #' @param na_level (`character`) replacement of the missing levels.+ |
+
51 | ++ |
+ #'+ |
+
52 | ++ |
+ #' @return `factor` with explicit NA+ |
+
53 | ++ |
+ #'+ |
+
54 | ++ |
+ #' @export+ |
+
55 | ++ |
+ #' @examples+ |
+
56 | ++ |
+ #' char1 <- c(" ", " ", "a", "b", "", "")+ |
+
57 | ++ |
+ #' h_ws_to_explicit_na(char1)+ |
+
58 | ++ |
+ #'+ |
+
59 | ++ |
+ #' fact1 <- as.factor(char1)+ |
+
60 | ++ |
+ #' h_ws_to_explicit_na(fact1)+ |
+
61 | ++ |
+ #'+ |
+
62 | ++ |
+ #' num1 <- c(1, 2, NA)+ |
+
63 | ++ |
+ #' h_ws_to_explicit_na(num1)+ |
+
64 | ++ |
+ #'+ |
+
65 | ++ |
+ #' logi1 <- c(TRUE, FALSE, NA)+ |
+
66 | ++ |
+ #' h_ws_to_explicit_na(logi1)+ |
+
67 | ++ |
+ h_ws_to_explicit_na <- function(x, na_level = "<Missing>") {+ |
+
68 | +7x | +
+ checkmate::assert_character(na_level)+ |
+
69 | ++ | + + | +
70 | +7x | +
+ res <- forcats::fct_na_value_to_level(h_ws_to_na(x), na_level)+ |
+
71 | ++ | + + | +
72 | +7x | +
+ if (na_level %in% res) {+ |
+
73 | +5x | +
+ forcats::fct_relevel(res, na_level, after = Inf)+ |
+
74 | ++ |
+ } else {+ |
+
75 | +2x | +
+ forcats::fct_drop(res, only = na_level)+ |
+
76 | ++ |
+ }+ |
+
77 | ++ |
+ }+ |
+
78 | ++ | + + | +
79 | ++ |
+ #' Transforming Empty Strings and White Spaces to Explicit NAs while Preserving Label+ |
+
80 | ++ |
+ #'+ |
+
81 | ++ |
+ #' @details This function preserves the label attribute.+ |
+
82 | ++ |
+ #'+ |
+
83 | ++ |
+ #' @param x (`vector`) input to be turned into factor with explicit missing level.+ |
+
84 | ++ |
+ #' @param na_level (`character`) the label to encode missing levels.+ |
+
85 | ++ |
+ #'+ |
+
86 | ++ |
+ #' @return `factor` with explicit NA and the same label as the input.+ |
+
87 | ++ |
+ #'+ |
+
88 | ++ |
+ #' @export+ |
+
89 | ++ |
+ #' @examples+ |
+
90 | ++ |
+ #' char1 <- c(" ", " ", "a", "b", "", "", NA)+ |
+
91 | ++ |
+ #' attr(char1, "label") <- "my_label"+ |
+
92 | ++ |
+ #'+ |
+
93 | ++ |
+ #' h_as_factor(char1)+ |
+
94 | ++ |
+ h_as_factor <- function(x, na_level = "<Missing>") {+ |
+
95 | +4x | +
+ checkmate::assert_vector(x)+ |
+
96 | ++ | + + | +
97 | +4x | +
+ init_lab <- attr(x, "label")+ |
+
98 | ++ | + + | +
99 | +4x | +
+ res <- h_ws_to_explicit_na(x, na_level = na_level)+ |
+
100 | ++ | + + | +
101 | +4x | +
+ attr(res, "label") <- init_lab+ |
+
102 | ++ | + + | +
103 | +4x | +
+ res+ |
+
104 | ++ |
+ }+ |
+
105 | ++ | + + | +
106 | ++ |
+ #' Setting the Label Attribute+ |
+
107 | ++ |
+ #'+ |
+
108 | ++ |
+ #' @param var (`object`) whose label attribute can be set.+ |
+
109 | ++ |
+ #' @param label (`character`) the label to add.+ |
+
110 | ++ |
+ #'+ |
+
111 | ++ |
+ #' @return `object` with label attribute.+ |
+
112 | ++ |
+ #'+ |
+
113 | ++ |
+ #' @export+ |
+
114 | ++ |
+ #' @examples+ |
+
115 | ++ |
+ #' x <- c(1:10)+ |
+
116 | ++ |
+ #' attr(x, "label")+ |
+
117 | ++ |
+ #'+ |
+
118 | ++ |
+ #' y <- attr_label(x, "my_label")+ |
+
119 | ++ |
+ #' attr(y, "label")+ |
+
120 | ++ |
+ attr_label <- function(var, label) {+ |
+
121 | +89x | +
+ checkmate::assert_character(label)+ |
+
122 | ++ | + + | +
123 | +88x | +
+ x <- var+ |
+
124 | +88x | +
+ attr(x, "label") <- label+ |
+
125 | ++ | + + | +
126 | +88x | +
+ x+ |
+
127 | ++ |
+ }+ |
+
128 | ++ | + + | +
129 | ++ |
+ #' Setting the Label Attribute to Data Frame Columns+ |
+
130 | ++ |
+ #'+ |
+
131 | ++ |
+ #' @param df (`data.frame`).+ |
+
132 | ++ |
+ #' @param label (`character`) the labels to add.+ |
+
133 | ++ |
+ #'+ |
+
134 | ++ |
+ #' @return `data.frame` with label attributes.+ |
+
135 | ++ |
+ #'+ |
+
136 | ++ |
+ #' @export+ |
+
137 | ++ |
+ #' @examples+ |
+
138 | ++ |
+ #' res <- attr_label_df(mtcars, letters[1:11])+ |
+
139 | ++ |
+ #' res+ |
+
140 | ++ |
+ #' lapply(res, attr, "label")+ |
+
141 | ++ |
+ attr_label_df <- function(df, label) {+ |
+
142 | +20x | +
+ checkmate::assert_data_frame(df)+ |
+
143 | +20x | +
+ checkmate::assert_character(label, len = ncol(df))+ |
+
144 | ++ | + + | +
145 | +19x | +
+ res <- mapply(attr_label, var = df, label = as.list(label), SIMPLIFY = FALSE)+ |
+
146 | +19x | +
+ as.data.frame(res)+ |
+
147 | ++ |
+ }+ |
+
1 | ++ |
+ #' Encode Categorical Missing Values in a `list` of `data.frame`+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @details This is a helper function to encode missing values (i.e `NA` and `empty string`) of every `character` and+ |
+
4 | ++ |
+ #' `factor` variable found in a `list` of `data.frame`. The `label` attribute of the columns is preserved.+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' @param data (`list` of `data.frame`) to be transformed.+ |
+
7 | ++ |
+ #' @param omit_tables (`character`) the names of the tables to omit from processing.+ |
+
8 | ++ |
+ #' @param omit_columns (`character`) the names of the columns to omit from processing.+ |
+
9 | ++ |
+ #' @param char_as_factor (`logical`) should character columns be converted into factor.+ |
+
10 | ++ |
+ #' @param na_level (`string`) the label to encode missing levels.+ |
+
11 | ++ |
+ #'+ |
+
12 | ++ |
+ #' @return `list` of `data.frame` object with explicit missing levels.+ |
+
13 | ++ |
+ #' @export+ |
+
14 | ++ |
+ #'+ |
+
15 | ++ |
+ #' @examples+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' df1 <- data.frame(+ |
+
18 | ++ |
+ #' "char" = c("a", "b", NA, "a", "k", "x"),+ |
+
19 | ++ |
+ #' "char2" = c("A", "B", NA, "A", "K", "X"),+ |
+
20 | ++ |
+ #' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),+ |
+
21 | ++ |
+ #' "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA)+ |
+
22 | ++ |
+ #' )+ |
+
23 | ++ |
+ #' df2 <- data.frame(+ |
+
24 | ++ |
+ #' "char" = c("a", "b", NA, "a", "k", "x"),+ |
+
25 | ++ |
+ #' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),+ |
+
26 | ++ |
+ #' "num" = c(1:5, NA)+ |
+
27 | ++ |
+ #' )+ |
+
28 | ++ |
+ #' df3 <- data.frame(+ |
+
29 | ++ |
+ #' "char" = c(NA, NA, "A")+ |
+
30 | ++ |
+ #' )+ |
+
31 | ++ |
+ #'+ |
+
32 | ++ |
+ #' db <- list(df1 = df1, df2 = df2, df3 = df3)+ |
+
33 | ++ |
+ #'+ |
+
34 | ++ |
+ #' ls_explicit_na(db)+ |
+
35 | ++ |
+ #' ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")+ |
+
36 | ++ |
+ #'+ |
+
37 | ++ |
+ ls_explicit_na <- function(data,+ |
+
38 | ++ |
+ omit_tables = NULL,+ |
+
39 | ++ |
+ omit_columns = NULL,+ |
+
40 | ++ |
+ char_as_factor = TRUE,+ |
+
41 | ++ |
+ na_level = "<Missing>") {+ |
+
42 | +3x | +
+ checkmate::assert_list(data, types = "data.frame", names = "unique")+ |
+
43 | +3x | +
+ checkmate::assert_character(omit_tables, null.ok = TRUE)+ |
+
44 | +3x | +
+ checkmate::assert_character(omit_columns, null.ok = TRUE)+ |
+
45 | +3x | +
+ checkmate::assert_flag(char_as_factor)+ |
+
46 | +3x | +
+ checkmate::assert_string(na_level)+ |
+
47 | ++ | + + | +
48 | +3x | +
+ modif_tab <- setdiff(names(data), omit_tables)+ |
+
49 | +3x | +
+ if (length(modif_tab) < 1) {+ |
+
50 | +1x | +
+ return(data)+ |
+
51 | ++ |
+ }+ |
+
52 | ++ | + + | +
53 | +2x | +
+ data[modif_tab] <- lapply(+ |
+
54 | +2x | +
+ data[modif_tab],+ |
+
55 | +2x | +
+ h_df_explicit,+ |
+
56 | +2x | +
+ omit_columns = omit_columns,+ |
+
57 | +2x | +
+ char_as_factor = char_as_factor,+ |
+
58 | +2x | +
+ na_level = na_level+ |
+
59 | ++ |
+ )+ |
+
60 | ++ | + + | +
61 | +2x | +
+ data+ |
+
62 | ++ |
+ }+ |
+
63 | ++ | + + | +
64 | ++ |
+ #' Encode Categorical Missing Values in a `data.frame`.+ |
+
65 | ++ |
+ #'+ |
+
66 | ++ |
+ #' @inheritParams ls_explicit_na+ |
+
67 | ++ |
+ #' @keywords internal+ |
+
68 | ++ |
+ #'+ |
+
69 | ++ |
+ #' @examples+ |
+
70 | ++ |
+ #' \dontrun{+ |
+
71 | ++ |
+ #' df <- data.frame(+ |
+
72 | ++ |
+ #' "char" = c("a", "b", NA, "a", "k", "x"),+ |
+
73 | ++ |
+ #' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")),+ |
+
74 | ++ |
+ #' "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA),+ |
+
75 | ++ |
+ #' "num" = c(1:5, NA)+ |
+
76 | ++ |
+ #' )+ |
+
77 | ++ |
+ #'+ |
+
78 | ++ |
+ #' h_df_explicit(df)+ |
+
79 | ++ |
+ #' h_df_explicit(df, omit_columns = c("fact", "x"))+ |
+
80 | ++ |
+ #' }+ |
+
81 | ++ |
+ h_df_explicit <- function(df,+ |
+
82 | ++ |
+ omit_columns = NULL,+ |
+
83 | ++ |
+ char_as_factor = TRUE,+ |
+
84 | ++ |
+ na_level = "<Missing>") {+ |
+
85 | +3x | +
+ na_list <- list(x = c("", NA))+ |
+
86 | +3x | +
+ names(na_list) <- na_level+ |
+
87 | +3x | +
+ na_rule <- rule(.lst = na_list)+ |
+
88 | ++ | + + | +
89 | +3x | +
+ df %>%+ |
+
90 | +3x | +
+ mutate(+ |
+
91 | +3x | +
+ across(+ |
+
92 | +3x | +
+ where(~ is.character(.x) | is.factor(.x)) & !any_of(.env$omit_columns),+ |
+
93 | +3x | +
+ ~ reformat(.x, format = .env$na_rule, .string_as_fct = .env$char_as_factor, .na_last = TRUE)+ |
+
94 | ++ |
+ )+ |
+
95 | ++ |
+ )+ |
+
96 | ++ |
+ }+ |
+
1 | ++ |
+ #' Assert Nested List can be used as Format Argument in Reformat.+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @param object (`list`) to assert.+ |
+
4 | ++ |
+ #' @return invisible `TRUE` or an error message if the criteria are not fulfilled.+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' @export+ |
+
7 | ++ |
+ #' @examples+ |
+
8 | ++ |
+ #' format <- list(+ |
+
9 | ++ |
+ #' df1 = list(+ |
+
10 | ++ |
+ #' var1 = rule("X" = "x", "N" = c(NA, ""))+ |
+
11 | ++ |
+ #' ),+ |
+
12 | ++ |
+ #' df2 = list(+ |
+
13 | ++ |
+ #' var1 = rule(),+ |
+
14 | ++ |
+ #' var2 = rule("f11" = "F11", "NN" = NA),+ |
+
15 | ++ |
+ #' var3 = empty_rule+ |
+
16 | ++ |
+ #' ),+ |
+
17 | ++ |
+ #' df3 = list()+ |
+
18 | ++ |
+ #' )+ |
+
19 | ++ |
+ #'+ |
+
20 | ++ |
+ #' assert_valid_format(format)+ |
+
21 | ++ |
+ assert_valid_format <- function(object) {+ |
+
22 | +4x | +
+ coll <- checkmate::makeAssertCollection()+ |
+
23 | ++ | + + | +
24 | ++ |
+ # Check object.+ |
+
25 | +4x | +
+ checkmate::assert_list(object, names = "unique", type = "list", add = coll)+ |
+
26 | ++ | + + | +
27 | ++ |
+ # Check table level.+ |
+
28 | +4x | +
+ mapply(+ |
+
29 | +4x | +
+ function(x, xtable) {+ |
+
30 | +8x | +
+ checkmate::assert_list(+ |
+
31 | +8x | +
+ x,+ |
+
32 | +8x | +
+ names = "unique",+ |
+
33 | +8x | +
+ types = "rule",+ |
+
34 | +8x | +
+ any.missing = FALSE,+ |
+
35 | +8x | +
+ .var.name = paste0("[", xtable, "]"),+ |
+
36 | +8x | +
+ add = coll+ |
+
37 | ++ |
+ )+ |
+
38 | ++ |
+ },+ |
+
39 | +4x | +
+ object,+ |
+
40 | +4x | +
+ names(object)+ |
+
41 | ++ |
+ )+ |
+
42 | ++ | + + | +
43 | +4x | +
+ checkmate::reportAssertions(coll)+ |
+
44 | ++ |
+ }+ |
+
45 | ++ | + + | +
46 | ++ |
+ #' Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat.+ |
+
47 | ++ |
+ #'+ |
+
48 | ++ |
+ #' @param object (`list`) to assert.+ |
+
49 | ++ |
+ #' @return invisible `TRUE` or an error message if the criteria are not fulfilled.+ |
+
50 | ++ |
+ #'+ |
+
51 | ++ |
+ #' @export+ |
+
52 | ++ |
+ #' @examples+ |
+
53 | ++ |
+ #' format <- list(+ |
+
54 | ++ |
+ #' df1 = list(+ |
+
55 | ++ |
+ #' var1 = list("X" = "x", "N" = c(NA, ""))+ |
+
56 | ++ |
+ #' ),+ |
+
57 | ++ |
+ #' df2 = list(+ |
+
58 | ++ |
+ #' var1 = list(),+ |
+
59 | ++ |
+ #' var2 = list("f11" = "F11", "NN" = NA)+ |
+
60 | ++ |
+ #' ),+ |
+
61 | ++ |
+ #' df3 = list()+ |
+
62 | ++ |
+ #' )+ |
+
63 | ++ |
+ #'+ |
+
64 | ++ |
+ #' assert_valid_list_format(format)+ |
+
65 | ++ |
+ assert_valid_list_format <- function(object) {+ |
+
66 | +2x | +
+ coll <- checkmate::makeAssertCollection()+ |
+
67 | ++ | + + | +
68 | ++ |
+ # Check object.+ |
+
69 | +2x | +
+ checkmate::assert_list(object, names = "unique", type = "list", add = coll)+ |
+
70 | ++ | + + | +
71 | ++ |
+ # Check table level.+ |
+
72 | +2x | +
+ mapply(+ |
+
73 | +2x | +
+ function(x, xtable) {+ |
+
74 | +4x | +
+ checkmate::assert_list(+ |
+
75 | +4x | +
+ x,+ |
+
76 | +4x | +
+ names = "unique",+ |
+
77 | +4x | +
+ types = "list",+ |
+
78 | +4x | +
+ any.missing = FALSE,+ |
+
79 | +4x | +
+ .var.name = paste0("[", xtable, "]"),+ |
+
80 | +4x | +
+ add = coll+ |
+
81 | ++ |
+ )+ |
+
82 | ++ |
+ },+ |
+
83 | +2x | +
+ object,+ |
+
84 | +2x | +
+ names(object)+ |
+
85 | ++ |
+ )+ |
+
86 | ++ | + + | +
87 | ++ |
+ # Check variable level.+ |
+
88 | +2x | +
+ mapply(+ |
+
89 | +2x | +
+ function(x, xtable) {+ |
+
90 | +4x | +
+ xvar <- names(x)+ |
+
91 | +4x | +
+ mapply(+ |
+
92 | +4x | +
+ function(x, xvar) {+ |
+
93 | +8x | +
+ checkmate::assert_list(+ |
+
94 | +8x | +
+ x,+ |
+
95 | +8x | +
+ names = "unique",+ |
+
96 | +8x | +
+ type = c("character", "numeric", "logical"),+ |
+
97 | +8x | +
+ .var.name = paste0("[", xtable, ".", xvar, "]"),+ |
+
98 | +8x | +
+ add = coll+ |
+
99 | ++ |
+ )+ |
+
100 | ++ |
+ },+ |
+
101 | +4x | +
+ x,+ |
+
102 | +4x | +
+ xvar+ |
+
103 | ++ |
+ )+ |
+
104 | ++ |
+ },+ |
+
105 | +2x | +
+ object,+ |
+
106 | +2x | +
+ names(object)+ |
+
107 | ++ |
+ )+ |
+
108 | ++ | + + | +
109 | +2x | +
+ checkmate::reportAssertions(coll)+ |
+
110 | ++ |
+ }+ |
+
1 | ++ |
+ whisker_env <- NULL+ |
+
2 | ++ | + + | +
3 | ++ |
+ .onLoad <- function(libname, pkgname) {+ |
+
4 | +! | +
+ default_whiskers <- c(patient_label = "patients")+ |
+
5 | +! | +
+ whisker_env <<- new.env(parent = globalenv())+ |
+
6 | +! | +
+ add_whisker(default_whiskers)+ |
+
7 | ++ |
+ }+ |
+
"+y.value+"
";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p