Added report template within the package TODO - more explanation with…

…in the report to explain how to use it...
unhcRverse · Nov 7, 2023 · 74d6325 · 74d6325
1 parent aefa89b
commit 74d6325
Show file tree

Hide file tree

Showing 92 changed files with 150,104 additions and 25,990 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,6 +4,12 @@
 ^LICENSE\.md$
 ^\.github$
 ^_pkgdown\.yml$
+^codecov\.yml$
+^README\.Rmd$
+^setting_testthat\.R$
 ^docs$
+^data-raw$
+^dev$
 ^pkgdown$
 ^CODE_OF_CONDUCT\.md$
+^cleaningtools_ed\.Rproj$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,12 @@
 Package: cleaningtools
 Title: cleaningtools package focuses on data cleaning
-Version: 0.0.0.9002
+Version: 0.0.0.9003
 Authors@R: c(
     person("Mehedi", "Khan", , "mh.khan@reach-initiative.org", role = "aut",
            comment = c(ORCID = "0000-0002-4276-4485")),
     person("Yann", "Say", , "yann.say@impact-initiatives.org", role = c("cre", "aut"),
-           comment = c(ORCID = "0000-0002-7390-4209"))
+           comment = c(ORCID = "0000-0002-7390-4209")),
+    person("Edouard", "Legoupil", , "legoupil@nhcr.org", role = "ctb")
   )
 Description: The cleaningtools package focuses on cleaning, and has three
     components: **Check**, which includes a set of functions that flag
@@ -32,11 +33,13 @@ Imports:
     randomcoloR,
     rlang,
     snakecase,
+    stats,
     stringi,
     stringr,
     tibble,
     tidyr,
-    tidyselect
+    tidyselect,
+    utils
 Suggests: 
     rmarkdown,
     testthat (>= 3.0.0),

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,13 @@
-# cleaningtools (development version)
+# cleaningtools 0.0.0.9003
 
- Code review - transferred all function to a fusen notebook to review checking with examples
+ * Code review - transferred all function to a [fusen notebook](https://thinkr-open.github.io/fusen/index.html) to review checking with examples. All dev can be done there to enforce better documentation
 
- fixed globals.R
+ * fixed globals.R and few mising library import 
+
+ * added a built-in report template with all the function parameters inst/rmarkdown/templates/clean/skeleton/skeleton.Rmd
+
+ * added demo files within a data-raw folder directly as xlsx files
+
+# cleaningtools 0.0.0.9002
 
-* Initial CRAN submission.
+  Initial version
diff --git a/R/add_duration_from_audit.R b/R/add_duration_from_audit.R
@@ -151,12 +151,12 @@ add_duration_from_audit <- function(dataset,
 
   if (exists("duration_with_sum_all")) {
     dataset <- dataset %>%
-      dplyr::left_join(duration_with_sum_all, by = setNames("uuid", uuid_column))
+      dplyr::left_join(duration_with_sum_all, by = stats::setNames("uuid", uuid_column))
   }
 
   if (exists("duration_with_start_end")) {
     dataset <- dataset %>%
-      dplyr::left_join(duration_with_start_end, by = setNames("uuid", uuid_column))
+      dplyr::left_join(duration_with_start_end, by = stats::setNames("uuid", uuid_column))
   }
 
   return(dataset)

diff --git a/R/check_outliers.R b/R/check_outliers.R
@@ -76,7 +76,7 @@ check_outliers <- function(dataset,
   #######################
 
 
-  dataset <- type.convert(dataset, as.is = TRUE, na.string = c("", " ")) |>
+  dataset <- utils::type.convert(dataset, as.is = TRUE, na.string = c("", " ")) |>
     dplyr::rename(
       uuid = !!rlang::sym(uuid_column)
     ) |>
@@ -154,13 +154,13 @@ check_outliers <- function(dataset,
     variable_value <- variable_value[!is.na(variable_value) & !is.null(variable_value) & !is.infinite(variable_value)]
 
     if (!is.null(minimum_unique_value_of_variable)) {
-      outliers_tf_nr <- (abs(variable_value - mean(variable_value)) > strongness_factor * sd(variable_value)) &
+      outliers_tf_nr <- (abs(variable_value - mean(variable_value)) > strongness_factor * stats::sd(variable_value)) &
         (length(unique(variable_value)) > minimum_unique_value_of_variable)
     }
 
 
     if (is.null(minimum_unique_value_of_variable)) {
-      outliers_tf_nr <- abs(variable_value - mean(variable_value)) > strongness_factor * sd(variable_value)
+      outliers_tf_nr <- abs(variable_value - mean(variable_value)) > strongness_factor * stats::sd(variable_value)
     }
 
 
@@ -188,12 +188,12 @@ check_outliers <- function(dataset,
     log_variable <- log_variable[!is.na(log_variable) & !is.null(log_variable) & !is.infinite(log_variable)]
 
     if (!is.null(minimum_unique_value_of_variable)) {
-      outliers_tf <- abs(log_variable - mean(log_variable)) > strongness_factor * sd(log_variable) &
+      outliers_tf <- abs(log_variable - mean(log_variable)) > strongness_factor * stats::sd(log_variable) &
         length(unique(log_variable)) > minimum_unique_value_of_variable
     }
 
     if (is.null(minimum_unique_value_of_variable)) {
-      outliers_tf <- abs(log_variable - mean(log_variable)) > strongness_factor * sd(log_variable)
+      outliers_tf <- abs(log_variable - mean(log_variable)) > strongness_factor * stats::sd(log_variable)
     }
 
 

diff --git a/R/create_audit_list.R b/R/create_audit_list.R
@@ -15,15 +15,15 @@
 create_audit_list <- function(audit_zip_path,
                               dataset = NULL,
                               uuid_column = "uuid") {
-  list_of_files <- unzip(audit_zip_path, list = TRUE) %>%
+  list_of_files <- utils::unzip(audit_zip_path, list = TRUE) %>%
     dplyr::rename(path = Name) %>%
     dplyr::filter(stringr::str_detect(path, pattern = "audit.csv"))
 
   locatation_audit <- list_of_files %>%
     dplyr::pull(path) %>%
     stringr::str_split("/") %>%
     purrr::set_names(list_of_files$path) %>%
-    purrr:::map_dbl(~ which(stringr::str_detect(.x, "audit.csv"))) %>%
+    purrr::map_dbl(~ which(stringr::str_detect(.x, "audit.csv"))) %>%
     unique()
 
   if (length(locatation_audit) != 1) {

diff --git a/R/create_clean_data.R b/R/create_clean_data.R
@@ -170,5 +170,5 @@ create_clean_data <- function(raw_dataset,
     print("no surveys to remove in log")
   }
 
-  return(raw_dataset %>% type.convert(as.is = T))
+  return(raw_dataset %>% utils::type.convert(as.is = T))
 }
diff --git a/R/create_validation_list.R b/R/create_validation_list.R
@@ -29,7 +29,7 @@ create_validation_list <- function(choices, tool) {
   ) %>%
     do.call(rbind, .) %>%
     as.data.frame() %>%
-    setNames(c("name", "choices"))
+    stats::setNames(c("name", "choices"))
 
   choicelist <- new_lists %>%
     dplyr::bind_rows(create_formatted_choices(choices, tool) %>%
@@ -38,7 +38,7 @@ create_validation_list <- function(choices, tool) {
   choice_validation <- choicelist %>%
     unique() %>%
     data.table::transpose() %>%
-    setNames(.[1, ]) %>%
+    stats::setNames(.[1, ]) %>%
     dplyr::slice(-1) %>%
     dplyr::mutate_all(~ stringr::str_split(., ";\n"))
 

diff --git a/R/detect_uuid.R b/R/detect_uuid.R
@@ -9,6 +9,8 @@
 #'  If there is 0 or 2 or more possible ansers, it will
 #' give an error.
 #' 
+#' @keywords internal
+#' 
 #' @export
 #' @examples
 #' test1_df <- data.frame(uuid = letters,

diff --git a/R/knit_big_table.R b/R/knit_big_table.R
@@ -6,6 +6,8 @@
 #' @param .height Height in the html output
 #'
 #' @return Table with some features for quarto html output
+#' 
+#' @keywords internal
 #' @export
 #' @examples
 #' #knit_big_table(table_to_print)

diff --git a/R/print_log.R b/R/print_log.R
@@ -5,7 +5,10 @@
 #' @param log_to_print A data frame to print
 #' @param message A message to print if the data frame is empty
 #'
-#' @return The data frame to print or an alternative message if the dataframe is empty
+#' @return The data frame to print or an alternative message 
+#' if the dataframe is empty
+#' 
+#' @keywords internal
 #' @export
 #' @examples
 #' #print_log(log_to_print, "No values flagged")

diff --git a/R/review_sample_frame_with_dataset.R b/R/review_sample_frame_with_dataset.R
@@ -70,7 +70,7 @@ review_sample_frame_with_dataset <- function(sample_frame,
 
   sample_frame |>
     dplyr::select(-dplyr::any_of(c("Collected", "Remaining"))) |>
-    dplyr::left_join(actual_df, by = setNames(clean_dataset_strata_column, sampling_frame_strata_column)) |>
+    dplyr::left_join(actual_df, by = stats::setNames(clean_dataset_strata_column, sampling_frame_strata_column)) |>
     dplyr::mutate(Collected = dplyr::case_when(is.na(Collected) ~ 0, T ~ Collected)) |>
     dplyr::mutate(
       Remaining = !!rlang::sym(sampling_frame_target_survey_column) - Collected

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -43,3 +43,7 @@ reference:
   desc: functions building cleaning
 - contents:
   - starts_with("coerce_")
+- title: checks
+  desc: functions building cleaning
+- contents:
+  - starts_with("check_")
diff --git a/...down/templates/clean/skeleton/review.xlsx → data-raw/review.xlsx b/...down/templates/clean/skeleton/review.xlsx → data-raw/review.xlsx
diff --git a/dev/function_documentation.Rmd b/dev/function_documentation.Rmd
@@ -452,12 +452,12 @@ add_duration_from_audit <- function(dataset,
 
   if (exists("duration_with_sum_all")) {
     dataset <- dataset %>%
-      dplyr::left_join(duration_with_sum_all, by = setNames("uuid", uuid_column))
+      dplyr::left_join(duration_with_sum_all, by = stats::setNames("uuid", uuid_column))
   }
 
   if (exists("duration_with_start_end")) {
     dataset <- dataset %>%
-      dplyr::left_join(duration_with_start_end, by = setNames("uuid", uuid_column))
+      dplyr::left_join(duration_with_start_end, by = stats::setNames("uuid", uuid_column))
   }
 
   return(dataset)
@@ -553,15 +553,15 @@ test_that("add_duration_from_audit works", {
 create_audit_list <- function(audit_zip_path,
                               dataset = NULL,
                               uuid_column = "uuid") {
-  list_of_files <- unzip(audit_zip_path, list = TRUE) %>%
+  list_of_files <- utils::unzip(audit_zip_path, list = TRUE) %>%
     dplyr::rename(path = Name) %>%
     dplyr::filter(stringr::str_detect(path, pattern = "audit.csv"))
 
   locatation_audit <- list_of_files %>%
     dplyr::pull(path) %>%
     stringr::str_split("/") %>%
     purrr::set_names(list_of_files$path) %>%
-    purrr:::map_dbl(~ which(stringr::str_detect(.x, "audit.csv"))) %>%
+    purrr::map_dbl(~ which(stringr::str_detect(.x, "audit.csv"))) %>%
     unique()
 
   if (length(locatation_audit) != 1) {
@@ -1460,7 +1460,7 @@ check_outliers <- function(dataset,
   #######################
 
 
-  dataset <- type.convert(dataset, as.is = TRUE, na.string = c("", " ")) |>
+  dataset <- utils::type.convert(dataset, as.is = TRUE, na.string = c("", " ")) |>
     dplyr::rename(
       uuid = !!rlang::sym(uuid_column)
     ) |>
@@ -1538,13 +1538,13 @@ check_outliers <- function(dataset,
     variable_value <- variable_value[!is.na(variable_value) & !is.null(variable_value) & !is.infinite(variable_value)]
 
     if (!is.null(minimum_unique_value_of_variable)) {
-      outliers_tf_nr <- (abs(variable_value - mean(variable_value)) > strongness_factor * sd(variable_value)) &
+      outliers_tf_nr <- (abs(variable_value - mean(variable_value)) > strongness_factor * stats::sd(variable_value)) &
         (length(unique(variable_value)) > minimum_unique_value_of_variable)
     }
 
 
     if (is.null(minimum_unique_value_of_variable)) {
-      outliers_tf_nr <- abs(variable_value - mean(variable_value)) > strongness_factor * sd(variable_value)
+      outliers_tf_nr <- abs(variable_value - mean(variable_value)) > strongness_factor * stats::sd(variable_value)
     }
 
 
@@ -1572,12 +1572,12 @@ check_outliers <- function(dataset,
     log_variable <- log_variable[!is.na(log_variable) & !is.null(log_variable) & !is.infinite(log_variable)]
 
     if (!is.null(minimum_unique_value_of_variable)) {
-      outliers_tf <- abs(log_variable - mean(log_variable)) > strongness_factor * sd(log_variable) &
+      outliers_tf <- abs(log_variable - mean(log_variable)) > strongness_factor * stats::sd(log_variable) &
         length(unique(log_variable)) > minimum_unique_value_of_variable
     }
 
     if (is.null(minimum_unique_value_of_variable)) {
-      outliers_tf <- abs(log_variable - mean(log_variable)) > strongness_factor * sd(log_variable)
+      outliers_tf <- abs(log_variable - mean(log_variable)) > strongness_factor * stats::sd(log_variable)
     }
 
 
@@ -2231,7 +2231,7 @@ create_clean_data <- function(raw_dataset,
     print("no surveys to remove in log")
   }
 
-  return(raw_dataset %>% type.convert(as.is = T))
+  return(raw_dataset %>% utils::type.convert(as.is = T))
 }
 ```
 
@@ -3774,7 +3774,7 @@ review_sample_frame_with_dataset <- function(sample_frame,
 
   sample_frame |>
     dplyr::select(-dplyr::any_of(c("Collected", "Remaining"))) |>
-    dplyr::left_join(actual_df, by = setNames(clean_dataset_strata_column, sampling_frame_strata_column)) |>
+    dplyr::left_join(actual_df, by = stats::setNames(clean_dataset_strata_column, sampling_frame_strata_column)) |>
     dplyr::mutate(Collected = dplyr::case_when(is.na(Collected) ~ 0, T ~ Collected)) |>
     dplyr::mutate(
       Remaining = !!rlang::sym(sampling_frame_target_survey_column) - Collected
@@ -4270,7 +4270,7 @@ create_validation_list <- function(choices, tool) {
   ) %>%
     do.call(rbind, .) %>%
     as.data.frame() %>%
-    setNames(c("name", "choices"))
+    stats::setNames(c("name", "choices"))
 
   choicelist <- new_lists %>%
     dplyr::bind_rows(create_formatted_choices(choices, tool) %>%
@@ -4279,7 +4279,7 @@ create_validation_list <- function(choices, tool) {
   choice_validation <- choicelist %>%
     unique() %>%
     data.table::transpose() %>%
-    setNames(.[1, ]) %>%
+    stats::setNames(.[1, ]) %>%
     dplyr::slice(-1) %>%
     dplyr::mutate_all(~ stringr::str_split(., ";\n"))
 
@@ -4455,6 +4455,8 @@ test_that("coerce_to_character works", {
 #'  If there is 0 or 2 or more possible ansers, it will
 #' give an error.
 #' 
+#' @keywords internal
+#' 
 #' @export
  
 
@@ -4497,6 +4499,8 @@ test_that("detect_uuid works", {
 #' @param .height Height in the html output
 #'
 #' @return Table with some features for quarto html output
+#' 
+#' @keywords internal
 #' @export
  
 knit_big_table <- function(table_to_print, .height = "500px") {
@@ -4526,7 +4530,10 @@ test_that("knit_big_table works", {
 #' @param log_to_print A data frame to print
 #' @param message A message to print if the data frame is empty
 #'
-#' @return The data frame to print or an alternative message if the dataframe is empty
+#' @return The data frame to print or an alternative message 
+#' if the dataframe is empty
+#' 
+#' @keywords internal
 #' @export
  
 print_log <- function(log_to_print, message) {

diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html
diff --git a/docs/LICENSE.html b/docs/LICENSE.html