diff --git a/.Rbuildignore b/.Rbuildignore index c48cb73..e291d8b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ +^codecov\.yml$ +^appveyor\.yml$ ^cran-comments\.md$ ^CRAN-RELEASE$ ^.*\.Rproj$ @@ -5,3 +7,7 @@ ^\.travis\.yml$ ^\.idea$ ^img$ +^\.gitignore$ +^\.Rhistory$ +^\.RData$ +^\.gitattributes$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..fce2bdc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +* text=auto +data/* binary +src/* text=lf +R/* text=lf diff --git a/.travis.yml b/.travis.yml index 88caf2a..4da42ab 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,4 +2,17 @@ language: R sudo: false -cache: packages \ No newline at end of file +cache: packages + +r: + - oldrel + - release + - devel + +r_check_args: "--as-cran" + +r_packages: + - covr + +after_success: + - Rscript -e 'library(covr); codecov()' \ No newline at end of file diff --git a/CRAN-RELEASE b/CRAN-RELEASE new file mode 100644 index 0000000..dc484bd --- /dev/null +++ b/CRAN-RELEASE @@ -0,0 +1,2 @@ +This package was submitted to CRAN on 2019-04-10. +Once it is accepted, delete this file and tag the release (commit 1faa2f9a78). diff --git a/DESCRIPTION b/DESCRIPTION index 25d2a67..10d417c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,9 @@ Package: bdclean Type: Package Title: A User-Friendly Biodiversity Data Cleaning App for the Inexperienced R User -Description: Provides features to manage complete work flow for biodiversity data cleaning, from uploading the data; gathering input from the user, in order to adjust cleaning procedures; perform the cleaning; and finally, generating various reports and several versions of the data. Facilitates user-level data cleaning, designed for the inexperienced R user. T Gueta et al (2018) . T Gueta et al (2017) . -Version: 0.1.12 -Date: 2019-03-02 +Description: Provides features to manage the complete workflow for biodiversity data cleaning. Uploading data, gathering input from users (in order to adjust cleaning procedures), cleaning data and finally, generating various reports and several versions of the data. Facilitates user-level data cleaning, designed for the inexperienced R user. T Gueta et al (2018) . T Gueta et al (2017) . +Version: 0.1.15 +Date: 2019-04-10 License: GPL-3 URL: https://github.com/bd-R/bdclean, https://bd-r.github.io/The-bdverse/index.html BugReports: https://github.com/bd-R/bdclean/issues @@ -42,5 +42,7 @@ Imports: rmarkdown, knitr, shiny, shinydashboard, shinyjs, leaflet, DT, data.tab Depends: R (>= 2.10) RoxygenNote: 6.1.1 Suggests: - testthat + testthat, + roxygen2, + covr LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 5bab878..ec6fc4d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,13 +6,13 @@ export(clean_data) export(cleaning_function) export(create_default_questionnaire) export(create_report_data) -export(earliestDate) +export(earliest_date) export(get_checks_list) export(run_bdclean) export(run_questionnaire) -export(spatialResolution) -export(taxoLevel) -export(temporalResolution) +export(spatial_resolution) +export(taxo_level) +export(temporal_resolution) import(bdDwC) import(bdchecks) import(data.table) diff --git a/R/bdclean.R b/R/bdclean.R index 69a5b36..c114f6a 100644 --- a/R/bdclean.R +++ b/R/bdclean.R @@ -8,17 +8,17 @@ #' in order to achieve minimum quality to use the data further for any #' analysis or modelling. #' -#'@section Data cleaning: -#'\itemize{ -#'\item \link{run_bdclean} -#'\item \link{clean_data} -#'} +#' @section Data cleaning: +#' \itemize{ +#' \item \link{run_bdclean} +#' \item \link{clean_data} +#' } #' #' -#'@section Citation: -#'\itemize{ -#'\item Gueta, T., Barve, V., Nagarajah, T., Agrawal, A. & Carmel, Y. (2018). bdclean: Biodiversity data cleaning workflows (R package V 1.0.0). Retrieved from https://github.com/bd-R/bdclean/ -#'} +#' @section Citation: +#' \itemize{ +#' \item Gueta, T., Barve, V., Nagarajah, T., Agrawal, A. & Carmel, Y. (2019). bdclean: Biodiversity data cleaning workflows (R package V 0.1.13). Retrieved from https://github.com/bd-R/bdclean/ +#' } #' #' @docType package #' @name bdclean diff --git a/R/clean_data.R b/R/clean_data.R index f5c1fc0..f295cbd 100644 --- a/R/clean_data.R +++ b/R/clean_data.R @@ -7,69 +7,69 @@ #' You can add your custom questions to this questionnaire and then pass it to this #' function to process the data. #' -#'@param data Biodiversity data in a data frame -#'@param customQuestionnaire Custom user created questionnaire responses if to pypass answering questions each time. -#'@param clean Whether to clean after flagging. If false only flagging will be done. -#'@param missing How to treat data with missing values. Default: false - will be treated as bad data. -#'@param report Whether to print report of cleaning done. -#'@param format Formats of the cleaning report required. Options are: Markdown, HTML or / and PDF +#' @param data Biodiversity data in a data frame +#' @param custom_questionnaire Custom user created questionnaire responses if to pypass answering questions each time. +#' @param clean Whether to clean after flagging. If false only flagging will be done. +#' @param missing How to treat data with missing values. Default: false - will be treated as bad data. +#' @param report Whether to print report of cleaning done. +#' @param format Formats of the cleaning report required. Options are: Markdown, HTML or / and PDF #' -#'@return data frame with clean data +#' @return data frame with clean data #' -#'@examples \dontrun{ -#'library(rgbif) -#'occdat <- occ_data( -#' country = "AU", # Country code for australia -#' classKey= 359, # Class code for mammalia -#' limit=5000 # Get only 5000 records -#' ) -#' myData<-occdat$data -#' -#' cleanedData <- clean_data(myData) -#' -#' responses <- run_questionnaire() -#' cleanedData <- clean_data(myData, responses) -#' -#' customQuestionnaire <- create_default_questionnaire() -#' customResponses <- run_questionnaire(customQuestionnaire) -#' cleanedData <- clean_data(myData, customResponses) -#' } +#' @examples +#' +#' custom_questionnaire <- create_default_questionnaire() +#' +#' if(interactive()){ +#' +#' library(rgbif) +#' occdat <- occ_data( +#' country = 'AU', # Country code for australia +#' classKey = 359, # Class code for mammalia +#' limit = 50 # Get only 50 records +#' ) +#' myData <- occdat$data +#' +#' responses <- run_questionnaire() +#' cleaned_data <- clean_data(myData, responses) +#' +#' cleaned_data2 <- clean_data(myData) +#' +#' } #' -#'@export +#' @export clean_data <- function(data, - customQuestionnaire = NULL, + custom_questionnaire = NULL, clean = TRUE, missing = FALSE, report = TRUE, format = c("html_document", "pdf_document")) { responses <- list() - inputData <- data - flaggedData <- data - cleanedData <- data - + input_data <- data + flagged_data <- data + cleaned_data <- data # Questionnaire - if (is.null(customQuestionnaire)) { + if (is.null(custom_questionnaire)) { responses <- run_questionnaire() } else { - responses <- customQuestionnaire + responses <- custom_questionnaire } # Flagging - flaggedData <- responses$flagData(inputData, missing) - + flagged_data <- responses$flag_data(input_data, missing) # Decision Making if (clean) { - cleanedData <- cleaning_function(flaggedData) + cleaned_data <- cleaning_function(flagged_data) } # Report if (report) { create_report_data(data, - flaggedData, - cleanedData, + flagged_data, + cleaned_data, responses, clean, format) @@ -77,61 +77,51 @@ clean_data <- # Cleaning if (clean) { - return(cleanedData) + return(cleaned_data) } - return(flaggedData) + return(flagged_data) } #' Execute the Questionnaire and save user responses. #' #' -#'@param customQuestionnaire Custom User Created Questionnaire if already available. +#' @param custom_questionnaire Custom User Created Questionnaire if already available. #' -#'@return list with BdQuestionObjects containing user answers +#' @return list with BdQuestionObjects containing user answers #' -#'@examples \dontrun{ -#'library(rgbif) -#'occdat1 <- occ_data( -#' country = "AU", # Country code for australia -#' classKey= 359, # Class code for mammalia -#' limit=5000, # Get only 5000 records -#' ) -#' myData<-occdat1$data +#' @examples +#' +#' if(interactive()){ #' -#' responses <- run_questionnaire() -#' cleanedData <- clean_data_new(myData, responses) -#'} +#' responses <- run_questionnaire() +#' +#' } #' -#'@export -run_questionnaire <- function(customQuestionnaire = NULL) { +#' @export +run_questionnaire <- function(custom_questionnaire = NULL) { responses <- list() - - if (is.null(customQuestionnaire)) { + if (is.null(custom_questionnaire)) { message("Custom Questionnaire not given. Using package default Questionnaire...") responses <- create_default_questionnaire() - } else { - if (class(customQuestionnaire) != "BdQuestionContainer") { + if (class(custom_questionnaire) != "BdQuestionContainer") { message( "Provided Custom Questionnaire is not of class BdQuestionContainer. Using package default Questionnaire" ) responses <- create_default_questionnaire() - } else { message("Custom Questionnaire detected.") - responses <- customQuestionnaire + responses <- custom_questionnaire } } - message("Please answer the following questions to initiate cleaning process.") - - for (question in responses$BdQuestions) { + for (question in responses$bdquestions) { if (question$question.type != "Child" && question$question.type != "ChildRouter") { - getUserResponse(question) + get_user_response(question) } } message("Thank you! Cleaning can be started now based on your responses.") @@ -139,24 +129,30 @@ run_questionnaire <- function(customQuestionnaire = NULL) { } #' Internal function for getting user response +#' +#' @param bd_question The BDQuestion object to get users responses. +#' +#' @examples #' -#'@param bdQuestion The BDQuestion object to get users responses. +#' if(interactive()){ +#' +#' question <- BdQuestion() +#' responses <- get_user_response(question) #' -getUserResponse <- function(bdQuestion) { +#' } +get_user_response <- function(bd_question) { # Child & ChildRouter already filtered in first loop above - - if (bdQuestion$question.type == "Atomic") { + if (bd_question$question.type == "Atomic") { # Atomic is filtered - bdQuestion$printQuestion() - bdQuestion$getResponse() - + bd_question$print_question() + bd_question$get_response() } else { # Router , Child as child & ChildRouter as child is filtered - bdQuestion$printQuestion() - bdQuestion$getResponse() - if (bdQuestion$users.answer %in% bdQuestion$router.condition) { - for (question in bdQuestion$child.questions) { - getUserResponse(question) + bd_question$print_question() + bd_question$get_response() + if (bd_question$users.answer %in% bd_question$router.condition) { + for (question in bd_question$child.questions) { + get_user_response(question) } } } diff --git a/R/decision_making.R b/R/decision_making.R index e433893..4195857 100644 --- a/R/decision_making.R +++ b/R/decision_making.R @@ -1,150 +1,179 @@ #' Data decision function (binary decision) required in bdclean internal usage. #' -#' NOTE: This is an package internal function. Do not use for external uses. +#' NOTE: This is an package internal function. Do not use for external uses. Exported to make it available for shiny app. #' #' @param bddata The dataframe to clean #' -#'@export +#' @examples +#' +#' if(interactive()){ +#' +#' library(rgbif) +#' occdat <- occ_data( +#' country = 'AU', # Country code for australia +#' classKey = 359, # Class code for mammalia +#' limit = 50 # Get only 50 records +#' ) +#' myData <- occdat$data +#' cleaned_data <- cleaning_function(myData) +#' +#' } +#' +#' @export cleaning_function <- function(bddata) { bddata <- as.data.frame(bddata) - checkColumns <- which(grepl("bdclean", names(bddata))) + check_columns <- which(grepl("bdclean", names(bddata))) - if (length(checkColumns) == 0) { + if (length(check_columns) == 0) { warning("Dataset has no flag columns! Skipping cleaning") return(bddata) } - checkData <- bddata[, checkColumns] + check_data <- bddata[, check_columns] # ------------- Decision Making of Cleaning ------------- - # Cleaning criteria is binary: Pass, Fail - # Records with cleanliness-score 10 for all checks will pass. - # Records with cleanliness-score less than 10 in atleast 1 check will fail + # Cleaning criteria is binary: Pass, Fail Records with cleanliness-score 10 for all + # checks will pass. Records with cleanliness-score less than 10 in atleast 1 check will fail # ------------- Decision Making of Cleaning ------------- - if (class(checkData) == "logical") { - failedDataLogical <- checkData != TRUE + if (class(check_data) == "logical") { + failed_data_logical <- check_data != TRUE } else { - failedDataLogical <- rowSums(checkData != TRUE, na.rm = T) >= 1 + failed_data_logical <- rowSums(check_data != TRUE, na.rm = T) >= 1 } # ------------- End of Decision Making of Cleaning ------------- - message("Records remaining:", - nrow(bddata) - sum(failedDataLogical)) + nrow(bddata) - sum(failed_data_logical)) - return(bddata[!failedDataLogical, !grepl("bdclean", names(bddata))]) + return(bddata[!failed_data_logical, !grepl("bdclean", names(bddata))]) } #' Data decision function (threshold tuning) required in bdclean internal usage. #' #' NOTE: This is an package internal function. Do not use for external uses. #' -#'@param flaggedData The dataset with flags to be cleaned. -#'@param cleaningThreshold The Cleaning tolerance. Not used in current version. +#' @param flagged_data The dataset with flags to be cleaned. +#' @param cleaning_threshold The Cleaning tolerance. Not used in current version. #' -perform_Cleaning <- function(flaggedData, cleaningThreshold = 5) { - flagColumns <- which(grepl("bdclean", names(flaggedData))) +#' @examples +#' +#' if(interactive()){ +#' +#' library(rgbif) +#' occdat <- occ_data( +#' country = 'AU', # Country code for australia +#' classKey = 359, # Class code for mammalia +#' limit = 50 # Get only 50 records +#' ) +#' myData <- occdat$data +#' cleaned_data <- perform_Cleaning(myData) +#' +#' } +perform_Cleaning <- function(flagged_data, cleaning_threshold = 5) { + flag_columns <- which(grepl("bdclean", names(flagged_data))) - if (length(flagColumns) == 0) { + if (length(flag_columns) == 0) { warning("Dataset has no flag columns! Skipping cleaning") - return(flaggedData) + return(flagged_data) } + cleaned_data <- flagged_data + cleaned_data$cleanliness_score <- 0 - cleanedData <- flaggedData - cleanedData$cleanlinessScore <- 0 - - for (columnIndex in flagColumns) { - cleanedData$cleanlinessScore <- - cleanedData$cleanlinessScore + cleanedData[, columnIndex] + for (column_index in flag_columns) { + cleaned_data$cleanliness_score <- + cleaned_data$cleanliness_score + cleaned_data[, column_index] } - cleanedData$cleanlinessScore <- - cleanedData$cleanlinessScore / length(flagColumns) - cleanedData <- - cleanedData[cleanedData$cleanlinessScore >= cleaningThreshold, c(flagColumns, length(cleanedData)) * -1] - - return(cleanedData) + cleaned_data$cleanliness_score <- + cleaned_data$cleanliness_score / length(flag_columns) + cleaned_data <- + cleaned_data[cleaned_data$cleanliness_score >= cleaning_threshold, c(flag_columns, length(cleaned_data)) * -1] + return(cleaned_data) } #' Returning checks list, function required in bdclean internal usage. #' #' NOTE: This is an package internal function. Do not use for external uses. +#' +#' @examples +#' +#' if(interactive()){ +#' +#' all_checks <- get_checks_list() +#' +#' } #' -#'@export +#' @export get_checks_list <- function() { - # Uncomment if both suppoprted in customized checks. Right now, - # only bdcecks supported as it doenst require user input. - # bdcleanDocumentation <- tools::Rd_db("bdclean") - # bdchecksDocumentation <- tools::Rd_db("bdchecks") - # packageDocumentation <- c(bdcleanDocumentation, bdchecksDocumentation) - - bdchecksDocumentation <- tools::Rd_db("bdchecks") - packageDocumentation <- bdchecksDocumentation + bdchecks_documentation <- tools::Rd_db("bdchecks") + package_documentation <- bdchecks_documentation - qualityChecks <- list() - - for (i in 1:length(packageDocumentation)) { - string <- paste(packageDocumentation[i], collapse = " ") + quality_checks <- list() + for (i in 1:length(package_documentation)) { + string <- paste(package_documentation[i], collapse = " ") if (grepl("checkCategory", string)) { - nameOfQualityCheck <- - gsub(".Rd", "", names(packageDocumentation)[i]) - - functionDocumentation <- packageDocumentation[i] + name_of_quality_check <- + gsub(".Rd", "", names(package_documentation)[i]) - brokenDocumentation <- + function_documentation <- package_documentation[i] + broken_documentation <- unlist(strsplit( - paste(functionDocumentation[[1]], collapse = " "), - split = '\\', + paste(function_documentation[[1]], collapse = " "), + split = "\\", fixed = TRUE )) - - brokenDocumentation <- gsub("\\n", "", - gsub( - "[{}]", "", brokenDocumentation - ) - ) + broken_documentation <- + gsub("\\n", "", gsub("[{}]", "", broken_documentation)) description <- - brokenDocumentation[grep("title", brokenDocumentation)] + broken_documentation[grep("title", broken_documentation)] description <- gsub("title Data check", "", description, fixed = T) - samplePassData <- - brokenDocumentation[grep("samplePassData", brokenDocumentation)] - samplePassData <- - gsub("section samplePassData", "", samplePassData, fixed = T) + sample_pass_data <- + broken_documentation[grep("samplePassData", broken_documentation)] + sample_pass_data <- + gsub("section samplePassData", + "", + sample_pass_data, + fixed = T) - sampleFailData <- - brokenDocumentation[grep("sampleFailData", brokenDocumentation)] - sampleFailData <- - gsub("section sampleFailData", "", sampleFailData, fixed = T) + sample_fail_data <- + broken_documentation[grep("sampleFailData", broken_documentation)] + sample_fail_data <- + gsub("section sampleFailData", + "", + sample_fail_data, + fixed = T) - checkCategory <- - brokenDocumentation[grep("checkCategory", brokenDocumentation)] - checkCategory <- - gsub("section checkCategory", "", checkCategory, fixed = T) + check_category <- + broken_documentation[grep("checkCategory", broken_documentation)] + check_category <- + gsub("section checkCategory", + "", + check_category, + fixed = T) - targetDWCField <- - brokenDocumentation[grep("targetDWCField", brokenDocumentation)] - targetDWCField <- - gsub("section targetDWCField", "", targetDWCField, fixed = T) + target_dwc_field <- + broken_documentation[grep("targetDWCField", broken_documentation)] + target_dwc_field <- + gsub("section targetDWCField", + "", + target_dwc_field, + fixed = T) temp <- list() - - temp$nameOfQualityCheck <- - paste("DC_", nameOfQualityCheck, sep = "") - temp$description <- - paste(description, collapse = " ") - temp$samplePassData <- samplePassData - temp$sampleFailData <- sampleFailData - temp$checkCategory <- checkCategory - temp$targetDWCField <- targetDWCField - - qualityChecks[nameOfQualityCheck] <- - list(temp) + temp$name_of_quality_check <- + paste("DC_", name_of_quality_check, sep = "") + temp$description <- paste(description, collapse = " ") + temp$sample_pass_data <- sample_pass_data + temp$sample_fail_data <- sample_fail_data + temp$check_category <- check_category + temp$target_dwc_field <- target_dwc_field + quality_checks[name_of_quality_check] <- list(temp) } } - return(qualityChecks) + return(quality_checks) } diff --git a/R/generate_report.R b/R/generate_report.R index 77a71c8..9b127fe 100644 --- a/R/generate_report.R +++ b/R/generate_report.R @@ -1,202 +1,198 @@ #' Generate data required to create report, function required in bdclean internal usage. #' -#' NOTE: This is an package internal function. Do not use for external uses. -#' -#' @param inputData The input dataframe before cleaning -#' @param flaggedData The flagged data for cleaning -#' @param cleanedData The data with flagged records removed +#' NOTE: This is an package internal function. Do not use for external uses. Exported to make it available for shiny app. +#' +#' @param input_data The input dataframe before cleaning +#' @param flagged_data The flagged data for cleaning +#' @param cleaned_data The data with flagged records removed #' @param responses The BDQuestions object with user responses -#' @param cleaningTrue Flag specifying if the cleaning should be done, or just flagging +#' @param cleaning_true Flag specifying if the cleaning should be done, or just flagging #' @param format The format of the report to be generated +#' +#' @examples +#' +#' if(interactive()){ +#' +#' library(rgbif) +#' occdat <- occ_data( +#' country = 'AU', # Country code for australia +#' classKey = 359, # Class code for mammalia +#' limit = 50 # Get only 50 records +#' ) +#' myData <- occdat$data +#' +#' question <- BdQuestion() +#' responses <- get_user_response(question) +#' +#' cleaned_data <- create_report_data(myData, myData, myData, responses, T, 'pdf') +#' +#' } #' -#'@export +#' @export create_report_data <- - function(inputData, - flaggedData, - cleanedData, + function(input_data, + flagged_data, + cleaned_data, responses, - cleaningTrue, + cleaning_true, format) { - for (question in responses$BdQuestions) { + for (question in responses$bdquestions) { if (length(question$quality.checks) > 0 && length(question$users.answer) > 0) { - question$addToReport(flaggedData, cleaningTrue) + question$add_to_report(flagged_data, cleaning_true) } } - if (!cleaningTrue) { - cleanedData <- flaggedData + if (!cleaning_true) { + cleaned_data <- flagged_data } # --------------- Data required for detailed report --------------- - inputSize <- dim(inputData) - outputSize <- dim(cleanedData) + input_size <- dim(input_data) + output_size <- dim(cleaned_data) - inputUniqueSpecies <- - length(unique(inputData$scientificName)) - outputUniqueSpecies <- - length(unique(cleanedData$scientificName)) + input_unique_species <- + length(unique(input_data[, "scientificName"])) + output_unique_species <- + length(unique(cleaned_data[, "scientificName"])) - # Sys.setenv( TZ="Etc/GMT+5" ) - earliestInputDate <- - try(as.POSIXct(unique(inputData$eventDate), tz="UTC"), silent = T) + earliest_input_date <- + try(as.POSIXct(unique(input_data[, "eventDate"]), tz = "UTC"), silent = T) - if(class(earliestInputDate)!="try-error"){ - earliestInputDate <- min(earliestInputDate) - latestInputDate <- - max(as.POSIXct(unique(inputData$eventDate), tz="UTC")) + if (class(earliest_input_date) != "try-error") { + earliest_input_date <- min(earliest_input_date) + latest_input_date <- + max(as.POSIXct(unique(input_data[, "eventDate"]), tz = "UTC")) } else { - earliestInputDate <- "Not Available" - latestInputDate <- "Not Available" + earliest_input_date <- "Not Available" + latest_input_date <- "Not Available" } - - earliestOutputDate <- - try(as.POSIXct(unique(cleanedData$eventDate), tz="UTC"), silent = T) - if(class(earliestOutputDate)!="try-error"){ - earliestOutputDate <- min(earliestInputDate) - latestOutputDate <- - max(as.POSIXct(unique(cleanedData$eventDate), tz="UTC")) + earliest_output_date <- + try(as.POSIXct(unique(cleaned_data[, "eventDate"]), tz = "UTC"), silent = T) + + if (class(earliest_output_date) != "try-error") { + earliest_output_date <- min(earliest_input_date) + latest_output_date <- + max(as.POSIXct(unique(cleaned_data[, "eventDate"]), tz = "UTC")) } else { - earliestOutputDate <- "Not Available" - latestOutputDate <- "Not Available" + earliest_output_date <- "Not Available" + latest_output_date <- "Not Available" } - - InputData <- + input_data <- c( - inputSize[1], - inputSize[2], - inputUniqueSpecies, - paste(earliestInputDate, "-", latestInputDate) + input_size[1], + input_size[2], + input_unique_species, + paste(earliest_input_date, "-", latest_input_date) ) - - CleanedData <- + cleaned_data <- c( - outputSize[1], - outputSize[2], - outputUniqueSpecies, - paste(earliestOutputDate, "-", latestOutputDate) + output_size[1], + output_size[2], + output_unique_species, + paste(earliest_output_date, "-", latest_output_date) ) - - data.summary <- data.frame(InputData, CleanedData) # One + data.summary <- data.frame(input_data, cleaned_data) # One row.names(data.summary) <- c("Rows", "Columns", "Number of unique scientific names", "Date Range") - checks.records <- list() # Three + checks.records <- list() # Three index <- 1 - for (question in responses$BdQuestions) { + for (question in responses$bdquestions) { # length(question$quality.checks) > 0 && if (length(question$users.answer) > 0) { checks.records[[paste("question", index, sep = "")]] <- list( - "question" = question$question, - "answer" = question$users.answer, - "checks" = question$cleaning.details + question = question$question, + answer = question$users.answer, + checks = question$cleaning.details ) - index = index + 1 + index <- index + 1 } } - - - # ------------------- End of data required for detailed report --------------- # ------------------ Data required for short report --------------- - - recordsTable <- data.frame( - DataCleaningProcedure = "Initial Records", - NoOfRecords = NROW(inputData), - Action = "" - ) - + records_table <- + data.frame( + DataCleaningProcedure = "Initial Records", + NoOfRecords = NROW(input_data), + Action = "" + ) for (question in checks.records) { - checkIndex <- 1 + check_index <- 1 for (check in question$checks) { - recordsTable <- + + print(names(question$checks)) + print(names(question$checks)[check_index]) + + records_table <- rbind( - recordsTable, + records_table, data.frame( - DataCleaningProcedure = names(question$checks)[checkIndex], + DataCleaningProcedure = names(question$checks)[check_index], NoOfRecords = check$affectedData, - Action = ifelse(cleaningTrue, "Removal", "Flagging") + Action = ifelse(cleaning_true, "Removal", "Flagging") ) ) - checkIndex <- checkIndex + 1 + check_index <- check_index + 1 } } - - remainingRecords <- - (nrow(cleanedData)) - removedRecords <- nrow(inputData) - nrow(cleanedData) - repairedRecords <- 0 - - recordsTable <- + remaining_records <- (nrow(cleaned_data)) + removed_records <- nrow(input_data) - nrow(cleaned_data) + records_table <- rbind( - recordsTable, + records_table, data.frame( DataCleaningProcedure = "Total", NoOfRecords = paste( - "Remaining " , - remainingRecords, + "Remaining ", + remaining_records, " Records (", - (remainingRecords / recordsTable[1, 2]) * 100, + (remaining_records / records_table[1, 2]) * 100, "%)", sep = "" ), - Action = paste ( - ifelse(cleaningTrue, "Removal of ", "Flagging of"), - removedRecords, + Action = paste( + ifelse(cleaning_true, "Removal of ", "Flagging of"), + removed_records, " Records (", - (removedRecords / recordsTable[1, 2]) * 100, + (removed_records / records_table[1, 2]) * 100, "%)", sep = "" ) ) ) - # ------------ End of data required for short report --------------- - - print(knitr::kable(recordsTable, format = "markdown")) - - generateShortReport(recordsTable, format) - generateDetailedReport(data.summary, checks.records, format) + message(knitr::kable(records_table, format = "markdown")) + generate_short_report(records_table, format) + generate_detailed_report(data.summary, checks.records, format) } -generateShortReport <- function(recordsTable, format) { +generate_short_report <- function(records_table, format) { message("Generating Reports...") - - dir.create(file.path(getwd(), "CleaningReports"), showWarnings = FALSE) - - message("Created folder") - try(rmarkdown::render( system.file("rmd/generateShortReport.Rmd", package = "bdclean"), format, quiet = T, - output_dir = "CleaningReports" + output_dir = tempdir() )) - message("generated simple") } -generateDetailedReport <- +generate_detailed_report <- function(data.summary, checks.records, format) { try(rmarkdown::render( system.file("rmd/generateDetailedReport.Rmd", package = "bdclean"), format, quiet = T, - output_dir = "CleaningReports" - )) - - message(paste( - "Saved generated reports to '", - getwd() , - "/CleaningReports'", - sep = "" + output_dir = tempdir() )) + message(paste("Saved generated reports to '", tempdir(), sep = "")) } diff --git a/R/quality_checks.R b/R/quality_checks.R index 10b5909..8bca579 100644 --- a/R/quality_checks.R +++ b/R/quality_checks.R @@ -13,12 +13,28 @@ #' #' @section checkCategory: #' taxonomic +#' +#' @param bddata Bio diversity data in a data frame +#' @param res The low rank of species required +#' +#' @examples +#' +#' if(interactive()){ #' -#'@param bddata Bio diversity data in a data frame -#'@param res The low rank of species required +#' library(rgbif) +#' occdat <- occ_data( +#' country = 'AU', # Country code for australia +#' classKey = 359, # Class code for mammalia +#' limit = 50 # Get only 50 records +#' ) +#' myData <- occdat$data +#' +#' responses <- taxo_level(myData, 'SPECIES') +#' +#' } #' #' @export -taxoLevel <- function(bddata, res = "SPECIES") { +taxo_level <- function(bddata, res = "SPECIES") { ranks <- c("CLASS", "ORDER", @@ -29,16 +45,16 @@ taxoLevel <- function(bddata, res = "SPECIES") { res <- toupper(res) if (!(res %in% ranks)) { - print("Rank Value unknown. It should be FAMILY, GENUS, SPECIES or SUBSPECIES") + warning("Rank Value unknown. It should be FAMILY, GENUS, SPECIES or SUBSPECIES") return(bddata) } idx <- which(ranks == res) - cat(paste("taxoLevel:", "\n Removing records above :", res, "\n")) - bddata$bdclean.taxoLevel <- FALSE + message(paste("taxoLevel:", "\n Removing records above :", res, "\n")) + bddata[, "bdclean.taxoLevel"] <- FALSE if (idx > 0) { for (i in idx:length(ranks)) { - bddata[which(bddata$taxonRank == ranks[i]), 'bdclean.taxoLevel'] <- + bddata[which(bddata[, "taxonRank"] == ranks[i]), "bdclean.taxoLevel"] <- TRUE } } @@ -61,22 +77,38 @@ taxoLevel <- function(bddata, res = "SPECIES") { #' #' @section checkCategory: #' spatial +#' +#' @param bddata Bio diversity data in a data frame +#' @param res The highest coordinate uncertainty required #' -#'@param bddata Bio diversity data in a data frame -#'@param res The highest coordinate uncertainty required +#' @examples +#' +#' if(interactive()){ +#' +#' library(rgbif) +#' occdat <- occ_data( +#' country = 'AU', # Country code for australia +#' classKey = 359, # Class code for mammalia +#' limit = 50 # Get only 50 records +#' ) +#' myData <- occdat$data +#' +#' responses <- spatial_resolution(myData, 1500) +#' +#' } #' -#'@export -spatialResolution <- function(bddata, res = 100) { - cat(paste( +#' @export +spatial_resolution <- function(bddata, res = 100) { + message(paste( "spatialResolution:", "\n Removing records above :", res, "\n" )) res <- as.numeric(res) - bddata$bdclean.spatialResolution <- FALSE + bddata[, "bdclean.spatialResolution"] <- FALSE if (res > 0) { - bddata[which(bddata$coordinateUncertaintyInMeters < res), 'bdclean.spatialResolution'] <- + bddata[which(bddata[, "coordinateUncertaintyInMeters"] < res), "bdclean.spatialResolution"] <- TRUE } return(bddata) @@ -97,22 +129,38 @@ spatialResolution <- function(bddata, res = 100) { #' #' @section checkCategory: #' temporal +#' +#' @param bddata Bio diversity data in a data frame +#' @param res The earliest data required #' -#'@param bddata Bio diversity data in a data frame -#'@param res The earliest data required +#' @examples +#' +#' if(interactive()){ +#' +#' library(rgbif) +#' occdat <- occ_data( +#' country = 'AU', # Country code for australia +#' classKey = 359, # Class code for mammalia +#' limit = 50 # Get only 50 records +#' ) +#' myData <- occdat$data +#' +#' responses <- earliest_date(myData, '2000-01-01') +#' +#' } #' -#'@export -earliestDate <- function(bddata, res = "1700-01-01") { - cat(paste("earliestDate:", "\n Removing records above :", res, "\n")) +#' @export +earliest_date <- function(bddata, res = "1700-01-01") { + message(paste("earliestDate:", "\n Removing records above :", res, "\n")) dates <- strsplit(res, " ")[[1]] bddata <- as.data.frame(bddata) ed <- try(as.Date(dates[1], format = "%Y-%m-%d")) if (class(ed) == "try-error" || is.na(ed)) { - print("That date wasn't correct!") + warning("That date wasn't correct!") return(bddata) } - bddata$bdclean.earliestDate <- FALSE - bddata[which(as.Date(bddata$eventDate) > ed), 'bdclean.earliestDate'] <- + bddata[, "bdclean.earliestDate"] <- FALSE + bddata[which(as.Date(bddata[, "eventDate"]) > ed), "bdclean.earliestDate"] <- TRUE return(bddata) } @@ -132,20 +180,36 @@ earliestDate <- function(bddata, res = "1700-01-01") { #' #' @section checkCategory: #' temporal +#' +#' @param bddata Bio diversity data in a data frame +#' @param res restriction of records with/without data, month, year fields #' -#'@param bddata Bio diversity data in a data frame -#'@param res restriction of records with/without data, month, year fields +#' @examples +#' +#' if(interactive()){ +#' +#' library(rgbif) +#' occdat <- occ_data( +#' country = 'AU', # Country code for australia +#' classKey = 359, # Class code for mammalia +#' limit = 50 # Get only 50 records +#' ) +#' myData <- occdat$data +#' +#' responses <- taxo_level(temporal_resolution, 'Day') +#' +#' } #' -#'@export -temporalResolution <- function(bddata, res = "Day") { - cat(paste( +#' @export +temporal_resolution <- function(bddata, res = "Day") { + message(paste( "temporalResolution:", "\n Removing records above :", res, "\n" )) bddata <- as.data.frame(bddata) - bddata$bdclean.temporalResolution <- FALSE + bddata[, "bdclean.temporalResolution"] <- FALSE if (res == "Day") { bddata[which(!is.na(bddata$day)), "bdclean.temporalResolution"] <- TRUE @@ -159,4 +223,4 @@ temporalResolution <- function(bddata, res = "Day") { TRUE } return(bddata) -} \ No newline at end of file +} diff --git a/R/questionnaire.R b/R/questionnaire.R index abf6044..df59b1a 100644 --- a/R/questionnaire.R +++ b/R/questionnaire.R @@ -1,33 +1,19 @@ #' Create the package default Questionnaire. #' +#' @return BdQuestionContainer object with default Questions #' -#'@return BdQuestionContainer object with default Questions +#' @examples #' -#'@examples \dontrun{ -#'library(rgbif) -#'occdat1 <- occ_data( -#' country = "AU", # Country code for australia -#' classKey= 359, # Class code for mammalia -#' limit=5000, # Get only 5000 records -#' ) -#' myData<-occdat1$data +#' customQuestionnaire <- create_default_questionnaire() #' -#' responses <- run_questionnaire() -#' cleanedData <- clean_data_new(myData, responses) -#' -#' customQuestionnaire <- create_default_questionnaire() -#' customResponses <- run_questionnaire(customQuestionnaire) -#' cleanedData <- clean_data_new(myData, customResponses) -#'} -#' -#'@export +#' @export create_default_questionnaire <- function() { question1 <- BdQuestion( question = "Do you worry about taxonomical aspect of the data?", - possible.responses = c("Yes" , "No"), + possible.responses = c("Yes", "No"), question.type = "Router", - router.condition = c("Yes", "Y", "yes" , 1, TRUE, "TRUE"), + router.condition = c("Yes", "Y", "yes", 1, TRUE, "TRUE"), question.id = "taxonMain", ui.type = "single-checkbox" ) @@ -44,19 +30,19 @@ create_default_questionnaire <- function() { "Class" ), question.type = "Child", - quality.checks = c("taxoLevel"), + quality.checks = c("taxo_level"), question.id = "taxonLevel", ui.type = "select" ) - question1$addChildQuestion(c(question2)) + question1$add_child_question(c(question2)) question3 <- BdQuestion( question = "Do you worry about spatial aspect of the data?", - possible.responses = c("Yes" , "No"), + possible.responses = c("Yes", "No"), question.type = "Router", - router.condition = c("Yes", "Y", "yes" , 1, TRUE, "TRUE"), + router.condition = c("Yes", "Y", "yes", 1, TRUE, "TRUE"), quality.checks = c("DC_coordinatesZero"), question.id = "spatialMain", ui.type = "single-checkbox" @@ -66,16 +52,14 @@ create_default_questionnaire <- function() { BdQuestion( question = "What is the spatial resolution required for your data? (in meteres)", question.type = "Child", - quality.checks = c("spatialResolution"), + quality.checks = c("spatial_resolution"), question.id = "spatialResolution", ui.type = "numericInput" ) - question4$addValidationFunction(function(answer) { + question4$add_validation_function(function(answer) { answer <- suppressWarnings(as.numeric(answer)) - check <- - (!is.na(answer) && - answer > 0 && answer < 100000) + check <- (!is.na(answer) && answer > 0 && answer < 1e+05) if (!check) { message( "Spatial resolution should be a number between 0 to 100 KM. Please give a correct value." @@ -84,12 +68,12 @@ create_default_questionnaire <- function() { return(check) }) - questionSub01 <- + question_sub_01 <- BdQuestion( question = "Do you worry about precision of coordinates?", - possible.responses = c("Yes" , "No"), + possible.responses = c("Yes", "No"), question.type = "ChildRouter", - router.condition = c("Yes", "Y", "yes" , 1, TRUE, "TRUE"), + router.condition = c("Yes", "Y", "yes", 1, TRUE, "TRUE"), quality.checks = c( "DC_coordinatePrecisionMismatch", "DC_precisionRangeMismatch", @@ -99,36 +83,42 @@ create_default_questionnaire <- function() { ui.type = "single-checkbox" ) - questionSub02 <- + question_sub_02 <- BdQuestion( question = "Do you worry about countries of occurrences?", - possible.responses = c("Yes" , "No"), + possible.responses = c("Yes", "No"), question.type = "ChildRouter", - router.condition = c("Yes", "Y", "yes" , 1, TRUE, "TRUE"), + router.condition = c("Yes", "Y", "yes", 1, TRUE, "TRUE"), quality.checks = c("DC_countryMismatch", "DC_countryNameUnknown"), question.id = "countryCoord", ui.type = "single-checkbox" ) - questionSub03 <- + question_sub_03 <- BdQuestion( question = "Do you worry about elevation of occurrences?", - possible.responses = c("Yes" , "No"), - router.condition = c("Yes", "Y", "yes" , 1, TRUE, "TRUE"), + possible.responses = c("Yes", "No"), + router.condition = c("Yes", + "Y", "yes", 1, TRUE, "TRUE"), question.type = "ChildRouter", quality.checks = c("DC_depthOutOfRange", "DC_elevationOutOfRange"), question.id = "elevationCoord", ui.type = "single-checkbox" ) - question3$addChildQuestion(c(question4, questionSub01, questionSub02, questionSub03)) + question3$add_child_question(c( + question4, + question_sub_01, + question_sub_02, + question_sub_03 + )) question5 <- BdQuestion( question = "Do you worry about temporal aspect of your data?", - possible.responses = c("Yes" , "No"), + possible.responses = c("Yes", "No"), question.type = "Router", - router.condition = c("Yes", "Y", "yes" , 1, TRUE, "TRUE"), + router.condition = c("Yes", "Y", "yes", 1, TRUE, "TRUE"), quality.checks = c( "DC_dateNull", "DC_dayInvalid", @@ -144,12 +134,12 @@ create_default_questionnaire <- function() { BdQuestion( question = "What is the range of dates of the observations in this data set? In format (YYYY-mm-dd YYYY-mm-dd)", question.type = "Child", - quality.checks = c("earliestDate"), + quality.checks = c("earliest_date"), question.id = "temporalEarliest", ui.type = "date-range" ) - question6$addValidationFunction(function(answer) { + question6$add_validation_function(function(answer) { dates <- strsplit(answer, " ")[[1]] d <- try(as.Date(dates[1])) if (class(d) == "try-error" || is.na(d)) { @@ -169,17 +159,18 @@ create_default_questionnaire <- function() { question = "What temporal resolution are you interested in?", possible.responses = c("Day", "Month", "Year"), question.type = "Child", - quality.checks = c("temporalResolution"), + quality.checks = c("temporal_resolution"), question.id = "temporalResolution", ui.type = "radio" ) - questionSub04 <- + question_sub_04 <- BdQuestion( question = "Do you worry about dates other than occured date (published date/identified date)?", - possible.responses = c("Yes" , "No"), + possible.responses = c("Yes", + "No"), question.type = "ChildRouter", - router.condition = c("Yes", "Y", "yes" , 1, TRUE, "TRUE"), + router.condition = c("Yes", "Y", "yes", 1, TRUE, "TRUE"), quality.checks = c( "DC_identifiedDateImprobable", "DC_modifiedInFuture", @@ -189,14 +180,14 @@ create_default_questionnaire <- function() { ui.type = "single-checkbox" ) - question5$addChildQuestion(c(question6, question7, questionSub04)) + question5$add_child_question(c(question6, question7, question_sub_04)) - questionSub05 <- + question_sub_05 <- BdQuestion( question = "Do you worry about other properties of occurrence? (GBIF issues/publisher/occuranceremark, etc)?", - possible.responses = c("Yes" , "No"), + possible.responses = c("Yes", "No"), question.type = "Router", - router.condition = c("Yes", "Y", "yes" , 1, TRUE, "TRUE"), + router.condition = c("Yes", "Y", "yes", 1, TRUE, "TRUE"), quality.checks = c( "DC_basisOfRecordBadlyFormed", "DC_classUnknown", @@ -208,23 +199,22 @@ create_default_questionnaire <- function() { ui.type = "single-checkbox" ) - allQuestions <- + all_questions <- BdQuestionContainer( c( question1, question2, question3, question4, - questionSub01, - questionSub02, - questionSub03, + question_sub_01, + question_sub_02, + question_sub_03, question5, question6, question7, - questionSub04, - questionSub05 + question_sub_04, + question_sub_05 ) ) - - return(allQuestions) + return(all_questions) } diff --git a/R/reference_classes.R b/R/reference_classes.R index 96f03b7..f62b059 100644 --- a/R/reference_classes.R +++ b/R/reference_classes.R @@ -35,8 +35,7 @@ BdQuestion <- .self$question.id <- question.id .self$ui.type <- ui.type }, - - printQuestion = function() { + print_question = function() { cat(.self$question, "\n") if (length(.self$possible.responses) > 0) { for (i in 1:length(.self$possible.responses)) { @@ -44,30 +43,23 @@ BdQuestion <- } } }, - - addValidationFunction = function(valFunction) { - .self$validation.function <- valFunction + add_validation_function = function(val_function) { + .self$validation.function <- val_function }, - - setResponse = function(response) { + set_response = function(response) { if (class(response) == "logical") { .self$users.answer <- ifelse(response, "yes", "no") } else { .self$users.answer <- as.character(response) } }, - - getResponse = function() { + get_response = function() { ans <- readline() length <- length(.self$possible.responses) - if (length > 0) { # Means it was a menu question, and not an open answer ans <- suppressWarnings(as.numeric(ans)) - - if (!is.na(ans) && - ans > 0 && - ans <= length) { + if (!is.na(ans) && ans > 0 && ans <= length) { # Validating user renponse is a menu number. .self$users.answer <- .self$possible.responses[as.numeric(ans)] @@ -75,16 +67,13 @@ BdQuestion <- message("Please choose number from menu...") .self$getResponse() } - } else { # Means answer is open ended - if (is.null(.self$validation.function)) { # If a validation function is not given .self$users.answer <- ans } else { - val = .self$validation.function(ans) - + val <- .self$validation.function(ans) if (val) { # If the validation function passes (returns true) .self$users.answer <- ans @@ -94,72 +83,62 @@ BdQuestion <- } } }, - - addChildQuestion = function(questions) { + add_child_question = function(questions) { .self$child.questions <- questions }, - - addQualityChecks = function(newChecks) { - cat("Adding Quality Checks.") - .self$quality.checks <- newChecks + add_quality_checks = function(new_checks) { + message("Adding Quality Checks.") + .self$quality.checks <- new_checks }, - - flagData = function(data, missing = FALSE) { - flaggedData <- data + flag_data = function(data, missing = FALSE) { + flagged_data <- data if (length(.self$quality.checks) > 0) { for (i in 1:length(.self$quality.checks)) { - checkName <- .self$quality.checks[i] - - if (grepl("DC_", checkName)) { + check_name <- .self$quality.checks[i] + if (grepl("DC_", check_name)) { # bdchecks quality checks - checkTemp <- - bdchecks::performDataCheck(data = flaggedData, - DConly = c(checkName)) + check_temp <- + bdchecks::performDataCheck(data = flagged_data, + DConly = c(check_name)) - - if (!is.null(checkTemp) && - length(checkTemp@flags) > 0 && - length(checkTemp@flags[[1]]@result) > 0) { - checkTemp <- checkTemp@flags[[1]]@result + if (!is.null(check_temp) && + length(check_temp@flags) > 0 && + length(check_temp@flags[[1]]@result) > 0) { + check_temp <- check_temp@flags[[1]]@result if (missing) { - checkTemp[is.na(checkTemp)] <- - FALSE # Treating mising values as fails + check_temp[is.na(check_temp)] <- + FALSE # Treating mising values as fails } else { - checkTemp[is.na(checkTemp)] <- - TRUE + check_temp[is.na(check_temp)] <- TRUE } - - flaggedData[, paste("bdclean", checkName, sep = ".")] <- - checkTemp + flagged_data[, paste("bdclean", check_name, sep = ".")] <- + check_temp } - } else { # bdclean quality checks - flaggedData <- - get(checkName)(flaggedData, .self$users.answer) + flagged_data <- + get(check_name)(flagged_data, .self$users.answer) } } } - - return(flaggedData) + return(flagged_data) }, - - addToReport = function(flaggedData, - clean = TRUE, - CleaningThreshold = 5) { - packageDocumentation <- tools::Rd_db("bdchecks") - flaggedData <- as.data.frame(flaggedData) + add_to_report = function(flagged_data, + clean = TRUE, + cleaning_threshold = 5) { + package_documentation <- tools::Rd_db("bdchecks") + flagged_data <- as.data.frame(flagged_data) for (i in 1:length(.self$quality.checks)) { - nameOfQualityCheck <- .self$quality.checks[i] + name_of_quality_check <- .self$quality.checks[i] - if (!(paste("bdclean", nameOfQualityCheck, sep = ".") %in% names(flaggedData))) { + if (!(paste("bdclean", name_of_quality_check, sep = ".") %in% names(flagged_data))) { # both bdchecks and bdclean columns have bdcelan prefix warning( "Required column ", - paste("bdclean", nameOfQualityCheck, sep = "."), + paste("bdclean", name_of_quality_check, sep = "."), " not found! Probably, quality check is missing from environment and check was not performed." ) @@ -167,90 +146,75 @@ BdQuestion <- } flag <- - flaggedData[,paste("bdclean", nameOfQualityCheck, sep = ".")] - - # Uncomment if using threshold - # countOfFlaggedData <- - # sum(flag < CleaningThreshold, na.rm = TRUE) - - countOfFlaggedData <- - sum(flag != TRUE, na.rm = T) + flagged_data[, paste("bdclean", name_of_quality_check, sep = ".")] + count_of_flagged_data <- sum(flag != TRUE, na.rm = T) # ------ Parsing MetaData for check from .Rd file - functionDocumentation <- - packageDocumentation[grep(nameOfQualityCheck, names(packageDocumentation))] + function_documentation <- + package_documentation[grep(name_of_quality_check, names(package_documentation))] - if (length(functionDocumentation) == 0) { + if (length(function_documentation) == 0) { warning( "Could not find function documentation for ", - nameOfQualityCheck, + name_of_quality_check, ". Skipping report." ) next } - - brokenDocumentation <- + broken_documentation <- unlist(strsplit( - paste(functionDocumentation[[1]], collapse = " "), - split = '\\', + paste(function_documentation[[1]], collapse = " "), + split = "\\", fixed = TRUE )) - brokenDocumentation <- gsub("\\n", "", - gsub( - "[{}]", "", brokenDocumentation - ) - ) + broken_documentation <- + gsub("\\n", "", gsub("[{}]", "", broken_documentation)) description <- - brokenDocumentation[grep("title", brokenDocumentation)] + broken_documentation[grep("title", broken_documentation)] description <- gsub("title Data check", "", description, fixed = T) - samplePassData <- - brokenDocumentation[grep("samplePassData", brokenDocumentation)] - samplePassData <- - gsub("section samplePassData", "", samplePassData, fixed = T) + sample_pass_data <- + broken_documentation[grep("samplePassData", broken_documentation)] + sample_pass_data <- + gsub("section samplePassData", "", sample_pass_data, fixed = T) - sampleFailData <- - brokenDocumentation[grep("sampleFailData", brokenDocumentation)] - sampleFailData <- - gsub("section sampleFailData", "", sampleFailData, fixed = T) + sample_fail_data <- + broken_documentation[grep("sampleFailData", broken_documentation)] + sample_fail_data <- + gsub("section sampleFailData", "", sample_fail_data, fixed = T) - checkCategory <- - brokenDocumentation[grep("checkCategory", brokenDocumentation)] - checkCategory <- - gsub("section checkCategory", "", checkCategory, fixed = T) + check_category <- + broken_documentation[grep("checkCategory", broken_documentation)] + check_category <- + gsub("section checkCategory", "", check_category, fixed = T) - targetDWCField <- - brokenDocumentation[grep("targetDWCField", brokenDocumentation)] - targetDWCField <- - gsub("section targetDWCField", "", targetDWCField, fixed = T) + target_dwc_field <- + broken_documentation[grep("targetDWCField", broken_documentation)] + target_dwc_field <- + gsub("section targetDWCField", "", target_dwc_field, fixed = T) # ------ End of Parsing MetaData for check from .Rd file temp <- list() + temp$description <- paste(description, collapse = " ") + temp$sample_pass_data <- sample_pass_data + temp$sample_fail_data <- sample_fail_data + temp$check_category <- check_category + temp$target_dwc_field <- target_dwc_field + temp$affected_data <- count_of_flagged_data - temp$description <- - paste(description, collapse = " ") - temp$samplePassData <- samplePassData - temp$sampleFailData <- sampleFailData - temp$checkCategory <- checkCategory - temp$targetDWCField <- targetDWCField - temp$affectedData <- countOfFlaggedData - - .self$cleaning.details[nameOfQualityCheck] <- - list(temp) + .self$cleaning.details[name_of_quality_check] <- list(temp) } }, - notify = function() { - cat("New Question object created.") + message("New Question object created.") }, - - printSelf = function() { + print_self = function() { print(.self$question) for (i in 1:length(.self$possible.responses)) { cat(" ", i, " ", .self$possible.responses[i], "\n") @@ -268,22 +232,21 @@ BdQuestion <- BdQuestionContainer <- setRefClass( "BdQuestionContainer", - fields = list(BdQuestions = "list"), + fields = list(bdquestions = "list"), methods = list( - initialize = function(BdQuestions = NA) { + initialize = function(bdquestions = NA) { "Construct an instance of BdQuestionContainer after validating the type." - if (class(BdQuestions[[1]]) != "BdQuestion") { + if (class(bdquestions[[1]]) != "BdQuestion") { stop("Incompatible input type. Provide a list of BdQuestion") } - .self$BdQuestions <- BdQuestions + .self$bdquestions <- bdquestions .self$notify() }, - - flagData = function(inputData, missing) { - message("Initial records: ", paste(dim(inputData), collapse = "x")) - flaggedData <- inputData - for (question in .self$BdQuestions) { + flag_data = function(input_data, missing) { + message("Initial records: ", paste(dim(input_data), collapse = "x")) + flagged_data <- input_data + for (question in .self$bdquestions) { if (length(question$quality.checks) > 0 && length(question$users.answer) > 0) { if (question$question.type == "Router" && @@ -295,33 +258,22 @@ BdQuestionContainer <- # If its ChildRouter and condition fails next } - - flaggedData <- - question$flagData(flaggedData, missing) + flagged_data <- + question$flag_data(flagged_data, missing) } - - # temp <- try({ - # question$flagData(tempData) - # }) - # - # if (!is(temp, "try-error")) { - # tempData <- temp - # } } - return(flaggedData) + return(flagged_data) }, - notify = function() { message(paste( "New BdQuestionContainer instance created with", - length(.self$BdQuestions), + length(.self$bdquestions), "questions." )) }, - - printSelf = function() { - for (question in .self$BdQuestions) { - question$printSelf() + print_self = function() { + for (question in .self$bdquestions) { + question$print_self() } } ) diff --git a/R/run_bdclean.R b/R/run_bdclean.R index 79cb045..9451422 100644 --- a/R/run_bdclean.R +++ b/R/run_bdclean.R @@ -1,10 +1,18 @@ #' Launch bdclean Shiny Application #' -#'@import shinydashboard shinyjs leaflet data.table rgbif spocc finch bdDwC bdchecks tools -#'@importFrom shiny runApp +#' @import shinydashboard shinyjs leaflet data.table rgbif spocc finch bdDwC bdchecks tools +#' @importFrom shiny runApp +#' +#' @examples +#' +#' if(interactive()){ +#' +#' run_bdclean() +#' +#' } #' -#'@export +#' @export run_bdclean <- function() { app_path <- system.file("shiny/bdclean", package = "bdclean") return(shiny::runApp(app_path, launch.browser = TRUE)) -} \ No newline at end of file +} diff --git a/README.md b/README.md index 0b2da83..a9b564b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ -[![Build Status](https://travis-ci.org/thiloshon/bdclean.svg?branch=master)](https://travis-ci.org/thiloshon/bdclean) +[![Build Status](https://travis-ci.org/bd-R/bdclean.svg?branch=new_bdclean)](https://travis-ci.org/bd-R/bdclean) +[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/thiloshon/bdclean?branch=master&svg=true)](https://ci.appveyor.com/project/thiloshon/bdclean) +[![Coverage status](https://codecov.io/gh/thiloshon/bdclean/branch/master/graph/badge.svg)](https://codecov.io/github/thiloshon/bdclean?branch=master) # bdclean ### User-friendly biodiversity data cleaning pipeline diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..c6c1438 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,45 @@ +# DO NOT CHANGE the "init" and "install" sections below + +# Download script file from GitHub +init: + ps: | + $ErrorActionPreference = "Stop" + Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" + Import-Module '..\appveyor-tool.ps1' + +install: + ps: Bootstrap + +cache: + - C:\RLibrary + +# Adapt as necessary starting from here + +build_script: + - travis-tool.sh install_deps + +test_script: + - travis-tool.sh run_tests + +on_failure: + - 7z a failure.zip *.Rcheck\* + - appveyor PushArtifact failure.zip + +artifacts: + - path: '*.Rcheck\**\*.log' + name: Logs + + - path: '*.Rcheck\**\*.out' + name: Logs + + - path: '*.Rcheck\**\*.fail' + name: Logs + + - path: '*.Rcheck\**\*.Rout' + name: Logs + + - path: '\*_*.tar.gz' + name: Bits + + - path: '\*_*.zip' + name: Bits diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..8f36b6c --- /dev/null +++ b/codecov.yml @@ -0,0 +1,12 @@ +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + patch: + default: + target: auto + threshold: 1% diff --git a/cran-comments.md b/cran-comments.md index 9377352..6274ae1 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,11 +1,13 @@ ## Resubmission This is a resubmission. In this version I have: -* Removed importsFrom from Description +* Addded small executables (outside of \dontrun{}) in all exported functions. As the package has mainly a shiny app, most examples are in interactive mode. -* Added DOI references and URLs +* Fixed description as suggested. --- R CMD check results -------------------------------------------------------------------- bdclean 0.1.12 ---- -Duration: 1m 33.8s + + + +-- R CMD check results ----------------------------------------------------------------------------------------------- 0 errors √ | 0 warnings √ | 0 notes √ \ No newline at end of file diff --git a/inst/rmd/generateShortReport.Rmd b/inst/rmd/generateShortReport.Rmd index 7e2de96..390d489 100644 --- a/inst/rmd/generateShortReport.Rmd +++ b/inst/rmd/generateShortReport.Rmd @@ -12,5 +12,5 @@ suppressPackageStartupMessages(library(knitr)) ## Data Cleaning Summary Table ```{r echo=FALSE} -kable(recordsTable) +kable(records_table) ``` \ No newline at end of file diff --git a/inst/shiny/bdclean/functions.R b/inst/shiny/bdclean/functions.R index eacb37b..fe32a75 100644 --- a/inst/shiny/bdclean/functions.R +++ b/inst/shiny/bdclean/functions.R @@ -2,9 +2,9 @@ summarizeDataframe <- function(data) { if (nrow(data) == 0) { return(data) } - tempData <- as.data.frame(data) - tempData <- - tempData[, names(tempData) %in% c( + temp_data <- as.data.frame(data) + temp_data <- + temp_data[, names(temp_data) %in% c( "scientificName", "taxonRank", "eventDate", @@ -12,27 +12,27 @@ summarizeDataframe <- function(data) { "decimalLatitude", "decimalLongitude" )] - tempData <- cbind(tempData, data) + temp_data <- cbind(temp_data, data) + hiding_cols <- c() + temp_data[] <- lapply(temp_data, as.character) - hidingCols <- c() - tempData[] <- lapply(tempData, as.character) - - for (i in 1:length(names(tempData))) { + for (i in 1:length(names(temp_data))) { + size <- ifelse(nrow(temp_data) > 1000, 1000, nrow(temp_data)) sample <- - sample(1:nrow(tempData), size = ifelse(nrow(tempData) > 1000, 1000, nrow(tempData))) + sample(1:nrow(temp_data), size = size) f <- - mean(sapply(tempData[sample, i], function(x) + mean(sapply(temp_data[sample, i], function(x) nchar(x)), na.rm = T) if (!is.nan(f)) { if (f > 50) { - hidingCols <- c(hidingCols, i) + hiding_cols <- c(hiding_cols, i) } } } - if (length(hidingCols) > 0) { - tempData <- tempData[, c(hidingCols * -1)] + if (length(hiding_cols) > 0) { + temp_data <- temp_data[, c(hiding_cols * -1)] } - tempData + temp_data } diff --git a/inst/shiny/bdclean/server.R b/inst/shiny/bdclean/server.R index 568ae3f..54f2b23 100644 --- a/inst/shiny/bdclean/server.R +++ b/inst/shiny/bdclean/server.R @@ -2,33 +2,20 @@ options(shiny.maxRequestSize = 50 * 1024 ^ 2) library(bdchecks) shinyServer(function(input, output, session) { - # End session with browser close - # session$onSessionEnded(function() { - # stopApp() - # }) - - # jscode <- "shinyjs.closeWindow = function() { window.close(); }" - # ------------- Local Data store ------------------------ - dataStore <- + data_store <- list( inputData = data.frame(), inputReceived = FALSE, - configuredCleaning = FALSE, customizedChecks = c(), customizedCheck = FALSE, - flaggedData = data.frame(), flaggingDone = FALSE, - cleanedData = data.frame(), cleaningDone = FALSE, - questionnaire = bdclean::create_default_questionnaire(), - # bdclean:: qualityChecks = bdclean::get_checks_list(), - # bdclean:: warningData = data.frame( @@ -55,8 +42,8 @@ shinyServer(function(input, output, session) { # for (stringIndex in length(warnings):1) { # print("--------") # print(warnings[stringIndex]) - # - # + # + # # if(grepl('[,:-]$', warnings[stringIndex])){ # print("in") # warnings[stringIndex - 1] <- paste(warnings[stringIndex - 1], warnings[stringIndex]) @@ -73,7 +60,9 @@ shinyServer(function(input, output, session) { time = format(Sys.time(), "%I:%M %p"), icon = icon ) - dataStore$warningData <<- rbind(temp, dataStore$warningData) + data_store$warningData <<- + rbind(temp, data_store$warningData) + # Assiging to the parent environment of this function which isn't global env. } # ------------- End of Warning Menu Notifiation ------------------------ @@ -88,7 +77,7 @@ shinyServer(function(input, output, session) { p( "Click the tabs in the left and follow the instructions to customize cleaning." ), - img(src = 'bdverse.png', align = "center"), + img(src = "bdverse.png", align = "center"), helpText( "GPL-3 ©Tomer Gueta, Vijay Barve, Thiloshon Nagarajah, Ashwin Agrawal and Carmel Yohay (2018). bdclean: Biodiversity Data Cleaning Workflow. R package version 0.1.900" @@ -108,7 +97,7 @@ shinyServer(function(input, output, session) { # ------------- Next Buttons Navigation Control ------------------- observeEvent(input$dataToConfigure, { - if (dataStore$inputReceived) { + if (data_store$inputReceived) { updateTabItems(session, "sideBar", "configure") } else { showNotification("Please add data first!", duration = 2) @@ -121,9 +110,10 @@ shinyServer(function(input, output, session) { duration = 2) dummyQuestion <- - bdclean::BdQuestion( # bdclean:: + bdclean::BdQuestion( + # bdclean:: question = "Customized Quality Checks", - possible.responses = c("Yes" , "No"), + possible.responses = c("Yes", "No"), question.type = "ChildRouter", router.condition = c("Yes"), quality.checks = input$typeInput, @@ -133,9 +123,9 @@ shinyServer(function(input, output, session) { ) dummyQuestion$users.answer <- "Yes" - dataStore$customizedChecks <<- + data_store$customizedChecks <<- BdQuestionContainer(c(dummyQuestion)) - dataStore$customizedCheck <<- TRUE + data_store$customizedCheck <<- TRUE } else { getResponse <- function(bdQuestion) { @@ -153,19 +143,19 @@ shinyServer(function(input, output, session) { } } - for (question in dataStore$questionnaire$BdQuestions) { + for (question in data_store$questionnaire$BdQuestions) { if (question$question.type != "Child") { getResponse(question) } } } - dataStore$configuredCleaning <<- TRUE + data_store$configuredCleaning <<- TRUE updateTabItems(session, "sideBar", "flag") }) observeEvent(input$flagToClean, { - if (!dataStore$flaggingDone) { + if (!data_store$flaggingDone) { showNotification("Please click Flag first!", duration = 2) return() } @@ -178,8 +168,8 @@ shinyServer(function(input, output, session) { warnings <- capture.output( - dataStore$cleanedData <<- - bdclean::cleaning_function(dataStore$flaggedData) # bdclean:: + data_store$cleanedData <<- + bdclean::cleaning_function(data_store$flaggedData) # bdclean:: , type = "message" ) @@ -187,16 +177,16 @@ shinyServer(function(input, output, session) { addWarnings("Warning while Cleaning", warnings, "trash") }) - shinyjs::addClass(id = 'flagToCleanDiv', - class = 'readyButton') - shinyjs::removeClass(id = 'flagToCleanDiv', - class = 'completedButton') + shinyjs::addClass(id = "flagToCleanDiv", + class = "readyButton") + shinyjs::removeClass(id = "flagToCleanDiv", + class = "completedButton") - dataStore$cleaningDone <<- TRUE + data_store$cleaningDone <<- TRUE }) observeEvent(input$flagToDocument, { - if (!dataStore$flaggingDone) { + if (!data_store$flaggingDone) { showNotification("Please click Flag first!", duration = 2) return() } @@ -204,7 +194,7 @@ shinyServer(function(input, output, session) { updateTabItems(session, "sideBar", "document") checks <- - ifelse(dataStore$customizedCheck, + ifelse(data_store$customizedCheck, "customizedChecks", "questionnaire") @@ -212,11 +202,11 @@ shinyServer(function(input, output, session) { warnings <- capture.output( bdclean::create_report_data( # bdclean:: - dataStore$inputData, - dataStore$flaggedData, - dataStore$cleanedData, - dataStore[[checks]], - dataStore$cleaningDone, + data_store$inputData, + data_store$flaggedData, + data_store$cleanedData, + data_store[[checks]], + data_store$cleaningDone, c("md_document") ), type = "message" @@ -225,8 +215,8 @@ shinyServer(function(input, output, session) { addWarnings("Warning in Report Generation", warnings, "file") }) - dataStore$cleaningDone <- FALSE - dataStore$cleanedData <- dataStore$flaggedData + data_store$cleaningDone <- FALSE + data_store$cleanedData <- data_store$flaggedData }) observeEvent(input$cleanToDocument, { @@ -234,18 +224,18 @@ shinyServer(function(input, output, session) { withProgress(message = "Generating Artifacts...", { checks <- - ifelse(dataStore$customizedCheck, + ifelse(data_store$customizedCheck, "customizedChecks", "questionnaire") warnings <- capture.output( bdclean::create_report_data( # bdclean:: - dataStore$inputData, - dataStore$flaggedData, - dataStore$cleanedData, - dataStore[[checks]], - dataStore$cleaningDone, + data_store$inputData, + data_store$flaggedData, + data_store$cleanedData, + data_store[[checks]], + data_store$cleaningDone, c("md_document") ), type = "message" @@ -276,7 +266,7 @@ shinyServer(function(input, output, session) { "3" = NULL ) ) - dataStore$inputData <<- data$data + data_store$inputData <<- data$data } else { warnings <- capture.output( @@ -295,17 +285,17 @@ shinyServer(function(input, output, session) { type = "message" ) - if (length(warnings) > 0 ){ + if (length(warnings) > 0) { showNotification(paste(warnings, collapse = " "), duration = 6) } tempData <- data[[input$queryDB]]$data[[1]] - dataStore$inputData <<- tempData + data_store$inputData <<- tempData } }) - dataLoadedTask(dataStore$inputData) + dataLoadedTask(data_store$inputData) }) observeEvent(input$inputFile, { @@ -317,31 +307,31 @@ shinyServer(function(input, output, session) { message("Reading DWCA ZIP...") finchRead <- finch::dwca_read(input$inputFile$datapath, read = T) - dataStore$inputData <<- finchRead$data[[1]] + data_store$inputData <<- finchRead$data[[1]] } else { - dataStore$inputData <<- + data_store$inputData <<- data.table::fread(input$inputFile$datapath) } }) - dataLoadedTask(dataStore$inputData) + dataLoadedTask(data_store$inputData) }) observeEvent(input$mapTexture, { - if (length(dataStore$inputData) == 0) { + if (length(data_store$inputData) == 0) { return(NULL) } - leafletProxy("mymap", data = dataStore$inputData) %>% + leafletProxy("mymap", data = data_store$inputData) %>% clearShapes() %>% addCircles(~ decimalLongitude, ~ decimalLatitude, color = input$mapColor) }) observeEvent(input$mapColor, { - if (length(dataStore$inputData) == 0) { + if (length(data_store$inputData) == 0) { return(NULL) } - leafletProxy("mymap", data = dataStore$inputData) %>% + leafletProxy("mymap", data = data_store$inputData) %>% clearShapes() %>% addCircles(~ decimalLongitude, ~ decimalLtitude, color = input$mapColor) }) @@ -363,14 +353,14 @@ shinyServer(function(input, output, session) { data.table::fread(file = dictionaryPath) darwinizer <- - bdDwC::darwinizeNames(dataStore$inputData, customDictionary) + bdDwC::darwinizeNames(data_store$inputData, customDictionary) fixed <- darwinizer[darwinizer$matchType == "Darwinized",] if (nrow(fixed) > 0) { - tidyData <- bdDwC::renameUserData(dataStore$inputData, darwinizer) - dataStore$inputData <<- tidyData + tidyData <- bdDwC::renameUserData(data_store$inputData, darwinizer) + data_store$inputData <<- tidyData showNotification(paste( "Converted Columns:", @@ -384,42 +374,41 @@ shinyServer(function(input, output, session) { } } - if ("decimalLatitude" %in% colnames(dataStore$inputData)) - { - dataStore$inputData$decimalLatitude <<- - as.numeric(dataStore$inputData$decimalLatitude) - dataStore$inputData$decimalLongitude <<- - as.numeric(dataStore$inputData$decimalLongitude) + if ("decimalLatitude" %in% colnames(data_store$inputData)) { + data_store$inputData$decimalLatitude <<- + as.numeric(data_store$inputData$decimalLatitude) + data_store$inputData$decimalLongitude <<- + as.numeric(data_store$inputData$decimalLongitude) } # ------------ End of Darwinizing Data ------------- - try(leafletProxy("mymap", data = dataStore$inputData) %>% + try(leafletProxy("mymap", data = data_store$inputData) %>% clearShapes() %>% addCircles(~ decimalLongitude, ~ decimalLatitude, color = input$mapColor)) output$inputDataTable <- DT::renderDataTable(DT::datatable({ - summarizeDataframe(dataStore$inputData) + summarizeDataframe(data_store$inputData) }, options = list(scrollX = TRUE))) - shinyjs::addClass(id = 'queryDatabaseDiv', - class = 'readyButton') - shinyjs::removeClass(id = 'queryDatabaseDiv', - class = 'activeButton') + shinyjs::addClass(id = "queryDatabaseDiv", + class = "readyButton") + shinyjs::removeClass(id = "queryDatabaseDiv", + class = "activeButton") - shinyjs::addClass(id = 'inputFileDiv', - class = 'readyButton') - shinyjs::removeClass(id = 'inputFileDiv', - class = 'activeButton') + shinyjs::addClass(id = "inputFileDiv", + class = "readyButton") + shinyjs::removeClass(id = "inputFileDiv", + class = "activeButton") - shinyjs::addClass(id = 'dataToConfigureDiv', - class = 'completedButton') - shinyjs::removeClass(id = 'queryDatabaseDiv', - class = 'activeButton') + shinyjs::addClass(id = "dataToConfigureDiv", + class = "completedButton") + shinyjs::removeClass(id = "queryDatabaseDiv", + class = "activeButton") showNotification("Read Data Successfully", duration = 2) - dataStore$inputReceived <<- TRUE + data_store$inputReceived <<- TRUE # --------- Setting flag tab statistic boxes ------- output$inputDataRows <- renderText(nrow(data)) @@ -528,7 +517,7 @@ shinyServer(function(input, output, session) { } } - for (question in dataStore$questionnaire$BdQuestions) { + for (question in data_store$questionnaire$BdQuestions) { if (question$question.type != "Child" && question$question.type != "ChildRouter") { createUIContainer(question) @@ -546,41 +535,42 @@ shinyServer(function(input, output, session) { output$qualityChecks <- renderUI({ components <- list() - for (i in 1:length(dataStore$qualityChecks)) { + for (i in 1:length(data_store$qualityChecks)) { components[[i]] <- tagList( HTML( paste( "" ) ), div( class = "checksListContent", - h4(dataStore$qualityChecks[[i]]$nameOfQualityCheck), + h4(data_store$qualityChecks[[i]]$nameOfQualityCheck), div(class = "checksListTopic col-sm-3", p("Description: ")), - div(class = "checksListTitle", p( - dataStore$qualityChecks[[i]]$description - )), + div( + class = "checksListTitle", + p(data_store$qualityChecks[[i]]$description) + ), div(class = "checksListTopic col-sm-3", p("Sample Passing Data: ")), div( class = "checksListTitle", - p(dataStore$qualityChecks[[i]]$samplePassData) + p(data_store$qualityChecks[[i]]$samplePassData) ), div(class = "checksListTopic col-sm-3", p("Sample Failing Data: ")), div( class = "checksListTitle", - p(dataStore$qualityChecks[[i]]$sampleFailData) + p(data_store$qualityChecks[[i]]$sampleFailData) ), div(class = "checksListTopic col-sm-3", p("Category of Quality Check: ")), div( class = "checksListTitle", - p(dataStore$qualityChecks[[i]]$checkCategory) + p(data_store$qualityChecks[[i]]$checkCategory) ), div(class = "checksListTopic col-sm-3", p( @@ -588,7 +578,7 @@ shinyServer(function(input, output, session) { )), div( class = "checksListTitle", - p(dataStore$qualityChecks[[i]]$targetDWCField) + p(data_store$qualityChecks[[i]]$targetDWCField) ) ), br(), @@ -598,7 +588,7 @@ shinyServer(function(input, output, session) { return( div( - id = 'typeInput', + id = "typeInput", class = "form-group shiny-input-checkboxgroup shiny-input-container shiny-bound-input", tags$br(), tags$br(), @@ -654,7 +644,7 @@ shinyServer(function(input, output, session) { return( div( - id = 'domainInput', + id = "domainInput", class = "form-group shiny-input-radiogroup shiny-input-container shiny-bound-input", tags$br(), tags$br(), @@ -670,42 +660,42 @@ shinyServer(function(input, output, session) { # ------------- Flagging Module ------------------- observeEvent(input$flagButton, { - tempData <- dataStore$inputData - dataStore$flaggedData <<- data.frame() - dataStore$cleanedData <<- data.frame() + tempData <- data_store$inputData + data_store$flaggedData <<- data.frame() + data_store$cleanedData <<- data.frame() withProgress(message = "Flagging Data...", { checks <- - ifelse(dataStore$customizedCheck, + ifelse(data_store$customizedCheck, "customizedChecks", "questionnaire") warnings <- capture.output( - dataStore$flaggedData <<- - dataStore[[checks]]$flagData(dataStore$inputData, missing = - input$missingCase), + data_store$flaggedData <<- + data_store[[checks]]$flagData(data_store$inputData, missing = + input$missingCase), type = "message" ) - dataStore$flaggingDone <<- TRUE + data_store$flaggingDone <<- TRUE addWarnings("Warning while Flagging", warnings, "flag") }) - shinyjs::addClass(id = 'flagButtonDiv', - class = 'readyButton') + shinyjs::addClass(id = "flagButtonDiv", + class = "readyButton") - shinyjs::removeClass(id = 'flagButtonDiv', - class = 'completedButton') + shinyjs::removeClass(id = "flagButtonDiv", + class = "completedButton") - shinyjs::addClass(id = 'flagToCleanDiv', - class = 'completedButton') - shinyjs::removeClass(id = 'flagToCleanDiv', - class = 'activeButton') + shinyjs::addClass(id = "flagToCleanDiv", + class = "completedButton") + shinyjs::removeClass(id = "flagToCleanDiv", + class = "activeButton") }) output$messageMenu <- renderMenu({ msgs <- - apply(as.data.frame(dataStore$warningData), 1, function(row) { + apply(as.data.frame(data_store$warningData), 1, function(row) { messageItem( from = row[["from"]], message = row[["message"]], @@ -746,7 +736,7 @@ shinyServer(function(input, output, session) { checkData <- flaggedData[, checkColumns] - if (class(checkData) == "logical"){ + if (class(checkData) == "logical") { return(nrow(flaggedData) - length(checkData[checkData != TRUE])) } @@ -759,7 +749,7 @@ shinyServer(function(input, output, session) { # dataStore$cleaningThresholdControl) warnings <- capture.output(flaggedCount <- - get_flagging_statistics(dataStore$flaggedData) , + get_flagging_statistics(data_store$flaggedData), type = "message") addWarnings("Message while Flagging", warnings, "question") @@ -798,14 +788,14 @@ shinyServer(function(input, output, session) { icon = icon("list-ol")), infoBox( "# of Newly Added Columns", - length(dataStore$flaggedData) - length(dataStore$inputData), + length(data_store$flaggedData) - length(data_store$inputData), icon = icon("th-list"), color = "purple" ), infoBox( "# of Unique Scientific Names Remaining", length(unique( - dataStore$flaggedData$scientificName + data_store$flaggedData$scientificName )), icon = icon("paw"), color = "yellow" @@ -813,7 +803,7 @@ shinyServer(function(input, output, session) { infoBox( "Clean Data", paste((( - flaggedCount / nrow(dataStore$inputData) + flaggedCount / nrow(data_store$inputData) ) * 100), "%", sep = ""), icon = icon("flag"), color = "red" @@ -823,7 +813,7 @@ shinyServer(function(input, output, session) { tabPanel( "Table View", div(class = "secondaryHeaders", h3("View 02: Summarized Table")), - DT::renderDataTable(summarizeDataframe(dataStore$flaggedData), width = 300) + DT::renderDataTable(summarizeDataframe(data_store$flaggedData), width = 300) ) ), @@ -844,7 +834,7 @@ shinyServer(function(input, output, session) { }) output$flaggedDataTable <- - reactive(DT::renderDT(summarizeDataframe(dataStore$flaggedData))) + reactive(DT::renderDT(summarizeDataframe(data_store$flaggedData))) # ------------- End of Flagging Module ------------------- @@ -855,7 +845,7 @@ shinyServer(function(input, output, session) { conditionalPanel("input.flagToClean > 0", tagList( div(id = "completedIcon", img( - src = 'completed.png', align = "center" + src = "completed.png", align = "center" )), p(paste("Cleaning is succesfully done.")), @@ -874,10 +864,6 @@ shinyServer(function(input, output, session) { )) }) - # observeEvent(input$cleanControl, { - # dataStore$cleaningThresholdControl <<- input$cleanControl - # }) - # ------------- End of Cleaning Module ------------------------ @@ -897,7 +883,7 @@ shinyServer(function(input, output, session) { downloadButton("downloadInput", "Download Input Data"), br(), br(), - DT::renderDataTable(summarizeDataframe(dataStore$inputData), width = 300) + DT::renderDataTable(summarizeDataframe(data_store$inputData), width = 300) ), tabPanel( "Flagged Data", @@ -907,7 +893,7 @@ shinyServer(function(input, output, session) { downloadButton("downloadFlagged", "Download Flagged Data"), br(), br(), - DT::renderDataTable(summarizeDataframe(dataStore$flaggedData), width = 300) + DT::renderDataTable(summarizeDataframe(data_store$flaggedData), width = 300) ), tabPanel( "Cleaned Data", @@ -915,7 +901,7 @@ shinyServer(function(input, output, session) { downloadButton("downloadCleaned", "Download Cleaned Data"), br(), br(), - DT::renderDataTable(summarizeDataframe(dataStore$cleanedData), width = 300) + DT::renderDataTable(summarizeDataframe(data_store$cleanedData), width = 300) ), tabPanel( "Cleaning Report", @@ -966,7 +952,7 @@ shinyServer(function(input, output, session) { output$downloadShortReport <- downloadHandler( filename = function() { - paste('shortReport-', Sys.Date(), switch( + paste("shortReport-", Sys.Date(), switch( input$reportFormat, "pdf_document" = ".pdf", "html_document" = ".html", @@ -977,17 +963,17 @@ shinyServer(function(input, output, session) { content = function(file) { withProgress(message = "Preparing download...", { checks <- - ifelse(dataStore$customizedCheck, + ifelse(data_store$customizedCheck, "customizedChecks", "questionnaire") bdclean::create_report_data( # bdclean:: - dataStore$inputData, - dataStore$cleanedData, - dataStore$flaggedData, - dataStore[[checks]], - dataStore$cleaningDone, + data_store$inputData, + data_store$cleanedData, + data_store$flaggedData, + data_store[[checks]], + data_store$cleaningDone, input$reportFormat ) }) @@ -1009,7 +995,7 @@ shinyServer(function(input, output, session) { output$downloadDetailedReport <- downloadHandler( filename = function() { - paste('detailedReport-', Sys.Date(), switch( + paste("detailedReport-", Sys.Date(), switch( input$reportFormat, "pdf_document" = ".pdf", "html_document" = ".html", @@ -1020,16 +1006,16 @@ shinyServer(function(input, output, session) { content = function(file) { withProgress(message = "Preparing download...", { checks <- - ifelse(dataStore$customizedCheck, + ifelse(data_store$customizedCheck, "customizedChecks", "questionnaire") bdclean::create_report_data( # bdclean:: - dataStore$inputData, - dataStore$cleanedData, - dataStore$flaggedData, - dataStore[[checks]], - dataStore$cleaningDone, + data_store$inputData, + data_store$cleanedData, + data_store$flaggedData, + data_store[[checks]], + data_store$cleaningDone, input$reportFormat ) }) @@ -1051,27 +1037,27 @@ shinyServer(function(input, output, session) { output$downloadInput <- downloadHandler( filename = function() { - paste('inputData-', Sys.Date(), '.csv') + paste("inputData-", Sys.Date(), ".csv") }, content = function(con) { - write.csv(dataStore$inputData, con) + write.csv(data_store$inputData, con) } ) output$downloadFlagged <- downloadHandler( filename = function() { - paste('flaggedData-', Sys.Date(), '.csv') + paste("flaggedData-", Sys.Date(), ".csv") }, content = function(con) { - write.csv(dataStore$flaggedData, con) + write.csv(data_store$flaggedData, con) } ) output$downloadCleaned <- downloadHandler( filename = function() { - paste('cleanedData-', Sys.Date(), '.csv') + paste("cleanedData-", Sys.Date(), ".csv") }, content = function(con) { - write.csv(dataStore$cleanedData, con) + write.csv(data_store$cleanedData, con) } ) diff --git a/inst/shiny/bdclean/ui.R b/inst/shiny/bdclean/ui.R index 52e6eab..57f6326 100644 --- a/inst/shiny/bdclean/ui.R +++ b/inst/shiny/bdclean/ui.R @@ -88,17 +88,17 @@ shinyUI(dashboardPage( "queryDB", label = h3("Online Database:"), choices = list( - "GBIF (Global Biodiversity Information Facility)" = 'gbif', - "iDigBio (Integrated Digitized Biocollections)" = 'idigbio', - "EcoEngine (Berkeley Ecoinformatics Engine)" = 'ecoengine', - "Vertnet (Vertebrate Network)" = 'vertnet', - "BISON (Biodiversity Information Serving Our Nation)" = 'bison', - "iNaturalist" = 'inat', - "ALA (Atlas of Living Australia)" = 'ala', - "OBIS (Ocean Biogeographic Information System)" = 'obis', - "AntWeb" = 'antweb' + "GBIF (Global Biodiversity Information Facility)" = "gbif", + "iDigBio (Integrated Digitized Biocollections)" = "idigbio", + "EcoEngine (Berkeley Ecoinformatics Engine)" = "ecoengine", + "Vertnet (Vertebrate Network)" = "vertnet", + "BISON (Biodiversity Information Serving Our Nation)" = "bison", + "iNaturalist" = "inat", + "ALA (Atlas of Living Australia)" = "ala", + "OBIS (Ocean Biogeographic Information System)" = "obis", + "AntWeb" = "antweb" ), - selected = 'gbif' + selected = "gbif" ), br(), @@ -246,20 +246,7 @@ shinyUI(dashboardPage( # ------------------------------- ), - # Uncomment if domain specifc cleaning is needed - # tabPanel( - # "Option 03", - # div(class = "secondaryHeaders", h3("Option 03: Cleaning Templates")), - # helpText( - # "Note: Choose the cleaning, customized for special domains and needs" - # ), - # - # # ------------------------------- - # - # uiOutput("domainCleaning") - # - # # ------------------------------- - # ), + div(class = "progressStep", taskItem( value = 30, color = "green", "Step 2 of 6" @@ -313,7 +300,7 @@ shinyUI(dashboardPage( actionButton("flagButton", label = "Flag Data") ) - ), + ), div(class = "progressStep", taskItem( value = 45, color = "yellow", @@ -329,13 +316,12 @@ shinyUI(dashboardPage( uiOutput("cleanedResultsUI") # ------------------------------- - ) + ) ))), # ------------- End of Flagging Module ------------------- # ------------- Documentation Module ------------------- - tabItem("document", fluidRow(column( 12, @@ -347,7 +333,7 @@ shinyUI(dashboardPage( "reportFormat", "Report Type", choices = list( - "PDF" = 'pdf_document', + "PDF" = "pdf_document", "HTML" = "html_document", "Word" = "word_document", "Markdown" = "md_document" @@ -358,13 +344,11 @@ shinyUI(dashboardPage( # ------------------------------- uiOutput("documentContentUI") - - # ------------------------------- ) ))) -) - -# ------------- End of Documentation Module ------------------- - + ) + + # ------------- End of Documentation Module ------------------- + ) )) diff --git a/inst/shiny/bdclean/www/style.css b/inst/shiny/bdclean/www/style.css index 910eeb2..1886b3a 100644 --- a/inst/shiny/bdclean/www/style.css +++ b/inst/shiny/bdclean/www/style.css @@ -111,3 +111,8 @@ body, .h1, .h2, .h3, .h4, .h5, .h6, h1, h2, h3, h4, h5, h6, .main-header .logo { .subSpan{ margin-left: 5%; } + + +.shiny-input-container:not(.shiny-input-container-inline){ + width: auto; +} diff --git a/man/BdQuestionContainer-class.Rd b/man/BdQuestionContainer-class.Rd index f06e6cf..0a15084 100644 --- a/man/BdQuestionContainer-class.Rd +++ b/man/BdQuestionContainer-class.Rd @@ -11,6 +11,6 @@ The Question Container Reference Class \section{Methods}{ \describe{ -\item{\code{initialize(BdQuestions = NA)}}{Construct an instance of BdQuestionContainer after validating the type.} +\item{\code{initialize(bdquestions = NA)}}{Construct an instance of BdQuestionContainer after validating the type.} }} diff --git a/man/bdclean.Rd b/man/bdclean.Rd index 02778d6..a5d07ab 100644 --- a/man/bdclean.Rd +++ b/man/bdclean.Rd @@ -25,7 +25,7 @@ analysis or modelling. \section{Citation}{ \itemize{ -\item Gueta, T., Barve, V., Nagarajah, T., Agrawal, A. & Carmel, Y. (2018). bdclean: Biodiversity data cleaning workflows (R package V 1.0.0). Retrieved from https://github.com/bd-R/bdclean/ +\item Gueta, T., Barve, V., Nagarajah, T., Agrawal, A. & Carmel, Y. (2019). bdclean: Biodiversity data cleaning workflows (R package V 0.1.13). Retrieved from https://github.com/bd-R/bdclean/ } } diff --git a/man/clean_data.Rd b/man/clean_data.Rd index 32ff8d6..269d98b 100644 --- a/man/clean_data.Rd +++ b/man/clean_data.Rd @@ -4,14 +4,14 @@ \alias{clean_data} \title{Data cleaning according to Questionnaire Responses.} \usage{ -clean_data(data, customQuestionnaire = NULL, clean = TRUE, +clean_data(data, custom_questionnaire = NULL, clean = TRUE, missing = FALSE, report = TRUE, format = c("html_document", "pdf_document")) } \arguments{ \item{data}{Biodiversity data in a data frame} -\item{customQuestionnaire}{Custom user created questionnaire responses if to pypass answering questions each time.} +\item{custom_questionnaire}{Custom user created questionnaire responses if to pypass answering questions each time.} \item{clean}{Whether to clean after flagging. If false only flagging will be done.} @@ -34,23 +34,24 @@ You can add your custom questions to this questionnaire and then pass it to this function to process the data. } \examples{ -\dontrun{ + +custom_questionnaire <- create_default_questionnaire() + +if(interactive()){ + library(rgbif) occdat <- occ_data( - country = "AU", # Country code for australia - classKey= 359, # Class code for mammalia - limit=5000 # Get only 5000 records - ) - myData<-occdat$data + country = 'AU', # Country code for australia + classKey = 359, # Class code for mammalia + limit = 50 # Get only 50 records +) +myData <- occdat$data - cleanedData <- clean_data(myData) +responses <- run_questionnaire() +cleaned_data <- clean_data(myData, responses) - responses <- run_questionnaire() - cleanedData <- clean_data(myData, responses) +cleaned_data2 <- clean_data(myData) - customQuestionnaire <- create_default_questionnaire() - customResponses <- run_questionnaire(customQuestionnaire) - cleanedData <- clean_data(myData, customResponses) - } +} } diff --git a/man/cleaning_function.Rd b/man/cleaning_function.Rd index 19f7337..60864ac 100644 --- a/man/cleaning_function.Rd +++ b/man/cleaning_function.Rd @@ -10,5 +10,21 @@ cleaning_function(bddata) \item{bddata}{The dataframe to clean} } \description{ -NOTE: This is an package internal function. Do not use for external uses. +NOTE: This is an package internal function. Do not use for external uses. Exported to make it available for shiny app. +} +\examples{ + +if(interactive()){ + +library(rgbif) +occdat <- occ_data( + country = 'AU', # Country code for australia + classKey = 359, # Class code for mammalia + limit = 50 # Get only 50 records +) +myData <- occdat$data +cleaned_data <- cleaning_function(myData) + +} + } diff --git a/man/create_default_questionnaire.Rd b/man/create_default_questionnaire.Rd index 25f044f..8410c3b 100644 --- a/man/create_default_questionnaire.Rd +++ b/man/create_default_questionnaire.Rd @@ -13,21 +13,7 @@ BdQuestionContainer object with default Questions Create the package default Questionnaire. } \examples{ -\dontrun{ -library(rgbif) -occdat1 <- occ_data( - country = "AU", # Country code for australia - classKey= 359, # Class code for mammalia - limit=5000, # Get only 5000 records - ) - myData<-occdat1$data - responses <- run_questionnaire() - cleanedData <- clean_data_new(myData, responses) - - customQuestionnaire <- create_default_questionnaire() - customResponses <- run_questionnaire(customQuestionnaire) - cleanedData <- clean_data_new(myData, customResponses) -} +customQuestionnaire <- create_default_questionnaire() } diff --git a/man/create_report_data.Rd b/man/create_report_data.Rd index cd16892..245adf8 100644 --- a/man/create_report_data.Rd +++ b/man/create_report_data.Rd @@ -4,22 +4,42 @@ \alias{create_report_data} \title{Generate data required to create report, function required in bdclean internal usage.} \usage{ -create_report_data(inputData, flaggedData, cleanedData, responses, - cleaningTrue, format) +create_report_data(input_data, flagged_data, cleaned_data, responses, + cleaning_true, format) } \arguments{ -\item{inputData}{The input dataframe before cleaning} +\item{input_data}{The input dataframe before cleaning} -\item{flaggedData}{The flagged data for cleaning} +\item{flagged_data}{The flagged data for cleaning} -\item{cleanedData}{The data with flagged records removed} +\item{cleaned_data}{The data with flagged records removed} \item{responses}{The BDQuestions object with user responses} -\item{cleaningTrue}{Flag specifying if the cleaning should be done, or just flagging} +\item{cleaning_true}{Flag specifying if the cleaning should be done, or just flagging} \item{format}{The format of the report to be generated} } \description{ -NOTE: This is an package internal function. Do not use for external uses. +NOTE: This is an package internal function. Do not use for external uses. Exported to make it available for shiny app. +} +\examples{ + +if(interactive()){ + +library(rgbif) +occdat <- occ_data( + country = 'AU', # Country code for australia + classKey = 359, # Class code for mammalia + limit = 50 # Get only 50 records +) +myData <- occdat$data + +question <- BdQuestion() +responses <- get_user_response(question) + +cleaned_data <- create_report_data(myData, myData, myData, responses, T, 'pdf') + +} + } diff --git a/man/earliestDate.Rd b/man/earliest_date.Rd similarity index 62% rename from man/earliestDate.Rd rename to man/earliest_date.Rd index 9a40bcd..7369cb0 100644 --- a/man/earliestDate.Rd +++ b/man/earliest_date.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality_checks.R -\name{earliestDate} -\alias{earliestDate} +\name{earliest_date} +\alias{earliest_date} \title{Clean data based on earliest date.} \usage{ -earliestDate(bddata, res = "1700-01-01") +earliest_date(bddata, res = "1700-01-01") } \arguments{ \item{bddata}{Bio diversity data in a data frame} @@ -34,3 +34,20 @@ eventDate temporal } +\examples{ + +if(interactive()){ + +library(rgbif) +occdat <- occ_data( + country = 'AU', # Country code for australia + classKey = 359, # Class code for mammalia + limit = 50 # Get only 50 records +) +myData <- occdat$data + +responses <- earliest_date(myData, '2000-01-01') + +} + +} diff --git a/man/getUserResponse.Rd b/man/getUserResponse.Rd deleted file mode 100644 index 25b3e15..0000000 --- a/man/getUserResponse.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clean_data.R -\name{getUserResponse} -\alias{getUserResponse} -\title{Internal function for getting user response} -\usage{ -getUserResponse(bdQuestion) -} -\arguments{ -\item{bdQuestion}{The BDQuestion object to get users responses.} -} -\description{ -Internal function for getting user response -} diff --git a/man/get_checks_list.Rd b/man/get_checks_list.Rd index 8112a2c..6cccb1b 100644 --- a/man/get_checks_list.Rd +++ b/man/get_checks_list.Rd @@ -9,3 +9,12 @@ get_checks_list() \description{ NOTE: This is an package internal function. Do not use for external uses. } +\examples{ + +if(interactive()){ + +all_checks <- get_checks_list() + +} + +} diff --git a/man/get_user_response.Rd b/man/get_user_response.Rd new file mode 100644 index 0000000..7f5cf4e --- /dev/null +++ b/man/get_user_response.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_data.R +\name{get_user_response} +\alias{get_user_response} +\title{Internal function for getting user response} +\usage{ +get_user_response(bd_question) +} +\arguments{ +\item{bd_question}{The BDQuestion object to get users responses.} +} +\description{ +Internal function for getting user response +} +\examples{ + +if(interactive()){ + +question <- BdQuestion() +responses <- get_user_response(question) + +} +} diff --git a/man/perform_Cleaning.Rd b/man/perform_Cleaning.Rd index 56e10fb..afd3b1b 100644 --- a/man/perform_Cleaning.Rd +++ b/man/perform_Cleaning.Rd @@ -4,13 +4,28 @@ \alias{perform_Cleaning} \title{Data decision function (threshold tuning) required in bdclean internal usage.} \usage{ -perform_Cleaning(flaggedData, cleaningThreshold = 5) +perform_Cleaning(flagged_data, cleaning_threshold = 5) } \arguments{ -\item{flaggedData}{The dataset with flags to be cleaned.} +\item{flagged_data}{The dataset with flags to be cleaned.} -\item{cleaningThreshold}{The Cleaning tolerance. Not used in current version.} +\item{cleaning_threshold}{The Cleaning tolerance. Not used in current version.} } \description{ NOTE: This is an package internal function. Do not use for external uses. } +\examples{ + +if(interactive()){ + +library(rgbif) +occdat <- occ_data( + country = 'AU', # Country code for australia + classKey = 359, # Class code for mammalia + limit = 50 # Get only 50 records +) +myData <- occdat$data +cleaned_data <- perform_Cleaning(myData) + +} +} diff --git a/man/run_bdclean.Rd b/man/run_bdclean.Rd index 43e4a6b..e85fbb2 100644 --- a/man/run_bdclean.Rd +++ b/man/run_bdclean.Rd @@ -9,3 +9,12 @@ run_bdclean() \description{ Launch bdclean Shiny Application } +\examples{ + +if(interactive()){ + +run_bdclean() + +} + +} diff --git a/man/run_questionnaire.Rd b/man/run_questionnaire.Rd index d706542..6723857 100644 --- a/man/run_questionnaire.Rd +++ b/man/run_questionnaire.Rd @@ -4,10 +4,10 @@ \alias{run_questionnaire} \title{Execute the Questionnaire and save user responses.} \usage{ -run_questionnaire(customQuestionnaire = NULL) +run_questionnaire(custom_questionnaire = NULL) } \arguments{ -\item{customQuestionnaire}{Custom User Created Questionnaire if already available.} +\item{custom_questionnaire}{Custom User Created Questionnaire if already available.} } \value{ list with BdQuestionObjects containing user answers @@ -16,17 +16,11 @@ list with BdQuestionObjects containing user answers Execute the Questionnaire and save user responses. } \examples{ -\dontrun{ -library(rgbif) -occdat1 <- occ_data( - country = "AU", # Country code for australia - classKey= 359, # Class code for mammalia - limit=5000, # Get only 5000 records - ) - myData<-occdat1$data - responses <- run_questionnaire() - cleanedData <- clean_data_new(myData, responses) +if(interactive()){ + +responses <- run_questionnaire() + } } diff --git a/man/spatialResolution.Rd b/man/spatial_resolution.Rd similarity index 64% rename from man/spatialResolution.Rd rename to man/spatial_resolution.Rd index 0b49c8e..85f276a 100644 --- a/man/spatialResolution.Rd +++ b/man/spatial_resolution.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality_checks.R -\name{spatialResolution} -\alias{spatialResolution} +\name{spatial_resolution} +\alias{spatial_resolution} \title{Clean data based on spatial resolution} \usage{ -spatialResolution(bddata, res = 100) +spatial_resolution(bddata, res = 100) } \arguments{ \item{bddata}{Bio diversity data in a data frame} @@ -34,3 +34,20 @@ coordinateUncertaintyInMeters spatial } +\examples{ + +if(interactive()){ + +library(rgbif) +occdat <- occ_data( + country = 'AU', # Country code for australia + classKey = 359, # Class code for mammalia + limit = 50 # Get only 50 records +) +myData <- occdat$data + +responses <- spatial_resolution(myData, 1500) + +} + +} diff --git a/man/taxoLevel.Rd b/man/taxo_level.Rd similarity index 63% rename from man/taxoLevel.Rd rename to man/taxo_level.Rd index d242172..6837393 100644 --- a/man/taxoLevel.Rd +++ b/man/taxo_level.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality_checks.R -\name{taxoLevel} -\alias{taxoLevel} +\name{taxo_level} +\alias{taxo_level} \title{Clean data based on lower taxon level} \usage{ -taxoLevel(bddata, res = "SPECIES") +taxo_level(bddata, res = "SPECIES") } \arguments{ \item{bddata}{Bio diversity data in a data frame} @@ -34,3 +34,20 @@ taxonRank taxonomic } +\examples{ + +if(interactive()){ + +library(rgbif) +occdat <- occ_data( + country = 'AU', # Country code for australia + classKey = 359, # Class code for mammalia + limit = 50 # Get only 50 records +) +myData <- occdat$data + +responses <- taxo_level(myData, 'SPECIES') + +} + +} diff --git a/man/temporalResolution.Rd b/man/temporal_resolution.Rd similarity index 62% rename from man/temporalResolution.Rd rename to man/temporal_resolution.Rd index 82514ef..5cb1b72 100644 --- a/man/temporalResolution.Rd +++ b/man/temporal_resolution.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality_checks.R -\name{temporalResolution} -\alias{temporalResolution} +\name{temporal_resolution} +\alias{temporal_resolution} \title{Clean data based on temporal resolution} \usage{ -temporalResolution(bddata, res = "Day") +temporal_resolution(bddata, res = "Day") } \arguments{ \item{bddata}{Bio diversity data in a data frame} @@ -34,3 +34,20 @@ day, month, year temporal } +\examples{ + +if(interactive()){ + +library(rgbif) +occdat <- occ_data( + country = 'AU', # Country code for australia + classKey = 359, # Class code for mammalia + limit = 50 # Get only 50 records +) +myData <- occdat$data + +responses <- taxo_level(temporal_resolution, 'Day') + +} + +} diff --git a/tests/testthat/test-name.R b/tests/testthat/test-name.R index 8955de2..9f6c976 100644 --- a/tests/testthat/test-name.R +++ b/tests/testthat/test-name.R @@ -2,16 +2,18 @@ context("bdclean checks") test_that("Default questionnaire is generated", { - expect_that(create_default_questionnaire(), - is_a('BdQuestionContainer')) + expect_that( + create_default_questionnaire(), + is_a("BdQuestionContainer") + ) }) test_that("Quality checks are read correctly from bdchecks", { - expect_that(length(get_checks_list()) > 15, equals(TRUE)) + expect_that(length(get_checks_list()) > 15, equals(TRUE)) }) test_that("Cleaning function without data fails", { - expect_that(clean_data(), throws_error()) -}) \ No newline at end of file + expect_that(clean_data(), throws_error()) +})