Skip to content

Commit

Permalink
Merge pull request #53 from thiloshon/master
Browse files Browse the repository at this point in the history
Final CRAN version
  • Loading branch information
thiloshon authored Apr 18, 2019
2 parents f75eef6 + b69a450 commit edf5e27
Show file tree
Hide file tree
Showing 40 changed files with 1,041 additions and 814 deletions.
6 changes: 6 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
^codecov\.yml$
^appveyor\.yml$
^cran-comments\.md$
^CRAN-RELEASE$
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^\.idea$
^img$
^\.gitignore$
^\.Rhistory$
^\.RData$
^\.gitattributes$
4 changes: 4 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
* text=auto
data/* binary
src/* text=lf
R/* text=lf
15 changes: 14 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,17 @@

language: R
sudo: false
cache: packages
cache: packages

r:
- oldrel
- release
- devel

r_check_args: "--as-cran"

r_packages:
- covr

after_success:
- Rscript -e 'library(covr); codecov()'
2 changes: 2 additions & 0 deletions CRAN-RELEASE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This package was submitted to CRAN on 2019-04-10.
Once it is accepted, delete this file and tag the release (commit 1faa2f9a78).
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Package: bdclean
Type: Package
Title: A User-Friendly Biodiversity Data Cleaning App for the Inexperienced R User
Description: Provides features to manage complete work flow for biodiversity data cleaning, from uploading the data; gathering input from the user, in order to adjust cleaning procedures; perform the cleaning; and finally, generating various reports and several versions of the data. Facilitates user-level data cleaning, designed for the inexperienced R user. T Gueta et al (2018) <doi:10.3897/biss.2.25564>. T Gueta et al (2017) <doi:10.3897/tdwgproceedings.1.20311>.
Version: 0.1.12
Date: 2019-03-02
Description: Provides features to manage the complete workflow for biodiversity data cleaning. Uploading data, gathering input from users (in order to adjust cleaning procedures), cleaning data and finally, generating various reports and several versions of the data. Facilitates user-level data cleaning, designed for the inexperienced R user. T Gueta et al (2018) <doi:10.3897/biss.2.25564>. T Gueta et al (2017) <doi:10.3897/tdwgproceedings.1.20311>.
Version: 0.1.15
Date: 2019-04-10
License: GPL-3
URL: https://github.com/bd-R/bdclean, https://bd-r.github.io/The-bdverse/index.html
BugReports: https://github.com/bd-R/bdclean/issues
Expand Down Expand Up @@ -42,5 +42,7 @@ Imports: rmarkdown, knitr, shiny, shinydashboard, shinyjs, leaflet, DT, data.tab
Depends: R (>= 2.10)
RoxygenNote: 6.1.1
Suggests:
testthat
testthat,
roxygen2,
covr
LazyData: true
8 changes: 4 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ export(clean_data)
export(cleaning_function)
export(create_default_questionnaire)
export(create_report_data)
export(earliestDate)
export(earliest_date)
export(get_checks_list)
export(run_bdclean)
export(run_questionnaire)
export(spatialResolution)
export(taxoLevel)
export(temporalResolution)
export(spatial_resolution)
export(taxo_level)
export(temporal_resolution)
import(bdDwC)
import(bdchecks)
import(data.table)
Expand Down
18 changes: 9 additions & 9 deletions R/bdclean.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@
#' in order to achieve minimum quality to use the data further for any
#' analysis or modelling.
#'
#'@section Data cleaning:
#'\itemize{
#'\item \link{run_bdclean}
#'\item \link{clean_data}
#'}
#' @section Data cleaning:
#' \itemize{
#' \item \link{run_bdclean}
#' \item \link{clean_data}
#' }
#'
#'
#'@section Citation:
#'\itemize{
#'\item Gueta, T., Barve, V., Nagarajah, T., Agrawal, A. & Carmel, Y. (2018). bdclean: Biodiversity data cleaning workflows (R package V 1.0.0). Retrieved from https://github.com/bd-R/bdclean/
#'}
#' @section Citation:
#' \itemize{
#' \item Gueta, T., Barve, V., Nagarajah, T., Agrawal, A. & Carmel, Y. (2019). bdclean: Biodiversity data cleaning workflows (R package V 0.1.13). Retrieved from https://github.com/bd-R/bdclean/
#' }
#'
#' @docType package
#' @name bdclean
Expand Down
150 changes: 73 additions & 77 deletions R/clean_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,156 +7,152 @@
#' You can add your custom questions to this questionnaire and then pass it to this
#' function to process the data.
#'
#'@param data Biodiversity data in a data frame
#'@param customQuestionnaire Custom user created questionnaire responses if to pypass answering questions each time.
#'@param clean Whether to clean after flagging. If false only flagging will be done.
#'@param missing How to treat data with missing values. Default: false - will be treated as bad data.
#'@param report Whether to print report of cleaning done.
#'@param format Formats of the cleaning report required. Options are: Markdown, HTML or / and PDF
#' @param data Biodiversity data in a data frame
#' @param custom_questionnaire Custom user created questionnaire responses if to pypass answering questions each time.
#' @param clean Whether to clean after flagging. If false only flagging will be done.
#' @param missing How to treat data with missing values. Default: false - will be treated as bad data.
#' @param report Whether to print report of cleaning done.
#' @param format Formats of the cleaning report required. Options are: Markdown, HTML or / and PDF
#'
#'@return data frame with clean data
#' @return data frame with clean data
#'
#'@examples \dontrun{
#'library(rgbif)
#'occdat <- occ_data(
#' country = "AU", # Country code for australia
#' classKey= 359, # Class code for mammalia
#' limit=5000 # Get only 5000 records
#' )
#' myData<-occdat$data
#'
#' cleanedData <- clean_data(myData)
#'
#' responses <- run_questionnaire()
#' cleanedData <- clean_data(myData, responses)
#'
#' customQuestionnaire <- create_default_questionnaire()
#' customResponses <- run_questionnaire(customQuestionnaire)
#' cleanedData <- clean_data(myData, customResponses)
#' }
#' @examples
#'
#' custom_questionnaire <- create_default_questionnaire()
#'
#' if(interactive()){
#'
#' library(rgbif)
#' occdat <- occ_data(
#' country = 'AU', # Country code for australia
#' classKey = 359, # Class code for mammalia
#' limit = 50 # Get only 50 records
#' )
#' myData <- occdat$data
#'
#' responses <- run_questionnaire()
#' cleaned_data <- clean_data(myData, responses)
#'
#' cleaned_data2 <- clean_data(myData)
#'
#' }
#'
#'@export
#' @export
clean_data <-
function(data,
customQuestionnaire = NULL,
custom_questionnaire = NULL,
clean = TRUE,
missing = FALSE,
report = TRUE,
format = c("html_document", "pdf_document")) {
responses <- list()
inputData <- data
flaggedData <- data
cleanedData <- data

input_data <- data
flagged_data <- data
cleaned_data <- data

# Questionnaire
if (is.null(customQuestionnaire)) {
if (is.null(custom_questionnaire)) {
responses <- run_questionnaire()
} else {
responses <- customQuestionnaire
responses <- custom_questionnaire
}

# Flagging
flaggedData <- responses$flagData(inputData, missing)

flagged_data <- responses$flag_data(input_data, missing)

# Decision Making
if (clean) {
cleanedData <- cleaning_function(flaggedData)
cleaned_data <- cleaning_function(flagged_data)
}

# Report
if (report) {
create_report_data(data,
flaggedData,
cleanedData,
flagged_data,
cleaned_data,
responses,
clean,
format)
}

# Cleaning
if (clean) {
return(cleanedData)
return(cleaned_data)
}

return(flaggedData)
return(flagged_data)
}


#' Execute the Questionnaire and save user responses.
#'
#'
#'@param customQuestionnaire Custom User Created Questionnaire if already available.
#' @param custom_questionnaire Custom User Created Questionnaire if already available.
#'
#'@return list with BdQuestionObjects containing user answers
#' @return list with BdQuestionObjects containing user answers
#'
#'@examples \dontrun{
#'library(rgbif)
#'occdat1 <- occ_data(
#' country = "AU", # Country code for australia
#' classKey= 359, # Class code for mammalia
#' limit=5000, # Get only 5000 records
#' )
#' myData<-occdat1$data
#' @examples
#'
#' if(interactive()){
#'
#' responses <- run_questionnaire()
#' cleanedData <- clean_data_new(myData, responses)
#'}
#' responses <- run_questionnaire()
#'
#' }
#'
#'@export
run_questionnaire <- function(customQuestionnaire = NULL) {
#' @export
run_questionnaire <- function(custom_questionnaire = NULL) {
responses <- list()

if (is.null(customQuestionnaire)) {
if (is.null(custom_questionnaire)) {
message("Custom Questionnaire not given. Using package default Questionnaire...")
responses <- create_default_questionnaire()

} else {
if (class(customQuestionnaire) != "BdQuestionContainer") {
if (class(custom_questionnaire) != "BdQuestionContainer") {
message(
"Provided Custom Questionnaire is not of class BdQuestionContainer.
Using package default Questionnaire"
)
responses <- create_default_questionnaire()

} else {
message("Custom Questionnaire detected.")
responses <- customQuestionnaire
responses <- custom_questionnaire
}
}

message("Please answer the following questions to initiate cleaning process.")

for (question in responses$BdQuestions) {
for (question in responses$bdquestions) {
if (question$question.type != "Child" &&
question$question.type != "ChildRouter") {
getUserResponse(question)
get_user_response(question)
}
}
message("Thank you! Cleaning can be started now based on your responses.")
return(responses)
}

#' Internal function for getting user response
#'
#' @param bd_question The BDQuestion object to get users responses.
#'
#' @examples
#'
#'@param bdQuestion The BDQuestion object to get users responses.
#' if(interactive()){
#'
#' question <- BdQuestion()
#' responses <- get_user_response(question)
#'
getUserResponse <- function(bdQuestion) {
#' }
get_user_response <- function(bd_question) {
# Child & ChildRouter already filtered in first loop above

if (bdQuestion$question.type == "Atomic") {
if (bd_question$question.type == "Atomic") {
# Atomic is filtered
bdQuestion$printQuestion()
bdQuestion$getResponse()

bd_question$print_question()
bd_question$get_response()
} else {
# Router , Child as child & ChildRouter as child is filtered
bdQuestion$printQuestion()
bdQuestion$getResponse()
if (bdQuestion$users.answer %in% bdQuestion$router.condition) {
for (question in bdQuestion$child.questions) {
getUserResponse(question)
bd_question$print_question()
bd_question$get_response()
if (bd_question$users.answer %in% bd_question$router.condition) {
for (question in bd_question$child.questions) {
get_user_response(question)
}
}
}
Expand Down
Loading

0 comments on commit edf5e27

Please sign in to comment.