Skip to content

Commit

Permalink
More dataset tests (#132)
Browse files Browse the repository at this point in the history
- Fixes #118
- Rename check_duplicates to check_pivot_duplicates
- Add option in metadata_check_custom_R_code path_data to allow easier testing in tests/testthat
- Add Test Datasets 5 and 6
- Update taxon_list-orig.csv
- Clean existing test metadata files
  • Loading branch information
yangsophieee authored Nov 16, 2023
1 parent 256ab7a commit c706804
Show file tree
Hide file tree
Showing 55 changed files with 1,391 additions and 1,495 deletions.
3 changes: 1 addition & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ export("%>%")
export(build_add_version)
export(build_combine)
export(build_setup_pipeline)
export(check_duplicates)
export(check_pivot_wider)
export(check_pivot_duplicates)
export(dataset_build)
export(dataset_configure)
export(dataset_find_taxon)
Expand Down
3 changes: 1 addition & 2 deletions R/pivot.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#' @param dataset Built dataset with `test_build_dataset`
#'
#' @return Number of rows with duplicates preventing pivoting wider
#' @export

check_pivot_wider <- function(dataset) {

Expand All @@ -27,7 +26,7 @@ check_pivot_wider <- function(dataset) {
if (duplicates == 0) {
invisible(TRUE)
} else {
invisible(FALSE)
invisible(FALSE)
}

}
Expand Down
20 changes: 11 additions & 9 deletions R/process.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ dataset_process <- function(filename_data_raw,
# Record methods
methods <- process_format_methods(metadata, dataset_id, sources, contributors)


# Retrieve taxonomic details for known species
taxonomic_updates <-
traits %>%
Expand Down Expand Up @@ -1902,7 +1903,7 @@ dataset_update_taxonomy <- function(austraits_raw, taxa) {

columns_in_taxon_list <- names(taxa)

# incoming table from austraits_raw is a list of all taxa for the study
# Incoming table from `austraits_raw` is a list of all taxa for the study
# `original_name` and `aligned_name` will be different if
# there were taxonomic_updates specified in metadata file
austraits_raw$taxonomic_updates <-
Expand All @@ -1922,7 +1923,8 @@ dataset_update_taxonomy <- function(austraits_raw, taxa) {
taxa %>% dplyr::select(dplyr::all_of(c("aligned_name", "taxon_name")))
) %>%
dplyr::select(dplyr::all_of(c("dataset_id", "taxon_name")), dplyr::everything()) %>%
# for taxa where there is no taxon_name to matched to a "aligned_name", maintain the "aligned_name" as the "taxon_name"
# For taxa where there is no `taxon_name` to matched to a `aligned_name`,
# maintain the `aligned_name` as the `taxon_name`
dplyr::mutate(
taxon_name = ifelse(is.na(.data$taxon_name), .data$aligned_name, .data$taxon_name)#,
) %>%
Expand All @@ -1942,13 +1944,13 @@ dataset_update_taxonomy <- function(austraits_raw, taxa) {
.data$taxonomic_resolution),
taxon_rank = .data$taxonomic_resolution,
name_to_match_to = .data$taxon_name,
# Create variable `name_to_match_to` which specifies the part of the taxon name to which matches can be made.
# This step requires taxon_rank.
name_to_match_to = stringr::str_replace(.data$taxon_name, " \\[.+",""),
# Create variable `name_to_match_to` which specifies the part of the taxon name to which matches can be made
# This step requires `taxon_rank`
name_to_match_to = stringr::str_replace(.data$taxon_name, " \\[.+", ""),
name_to_match_to = ifelse(!.data$taxon_rank %in% c("species", "subspecies", "series", "variety", "form"),
stringr::word(.data$taxon_name,1), .data$name_to_match_to)
stringr::word(.data$taxon_name, 1), .data$name_to_match_to)
) %>%
# Remove taxon_rank, as it is about to be merged back in, but matches will now be possible to more rows.
# Remove `taxon_rank`, as it is about to be merged back in, but matches will now be possible to more rows
select(-dplyr::any_of(c("taxon_rank", "taxonomic_resolution"))) %>%
util_df_convert_character() %>%
# Merge in all data from taxa.
Expand All @@ -1967,7 +1969,7 @@ dataset_update_taxonomy <- function(austraits_raw, taxa) {
dplyr::distinct(.data$taxon_name, .keep_all = TRUE) %>%
dplyr::select(dplyr::any_of(columns_in_taxon_list))

# Now `taxonomic_resolution` be removed from the traits table.
# Now `taxonomic_resolution` be removed from the traits table
austraits_raw$traits <-
austraits_raw$traits %>%
dplyr::select(-dplyr::all_of(c("taxonomic_resolution")))
Expand Down Expand Up @@ -2038,7 +2040,7 @@ write_plaintext <- function(austraits, path) {
#'
#' @return Tibble with duplicates and pivot columns
#' @export
check_duplicates <- function(
check_pivot_duplicates <- function(
database_object,
dataset_ids = unique(database_object$traits$dataset_id)
) {
Expand Down
11 changes: 6 additions & 5 deletions R/setup.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#' Path to the `metadata.yml` file for specified `dataset_id`
#'
#' @param dataset_id Identifier for a particular study in the database
#' @param path_data Path to folder with data
#'
#' @return A string
metadata_path_dataset_id <- function(dataset_id) {
file.path("data", dataset_id, "metadata.yml")
metadata_path_dataset_id <- function(dataset_id, path_data = "data") {
file.path(path_data, dataset_id, "metadata.yml")
}

#' Create a template of file `metadata.yml` for specified `dataset_id`
Expand Down Expand Up @@ -223,13 +224,13 @@ metadata_user_select_names <- function(title, vars) {
#' @inheritParams metadata_path_dataset_id
#'
#' @export
metadata_check_custom_R_code <- function(dataset_id) {
metadata_check_custom_R_code <- function(dataset_id, path_data = "data") {

# Read metadata
metadata <- read_metadata_dataset(dataset_id)
metadata <- read_metadata_dataset(dataset_id, path_data)

# Load trait data and run `custom_R_code`
readr::read_csv(file.path("data", dataset_id, "data.csv"), col_types = cols(), guess_max = 100000) %>%
readr::read_csv(file.path(path_data, dataset_id, "data.csv"), col_types = cols(), guess_max = 100000) %>%
process_custom_code(metadata[["dataset"]][["custom_R_code"]])()

}
Expand Down
Loading

0 comments on commit c706804

Please sign in to comment.