From 043402eefaf66237a70cbce27a6eb3c17f2bf878 Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Thu, 18 Aug 2022 10:52:21 +1000 Subject: [PATCH] Trying to fix `move_values_to_new_trait` function Still problems as described in issue #612 --- R/process.R | 1 - data/Burrows_2001/metadata.yml | 2 +- data/Chen_2017/metadata.yml | 26 ++++++++++++++++---------- data/Crisp_2017/metadata.yml | 2 +- data/Jordan_2020/metadata.yml | 16 ++++++++-------- data/Jurado_1991/metadata.yml | 2 +- data/Kooyman_2011/metadata.yml | 6 ++++-- data/Leishman_1995/metadata.yml | 2 +- data/McGlone_2015/metadata.yml | 2 +- data/Metcalfe_2009/metadata.yml | 2 +- data/Metcalfe_2020_1/metadata.yml | 2 +- data/NTH_2014/metadata.yml | 13 +++++-------- data/Read_2003/metadata.yml | 2 +- data/Read_2005/metadata.yml | 2 +- data/Soliveres_2012/metadata.yml | 17 ++++++++++++++++- data/TMAG_2009/metadata.yml | 5 +---- data/WAH_1998/metadata.yml | 5 +++-- data/Wells_2012/metadata.yml | 2 +- scripts/custom.R | 7 ++++++- 19 files changed, 69 insertions(+), 47 deletions(-) diff --git a/R/process.R b/R/process.R index 0af73d35b..bef071fd0 100644 --- a/R/process.R +++ b/R/process.R @@ -601,7 +601,6 @@ process_flag_unsupported_values <- function(data, definitions) { dplyr::filter(is.na(.data$error)) %>% dplyr::pull(.data$trait_name) %>% unique() for(trait in traits ) { - # General categorical traits if(definitions[[trait]]$type == "categorical") { diff --git a/data/Burrows_2001/metadata.yml b/data/Burrows_2001/metadata.yml index 1a05a35f3..7b353c7da 100644 --- a/data/Burrows_2001/metadata.yml +++ b/data/Burrows_2001/metadata.yml @@ -37,7 +37,7 @@ dataset: "trifoliate_leaves","trifoliate_leaves","trifoliate_leaves") %>% move_values_to_new_trait( "leaf_type", "leaf_form_general", - "scale_leaf","scale","NA") + "scale_leaf","scale","") ' collection_date: unknown/2001 taxon_name: Genus_species diff --git a/data/Chen_2017/metadata.yml b/data/Chen_2017/metadata.yml index 8bd3085c3..2a21216b5 100644 --- a/data/Chen_2017/metadata.yml +++ b/data/Chen_2017/metadata.yml @@ -29,18 +29,24 @@ dataset: data %>% mutate( flowering_time = format_flowering_months(flowering_time_start, - flowering_time_end), - fruit_dehiscence = `Fruit.Type.Specific`) %>% - move_values_to_new_trait("Fruit.Type.Specific", "fruit_dehiscence", - c("pod", "capsule", "drupe", "achene", "nut", "samara", "follicle", + flowering_time_end), + fruit_dehiscence = `Fruit.Type.Specific`, + ) %>% + move_values_to_new_trait( + "Fruit.Type.Specific", "fruit_dehiscence", + c("pod", "capsule", "drupe", "achene", "nut", "samara", "follicle", "berry", "utricle", "syncarp", "syconium", "schizocarp"), - c("dehiscent", "dehiscent", "indehiscent", "indehiscent", + c("dehiscent", "dehiscent", "indehiscent", "indehiscent", "indehiscent", "indehiscent", "dehiscent", "indehiscent", - "indehiscent", "indehiscent", "indehiscent", "NA"), - c("pod", "capsule", "drupe", "achene", "nut", "samara", "follicle", - "berry", "utricle", "syncarp", "syconium", "schizocarp")) %>% - mutate(fruit_dehiscence = if_else(`Adaptive.Trait` %in% - c("indehiscent", "indehiscent, woody"), "indehiscent",fruit_dehiscence)) + "indehiscent", "indehiscent", "indehiscent", ""), + c("pod", "capsule", "drupe", "achene", "nut", "samara", "follicle", + "berry", "utricle", "syncarp", "syconium", "schizocarp") + ) %>% + mutate( + fruit_dehiscence = if_else(`Adaptive.Trait` %in% + c("indehiscent", "indehiscent, woody"), "indehiscent", fruit_dehiscence), + fruit_dehiscence = na_if(fruit_dehiscence, "") + ) ' collection_date: 2014/2014 taxon_name: species diff --git a/data/Crisp_2017/metadata.yml b/data/Crisp_2017/metadata.yml index 55500e867..211cc9ce0 100644 --- a/data/Crisp_2017/metadata.yml +++ b/data/Crisp_2017/metadata.yml @@ -36,7 +36,7 @@ dataset: ) %>% move_values_to_new_trait( "leaf_margins","leaf_margin_posture", - c("recurved", "revolute toothed"), c("recurved", "revolute"), c("NA", "toothed") + c("recurved", "revolute toothed"), c("recurved", "revolute"), c("", "toothed") ) ' collection_date: 1984/2017 diff --git a/data/Jordan_2020/metadata.yml b/data/Jordan_2020/metadata.yml index f42170688..1925e6fc1 100644 --- a/data/Jordan_2020/metadata.yml +++ b/data/Jordan_2020/metadata.yml @@ -68,34 +68,34 @@ dataset: leaf_lobation = leaf_margin, leaf_lobation = str_replace_all(leaf_lobation,"toothed|toothed | toothed", ""), leaf_margin = str_replace_all(leaf_margin,"lobed| lobed|lobed ", ""), - leaf_margin2 = NA, - leaf_compoundness2 = NA, - leaf_lobation2 = NA, - leaf_base_shape = NA + leaf_margin2 = NA_character_, + leaf_compoundness2 = NA_character_, + leaf_lobation2 = NA_character_, + leaf_base_shape = NA_character_ ) %>% move_values_to_new_trait( "leaf_division", "leaf_margin2", c("dentate", "crenate", "toothed", "deeply_toothed", "coarsely_toothed"), c("dentate", "crenate", "toothed", "toothed", "toothed"), - c("NA", "NA", "NA", "NA", "NA") + c("", "", "", "", "") ) %>% move_values_to_new_trait( "leaf_division", "leaf_compoundness2", c("compound"), c("compound"), - c("NA") + c("") ) %>% move_values_to_new_trait( "leaf_division", "leaf_lobation2", c("lobed", "shallowly_lobed", "deeply_segmented"), c("lobed", "lobed_shallow", "lobed_deep"), - c("NA", "NA", "NA") + c("", "", "") ) %>% move_values_to_new_trait( "leaf_shape", "leaf_base_shape", c("cuneate", "auriculate"), c("cuneate", "auriculate"), - c("NA", "NA") + c("", "") ) %>% mutate( leaf_margin = ifelse(is.na(leaf_margin2), leaf_margin, leaf_margin2), diff --git a/data/Jurado_1991/metadata.yml b/data/Jurado_1991/metadata.yml index e6a103d5e..fa6ace170 100644 --- a/data/Jurado_1991/metadata.yml +++ b/data/Jurado_1991/metadata.yml @@ -36,7 +36,7 @@ dataset: move_values_to_new_trait( "dispersal mode 1=unassisted, 2=elaiosome, 3=flesh/aril, 4=adhesive, 5=wind", "dispersal_appendage", - c("2","3"), c("2","3"), c("NA","NA")) + c("2","3"), c("2","3"), c("","")) ' collection_date: 1987/1989 taxon_name: name_original diff --git a/data/Kooyman_2011/metadata.yml b/data/Kooyman_2011/metadata.yml index e66f5b9ed..3228c4373 100644 --- a/data/Kooyman_2011/metadata.yml +++ b/data/Kooyman_2011/metadata.yml @@ -68,9 +68,11 @@ dataset: ungroup() %>% mutate( `leaves lobed` = str_replace_all(`leaves lobed`," NA",""), - `growth form` = str_replace_all(`growth form`," NA",""), + `growth form` = str_replace_all(`growth form`," NA",""), + `growth form` = na_if(`growth form`,"NA"), `leaf type` = str_replace_all(`leaf type`," NA",""), - `leaf longevity` = str_replace_all(`leaf longevity`," NA","") + `leaf longevity` = str_replace_all(`leaf longevity`," NA",""), + `leaf longevity` = na_if(`leaf longevity`,"NA"), ) ' collection_date: 2010/2010 diff --git a/data/Leishman_1995/metadata.yml b/data/Leishman_1995/metadata.yml index df38a84cf..17d05e6e2 100644 --- a/data/Leishman_1995/metadata.yml +++ b/data/Leishman_1995/metadata.yml @@ -68,7 +68,7 @@ dataset: "fruit_fleshiness", c("3","fleshy fruit","vertebrate ingestion (fleshy)"), c("fleshy","fleshy","fleshy"), - c("3",NA,"vertebrate ingestion (fleshy)") + c("3","","vertebrate ingestion (fleshy)") ) ' collection_date: 1996/1996 diff --git a/data/McGlone_2015/metadata.yml b/data/McGlone_2015/metadata.yml index 90bda8147..206c5831b 100644 --- a/data/McGlone_2015/metadata.yml +++ b/data/McGlone_2015/metadata.yml @@ -31,7 +31,7 @@ dataset: distinct(`species name`, `ht (m)`, `ll (mm)`, `lw (mm)`, `leaf form`, `habit`, .keep_all = TRUE) %>% move_values_to_new_trait( "leaf form", "leaf_form_general", - "scale", "scale", "NA" + "scale", "scale", "" ) ' collection_date: 2014/2014 diff --git a/data/Metcalfe_2009/metadata.yml b/data/Metcalfe_2009/metadata.yml index e86a0b70f..e0266a5f9 100644 --- a/data/Metcalfe_2009/metadata.yml +++ b/data/Metcalfe_2009/metadata.yml @@ -50,7 +50,7 @@ dataset: c("Insect","water","wind") ) %>% move_values_to_new_trait( - "Fruit_type", "dispersal_unit", "Spore", "spore", "NA" + "Fruit_type", "dispersal_unit", "Spore", "spore", "" ) ' collection_date: 2005/2005 diff --git a/data/Metcalfe_2020_1/metadata.yml b/data/Metcalfe_2020_1/metadata.yml index df0a9ccc4..6ac4da32b 100644 --- a/data/Metcalfe_2020_1/metadata.yml +++ b/data/Metcalfe_2020_1/metadata.yml @@ -24,7 +24,7 @@ dataset: mutate(leaf_form_general = NA) %>% move_values_to_new_trait( "Leaf_type", "leaf_form_general", - "no leaves", "leafless", "NA" + "no leaves", "leafless", "" ) ' collection_date: 2020/2020 diff --git a/data/NTH_2014/metadata.yml b/data/NTH_2014/metadata.yml index 2e2fd542a..c94b36404 100644 --- a/data/NTH_2014/metadata.yml +++ b/data/NTH_2014/metadata.yml @@ -37,25 +37,25 @@ dataset: "dispersal appendage", "fruit_type", c("drupe", "samara"), c("drupe", "samara"), - c("NA","wings") + c("","wings") ) %>% move_values_to_new_trait( "leaf shape", "leaf_lobation", - c("lobed"), c("lobed"), c("omit") + c("lobed"), c("lobed"), c("") ) %>% move_values_to_new_trait( "leaf shape", "leaf_lobation_pattern", - "palmately lobed", "palmately_lobed", "omit" + "palmately lobed", "palmately_lobed", "" ) %>% move_values_to_new_trait( "leaf shape", "plant_photosynthetic_organ2", - c("cladode"), c("cladode"), c("omit") + c("cladode"), c("cladode"), c("") ) %>% move_values_to_new_trait( "leaf shape", "plant_photosynthetic_organ3", c("article","branchlets with articles","articles"), c("cladode","cladode","cladode"), - c("omit","omit","omit") + c("","","") ) %>% mutate( leaf_type = ifelse(is.na(plant_photosynthetic_organ2),leaf_type,plant_photosynthetic_organ2), @@ -338,9 +338,6 @@ substitutions: - trait_name: plant_growth_form find: 'y' replace: succulent_short -- trait_name: leaf_shape - find: omit - replace: .na - trait_name: leaf_shape find: palmately_lobed replace: .na diff --git a/data/Read_2003/metadata.yml b/data/Read_2003/metadata.yml index e23bc4585..7dc940315 100644 --- a/data/Read_2003/metadata.yml +++ b/data/Read_2003/metadata.yml @@ -33,7 +33,7 @@ dataset: mutate(plant_photosynthetic_organ = NA) %>% move_values_to_new_trait( "leaf", "plant_photosynthetic_organ", - "phyllode", "phyllode", "NA" + "phyllode", "phyllode", "" ) ' collection_date: 2000-10/2000-11 diff --git a/data/Read_2005/metadata.yml b/data/Read_2005/metadata.yml index d2ff71aa4..ad9bec5e7 100644 --- a/data/Read_2005/metadata.yml +++ b/data/Read_2005/metadata.yml @@ -37,7 +37,7 @@ dataset: mutate(plant_photosynthetic_organ = NA) %>% move_values_to_new_trait( "leaf", "plant_photosynthetic_organ", - "phyllode", "phyllode", "NA" + "phyllode", "phyllode", "" ) ' collection_date: 2001-09/2001-09 diff --git a/data/Soliveres_2012/metadata.yml b/data/Soliveres_2012/metadata.yml index 117aa94da..d3d32ad81 100644 --- a/data/Soliveres_2012/metadata.yml +++ b/data/Soliveres_2012/metadata.yml @@ -22,7 +22,22 @@ contributors: austraits_curators: Rachael Gallagher dataset: data_is_long_format: no - custom_R_code: data %>% mutate(dispersers = NA, fruit_fleshiness = NA) %>% move_values_to_new_trait("Dispersal mechanism","dispersers",c("Vertebrates","Ants","Wind"),c("vertebrates","ants","wind"),c("Vertebrates","Ants","wind")) %>% move_values_to_new_trait("Dispersal mechanism","fruit_fleshiness",c("Fleshy-fruited","fleshy_fruit"),c("fleshy","fleshy"),c("NA","NA")) + custom_R_code: ' + data %>% + mutate(dispersers = NA, fruit_fleshiness = NA) %>% + move_values_to_new_trait( + "Dispersal mechanism","dispersers", + c("Vertebrates","Ants","Wind"), + c("vertebrates","ants","wind"), + c("Vertebrates","Ants","wind") + ) %>% + move_values_to_new_trait( + "Dispersal mechanism","fruit_fleshiness", + c("Fleshy-fruited","fleshy_fruit"), + c("fleshy","fleshy"), + c("","") + ) + ' collection_date: 2012/2012 taxon_name: name_original description: dispersal and growth form data for grazing land species diff --git a/data/TMAG_2009/metadata.yml b/data/TMAG_2009/metadata.yml index 865c7e235..993dddd1a 100644 --- a/data/TMAG_2009/metadata.yml +++ b/data/TMAG_2009/metadata.yml @@ -45,7 +45,7 @@ dataset: ) %>% move_values_to_new_trait( "leaf shape", "leaf_base_shape", - c("cuneate","hastate"), c("cuneate","hastate"), c("omit","omit") + c("cuneate","hastate"), c("cuneate","hastate"), c("","") ) ' collection_date: unknown/2009 @@ -247,9 +247,6 @@ traits: replicates: .na methods: Extracted from records of dispersal_appendage. substitutions: -- trait_name: leaf_shape - find: omit - replace: .na - trait_name: leaf_shape find: lobed replace: .na diff --git a/data/WAH_1998/metadata.yml b/data/WAH_1998/metadata.yml index f010e28f3..75e5711a5 100644 --- a/data/WAH_1998/metadata.yml +++ b/data/WAH_1998/metadata.yml @@ -67,8 +67,9 @@ dataset: mutate(leaf_form_general = NA) %>% move_values_to_new_trait( "leaf type (compound/simple)", "leaf_form_general", - "leafless", "leafless", "NA" - ) + "leafless", "leafless", "" + ) %>% + `leaf type (compound/simple)` = na_if(`leaf type (compound/simple)`, "") ' collection_date: unknown/1998 taxon_name: SPECIES diff --git a/data/Wells_2012/metadata.yml b/data/Wells_2012/metadata.yml index 74d3eb014..4a099f31a 100644 --- a/data/Wells_2012/metadata.yml +++ b/data/Wells_2012/metadata.yml @@ -80,7 +80,7 @@ dataset: dispersal_appendage = ifelse(Fruit_type == "Dry dehiscent with fleshy reward (e.g. aril, funicle, elaiosome, etc.)", "fleshy_reward", NA)) %>% - move_values_to_new_trait("Fruit_type","dispersal_unit","spore","spore","NA") + move_values_to_new_trait("Fruit_type","dispersal_unit","spore","spore","") ' collection_date: 2004/2005 taxon_name: Species diff --git a/scripts/custom.R b/scripts/custom.R index acd21917a..327709681 100644 --- a/scripts/custom.R +++ b/scripts/custom.R @@ -265,6 +265,8 @@ format_min_max_as_range <- function(data, min_column, max_column, range_column, #' } move_values_to_new_trait <- function(data, original_trait, new_trait, original_values, values_for_new_trait, values_to_keep) { +# data[[original_trait]] = NA_character_ + for (j in 1:length(original_values)) { i <- data[[original_trait]] == original_values[[j]] @@ -273,8 +275,11 @@ move_values_to_new_trait <- function(data, original_trait, new_trait, original_v data[[original_trait]] = ifelse(i, values_to_keep[[j]], data[[original_trait]]) data } - + + data[[original_trait]] = ifelse(data[[original_trait]] == "", NA, data[[original_trait]]) + return(data) + } #' Add values to an additional trait for datasets in long format