From a6e53133557c33747c487f3f07ba468bcab06a13 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Mon, 9 Dec 2024 13:41:03 -0500 Subject: [PATCH 1/5] using the number of rows in the structural metadata df to determine metadata expansion --- R/modify_frictionless_metadata.R | 22 +++++++++++++++++++--- man/expand_frictionless_metadata.Rd | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/R/modify_frictionless_metadata.R b/R/modify_frictionless_metadata.R index 7d54c20..f49eb8d 100644 --- a/R/modify_frictionless_metadata.R +++ b/R/modify_frictionless_metadata.R @@ -27,7 +27,7 @@ #' # update structural metadata #' write.csv(data_codebook,"my/codebook.csv", row.names = FALSE) #' -#' data_codebook_updated <- read.csv(""my/codebook.csv"") +#' data_codebook_updated <- read.csv("my/codebook.csv") #' #' # create frictionless package - this is done automatically with the #' # deposits package @@ -61,9 +61,24 @@ expand_frictionless_metadata <- function(structural_metadata, ## build up schema based on structural metadata - for(idx in 1:length(my_data_schema$fields)){ + # for each row, update the schema + for(idx in 1:nrow(structural_metadata)){ # item to build out - x <- my_data_schema$fields[[idx]] + ## row may not exist in the original data. + x <- tryCatch( + expr = { + ## get the fields item we want to update + my_data_schema$fields[[idx]] + }, + error = function(e){ + ## use the first index item + msg<- sprintf("Adding %s to frictionless metadata",structural_metadata$name[[idx]]) + message(msg) + my_data_schema$fields[[1]] + } + ) + + for(idy in 1:length(structural_metadata)){ y <- structural_metadata[idx,idy][[1]] @@ -85,6 +100,7 @@ expand_frictionless_metadata <- function(structural_metadata, my_data_schema$fields[[idx]] <- x } + if(prune_datapackage){ my_data_schema <- prune_datapackage(my_data_schema,structural_metadata) } diff --git a/man/expand_frictionless_metadata.Rd b/man/expand_frictionless_metadata.Rd index c207064..c6af2dd 100644 --- a/man/expand_frictionless_metadata.Rd +++ b/man/expand_frictionless_metadata.Rd @@ -45,7 +45,7 @@ data_codebook <- create_structural_metadata(data) # update structural metadata write.csv(data_codebook,"my/codebook.csv", row.names = FALSE) -data_codebook_updated <- read.csv(""my/codebook.csv"") +data_codebook_updated <- read.csv("my/codebook.csv") # create frictionless package - this is done automatically with the # deposits package From 2351d22bb3033e2d9d1358688918677cb8675f8d Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Mon, 9 Dec 2024 13:43:22 -0500 Subject: [PATCH 2/5] Increment version number to 0.3.12 --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6849728..babf18e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ohcleandat Type: Package Title: One Health Data Cleaning and Quality Checking Package -Version: 0.3.11 +Version: 0.3.12 Authors@R: c( person("Collin", "Schwantes", email = "schwantes@ecohealthalliance.org", role = c("cre", "aut"), comment = c(ORCID = "0000-0003-4014-4896")), person("Johana", "Teigen", email = "teigen@ecohealthalliance.org", role = "aut", comment = c(ORCID = "0000-0002-6209-2321")), diff --git a/NEWS.md b/NEWS.md index 5dcb9ef..70aa1dd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# ohcleandat 0.3.12 + # ohcleandat 0.3.11 * obfuscate gps can now handle NAs From a052e78a187bd2f72326f911156665eb2a7b798c Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Mon, 9 Dec 2024 14:23:45 -0500 Subject: [PATCH 3/5] make schema and metadata df the same length --- NEWS.md | 3 +++ R/create_structural_metadata.R | 14 +++++++------- R/modify_frictionless_metadata.R | 16 +++++++++++++--- man/create_structural_metadata.Rd | 17 +++++++++-------- man/expand_frictionless_metadata.Rd | 6 ++++-- man/prune_datapackage.Rd | 2 +- vignettes/data_examples/my_data.csv | 22 +++++++++++----------- 7 files changed, 48 insertions(+), 32 deletions(-) diff --git a/NEWS.md b/NEWS.md index 70aa1dd..ab8e724 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # ohcleandat 0.3.12 +* `expand_frictionless_metadata` can add and remove fields from the metadata depending +on the structural metadata supplied. + # ohcleandat 0.3.11 * obfuscate gps can now handle NAs diff --git a/R/create_structural_metadata.R b/R/create_structural_metadata.R index a207b41..546ad92 100644 --- a/R/create_structural_metadata.R +++ b/R/create_structural_metadata.R @@ -15,13 +15,13 @@ #' #' The metadata table produced has the following elements #' -#' `name` = The name of the field. This is taken as is from `data`. -#' `description` = Description of that field. May be provided by controlled vocabulary -#' `units` = Units of measure for that field. May or may not apply -#' `term_uri` = Universal Resource Identifier for a term from a controlled vocabulary or schema -#' `comments` = Free text providing additional details about the field -#' `primary_key` = `TRUE` or `FALSE`, Uniquely identifies each record in the data -#' `foreign_key` = `TRUE` or `FALSE`, Allows for linkages between data sets. Uniquely identifies +#' - `name` = The name of the field. This is taken as is from `data`. +#' - `description` = Description of that field. May be provided by controlled vocabulary +#' - `units` = Units of measure for that field. May or may not apply +#' - `term_uri` = Universal Resource Identifier for a term from a controlled vocabulary or schema +#' - `comments` = Free text providing additional details about the field +#' - `primary_key` = `TRUE` or `FALSE`, Uniquely identifies each record in the data +#' - `foreign_key` = `TRUE` or `FALSE`, Allows for linkages between data sets. Uniquely identifies #' records in a different data set #' #' diff --git a/R/modify_frictionless_metadata.R b/R/modify_frictionless_metadata.R index f49eb8d..d487861 100644 --- a/R/modify_frictionless_metadata.R +++ b/R/modify_frictionless_metadata.R @@ -1,7 +1,9 @@ #' Expand Frictionless Metadata with structural metadata #' -#' Loops over elements in the structural metadata and adds them to frictionless -#' metadata schema. Will overwrite existing values. +#' Loops over elements in the structural metadata and adds +#' them to the frictionless metadata schema. Will overwrite existing values and +#' remove any fields from the datapackage metadata not listed in the structural +#' metadata. #' #' @param structural_metadata Dataframe. Structural metadata from #' `create_structural_metadata` or `update_structural_metadata` @@ -61,6 +63,11 @@ expand_frictionless_metadata <- function(structural_metadata, ## build up schema based on structural metadata + ## drop fields that were removed from the structural metadata + if(nrow(structural_metadata) <= length(my_data_schema$fields)){ + my_data_schema$fields <- my_data_schema$fields[1:nrow(structural_metadata)] + } + # for each row, update the schema for(idx in 1:nrow(structural_metadata)){ # item to build out @@ -101,6 +108,7 @@ expand_frictionless_metadata <- function(structural_metadata, } + ## prune the properties of items in the schema, does not remove fields if(prune_datapackage){ my_data_schema <- prune_datapackage(my_data_schema,structural_metadata) } @@ -120,7 +128,7 @@ expand_frictionless_metadata <- function(structural_metadata, } -#' Prune data pacakge +#' Prune field properties in a data package #' #' method to remove properties from the metadata for a dataset in a datapackage #' @@ -152,3 +160,5 @@ prune_datapackage <- function(my_data_schema, structural_metadata){ return(my_data_schema_pruned) } + + diff --git a/man/create_structural_metadata.Rd b/man/create_structural_metadata.Rd index 9445cc4..d6d3ceb 100644 --- a/man/create_structural_metadata.Rd +++ b/man/create_structural_metadata.Rd @@ -31,16 +31,17 @@ generated then joined to pre-existing metadata via field names. } \details{ The metadata table produced has the following elements - -\code{name} = The name of the field. This is taken as is from \code{data}. -\code{description} = Description of that field. May be provided by controlled vocabulary -\code{units} = Units of measure for that field. May or may not apply -\code{term_uri} = Universal Resource Identifier for a term from a controlled vocabulary or schema -\code{comments} = Free text providing additional details about the field -\code{primary_key} = \code{TRUE} or \code{FALSE}, Uniquely identifies each record in the data -\code{foreign_key} = \code{TRUE} or \code{FALSE}, Allows for linkages between data sets. Uniquely identifies +\itemize{ +\item \code{name} = The name of the field. This is taken as is from \code{data}. +\item \code{description} = Description of that field. May be provided by controlled vocabulary +\item \code{units} = Units of measure for that field. May or may not apply +\item \code{term_uri} = Universal Resource Identifier for a term from a controlled vocabulary or schema +\item \code{comments} = Free text providing additional details about the field +\item \code{primary_key} = \code{TRUE} or \code{FALSE}, Uniquely identifies each record in the data +\item \code{foreign_key} = \code{TRUE} or \code{FALSE}, Allows for linkages between data sets. Uniquely identifies records in a different data set } +} \examples{ \dontrun{ df <- data.frame(a = 1:10, b = letters[1:10]) diff --git a/man/expand_frictionless_metadata.Rd b/man/expand_frictionless_metadata.Rd index c6af2dd..5adf538 100644 --- a/man/expand_frictionless_metadata.Rd +++ b/man/expand_frictionless_metadata.Rd @@ -29,8 +29,10 @@ be removed?} Updates the datapackage, returns nothing } \description{ -Loops over elements in the structural metadata and adds them to frictionless -metadata schema. Will overwrite existing values. +Loops over elements in the structural metadata and adds +them to the frictionless metadata schema. Will overwrite existing values and +remove any fields from the datapackage metadata not listed in the structural +metadata. } \examples{ \dontrun{ diff --git a/man/prune_datapackage.Rd b/man/prune_datapackage.Rd index 17dab6e..acd14d8 100644 --- a/man/prune_datapackage.Rd +++ b/man/prune_datapackage.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/modify_frictionless_metadata.R \name{prune_datapackage} \alias{prune_datapackage} -\title{Prune data pacakge} +\title{Prune field properties in a data package} \usage{ prune_datapackage(my_data_schema, structural_metadata) } diff --git a/vignettes/data_examples/my_data.csv b/vignettes/data_examples/my_data.csv index 8846be9..b849f8c 100644 --- a/vignettes/data_examples/my_data.csv +++ b/vignettes/data_examples/my_data.csv @@ -1,11 +1,11 @@ -"date","measurement","measured_by","site_name","key" -2024-08-26,43,"Johana","c",1 -2024-08-27,9,"Johana","c",2 -2024-08-28,79,"Johana","c",3 -2024-08-29,17,"Collin","a",4 -2024-08-30,61,"Johana","e",5 -2024-08-31,30,"Collin","b",6 -2024-09-01,58,"Collin","a",7 -2024-09-02,27,"Johana","d",8 -2024-09-03,52,"Johana","d",9 -2024-09-04,82,"Collin","e",10 +date,measurement,measured_by,site_name,key +2024-08-26,43,Johana,c,1 +2024-08-27,9,Johana,c,2 +2024-08-28,79,Johana,c,3 +2024-08-29,17,Collin,a,4 +2024-08-30,61,Johana,e,5 +2024-08-31,30,Collin,b,6 +2024-09-01,58,Collin,a,7 +2024-09-02,27,Johana,d,8 +2024-09-03,52,Johana,d,9 +2024-09-04,82,Collin,e,10 From 94a36002b2d7cadf968f7b8187a7eeec37374b68 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Mon, 9 Dec 2024 14:25:51 -0500 Subject: [PATCH 4/5] adding a debugging scripts folder --- scripts/debug_expand_df.R | 46 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 scripts/debug_expand_df.R diff --git a/scripts/debug_expand_df.R b/scripts/debug_expand_df.R new file mode 100644 index 0000000..a6e63df --- /dev/null +++ b/scripts/debug_expand_df.R @@ -0,0 +1,46 @@ +devtools::load_all() + + +# read in data + + + +df <- read.csv("vignettes/data_examples/my_data.csv") + +# create codebook + +structural_metadata <- create_structural_metadata(df,primary_key = "key", + foreign_key = c("measured_by","site_name")) + +# create data package + + +dp <- frictionless::create_package() |> + frictionless::add_resource(resource_name = "my_data", + data = df) + +frictionless::write_package(package = dp, + directory = "vignettes/data_examples") + +expand_frictionless_metadata(structural_metadata = structural_metadata, + resource_name = "my_data", + resource_path = "vignettes/data_examples/my_data.csv", + data_package_path = "vignettes/data_examples/datapackage.json" + ) + +## drop a measured_by column + +df <- read.csv("vignettes/data_examples/my_data.csv") + +# create codebook + +structural_metadata_2 <- create_structural_metadata(df, + primary_key = "key", + foreign_key = c("site_name")) + + +expand_frictionless_metadata(structural_metadata = structural_metadata_2, + resource_name = "my_data", + resource_path = "vignettes/data_examples/my_data.csv", + data_package_path = "vignettes/data_examples/datapackage.json" +) From 11ec857c15780dfa1950187ed0acf998663d3415 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Mon, 9 Dec 2024 14:27:01 -0500 Subject: [PATCH 5/5] adding scripts folder to build ignore --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 28829a6..a7f5b35 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ ^\.env$ ^doc$ ^Meta$ +^scripts$