Skip to content

Commit

Permalink
Merge pull request #60 from ecohealthalliance/fix/expand_metadata
Browse files Browse the repository at this point in the history
Fix/expand metadata
  • Loading branch information
collinschwantes authored Dec 9, 2024
2 parents deb0938 + 11ec857 commit e60b0c8
Show file tree
Hide file tree
Showing 10 changed files with 118 additions and 37 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
^\.env$
^doc$
^Meta$
^scripts$
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ohcleandat
Type: Package
Title: One Health Data Cleaning and Quality Checking Package
Version: 0.3.11
Version: 0.3.12
Authors@R: c(
person("Collin", "Schwantes", email = "schwantes@ecohealthalliance.org", role = c("cre", "aut"), comment = c(ORCID = "0000-0003-4014-4896")),
person("Johana", "Teigen", email = "teigen@ecohealthalliance.org", role = "aut", comment = c(ORCID = "0000-0002-6209-2321")),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ohcleandat 0.3.12

* `expand_frictionless_metadata` can add and remove fields from the metadata depending
on the structural metadata supplied.

# ohcleandat 0.3.11

* obfuscate gps can now handle NAs
Expand Down
14 changes: 7 additions & 7 deletions R/create_structural_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
#'
#' The metadata table produced has the following elements
#'
#' `name` = The name of the field. This is taken as is from `data`.
#' `description` = Description of that field. May be provided by controlled vocabulary
#' `units` = Units of measure for that field. May or may not apply
#' `term_uri` = Universal Resource Identifier for a term from a controlled vocabulary or schema
#' `comments` = Free text providing additional details about the field
#' `primary_key` = `TRUE` or `FALSE`, Uniquely identifies each record in the data
#' `foreign_key` = `TRUE` or `FALSE`, Allows for linkages between data sets. Uniquely identifies
#' - `name` = The name of the field. This is taken as is from `data`.
#' - `description` = Description of that field. May be provided by controlled vocabulary
#' - `units` = Units of measure for that field. May or may not apply
#' - `term_uri` = Universal Resource Identifier for a term from a controlled vocabulary or schema
#' - `comments` = Free text providing additional details about the field
#' - `primary_key` = `TRUE` or `FALSE`, Uniquely identifies each record in the data
#' - `foreign_key` = `TRUE` or `FALSE`, Allows for linkages between data sets. Uniquely identifies
#' records in a different data set
#'
#'
Expand Down
38 changes: 32 additions & 6 deletions R/modify_frictionless_metadata.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#' Expand Frictionless Metadata with structural metadata
#'
#' Loops over elements in the structural metadata and adds them to frictionless
#' metadata schema. Will overwrite existing values.
#' Loops over elements in the structural metadata and adds
#' them to the frictionless metadata schema. Will overwrite existing values and
#' remove any fields from the datapackage metadata not listed in the structural
#' metadata.
#'
#' @param structural_metadata Dataframe. Structural metadata from
#' `create_structural_metadata` or `update_structural_metadata`
Expand All @@ -27,7 +29,7 @@
#' # update structural metadata
#' write.csv(data_codebook,"my/codebook.csv", row.names = FALSE)
#'
#' data_codebook_updated <- read.csv(""my/codebook.csv"")
#' data_codebook_updated <- read.csv("my/codebook.csv")
#'
#' # create frictionless package - this is done automatically with the
#' # deposits package
Expand Down Expand Up @@ -61,9 +63,29 @@ expand_frictionless_metadata <- function(structural_metadata,

## build up schema based on structural metadata

for(idx in 1:length(my_data_schema$fields)){
## drop fields that were removed from the structural metadata
if(nrow(structural_metadata) <= length(my_data_schema$fields)){
my_data_schema$fields <- my_data_schema$fields[1:nrow(structural_metadata)]
}

# for each row, update the schema
for(idx in 1:nrow(structural_metadata)){
# item to build out
x <- my_data_schema$fields[[idx]]
## row may not exist in the original data.
x <- tryCatch(
expr = {
## get the fields item we want to update
my_data_schema$fields[[idx]]
},
error = function(e){
## use the first index item
msg<- sprintf("Adding %s to frictionless metadata",structural_metadata$name[[idx]])
message(msg)
my_data_schema$fields[[1]]
}
)


for(idy in 1:length(structural_metadata)){

y <- structural_metadata[idx,idy][[1]]
Expand All @@ -85,6 +107,8 @@ expand_frictionless_metadata <- function(structural_metadata,
my_data_schema$fields[[idx]] <- x
}


## prune the properties of items in the schema, does not remove fields
if(prune_datapackage){
my_data_schema <- prune_datapackage(my_data_schema,structural_metadata)
}
Expand All @@ -104,7 +128,7 @@ expand_frictionless_metadata <- function(structural_metadata,
}


#' Prune data pacakge
#' Prune field properties in a data package
#'
#' method to remove properties from the metadata for a dataset in a datapackage
#'
Expand Down Expand Up @@ -136,3 +160,5 @@ prune_datapackage <- function(my_data_schema, structural_metadata){

return(my_data_schema_pruned)
}


17 changes: 9 additions & 8 deletions man/create_structural_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions man/expand_frictionless_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/prune_datapackage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions scripts/debug_expand_df.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
devtools::load_all()


# read in data



df <- read.csv("vignettes/data_examples/my_data.csv")

# create codebook

structural_metadata <- create_structural_metadata(df,primary_key = "key",
foreign_key = c("measured_by","site_name"))

# create data package


dp <- frictionless::create_package() |>
frictionless::add_resource(resource_name = "my_data",
data = df)

frictionless::write_package(package = dp,
directory = "vignettes/data_examples")

expand_frictionless_metadata(structural_metadata = structural_metadata,
resource_name = "my_data",
resource_path = "vignettes/data_examples/my_data.csv",
data_package_path = "vignettes/data_examples/datapackage.json"
)

## drop a measured_by column

df <- read.csv("vignettes/data_examples/my_data.csv")

# create codebook

structural_metadata_2 <- create_structural_metadata(df,
primary_key = "key",
foreign_key = c("site_name"))


expand_frictionless_metadata(structural_metadata = structural_metadata_2,
resource_name = "my_data",
resource_path = "vignettes/data_examples/my_data.csv",
data_package_path = "vignettes/data_examples/datapackage.json"
)
22 changes: 11 additions & 11 deletions vignettes/data_examples/my_data.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"date","measurement","measured_by","site_name","key"
2024-08-26,43,"Johana","c",1
2024-08-27,9,"Johana","c",2
2024-08-28,79,"Johana","c",3
2024-08-29,17,"Collin","a",4
2024-08-30,61,"Johana","e",5
2024-08-31,30,"Collin","b",6
2024-09-01,58,"Collin","a",7
2024-09-02,27,"Johana","d",8
2024-09-03,52,"Johana","d",9
2024-09-04,82,"Collin","e",10
date,measurement,measured_by,site_name,key
2024-08-26,43,Johana,c,1
2024-08-27,9,Johana,c,2
2024-08-28,79,Johana,c,3
2024-08-29,17,Collin,a,4
2024-08-30,61,Johana,e,5
2024-08-31,30,Collin,b,6
2024-09-01,58,Collin,a,7
2024-09-02,27,Johana,d,8
2024-09-03,52,Johana,d,9
2024-09-04,82,Collin,e,10

0 comments on commit e60b0c8

Please sign in to comment.