From 7541cb26e2516a47681cc85d8d75ff9434ddec10 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Thu, 21 Jul 2022 14:26:47 -0700 Subject: [PATCH 01/10] trying to see if we can make Python-compatible schema. Not looking good so far. --- src/lib.rs | 2 +- src/table_collection.rs | 88 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index ba670ee8c..ed66fa738 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -435,7 +435,7 @@ pub use mutation_table::{MutationTable, MutationTableRow, OwnedMutationTable}; pub use node_table::{NodeTable, NodeTableRow, OwnedNodeTable}; pub use population_table::{OwnedPopulationTable, PopulationTable, PopulationTableRow}; pub use site_table::{OwnedSiteTable, SiteTable, SiteTableRow}; -pub use table_collection::TableCollection; +pub use table_collection::{TableLevel, TableCollection}; pub use traits::IndividualLocation; pub use traits::IndividualParents; pub use traits::NodeListGenerator; diff --git a/src/table_collection.rs b/src/table_collection.rs index fb54588e3..d3e5aa866 100644 --- a/src/table_collection.rs +++ b/src/table_collection.rs @@ -24,9 +24,23 @@ use crate::TskReturnValue; use crate::TskitTypeAccess; use crate::{tsk_id_t, tsk_size_t}; use crate::{EdgeId, NodeId}; +use libc::{c_char, strlen}; use ll_bindings::tsk_table_collection_free; use mbox::MBox; +pub enum TableLevel { + Toplevel, + Edges, + Nodes, + Sites, + Mutations, + Individuals, + Populations, + Migrations, + #[cfg(feature = "provenance")] + Provenance, +} + /// A table collection. /// /// This is a thin wrapper around the C type @@ -1202,6 +1216,58 @@ impl TableCollection { }; handle_tsk_return_value!(rv) } + + /// Set a metadata schema + /// + /// # Examples + /// + /// ```rust + /// use tskit::TskitTypeAccess; + /// + /// let json_schema = " + /// { + /// \"codec\": \"json\", + /// \"type\": \"object\", + /// \"name\": \"Population metadata\", + /// \"properties\": {\"name\": {\"type\": \"string\"}}, + /// } + /// "; + /// let mut tables = tskit::TableCollection::new(10.).unwrap(); + /// assert!(tables.set_metadata_schema( + /// tskit::TableLevel::Populations, + /// json_schema).is_ok()); + /// assert!(unsafe{ + /// (*tables.as_ptr()).populations.metadata_schema_length + /// }> 0); + /// let schema = unsafe { + /// std::ffi::CStr::from_ptr((*tables.as_ptr()).populations.metadata_schema) + /// }; + /// assert_eq!(schema.to_str().unwrap(), json_schema); + /// ``` + pub fn set_metadata_schema( + &mut self, + level: TableLevel, + schema: impl AsRef, + ) -> TskReturnValue { + println!("{} {}", schema.as_ref(), schema.as_ref().len()); + let cstr = std::ffi::CString::new(schema.as_ref()).unwrap(); + println!("{:?}", cstr); + let len = unsafe { strlen(cstr.as_bytes_with_nul().as_ptr() as *const c_char) }; + println!("{:?}", cstr); + println!("{}", len); + let rv = match level { + TableLevel::Populations => unsafe { + ll_bindings::tsk_population_table_set_metadata_schema( + &mut (*self.inner).populations, + cstr.as_bytes_with_nul().as_ptr() as *const c_char, + len.try_into().unwrap(), + ) + }, + _ => unimplemented!("haven't done it yet"), + }; + println!("rv = {}", rv); + handle_tsk_return_value!(rv) + } } impl TableAccess for TableCollection { @@ -2296,3 +2362,25 @@ mod test_adding_migrations { } } } + +#[cfg(test)] +mod test_metadata_schema { + use super::*; + + #[test] + fn population_metadata_schema() { + let json_schema = r#"[\"codec\":\"json\",\"name\":\"Populationmetadata\",\"properties\",{\"name\":{\"type\":\"string\"}}] +"#; + let mut tables = TableCollection::new(10.).unwrap(); + assert!(tables + .set_metadata_schema(TableLevel::Populations, json_schema) + .is_ok()); + assert!(!unsafe { (*tables.as_ptr()).populations.metadata_schema.is_null() }); + let len = unsafe { (*tables.as_ptr()).populations.metadata_schema_length }; + assert!(len > 0, "{}", len); + let schema = + unsafe { std::ffi::CStr::from_ptr((*tables.as_ptr()).populations.metadata_schema) }; + assert_eq!(schema.to_str().unwrap(), json_schema); + tables.dump("foo.trees", 0).unwrap(); + } +} From 0d30cdcb18978eacf6278d31425028468d4620ef Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Thu, 21 Jul 2022 14:48:40 -0700 Subject: [PATCH 02/10] progress --- src/table_collection.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/table_collection.rs b/src/table_collection.rs index d3e5aa866..be08c5e53 100644 --- a/src/table_collection.rs +++ b/src/table_collection.rs @@ -2369,18 +2369,36 @@ mod test_metadata_schema { #[test] fn population_metadata_schema() { - let json_schema = r#"[\"codec\":\"json\",\"name\":\"Populationmetadata\",\"properties\",{\"name\":{\"type\":\"string\"}}] -"#; + let json_schema3 = r#"{"codec":"json","type":"object","name":"Population metadata","properties:"{"name":{"type":"string"}}}"#; + let json_schema3 = r#"{"codec":"json","type":"object","name":"Population metadata","properties":{"name":{"type":"string"}}}"#; + let json_schema2 = r#"{"codec":"json","name":"Population metadata","properties":{"name":{"type":"string"}},"type":"object"}"#; + let json_schema = r#"{"codec":"json","name":"Population metadata","properties":{"name":{"type":"string"}},"type":"object"}"#; + + // YOU CANNOT HAVE A TRAILING COMMA AT THE END!!!!!! + let from_fp11 = r#" + { + "codec": "json", + "type": "object", + "name": "Population metadata", + "properties": {"name": {"type": "string"}} + }"#; + + assert_eq!(json_schema, json_schema2); let mut tables = TableCollection::new(10.).unwrap(); assert!(tables - .set_metadata_schema(TableLevel::Populations, json_schema) + .set_metadata_schema(TableLevel::Populations, from_fp11) .is_ok()); assert!(!unsafe { (*tables.as_ptr()).populations.metadata_schema.is_null() }); let len = unsafe { (*tables.as_ptr()).populations.metadata_schema_length }; assert!(len > 0, "{}", len); let schema = unsafe { std::ffi::CStr::from_ptr((*tables.as_ptr()).populations.metadata_schema) }; - assert_eq!(schema.to_str().unwrap(), json_schema); + assert_eq!(schema.to_str().unwrap(), from_fp11); tables.dump("foo.trees", 0).unwrap(); + + let tables = TableCollection::new_from_file("bananas.tables").unwrap(); + let schema = + unsafe { std::ffi::CStr::from_ptr((*tables.as_ptr()).populations.metadata_schema) }; + println!("from tskit = {}", schema.to_str().unwrap()); } } From 085ae859c1cd3998b3947dda7e2b9a8f6384040c Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Thu, 21 Jul 2022 14:54:11 -0700 Subject: [PATCH 03/10] rename, b/c we may not stick w/this API --- src/table_collection.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/table_collection.rs b/src/table_collection.rs index be08c5e53..ac5f3472e 100644 --- a/src/table_collection.rs +++ b/src/table_collection.rs @@ -1233,7 +1233,7 @@ impl TableCollection { /// } /// "; /// let mut tables = tskit::TableCollection::new(10.).unwrap(); - /// assert!(tables.set_metadata_schema( + /// assert!(tables.set_json_metadata_schema_from_str( /// tskit::TableLevel::Populations, /// json_schema).is_ok()); /// assert!(unsafe{ @@ -1244,7 +1244,7 @@ impl TableCollection { /// }; /// assert_eq!(schema.to_str().unwrap(), json_schema); /// ``` - pub fn set_metadata_schema( + pub fn set_json_metadata_schema_from_str( &mut self, level: TableLevel, schema: impl AsRef, @@ -2386,7 +2386,7 @@ mod test_metadata_schema { assert_eq!(json_schema, json_schema2); let mut tables = TableCollection::new(10.).unwrap(); assert!(tables - .set_metadata_schema(TableLevel::Populations, from_fp11) + .set_json_metadata_schema_from_str(TableLevel::Populations, from_fp11) .is_ok()); assert!(!unsafe { (*tables.as_ptr()).populations.metadata_schema.is_null() }); let len = unsafe { (*tables.as_ptr()).populations.metadata_schema_length }; From 099bc240f7933061cecb63e9d28af1d62e171086 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Thu, 21 Jul 2022 14:54:52 -0700 Subject: [PATCH 04/10] delete doc test b/c we ain't ready for that yet! --- src/table_collection.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/table_collection.rs b/src/table_collection.rs index ac5f3472e..02dd4975a 100644 --- a/src/table_collection.rs +++ b/src/table_collection.rs @@ -1221,29 +1221,6 @@ impl TableCollection { /// /// # Examples /// - /// ```rust - /// use tskit::TskitTypeAccess; - /// - /// let json_schema = " - /// { - /// \"codec\": \"json\", - /// \"type\": \"object\", - /// \"name\": \"Population metadata\", - /// \"properties\": {\"name\": {\"type\": \"string\"}}, - /// } - /// "; - /// let mut tables = tskit::TableCollection::new(10.).unwrap(); - /// assert!(tables.set_json_metadata_schema_from_str( - /// tskit::TableLevel::Populations, - /// json_schema).is_ok()); - /// assert!(unsafe{ - /// (*tables.as_ptr()).populations.metadata_schema_length - /// }> 0); - /// let schema = unsafe { - /// std::ffi::CStr::from_ptr((*tables.as_ptr()).populations.metadata_schema) - /// }; - /// assert_eq!(schema.to_str().unwrap(), json_schema); - /// ``` pub fn set_json_metadata_schema_from_str( &mut self, level: TableLevel, From 20dee0940b0d83e857c12d17e07fbf1ee7b33172 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Thu, 21 Jul 2022 17:01:40 -0700 Subject: [PATCH 05/10] rename --- src/lib.rs | 2 +- src/table_collection.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ed66fa738..24a12fe9b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -435,7 +435,7 @@ pub use mutation_table::{MutationTable, MutationTableRow, OwnedMutationTable}; pub use node_table::{NodeTable, NodeTableRow, OwnedNodeTable}; pub use population_table::{OwnedPopulationTable, PopulationTable, PopulationTableRow}; pub use site_table::{OwnedSiteTable, SiteTable, SiteTableRow}; -pub use table_collection::{TableLevel, TableCollection}; +pub use table_collection::{Schema, TableCollection}; pub use traits::IndividualLocation; pub use traits::IndividualParents; pub use traits::NodeListGenerator; diff --git a/src/table_collection.rs b/src/table_collection.rs index 02dd4975a..fd1356b34 100644 --- a/src/table_collection.rs +++ b/src/table_collection.rs @@ -28,7 +28,7 @@ use libc::{c_char, strlen}; use ll_bindings::tsk_table_collection_free; use mbox::MBox; -pub enum TableLevel { +pub enum Schema { Toplevel, Edges, Nodes, @@ -1223,7 +1223,7 @@ impl TableCollection { /// pub fn set_json_metadata_schema_from_str( &mut self, - level: TableLevel, + level: Schema, schema: impl AsRef, ) -> TskReturnValue { println!("{} {}", schema.as_ref(), schema.as_ref().len()); @@ -1233,7 +1233,7 @@ impl TableCollection { println!("{:?}", cstr); println!("{}", len); let rv = match level { - TableLevel::Populations => unsafe { + Schema::Populations => unsafe { ll_bindings::tsk_population_table_set_metadata_schema( &mut (*self.inner).populations, cstr.as_bytes_with_nul().as_ptr() as *const c_char, @@ -2363,7 +2363,7 @@ mod test_metadata_schema { assert_eq!(json_schema, json_schema2); let mut tables = TableCollection::new(10.).unwrap(); assert!(tables - .set_json_metadata_schema_from_str(TableLevel::Populations, from_fp11) + .set_json_metadata_schema_from_str(Schema::Populations, from_fp11) .is_ok()); assert!(!unsafe { (*tables.as_ptr()).populations.metadata_schema.is_null() }); let len = unsafe { (*tables.as_ptr()).populations.metadata_schema_length }; From 702be1a261738c52e63f233c8acf8c5787d03b62 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Thu, 21 Jul 2022 17:04:28 -0700 Subject: [PATCH 06/10] rename, part deux --- src/lib.rs | 2 +- src/table_collection.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 24a12fe9b..e8fcf7ed6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -435,7 +435,7 @@ pub use mutation_table::{MutationTable, MutationTableRow, OwnedMutationTable}; pub use node_table::{NodeTable, NodeTableRow, OwnedNodeTable}; pub use population_table::{OwnedPopulationTable, PopulationTable, PopulationTableRow}; pub use site_table::{OwnedSiteTable, SiteTable, SiteTableRow}; -pub use table_collection::{Schema, TableCollection}; +pub use table_collection::{MetadataSchema, TableCollection}; pub use traits::IndividualLocation; pub use traits::IndividualParents; pub use traits::NodeListGenerator; diff --git a/src/table_collection.rs b/src/table_collection.rs index fd1356b34..356a7b1af 100644 --- a/src/table_collection.rs +++ b/src/table_collection.rs @@ -28,7 +28,7 @@ use libc::{c_char, strlen}; use ll_bindings::tsk_table_collection_free; use mbox::MBox; -pub enum Schema { +pub enum MetadataSchema { Toplevel, Edges, Nodes, @@ -1223,7 +1223,7 @@ impl TableCollection { /// pub fn set_json_metadata_schema_from_str( &mut self, - level: Schema, + level: MetadataSchema, schema: impl AsRef, ) -> TskReturnValue { println!("{} {}", schema.as_ref(), schema.as_ref().len()); @@ -1233,7 +1233,7 @@ impl TableCollection { println!("{:?}", cstr); println!("{}", len); let rv = match level { - Schema::Populations => unsafe { + MetadataSchema::Populations => unsafe { ll_bindings::tsk_population_table_set_metadata_schema( &mut (*self.inner).populations, cstr.as_bytes_with_nul().as_ptr() as *const c_char, @@ -2363,7 +2363,7 @@ mod test_metadata_schema { assert_eq!(json_schema, json_schema2); let mut tables = TableCollection::new(10.).unwrap(); assert!(tables - .set_json_metadata_schema_from_str(Schema::Populations, from_fp11) + .set_json_metadata_schema_from_str(MetadataSchema::Populations, from_fp11) .is_ok()); assert!(!unsafe { (*tables.as_ptr()).populations.metadata_schema.is_null() }); let len = unsafe { (*tables.as_ptr()).populations.metadata_schema_length }; From da813d06a5bffccf172171e56caba4cac36ff9ad Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Thu, 21 Jul 2022 17:05:56 -0700 Subject: [PATCH 07/10] unbreak the tests --- src/table_collection.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/table_collection.rs b/src/table_collection.rs index 356a7b1af..724fb23e2 100644 --- a/src/table_collection.rs +++ b/src/table_collection.rs @@ -2373,9 +2373,9 @@ mod test_metadata_schema { assert_eq!(schema.to_str().unwrap(), from_fp11); tables.dump("foo.trees", 0).unwrap(); - let tables = TableCollection::new_from_file("bananas.tables").unwrap(); - let schema = - unsafe { std::ffi::CStr::from_ptr((*tables.as_ptr()).populations.metadata_schema) }; - println!("from tskit = {}", schema.to_str().unwrap()); + //let tables = TableCollection::new_from_file("bananas.tables").unwrap(); + //let schema = + // unsafe { std::ffi::CStr::from_ptr((*tables.as_ptr()).populations.metadata_schema) }; + //println!("from tskit = {}", schema.to_str().unwrap()); } } From 9a8e332392e223c3949f0581275c39a77d08d8af Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Thu, 21 Jul 2022 17:20:18 -0700 Subject: [PATCH 08/10] add a crate, this is probably a dumb idea --- metadata_schema_testing/Cargo.toml | 11 +++++++++++ metadata_schema_testing/src/main.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 metadata_schema_testing/Cargo.toml create mode 100644 metadata_schema_testing/src/main.rs diff --git a/metadata_schema_testing/Cargo.toml b/metadata_schema_testing/Cargo.toml new file mode 100644 index 000000000..5ecc1e587 --- /dev/null +++ b/metadata_schema_testing/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "metadata_schema_testing" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +tskit = {path = "..", features = ["derive"]} +serde = {version = "1.0.118", features = ["derive"]} +serde_json = {version = "1.0.67"} diff --git a/metadata_schema_testing/src/main.rs b/metadata_schema_testing/src/main.rs new file mode 100644 index 000000000..4a92b1451 --- /dev/null +++ b/metadata_schema_testing/src/main.rs @@ -0,0 +1,29 @@ +use tskit::prelude::*; +use tskit::TableCollection; + +#[derive(serde::Serialize, serde::Deserialize, tskit::metadata::PopulationMetadata)] +#[serializer("serde_json")] +struct PopulationMetadata { + name: String, +} + +fn main() { + let from_fp11 = r#" + { + "codec": "json", + "type": "object", + "name": "Population metadata", + "properties": {"name": {"type": "string"}} + }"#; + + let mut tables = TableCollection::new(10.0).unwrap(); + tables + .add_population_with_metadata(&PopulationMetadata { + name: "YRB".to_string(), + }) + .unwrap(); + tables + .set_json_metadata_schema_from_str(tskit::MetadataSchema::Populations, from_fp11) + .unwrap(); + tables.dump("testit.trees", 0).unwrap(); +} From f48d8de65fe372bc8d38f64bb327dba1e35ce43c Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 22 Jul 2022 07:13:36 -0700 Subject: [PATCH 09/10] move metadata example program to examples/ --- Cargo.toml | 4 ++++ .../src/main.rs => examples/metadata_schema.rs | 0 metadata_schema_testing/Cargo.toml | 11 ----------- 3 files changed, 4 insertions(+), 11 deletions(-) rename metadata_schema_testing/src/main.rs => examples/metadata_schema.rs (100%) delete mode 100644 metadata_schema_testing/Cargo.toml diff --git a/Cargo.toml b/Cargo.toml index 0aea780c6..2cf882e93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,3 +58,7 @@ name = "tree_traversals" [[example]] name = "forward_simulation" + +[[example]] +name = "metadata_schema" +required-features = ["derive"] diff --git a/metadata_schema_testing/src/main.rs b/examples/metadata_schema.rs similarity index 100% rename from metadata_schema_testing/src/main.rs rename to examples/metadata_schema.rs diff --git a/metadata_schema_testing/Cargo.toml b/metadata_schema_testing/Cargo.toml deleted file mode 100644 index 5ecc1e587..000000000 --- a/metadata_schema_testing/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "metadata_schema_testing" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -tskit = {path = "..", features = ["derive"]} -serde = {version = "1.0.118", features = ["derive"]} -serde_json = {version = "1.0.67"} From 10cf51a4709f718abebcbdc2b27d016a26e9106a Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 22 Jul 2022 07:19:51 -0700 Subject: [PATCH 10/10] add simplisity python scripts for testing metadata transfer --- python_scripts/read_tablesfile.py | 7 +++++++ python_scripts/requirements.txt | 1 + 2 files changed, 8 insertions(+) create mode 100644 python_scripts/read_tablesfile.py create mode 100644 python_scripts/requirements.txt diff --git a/python_scripts/read_tablesfile.py b/python_scripts/read_tablesfile.py new file mode 100644 index 000000000..8ed3b6838 --- /dev/null +++ b/python_scripts/read_tablesfile.py @@ -0,0 +1,7 @@ +import tskit +import sys + +for f in sys.argv[1:]: + tables = tskit.TableCollection.load(f) + for pop in tables.populations: + print(pop) diff --git a/python_scripts/requirements.txt b/python_scripts/requirements.txt new file mode 100644 index 000000000..72b8dd4e2 --- /dev/null +++ b/python_scripts/requirements.txt @@ -0,0 +1 @@ +tskit = "0.5.1"