Skip to content

Commit

Permalink
Merge pull request #54 from anergictcell/parser/p2g
Browse files Browse the repository at this point in the history
Change Gene - Term associations
  • Loading branch information
anergictcell authored Mar 26, 2024
2 parents 03bf197 + dd05d9f commit 57c7871
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 17 deletions.
43 changes: 30 additions & 13 deletions examples/compare_ontologies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@ fn ontology(path_arg: &str) -> Ontology {
}
}

fn ontology2(path_arg: &str) -> Ontology {
let path = Path::new(path_arg);

match path.is_file() {
true => Ontology::from_binary(path).unwrap(),
false => Ontology::from_standard_transitive(&path.to_string_lossy()).unwrap(),
}
}

/// Prints some basic stats about the differences
/// between two Ontologies
fn overview(diffs: &Comparison) {
Expand Down Expand Up @@ -89,15 +98,19 @@ fn changed_terms(diffs: &Comparison) {

/// Prints info about Gene-specific changes
fn changed_genes(diffs: &Comparison) {
println!("#Gene Delta\tID\tOld Name:New Name\tAdded Parents\tRemoved Parents");
for term in diffs.changed_genes() {
print!("Delta\t{}", term.id());
if let Some(names) = term.changed_name() {
println!(
"#Gene Delta\tID\tOld Name:New Name\tn Terms Old\tn Terms New\tAdded Terms\tRemoved Terms"
);
for gene in diffs.changed_genes() {
print!("Delta\t{}", gene.id());
if let Some(names) = gene.changed_name() {
print!("\t{}:{}", names.0, names.1);
} else {
print!("\t.");
}
if let Some(added) = term.added_terms() {
let (n_l, n_r) = gene.n_terms();
print!("\t{n_l}\t{n_r}");
if let Some(added) = gene.added_terms() {
print!(
"\t{}",
added
Expand All @@ -109,7 +122,7 @@ fn changed_genes(diffs: &Comparison) {
} else {
print!("\t.");
}
if let Some(removed) = term.removed_terms() {
if let Some(removed) = gene.removed_terms() {
print!(
"\t{}",
removed
Expand All @@ -127,15 +140,19 @@ fn changed_genes(diffs: &Comparison) {

/// Prints info about Gene-specific changes
fn changed_diseases(diffs: &Comparison) {
println!("#Disease Delta\tID\tOld Name:New Name\tAdded Parents\tRemoved Parents");
for term in diffs.changed_omim_diseases() {
print!("Delta\t{}", term.id());
if let Some(names) = term.changed_name() {
println!("#Disease Delta\tID\tOld Name:New Name\tn Terms Old\tn Terms New\tAdded Terms\tRemoved Terms");
for disease in diffs.changed_omim_diseases() {
print!("Delta\t{}", disease.id());
if let Some(names) = disease.changed_name() {
print!("\t{}:{}", names.0, names.1);
} else {
print!("\t.");
}
if let Some(added) = term.added_terms() {

let (n_l, n_r) = disease.n_terms();
print!("\t{n_l}\t{n_r}");

if let Some(added) = disease.added_terms() {
print!(
"\t{}",
added
Expand All @@ -147,7 +164,7 @@ fn changed_diseases(diffs: &Comparison) {
} else {
print!("\t.");
}
if let Some(removed) = term.removed_terms() {
if let Some(removed) = disease.removed_terms() {
print!(
"\t{}",
removed
Expand Down Expand Up @@ -206,7 +223,7 @@ fn main() {
let arg_new = args.next().unwrap();

let lhs = ontology(&arg_old);
let rhs = ontology(&arg_new);
let rhs = ontology2(&arg_new);

let diffs = lhs.compare(&rhs);

Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const MAX_HPO_ID_INTEGER: usize = 10_000_000;

const OBO_FILENAME: &str = "hp.obo";
const GENE_FILENAME: &str = "phenotype_to_genes.txt";
const GENE_TO_PHENO_FILENAME: &str = "genes_to_phenotype.txt";
const DISEASE_FILENAME: &str = "phenotype.hpoa";

/// The `HpoTermId` of `HP:0000118 | Phenotypic abnormality`
Expand Down
81 changes: 78 additions & 3 deletions src/ontology.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ use termarena::Arena;
/// Then use [`Ontology::from_standard`] to load the data.
/// You need the following files:
/// - `phenotype.hpoa` (Required to connect [`OmimDisease`]s to [`HpoTerm`]s)
/// - `phenotype_to_genes.txt` (Required to connect [`Gene`]s to [`HpoTerm`]s)
/// - `genes_to_phenotype.txt` (Required to connect [`Gene`]s to [`HpoTerm`]s)
/// - alternatively: `phenotype_to_genes.txt` (use [`Ontology::from_standard_transitive`])
/// - `hp.obo` (Required for [`HpoTerm`]s and their connection to each other)
/// 2. Load the ontology from a binary build using [`Ontology::from_binary`].
///
Expand Down Expand Up @@ -112,6 +113,15 @@ use termarena::Arena;
/// [`Ontology`] does not contain a direct relationship between genes and diseases. This relation
/// is only present indirectly via the connected [`HpoTerm`]s.
///
/// # Transivity of relations
///
/// **New in 0.9.0**
///
/// During the construction of the Ontology, every [`HpoTerm`] inherits all gene and disease
/// association of its child terms.
/// But [`Gene`]s and [`OmimDisease`]s will only contain links to *direct* [`HpoTerm`]s. The annotations
/// are not transitiv.
///
/// ```mermaid
/// erDiagram
/// ONTOLOGY ||--|{ HPOTERM : contains
Expand Down Expand Up @@ -368,13 +378,22 @@ impl Ontology {
/// - [`Ontology::add_gene`] failed
/// - [`Ontology::add_omim_disease`] failed
///
///
/// # Note
///
/// Since version `0.9.0` this method will not load genes transitively. That means
/// only directly linked [`HpoTerm`]s are connected to each gene. However, every
/// [`HpoTerm`] will still inherit all gene and disease associations from its children.
/// See [this discussion](https://github.com/anergictcell/hpo/issues/44) for a more detailed
/// explanation
///
/// # Examples
///
/// ```no_run
/// use hpo::Ontology;
/// use hpo::HpoTermId;
///
/// let ontology = Ontology::from_binary("/path/to/jax_hpo_data/").unwrap();
/// let ontology = Ontology::from_standard("/path/to/jax_hpo_data/").unwrap();
///
/// assert!(ontology.len() == 26);
///
Expand All @@ -387,12 +406,68 @@ impl Ontology {
/// ```
///
pub fn from_standard(folder: &str) -> HpoResult<Self> {
let mut ont = Ontology::default();
let path = Path::new(folder);
let obo = path.join(crate::OBO_FILENAME);
let gene = path.join(crate::GENE_TO_PHENO_FILENAME);
let disease = path.join(crate::DISEASE_FILENAME);
parser::load_from_jax_files(&obo, &gene, &disease, &mut ont)?;
ont.calculate_information_content()?;
ont.set_default_categories()?;
ont.set_default_modifier()?;
Ok(ont)
}

/// Initialize the [`Ontology`] from data provided by [Jax HPO](https://hpo.jax.org/)
///
/// You must download:
///
/// - Actual OBO data: [`hp.obo`](https://hpo.jax.org/app/data/ontology)
/// - Links between HPO and OMIM diseases: [`phenotype.hpoa`](https://hpo.jax.org/app/data/annotations)
/// - Links between HPO and Genes: [`genes_to_phenotypes.txt`](http://purl.obolibrary.org/obo/hp/hpoa/genes_to_phenotype.txt)
///
/// and then specify the folder where the data is stored.
///
/// # Errors
///
/// This method can fail for various reasons:
///
/// - obo file not present or available: [`HpoError::CannotOpenFile`]
/// - [`Ontology::add_gene`] failed
/// - [`Ontology::add_omim_disease`] failed
///
/// # Note
///
/// This method has one difference to [`Ontology::from_standard`] in that every [`Gene`]
/// contains directly linked [`HpoTerm`] and all their ancestor terms.
/// See [this discussion](https://github.com/anergictcell/hpo/issues/44) for a more detailed
/// explanation
///
/// # Examples
///
/// ```no_run
/// use hpo::Ontology;
/// use hpo::HpoTermId;
///
/// let ontology = Ontology::from_standard_transitive("/path/to/jax_hpo_data/").unwrap();
///
/// assert!(ontology.len() == 26);
///
/// let absent_term = HpoTermId::try_from("HP:9999999").unwrap();
/// assert!(ontology.hpo(absent_term).is_none());
///
/// let present_term = HpoTermId::try_from("HP:0000001").unwrap();
/// let root_term = ontology.hpo(present_term).unwrap();
/// assert_eq!(root_term.name(), "All");
/// ```
///
pub fn from_standard_transitive(folder: &str) -> HpoResult<Self> {
let mut ont = Ontology::default();
let path = Path::new(folder);
let obo = path.join(crate::OBO_FILENAME);
let gene = path.join(crate::GENE_FILENAME);
let disease = path.join(crate::DISEASE_FILENAME);
parser::load_from_standard_files(&obo, &gene, &disease, &mut ont)?;
parser::load_from_jax_files_with_transivitve_genes(&obo, &gene, &disease, &mut ont)?;
ont.calculate_information_content()?;
ont.set_default_categories()?;
ont.set_default_modifier()?;
Expand Down
7 changes: 7 additions & 0 deletions src/ontology/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ impl HpoTermDelta {
pub struct AnnotationDelta {
id: String,
names: (String, String),
n_terms: (usize, usize),
added_terms: Vec<HpoTermId>,
removed_terms: Vec<HpoTermId>,
}
Expand Down Expand Up @@ -337,6 +338,7 @@ impl<'a> AnnotationDelta {
Some(Self {
id,
names,
n_terms: (lhs_terms.len(), rhs_terms.len()),
added_terms,
removed_terms,
})
Expand Down Expand Up @@ -384,4 +386,9 @@ impl<'a> AnnotationDelta {
pub fn id(&self) -> &str {
&self.id
}

/// Returns the number of terms linked to `old` and `new`
pub fn n_terms(&self) -> (usize, usize) {
self.n_terms
}
}
49 changes: 48 additions & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,41 @@ pub(crate) mod binary;
/// Module to parse `hp.obo` file
pub(crate) mod hp_obo;

/// Module to parse HPO - `Gene` associations from `genes_to_phenotype.txt` file
pub(crate) mod genes_to_phenotype {
use crate::parser::Path;
use crate::HpoResult;
use std::fs::File;
use std::io::BufRead;
use std::io::BufReader;

use crate::HpoTermId;
use crate::Ontology;

/// Quick and dirty parser for development and debugging
pub fn parse<P: AsRef<Path>>(file: P, ontology: &mut Ontology) -> HpoResult<()> {
let file = File::open(file).unwrap();
let reader = BufReader::new(file);
for line in reader.lines() {
let line = line.unwrap();
// TODO: Check for the header outside of the `lines` iterator
if line.starts_with('#') || line.starts_with("ncbi_gene_id") {
continue;
}
let cols: Vec<&str> = line.trim().split('\t').collect();
let gene_id = ontology.add_gene(cols[1], cols[0])?;
let term_id = HpoTermId::try_from(cols[2])?;
ontology.link_gene_term(term_id, gene_id)?;

ontology
.gene_mut(&gene_id)
.expect("Cannot find gene {gene_id}")
.add_term(term_id);
}
Ok(())
}
}

/// Module to parse HPO - `Gene` associations from `phenotype_to_genes.txt` file
pub(crate) mod phenotype_to_genes {
use crate::parser::Path;
Expand Down Expand Up @@ -126,7 +161,7 @@ pub(crate) mod phenotype_hpoa {
}
}

pub(crate) fn load_from_standard_files<P: AsRef<Path>>(
pub(crate) fn load_from_jax_files_with_transivitve_genes<P: AsRef<Path>>(
obo_file: P,
gene_file: P,
disease_file: P,
Expand All @@ -137,3 +172,15 @@ pub(crate) fn load_from_standard_files<P: AsRef<Path>>(
phenotype_hpoa::parse(disease_file, ontology)?;
Ok(())
}

pub(crate) fn load_from_jax_files<P: AsRef<Path>>(
obo_file: P,
gene_file: P,
disease_file: P,
ontology: &mut Ontology,
) -> HpoResult<()> {
hp_obo::read_obo_file(obo_file, ontology)?;
genes_to_phenotype::parse(gene_file, ontology)?;
phenotype_hpoa::parse(disease_file, ontology)?;
Ok(())
}
Binary file modified tests/ontology.hpo
Binary file not shown.

0 comments on commit 57c7871

Please sign in to comment.