Skip to content

Commit

Permalink
Merge pull request #63 from anergictcell/fix/missing-orpha-features
Browse files Browse the repository at this point in the history
Add missing orpha features
  • Loading branch information
anergictcell authored Jun 15, 2024
2 parents 88d16d0 + f751516 commit bcc1187
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 64 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.

## Unreleased

## [0.10.1]

### Refactor

- Add some missing methods for Orpha diseases


## [0.10.0]

### Feature
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "hpo"
version = "0.10.0"
version = "0.10.1"
edition = "2021"
authors = ["Jonas Marcello <jonas.marcello@esbme.com>"]
description = "Human Phenotype Ontology Similarity"
Expand Down
6 changes: 3 additions & 3 deletions examples/bench_disease_enrichment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::path::Path;

use rayon::prelude::*;

use hpo::stats::hypergeom::disease_enrichment;
use hpo::stats::hypergeom::omim_disease_enrichment;
use hpo::Ontology;

const GENES: [&str; 6] = ["GBA1", "NPC1", "EZH2", "DMD", "MUC7", "ARID1B"];
Expand Down Expand Up @@ -34,7 +34,7 @@ fn multi_threaded_enrichment(ontology: &Ontology) -> usize {
.unwrap_or_else(|| panic!("{gene} does not exist"))
.to_hpo_set(ontology);
total
+ disease_enrichment(ontology, &set)
+ omim_disease_enrichment(ontology, &set)
.iter()
.fold(0, |count, enrichment| {
if enrichment.pvalue() < 0.0000005 && enrichment.enrichment() > 100.0 {
Expand All @@ -55,7 +55,7 @@ fn single_threaded_enrichment(ontology: &Ontology) -> usize {
.unwrap_or_else(|| panic!("{gene} does not exist"))
.to_hpo_set(ontology);
total
+ disease_enrichment(ontology, &set)
+ omim_disease_enrichment(ontology, &set)
.iter()
.fold(0, |count, enrichment| {
if enrichment.pvalue() < 0.0000005 && enrichment.enrichment() > 100.0 {
Expand Down
2 changes: 1 addition & 1 deletion examples/enrichment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fn gene_enrichments(ontology: &Ontology, hposet: &HpoSet, output_len: usize) {

/// Prints diseases that are enriched in the hposet
fn disease_enrichments(ontology: &Ontology, hposet: &HpoSet, output_len: usize) {
let mut disease_enrichments = hpo::stats::hypergeom::disease_enrichment(ontology, hposet);
let mut disease_enrichments = hpo::stats::hypergeom::omim_disease_enrichment(ontology, hposet);

disease_enrichments.sort_by(|a, b| {
a.pvalue()
Expand Down
36 changes: 35 additions & 1 deletion src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use std::collections::HashMap;

use crate::annotations::Genes;
use crate::annotations::OmimDiseases;
use crate::annotations::{OmimDiseases, OrphaDiseases};
use crate::similarity::GroupSimilarity;
use crate::similarity::Similarity;
use crate::similarity::SimilarityCombiner;
Expand Down Expand Up @@ -516,6 +516,40 @@ impl<'a> HpoSet<'a> {
.fold(OmimDiseases::default(), |acc, element| &acc | element)
}

/// Returns all [`crate::annotations::OrphaDiseaseId`]s that are associated to the set
///
/// # Panics
///
/// When an `HpoTermId` of the set is not part of the Ontology
///
/// # Examples
///
/// ```
/// use hpo::{Ontology, HpoSet};
/// use hpo::term::HpoGroup;
///
/// let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
///
/// let mut hpos = HpoGroup::new();
/// hpos.insert(12639u32);
/// hpos.insert(818u32);
/// let set = HpoSet::new(&ontology, hpos);
/// for d in set.orpha_disease_ids() {println!("{}", d);};
/// // `Papilloma of choroid plexus (ORPHA:2807)` is linked to `HP:0012639`
/// assert!(set.orpha_disease_ids().contains(&2807u32.into()));
/// ```
pub fn orpha_disease_ids(&self) -> OrphaDiseases {
self.group
.iter()
.map(|term_id| {
self.ontology
.get(term_id)
.expect("HpoTermId must be in Ontology")
.orpha_diseases()
})
.fold(OrphaDiseases::default(), |acc, diseases| &acc | diseases)
}

/// Returns the counts of all categories in the set
///
/// # Examples
Expand Down
19 changes: 16 additions & 3 deletions src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
use std::collections::HashMap;
use std::marker::PhantomData;

use crate::annotations::{AnnotationId, Disease, GeneId, OmimDiseaseId};
use crate::annotations::{AnnotationId, Disease, GeneId, OmimDiseaseId, OrphaDiseaseId};
use crate::HpoTerm;

pub mod hypergeom;
Expand All @@ -25,7 +25,7 @@ pub use linkage::Linkage;
/// The fold enrichment and p-value for an enriched Gene or Disease
///
/// [`Enrichment`] is returned from statistics enrichment methods, such as
/// [`hypergeom::gene_enrichment`] and [`hypergeom::disease_enrichment`].
/// [`hypergeom::gene_enrichment`], [`hypergeom::omim_disease_enrichment`] and [`hypergeom::orpha_disease_enrichment`].
///
#[derive(Debug)]
pub struct Enrichment<T> {
Expand Down Expand Up @@ -143,7 +143,7 @@ impl<'a> SampleSet<GeneId> {

impl<'a> SampleSet<OmimDiseaseId> {
/// Constructs a new `SampleSet` with disease counts from an iterator of [`HpoTerm`]s
pub fn disease<I: IntoIterator<Item = HpoTerm<'a>>>(terms: I) -> Self {
pub fn omim_disease<I: IntoIterator<Item = HpoTerm<'a>>>(terms: I) -> Self {
let term2omimid = |term: HpoTerm<'a>| term.omim_diseases().map(|d| d.id().as_u32());
let (size, counts) = calculate_counts(terms, term2omimid);
Self {
Expand All @@ -154,6 +154,19 @@ impl<'a> SampleSet<OmimDiseaseId> {
}
}

impl<'a> SampleSet<OrphaDiseaseId> {
/// Constructs a new `SampleSet` with disease counts from an iterator of [`HpoTerm`]s
pub fn orpha_disease<I: IntoIterator<Item = HpoTerm<'a>>>(terms: I) -> Self {
let term2omimid = |term: HpoTerm<'a>| term.orpha_diseases().map(|d| d.id().as_u32());
let (size, counts) = calculate_counts(terms, term2omimid);
Self {
size,
counts,
phantom: PhantomData,
}
}
}

impl<T: AnnotationId> SampleSet<T> {
/// Returns the total number of [`HpoTerm`]s in the [`SampleSet`]
fn len(&self) -> u64 {
Expand Down
2 changes: 1 addition & 1 deletion src/stats/hypergeom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,5 @@
mod disease;
mod gene;
mod statrs;
pub use disease::disease_enrichment;
pub use disease::{omim_disease_enrichment, orpha_disease_enrichment};
pub use gene::gene_enrichment;
142 changes: 88 additions & 54 deletions src/stats/hypergeom/disease.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
use tracing::debug;

use crate::annotations::OmimDiseaseId;
use crate::annotations::{AnnotationId, OmimDiseaseId, OrphaDiseaseId};
use crate::stats::hypergeom::statrs::Hypergeometric;
use crate::stats::{f64_from_u64, Enrichment, SampleSet};
use crate::HpoTerm;

/// Calculates the hypergeometric enrichment of diseases within the `set` compared to the `background`
/// Calculates the hypergeometric enrichment of OMIM diseases within the `set` compared to the `background`
///
/// # Examples
///
/// ```
/// use hpo::Ontology;
/// use hpo::{HpoSet, term::HpoGroup};
/// use hpo::stats::hypergeom::disease_enrichment;
/// use hpo::stats::hypergeom::omim_disease_enrichment;
///
/// let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
///
/// let gene = ontology.gene_by_name("KRAS").unwrap();
/// let gene_hpo_set = gene.to_hpo_set(&ontology);
///
/// let mut enrichments = disease_enrichment(&ontology, &gene_hpo_set);
/// let mut enrichments = omim_disease_enrichment(&ontology, &gene_hpo_set);
///
/// // the results are not sorted by default
/// enrichments.sort_by(|a, b| {
Expand All @@ -28,60 +28,94 @@ use crate::HpoTerm;
/// assert!(enrichments.first().unwrap().pvalue() < enrichments.last().unwrap().pvalue());
/// assert!(enrichments.first().unwrap().enrichment() > enrichments.last().unwrap().enrichment());
/// ```
pub fn disease_enrichment<'a, T, U>(background: T, set: U) -> Vec<Enrichment<OmimDiseaseId>>
pub fn omim_disease_enrichment<'a, T, U>(background: T, set: U) -> Vec<Enrichment<OmimDiseaseId>>
where
T: IntoIterator<Item = HpoTerm<'a>>,
U: IntoIterator<Item = HpoTerm<'a>>,
{
fn inner_disease_enrichment(
background: &SampleSet<OmimDiseaseId>,
sample_set: &SampleSet<OmimDiseaseId>,
) -> Vec<Enrichment<OmimDiseaseId>> {
let mut res = Vec::new();
for (disease, observed_successes) in sample_set {
if observed_successes == 0 {
debug!("Skipping {}", disease);
continue;
}
let successes = background
.get(&disease)
.expect("disease must be present in background set");
let hyper = Hypergeometric::new(
// Total number of HPOTerms in the Ontology
// ==> population
background.len(),
// Number of terms in the Ontology that are associated to the disease
// ==> successes
*successes,
// Number of terms in the set
// ==> draws
sample_set.len(),
)
.expect("the set must not be larger than the background");
// subtracting 1, because we want to test including observed_successes
// e.g. "7 or more", but sf by default calculates "more than 7"
let pvalue = hyper.sf(observed_successes - 1);
let enrichment = (f64_from_u64(observed_successes) / f64_from_u64(sample_set.len()))
/ (f64_from_u64(*successes) / f64_from_u64(background.len()));
res.push(Enrichment::annotation(
disease,
pvalue,
observed_successes,
enrichment,
));
debug!(
"Disease:{}\tPopulation: {}, Successes: {}, Draws: {}, Observed: {}",
disease,
background.len(),
successes,
sample_set.len(),
observed_successes
);
}
res
}
let background = SampleSet::omim_disease(background);
let sample_set = SampleSet::omim_disease(set);
inner_disease_enrichment(&background, &sample_set)
}

let background = SampleSet::disease(background);
let sample_set = SampleSet::disease(set);
/// Calculates the hypergeometric enrichment of ORPHA diseases within the `set` compared to the `background`
///
/// # Examples
///
/// ```
/// use hpo::Ontology;
/// use hpo::{HpoSet, term::HpoGroup};
/// use hpo::stats::hypergeom::orpha_disease_enrichment;
///
/// let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
///
/// let gene = ontology.gene_by_name("KRAS").unwrap();
/// let gene_hpo_set = gene.to_hpo_set(&ontology);
///
/// let mut enrichments = orpha_disease_enrichment(&ontology, &gene_hpo_set);
///
/// // the results are not sorted by default
/// enrichments.sort_by(|a, b| {
/// a.pvalue().partial_cmp(&b.pvalue()).unwrap()
/// });
/// assert!(enrichments.first().unwrap().pvalue() < enrichments.last().unwrap().pvalue());
/// assert!(enrichments.first().unwrap().enrichment() > enrichments.last().unwrap().enrichment());
/// ```
pub fn orpha_disease_enrichment<'a, T, U>(background: T, set: U) -> Vec<Enrichment<OrphaDiseaseId>>
where
T: IntoIterator<Item = HpoTerm<'a>>,
U: IntoIterator<Item = HpoTerm<'a>>,
{
let background = SampleSet::orpha_disease(background);
let sample_set = SampleSet::orpha_disease(set);
inner_disease_enrichment(&background, &sample_set)
}

#[inline]
fn inner_disease_enrichment<ID: AnnotationId>(
background: &SampleSet<ID>,
sample_set: &SampleSet<ID>,
) -> Vec<Enrichment<ID>> {
let mut res = Vec::new();
for (disease, observed_successes) in sample_set {
if observed_successes == 0 {
debug!("Skipping {}", disease);
continue;
}
let successes = background
.get(&disease)
.expect("disease must be present in background set");
let hyper = Hypergeometric::new(
// Total number of HPOTerms in the Ontology
// ==> population
background.len(),
// Number of terms in the Ontology that are associated to the disease
// ==> successes
*successes,
// Number of terms in the set
// ==> draws
sample_set.len(),
)
.expect("the set must not be larger than the background");
// subtracting 1, because we want to test including observed_successes
// e.g. "7 or more", but sf by default calculates "more than 7"
let pvalue = hyper.sf(observed_successes - 1);
let enrichment = (f64_from_u64(observed_successes) / f64_from_u64(sample_set.len()))
/ (f64_from_u64(*successes) / f64_from_u64(background.len()));
res.push(Enrichment::annotation(
disease,
pvalue,
observed_successes,
enrichment,
));
debug!(
"Disease:{}\tPopulation: {}, Successes: {}, Draws: {}, Observed: {}",
disease,
background.len(),
successes,
sample_set.len(),
observed_successes
);
}
res
}

0 comments on commit bcc1187

Please sign in to comment.