Skip to content

Commit

Permalink
Merge pull request #224 from monarch-initiative/ielis/issue222
Browse files Browse the repository at this point in the history
Remove `name` from `Phenotype`.
  • Loading branch information
ielis authored Aug 21, 2024
2 parents 2b745bf + 9f8acdb commit 6cc296e
Show file tree
Hide file tree
Showing 14 changed files with 152 additions and 564 deletions.
2 changes: 1 addition & 1 deletion src/genophenocorr/analysis/predicate/phenotype/_pheno.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def test(
return None

for phenotype in patient.phenotypes:
if phenotype.is_observed:
if phenotype.is_present:
if any(
self._query == anc
for anc in self._hpo.graph.get_ancestors(
Expand Down
21 changes: 11 additions & 10 deletions src/genophenocorr/data/_toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@ def get_toy_cohort() -> Cohort:
# - Spasticity HP:0001257
# - Chronic pancreatitis HP:0006280

arachnodactyly_T = Phenotype(TermId.from_curie('HP:0001166'), 'Arachnodactyly', True)
focal_clonic_seizure_T = Phenotype(TermId.from_curie('HP:0002266'), 'Focal clonic seizure', True)
seizure_T = Phenotype(TermId.from_curie('HP:0001250'), 'Seizure', True)
spasticity_T = Phenotype(TermId.from_curie('HP:0001257'), 'Spasticity', True)
Disease_T = Disease(TermId.from_curie('OMIM:001234'), 'Test Disease', True)
arachnodactyly_F = Phenotype(TermId.from_curie('HP:0001166'), 'Arachnodactyly', False)
focal_clonic_seizure_F = Phenotype(TermId.from_curie('HP:0002266'), 'Focal clonic seizure', False)
seizure_F = Phenotype(TermId.from_curie('HP:0001250'), 'Seizure', False)
spasticity_F = Phenotype(TermId.from_curie('HP:0001257'), 'Spasticity', False)
Disease_F = Disease(TermId.from_curie('OMIM:001234'), 'Test Disease', False)
arachnodactyly_T = Phenotype(TermId.from_curie('HP:0001166'), True)
focal_clonic_seizure_T = Phenotype(TermId.from_curie('HP:0002266'), True)
seizure_T = Phenotype(TermId.from_curie('HP:0001250'), True)
spasticity_T = Phenotype(TermId.from_curie('HP:0001257'), True)
arachnodactyly_F = Phenotype(TermId.from_curie('HP:0001166'), False)
focal_clonic_seizure_F = Phenotype(TermId.from_curie('HP:0002266'), False)
seizure_F = Phenotype(TermId.from_curie('HP:0001250'), False)
spasticity_F = Phenotype(TermId.from_curie('HP:0001257'), False)

Disease_T = Disease(TermId.from_curie('OMIM:001234'), "Test present disease", True)
Disease_F = Disease(TermId.from_curie('OMIM:001234'), "Test absent disease", False)

snv = Variant.create_variant_from_scratch(VariantCoordinates(make_region(280, 281), 'A', 'G', 0), 'FakeGene',
'NM_1234.5', 'NM_1234.5:c.180A>G', False, [VariantEffect.MISSENSE_VARIANT], [1],
Expand Down
4 changes: 1 addition & 3 deletions src/genophenocorr/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def default(self, o):
elif isinstance(o, Phenotype):
return {
'term_id': o.identifier.value,
'name': o.name,
'is_present': o.is_present,
}
elif isinstance(o, Disease):
Expand Down Expand Up @@ -160,7 +159,7 @@ def default(self, o):
_PROTEIN_METADATA = ('protein_id', 'label', 'protein_features', 'protein_length')
_PROTEIN_FEATURE = ('info', 'feature_type')
_FEATURE_INFO = ('name', 'region')
_PHENOTYPE_FIELDS = ('term_id', 'name', 'is_present')
_PHENOTYPE_FIELDS = ('term_id', 'is_present')
_DISEASE_FIELDS = ('term_id', 'name', 'is_observed')
_PATIENT_FIELDS = ('labels', 'phenotypes', 'diseases', 'variants')
_COHORT_FIELDS = ('members', 'excluded_patient_count')
Expand Down Expand Up @@ -271,7 +270,6 @@ def object_hook(obj: typing.Dict[typing.Any, typing.Any]) -> typing.Any:
elif GenophenocorrJSONDecoder._has_all_fields(obj, _PHENOTYPE_FIELDS):
return Phenotype(
term_id=hpotk.TermId.from_curie(obj['term_id']),
name=obj['name'],
is_observed=obj['is_present'],
)
elif GenophenocorrJSONDecoder._has_all_fields(obj, _DISEASE_FIELDS):
Expand Down
2 changes: 1 addition & 1 deletion src/genophenocorr/model/_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def present_phenotypes(self) -> typing.Iterator[Phenotype]:
"""
Get an iterator over *present* phenotypes of the patient.
"""
return filter(lambda p: p.is_observed, self._phenotypes)
return filter(lambda p: p.is_present, self._phenotypes)

def excluded_phenotypes(self) -> typing.Iterator[Phenotype]:
"""
Expand Down
34 changes: 13 additions & 21 deletions src/genophenocorr/model/_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,27 @@
import hpotk


class Phenotype(hpotk.model.Identified, hpotk.model.ObservableFeature, hpotk.model.Named):
"""A class that represents an HPO verified phenotype
class Phenotype(hpotk.model.Identified, hpotk.model.ObservableFeature):
"""
`Phenotype` represents a clinical sign or symptom represented as an HPO term.
The phenotype can be either present in the patient or excluded.
Attributes:
term_id (hpotk.model.Named): The HPO ID associated with this phenotype
name (str): The official HPO name for this phenotype
term_id (hpotk.TermId): The HPO ID associated with this phenotype
is_observed (bool): Is True if this phenotype was observed in the respective patient
"""

@staticmethod
def from_term(term: hpotk.model.MinimalTerm, is_observed: bool):
return Phenotype(term.identifier, term.name, is_observed)
return Phenotype(term.identifier, is_observed)

def __init__(self, term_id: hpotk.TermId,
name: str,
is_observed: bool) -> None:
def __init__(
self,
term_id: hpotk.TermId,
is_observed: bool
):
self._term_id = hpotk.util.validate_instance(term_id, hpotk.TermId, 'term_id')
self._name = hpotk.util.validate_instance(name, str, 'name')
self._observed = hpotk.util.validate_instance(is_observed, bool, 'is_observed')

@property
Expand All @@ -34,15 +37,6 @@ def identifier(self) -> hpotk.TermId:
"""
return self._term_id

@property
def name(self):
"""Returns a string that describes this Phenotype object.
Returns:
string: phenotype name
"""
return self._name

@property
def is_present(self) -> bool:
"""
Expand Down Expand Up @@ -75,16 +69,14 @@ def is_observed(self) -> bool:
def __eq__(self, other):
return isinstance(other, Phenotype) \
and self._term_id == other._term_id \
and self._name == other._name \
and self._observed == other._observed

def __hash__(self):
return hash((self._term_id, self._name, self._observed))
return hash((self._term_id, self._observed))

def __str__(self):
return f"Phenotype(" \
f"identifier={self._term_id}, " \
f"name={self._name}, " \
f"is_present={self._observed})"

def __repr__(self):
Expand Down
59 changes: 39 additions & 20 deletions src/genophenocorr/preprocessing/_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,29 @@ class PhenotypeCreator(Auditor[typing.Iterable[typing.Tuple[str, bool]], typing.
with HPO toolkit's validator.
"""

def __init__(self, hpo: hpotk.MinimalOntology,
validator: hpotk.validate.ValidationRunner):
def __init__(
self,
hpo: hpotk.MinimalOntology,
validator: hpotk.validate.ValidationRunner,
):
self._logger = logging.getLogger(__name__)
self._hpo = hpotk.util.validate_instance(hpo, hpotk.MinimalOntology, 'hpo')
self._validator = hpotk.util.validate_instance(validator, hpotk.validate.ValidationRunner, 'validator')

def process(self, inputs: typing.Iterable[typing.Tuple[str, bool]], notepad: Notepad) -> typing.Sequence[Phenotype]:
"""Creates a list of Phenotype objects from term IDs and checks if the term IDs satisfy the validation requirements.
def process(
self,
inputs: typing.Iterable[typing.Tuple[str, bool]],
notepad: Notepad,
) -> typing.Sequence[Phenotype]:
"""
Map CURIEs and observation states into phenotypes and validate the requirements.
Args:
inputs (Iterable[Tuple[str, bool]]): A list of Tuples, structured (HPO IDs, boolean- True if observed)
notepad: Node
inputs (Iterable[Tuple[str, bool]]): 2-element tuples with a CURIE `str` and observation state as `bool`
(`True` if phenotype was observed).
notepad: Notepad
Returns:
A sequence of Phenotype objects
A sequence of phenotypes
"""
phenotypes = []

Expand All @@ -42,41 +51,51 @@ def process(self, inputs: typing.Iterable[typing.Tuple[str, bool]], notepad: Not
try:
term_id = hpotk.TermId.from_curie(curie)
except ValueError as ve:
notepad.add_warning(f'#{i} {ve.args[0]}',
'Ensure the term ID consists of a prefix (e.g. `HP`) '
'and id (e.g. `0001250`) joined by colon `:` or underscore `_`')
notepad.add_warning(
f'#{i} {ve.args[0]}',
'Ensure the term ID consists of a prefix (e.g. `HP`) '
'and id (e.g. `0001250`) joined by colon `:` or underscore `_`',
)
continue

# Check the term is an HPO concept
if term_id.prefix != 'HP':
notepad.add_warning(f'#{i} {term_id} is not an HPO term',
'Remove non-HPO concepts from the analysis input')
notepad.add_warning(
f'#{i} {term_id} is not an HPO term',
'Remove non-HPO concepts from the analysis input',
)
continue

# Term must be present in HPO
term = self._hpo.get_term(term_id)
if term is None:
notepad.add_warning(f'#{i} {term_id} is not in HPO version `{self._hpo.version}`',
'Correct the HPO term or use the latest HPO for the analysis')
notepad.add_warning(
f'#{i} {term_id} is not in HPO version `{self._hpo.version}`',
'Correct the HPO term or use the latest HPO for the analysis',
)
continue


assert term is not None
if term.identifier != term_id:
# Input includes an obsolete term ID. We emit a warning and update the term ID behind the scenes,
# since `term.identifier` always returns the primary term ID.
notepad.add_warning(f'#{i} {term_id} is an obsolete identifier for {term.name}',
f'Replace {term_id} with the primary term ID {term.identifier}')
notepad.add_warning(
f'#{i} {term_id} is an obsolete identifier for {term.name}',
f'Replace {term_id} with the primary term ID {term.identifier}',
)

phenotypes.append(Phenotype.from_term(term, is_observed))

# Check we have some phenotype terms to work with.
if len(phenotypes) == 0:
notepad.add_warning(
f'No phenotype terms were left after the validation',
'Revise the phenotype terms and try again')
'No phenotype terms were left after the validation',
'Revise the phenotype terms and try again',
)
else:
vr = self._validator.validate_all(phenotypes)
for result in vr.results:
level = self._translate_level(result.level)
level = PhenotypeCreator._translate_level(result.level)
if level is None:
# Should not happen. Please let the developers know about this issue!
raise ValueError(f'Unknown result validation level {result.level}')
Expand Down
2 changes: 0 additions & 2 deletions tests/analysis/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def degenerated_cohort(
phenotypes=(
Phenotype(
term_id=hpotk.TermId.from_curie("HP:0000118"),
name="Phenotypic abnormality",
is_observed=True,
),
),
Expand Down Expand Up @@ -75,7 +74,6 @@ def degenerated_cohort(
phenotypes=(
Phenotype(
term_id=hpotk.TermId.from_curie("HP:0000118"),
name="Phenotypic abnormality",
is_observed=True,
),
),
Expand Down
1 change: 0 additions & 1 deletion tests/analysis/predicate/phenotype/test_predicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def test_a_patient(
phenotypes=(
Phenotype(
hpotk.TermId.from_curie(curie),
name="Doesn't matter",
is_observed=True,
)
for curie in curies
Expand Down
2 changes: 1 addition & 1 deletion tests/analysis/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,4 @@ def test_compare_symptom_count_vs_genotype(
phenotype_group_terms=phenotype_group_terms,
)

assert phenotype_group_results.p_value == pytest.approx(0.6056948063581343)
assert phenotype_group_results.p_value == pytest.approx(0.9345982107594922)
16 changes: 8 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,18 +313,18 @@ def toy_cohort(
@pytest.fixture(scope='session')
def test_phenotypes() -> typing.Mapping[str, Phenotype]:
return {
'arachnodactyly_T': Phenotype(hpotk.TermId.from_curie('HP:0001166'), "Arachnodactyly", True),
'seizure_T': Phenotype(hpotk.TermId.from_curie('HP:0001250'), "Seizure", True),
'arachnodactyly_T': Phenotype(hpotk.TermId.from_curie('HP:0001166'), True),
'seizure_T': Phenotype(hpotk.TermId.from_curie('HP:0001250'), True),
'focal_clonic_seizure_T': Phenotype(
hpotk.TermId.from_curie('HP:0002266'), "Focal clonic seizure", True,
hpotk.TermId.from_curie('HP:0002266'), True,
),
'spasticity_T': Phenotype(hpotk.TermId.from_curie('HP:0001257'), "Spasticity", True),
'spasticity_T': Phenotype(hpotk.TermId.from_curie('HP:0001257'), True),

'arachnodactyly_F': Phenotype(hpotk.TermId.from_curie('HP:0001166'), "Arachnodactyly", False),
'seizure_F': Phenotype(hpotk.TermId.from_curie('HP:0001250'), "Seizure", False),
'spasticity_F': Phenotype(hpotk.TermId.from_curie('HP:0001257'), "Spasticity", False),
'arachnodactyly_F': Phenotype(hpotk.TermId.from_curie('HP:0001166'), False),
'seizure_F': Phenotype(hpotk.TermId.from_curie('HP:0001250'), False),
'spasticity_F': Phenotype(hpotk.TermId.from_curie('HP:0001257'), False),
'focal_clonic_seizure_F': Phenotype(
hpotk.TermId.from_curie('HP:0002266'), "Focal clonic seizure", False,
hpotk.TermId.from_curie('HP:0002266'), False,
),
}

Expand Down
8 changes: 2 additions & 6 deletions tests/model/test_cohort.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import pytest

from genophenocorr.model import Cohort, VariantEffect
from genophenocorr.model import Cohort


class TestCohort:
Expand All @@ -12,12 +10,10 @@ def test_all_transcript_ids(
# The SUOX cohort includes variants that affect the following transcripts:
assert suox_cohort.all_transcript_ids == {
"NM_001032386.2", # MANE
"NM_001351091.2",
"NM_001351091.2",
"NM_001032387.2",
"NM_001351089.2",
"NM_000456.3",
"NM_016373.4",
"NM_013275.6"
}

def test_variant_effect_count_by_tx(
Expand Down
Loading

0 comments on commit 6cc296e

Please sign in to comment.