Skip to content

Commit

Permalink
Merge pull request #185 from monarch-initiative/variant_formatter
Browse files Browse the repository at this point in the history
Created a Formatter
  • Loading branch information
ielis authored Jul 25, 2024
2 parents b794205 + 2444221 commit 2c7f645
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 19 deletions.
2 changes: 1 addition & 1 deletion src/genophenocorr/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
__all__ = [
'Cohort', 'Patient', 'SampleLabels',
'Phenotype', 'Disease',
'Variant', 'VariantCoordinates', 'Genotype', 'Genotypes', 'Genotyped',
'Variant', 'VariantCoordinates', 'Genotype', 'Genotypes', 'Genotyped',
'TranscriptAnnotation', 'VariantEffect', 'TranscriptInfoAware', 'TranscriptCoordinates',
'ProteinMetadata', 'ProteinFeature', 'FeatureInfo', 'FeatureType',
]
2 changes: 1 addition & 1 deletion src/genophenocorr/model/_cohort.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import itertools
import typing

from collections import Counter, defaultdict
from collections import Counter

import hpotk

Expand Down
30 changes: 28 additions & 2 deletions src/genophenocorr/model/_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from .genome import Region, GenomicRegion
from ._gt import Genotyped, Genotypes
from ._protein import ProteinMetadata
from ._variant_effects import VariantEffect


Expand Down Expand Up @@ -262,7 +261,17 @@ def variant_key(self) -> str:
if self.is_structural():
return f'{self.chrom}_{self.start + 1}_{self.end}_{self.alt[1:-1]}'
else:
return f'{self.chrom}_{self.start + 1}_{self.end}_{self.ref}_{self.alt}'
key = f'{self.chrom}_{self.start + 1}_{self.end}_{self.ref}_{self.alt}'
if len(key) > 50:
ref = None
alt = None
if len(self.ref) > 10:
ref = f"--{len(self.ref)}bp--"
if len(self.alt) > 10:
alt = f"--{len(self.alt)}bp--"
return f"{self.chrom}_{self.start + 1}_{self.end}_{ref if not None else self.ref}_{alt if not None else self.alt}"
else:
return key

@property
def variant_class(self) -> str:
Expand Down Expand Up @@ -385,6 +394,22 @@ def get_hgvs_cdna_by_tx_id(self, transcript_id:str) -> typing.Optional[str]:
if tx_ann.transcript_id == transcript_id:
return tx_ann.hgvs_cdna
return None

def get_preferred_tx_annotation(self) -> typing.Optional[TranscriptAnnotation]:
"""Get the `TranscriptAnnotation` that represents the result of the functional annotation
with respect to the preferred transcript of a gene.
Returns `None` if transcript annotations is no preferred transcript found.
Returns:
typing.Optional[TranscriptAnnotation]: The `TranscriptAnnotation` with respect
to the preferred transcript
or `None` if the preferred transcript info is not available.
"""
for tx in self.tx_annotations:
if tx.is_preferred:
return tx
return None


class Variant(VariantCoordinateAware, FunctionalAnnotationAware, Genotyped):
Expand Down Expand Up @@ -470,3 +495,4 @@ def __str__(self) -> str:
return (f"Variant(variant_coordinates:{str(self.variant_coordinates)}, "
f"tx_annotations:{self.tx_annotations}, "
f"genotypes:{self.genotypes})")

4 changes: 3 additions & 1 deletion src/genophenocorr/view/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
from ._stats import StatsViewer
from ._txp import VariantTranscriptVisualizer
from ._protein_visualizer import ProteinVisualizer
from ._formatter import VariantFormatter

__all__ = [
'CohortViewable',
'ProteinVisualizer', 'ProteinVisualizable', 'ProteinViewable',
'DiseaseViewable',
'StatsViewer',
'VariantTranscriptVisualizer'
'VariantTranscriptVisualizer',
'VariantFormatter',
]
21 changes: 9 additions & 12 deletions src/genophenocorr/view/_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from hpotk import MinimalOntology
from jinja2 import Environment, PackageLoader

from genophenocorr.model import Cohort, Variant
from genophenocorr.model import Cohort
from ._formatter import VariantFormatter


class CohortViewable:
Expand Down Expand Up @@ -33,7 +34,7 @@ def __init__(
def process(
self,
cohort: Cohort,
transcript_id: str
transcript_id: typing.Optional[str] = None
) -> str:
"""
Create an HTML that should be shown with display(HTML(..)) of the ipython package.
Expand All @@ -51,7 +52,7 @@ def process(
def _prepare_context(
self,
cohort: Cohort,
transcript_id: str
transcript_id: typing.Optional[str]
) -> typing.Mapping[str, typing.Any]:

hpo_counts = list()
Expand Down Expand Up @@ -95,6 +96,8 @@ def _prepare_context(
var_effects_list.append({"effect": effect, "count": count})
else:
has_transcript = False
if transcript_id is None:
transcript_id = "MANE transcript ID"
# The following dictionary is used by the Jinja2 HTML template
return {
"n_individuals": len(cohort.all_patients),
Expand All @@ -112,17 +115,10 @@ def _prepare_context(
"transcript_id": transcript_id,
}

@staticmethod
def get_display(variant: Variant, transcript_id: str) -> str:
for annot in variant.tx_annotations:
if annot.transcript_id == transcript_id:
return annot.hgvs_cdna
return variant.variant_coordinates.variant_key

@staticmethod
def get_variant_description(
cohort: Cohort,
transcript_id: str,
transcript_id: typing.Optional[str],
only_hgvs: bool = True,
) -> typing.Mapping[str, str]:
"""
Expand All @@ -137,9 +133,10 @@ def get_variant_description(
"""
chrom_to_display = dict()
all_var_set = cohort.all_variants()
var_formatter = VariantFormatter(transcript_id)
for var in all_var_set:
var_string = var.variant_coordinates.variant_key
display = CohortViewable.get_display(variant=var, transcript_id=transcript_id)
display = var_formatter.format_as_string(var)
if only_hgvs:
# do not show the transcript id
fields = display.split(":")
Expand Down
50 changes: 50 additions & 0 deletions src/genophenocorr/view/_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import abc
import typing

from genophenocorr.model import Variant

T = typing.TypeVar('T')

class Formatter(typing.Generic[T], metaclass=abc.ABCMeta):

@abc.abstractmethod
def format_as_string(self, item: T) -> str:
""" Inputs an item and outputs a human readable string that can be used
to more easily read the item in tables or other visualizers.
Args:
item (T): an element to be formatted
Returns:
str: a human readable string
"""
pass

class VariantFormatter(Formatter[Variant]):
"""
A class that can be used to format a `Variant` to a human readable string
"""
def __init__(self, tx_id: typing.Optional[str] = None) -> None:
self._tx_id = tx_id

def format_as_string(self, item: Variant) -> str:
"""
Args:
item (Variant): An object of class `Variant` representing a variant.
Returns:
str: A human readable string for the variant.
"""
if self._tx_id is not None:
transcript = item.get_tx_anno_by_tx_id(self._tx_id)
else:
transcript = item.get_preferred_tx_annotation()
if transcript is not None and transcript.hgvs_cdna is not None:
if len(transcript.hgvs_cdna) > 50:
return "Long HGVS"
return transcript.hgvs_cdna
elif item.variant_coordinates.variant_key is not None:
return item.variant_coordinates.variant_key
else:
# To be reevaluated
return f"Variant {item} has no string format."
3 changes: 1 addition & 2 deletions tests/model/test_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from genophenocorr.model import Variant, Cohort


class TestVariant:

@pytest.fixture
Expand All @@ -28,4 +27,4 @@ def test_get_hgvs_cdna_by_tx(
):
hgvs = some_variant.get_hgvs_cdna_by_tx_id(transcript_id=tx_id)

assert hgvs == expected
assert hgvs == expected
24 changes: 24 additions & 0 deletions tests/view/test_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest

import typing

from genophenocorr.model import Cohort
from genophenocorr.view import VariantFormatter

class TestFormatter:

@pytest.mark.parametrize(
"variant, expected",
[
('12_56004525_56004525_A_G', "NM_001032386.2:c.1136A>G"),
]
)
def test_variant_formatter(
self,
variant: str,
expected: str,
suox_cohort: Cohort
):
var = suox_cohort.get_variant_by_key(variant)
formatter = VariantFormatter()
assert formatter.format_as_string(var) == expected

0 comments on commit 2c7f645

Please sign in to comment.