Skip to content

Commit

Permalink
Merge pull request #204 from monarch-initiative/large_variant
Browse files Browse the repository at this point in the history
Merging after seeing passing tests. I must force-merge because tests for 3.12 will never run on this PR but they are required to meet the specs.
  • Loading branch information
ielis authored Aug 20, 2024
2 parents 7b8a4cf + d887e6a commit 31452de
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 10 deletions.
5 changes: 3 additions & 2 deletions src/genophenocorr/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def default(self, o):
'label': o.label,
'meta_label': o.meta_label,
}
elif isinstance(o, (Genotype, VariantEffect, Strand)):
elif isinstance(o, (Genotype, VariantEffect, Strand, VariantClass)):
# enums
return o.name
elif isinstance(o, Phenotype):
return {
Expand Down Expand Up @@ -220,7 +221,7 @@ def object_hook(obj: typing.Dict[typing.Any, typing.Any]) -> typing.Any:
elif GenophenocorrJSONDecoder._has_all_fields(obj, _IMPRECISE_SV_INFO_FIELDS):
return ImpreciseSvInfo(
structural_type=hpotk.TermId.from_curie(obj['structural_type']),
variant_class=obj['variant_class'],
variant_class=VariantClass[obj['variant_class']],
gene_id=obj['gene_id'],
gene_symbol=obj['gene_symbol'],
)
Expand Down
10 changes: 7 additions & 3 deletions src/genophenocorr/model/_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,8 @@ def variant_key(self) -> str:
Get a readable representation of the variant's coordinates.
For instance, ``X_12345_12345_C_G`` for a sequence variant or ``22_10001_20000_INV`` for a symbolic variant.
If the key is larger than 50 characters, the 'ref' and/or 'alt' (if over 10 bps) are changed to just show number of bps.
Example: ``X_1000001_1000027_TAAAAAAAAAAAAAAAAAAAAAAAAAA_T`` -> ``X_1000001_1000027_--27bp--_T``
.. note::
Expand All @@ -471,13 +473,15 @@ def variant_key(self) -> str:
else:
key = f'{self.chrom}_{self.start + 1}_{self.end}_{self.ref}_{self.alt}'
if len(key) > 50:
ref = None
alt = None
if len(self.ref) > 10:
ref = f"--{len(self.ref)}bp--"
else:
ref = self.ref
if len(self.alt) > 10:
alt = f"--{len(self.alt)}bp--"
return f"{self.chrom}_{self.start + 1}_{self.end}_{ref if not None else self.ref}_{alt if not None else self.alt}"
else:
alt = self.alt
return f"{self.chrom}_{self.start + 1}_{self.end}_{ref}_{alt}"
else:
return key

Expand Down
1 change: 1 addition & 0 deletions src/genophenocorr/preprocessing/_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def annotate(self, item: ImpreciseSvInfo) -> typing.Sequence[TranscriptAnnotatio
variant_effects=variant_effects,
affected_exons=affected_exons,
protein_id=None,
hgvsp=None,
protein_effect_coordinates=None,
)
tx_annotations.append(annotation)
Expand Down
4 changes: 2 additions & 2 deletions tests/analysis/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def test_get_count(
all_counts = results.all_counts
assert isinstance(all_counts, typing.Mapping)

# We tested 69 HPO terms
assert results.total_tests == 69
# We tested 74 HPO terms
assert len(all_counts) == 74

# The index of all_counts is a Tuple with (HPO TermId, BooleanPredicate
# Let's test Seizure - we should have one row for each Patient Predicate
Expand Down
2 changes: 2 additions & 0 deletions tests/model/test_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def test_all_transcript_ids(
"NM_001032387.2",
"NM_001351089.2",
"NM_000456.3",
"NM_016373.4",
"NM_013275.6"
}

def test_variant_effect_count_by_tx(
Expand Down
40 changes: 38 additions & 2 deletions tests/model/test_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import typing

from genophenocorr.model import Variant, Cohort
from genophenocorr.model import Variant, Cohort, VariantCoordinates
from genophenocorr.model.genome import GenomeBuild, GenomicRegion, Strand


class TestVariant:

Expand All @@ -27,4 +29,38 @@ def test_get_hgvs_cdna_by_tx(
):
hgvs = some_variant.get_hgvs_cdna_by_tx_id(transcript_id=tx_id)

assert hgvs == expected
assert hgvs == expected


class TestVariantCoordinates:

@pytest.mark.parametrize(
"contig_name, start, end, ref, alt, change_length, expected",
[
("chrX", 100, 101, "C", "T", 0, "X_101_101_C_T"),
("chrY", 150, 152, "G", "GG", 1, "Y_151_152_G_GG"),
("chr16", 1000, 1040, "A", "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG", 39, "16_1001_1040_A_--40bp--"),
("chr2", 200, 301, "N", "<DEL>", 100, "2_201_301_DEL")
]
)
def test_variant_key(
self,
genome_build: GenomeBuild,
contig_name: str,
start: int, end: int,
ref: str, alt: str,
change_length: int,
expected: str,
):
contig = genome_build.contig_by_name(contig_name)
assert contig is not None

vc = VariantCoordinates(
region=GenomicRegion(
contig=contig,
start=start, end=end, strand=Strand.POSITIVE,
),
ref=ref, alt=alt, change_length=change_length,
)

assert vc.variant_key == expected
173 changes: 172 additions & 1 deletion tests/test_data/SUOX.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/view/test_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class TestFormatter:
"variant, expected",
[
('12_56004525_56004525_A_G', "NM_001032386.2:c.1136A>G"),
('16_78386858_78425054_--38197bp--_A', 'NM_016373.4:c.517_791del')
]
)
def test_variant_formatter(
Expand Down

0 comments on commit 31452de

Please sign in to comment.