Skip to content

Commit

Permalink
Fix branches in parsing GenomicInterpretation. Test parsing HGVS expr…
Browse files Browse the repository at this point in the history
…ession.
  • Loading branch information
ielis committed Oct 26, 2023
1 parent a198076 commit 58bf50a
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 14 deletions.
32 changes: 26 additions & 6 deletions src/genophenocorr/preprocessing/_phenopacket.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def find_coordinates(self, item: GenomicInterpretation) -> typing.Tuple[VariantC
variant_descriptor = item.variant_interpretation.variation_descriptor

vc = None
if variant_descriptor.vcf_record.IsInitialized():
if self._vcf_is_available(variant_descriptor.vcf_record):
# We have a VCF record.
contig = self._build.contig_by_name(variant_descriptor.vcf_record.chrom)
start = int(variant_descriptor.vcf_record.pos) - 1
Expand All @@ -58,9 +58,10 @@ def find_coordinates(self, item: GenomicInterpretation) -> typing.Tuple[VariantC

region = GenomicRegion(contig, start, end, Strand.POSITIVE)
vc = VariantCoordinates(region, ref, alt, change_length)
elif variant_descriptor.variation.IsInitialized():
elif self._cnv_is_available(variant_descriptor.variation):
# We have a CNV.
seq_location = variant_descriptor.variation.copy_number.allele.sequence_location
variation = variant_descriptor.variation
seq_location = variation.copy_number.allele.sequence_location
refseq_contig_name = seq_location.sequence_id.split(':')[1]
contig = self._build.contig_by_name(refseq_contig_name)

Expand All @@ -69,7 +70,7 @@ def find_coordinates(self, item: GenomicInterpretation) -> typing.Tuple[VariantC
start = int(seq_location.sequence_interval.start_number.value) - 1
end = int(seq_location.sequence_interval.end_number.value)
ref = 'N'
number = int(variant_descriptor.variation.copy_number.number.value)
number = int(variation.copy_number.number.value)
if number > 2:
alt = '<DUP>'
else:
Expand All @@ -84,8 +85,8 @@ def find_coordinates(self, item: GenomicInterpretation) -> typing.Tuple[VariantC
if expression.syntax == 'hgvs.c':
vc = self._hgvs_finder.find_coordinates(expression.value)
break
else:
# No way we can get something from this `GenomicInterpretation`!

if vc is None:
raise ValueError('Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` '
'but did not find one')

Expand All @@ -95,6 +96,25 @@ def find_coordinates(self, item: GenomicInterpretation) -> typing.Tuple[VariantC

return vc, gt

@staticmethod
def _vcf_is_available(vcf_record) -> bool:
"""
Check if we can parse data out of VCF record.
"""
return (vcf_record.genome_assembly != ''
and vcf_record.chrom != ''
and vcf_record.pos >= 0
and vcf_record.ref != ''
and vcf_record.alt != '')

@staticmethod
def _cnv_is_available(variation):
seq_location = variation.copy_number.allele.sequence_location
return (seq_location.sequence_id != ''
and seq_location.sequence_interval.start_number.value >= 0
and seq_location.sequence_interval.end_number.value >= 0
and variation.copy_number.number.value != '')

@staticmethod
def _map_geno_genotype_label(genotype: str) -> Genotype:
"""
Expand Down
30 changes: 22 additions & 8 deletions src/genophenocorr/preprocessing/_test_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,50 @@
# pyright: reportGeneralTypeIssues=false
from phenopackets import Phenopacket, GenomicInterpretation

from genophenocorr.model.genome import GRCh38
from genophenocorr.model.genome import GenomeBuild, GRCh38

from ._api import VariantCoordinateFinder
from ._phenopacket import PhenopacketVariantCoordinateFinder
from ._protein import ProteinAnnotationCache, ProtCachingFunctionalAnnotator
from ._uniprot import UniprotProteinMetadataService
from ._variant import VariantAnnotationCache, VarCachingFunctionalAnnotator
from ._vep import VepFunctionalAnnotator
from ._vv import VVHgvsVariantCoordinateFinder


@pytest.fixture
def pp_vc_finder() -> PhenopacketVariantCoordinateFinder:
return PhenopacketVariantCoordinateFinder(GRCh38)
def build() -> GenomeBuild:
return GRCh38


@pytest.fixture
def hgvs_vc_finder(build: GenomeBuild) -> VariantCoordinateFinder:
return VVHgvsVariantCoordinateFinder(build)


@pytest.fixture
def pp_vc_finder(build: GenomeBuild,
hgvs_vc_finder: VariantCoordinateFinder) -> PhenopacketVariantCoordinateFinder:
return PhenopacketVariantCoordinateFinder(build, hgvs_vc_finder)


@pytest.mark.parametrize("pp_path, expected",
[('test_data/deletion_test.json', '16_89284129_89284134_CTTTTT_C'),
('test_data/insertion_test.json', '16_89280829_89280830_C_CA'),
('test_data/insertion_test.json', '16_89280829_89280829_C_CA'),
('test_data/missense_test.json', '16_89279135_89279135_G_C'),
('test_data/duplication_test.json', '16_89279850_89279851_G_GC'),
('test_data/missense_hgvs_test.json', '16_89279135_89279135_G_C'),
('test_data/duplication_test.json', '16_89279850_89279850_G_GC'),
('test_data/delinsert_test.json', '16_89284601_89284602_GG_A'),
('test_data/CVDup_test.json', '16_89284524_89373231_DUP'),
('test_data/CVDel_test.json', '16_89217282_89506042_DEL')
('test_data/CVDup_test.json', '16_89284523_89373231_DUP'),
('test_data/CVDel_test.json', '16_89217281_89506042_DEL')
])
def test_find_coordinates(pp_path, expected, pp_vc_finder):
fname = resource_filename(__name__, pp_path)
gi = read_genomic_interpretation_json(fname)

vc, gt = pp_vc_finder.find_coordinates(gi)

assert expected == vc.variant_key
assert vc.variant_key == expected


def read_genomic_interpretation_json(fpath: str) -> GenomicInterpretation:
Expand Down

0 comments on commit 58bf50a

Please sign in to comment.