From bf49389b2c8a3db8e18837895316d8782451c337 Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Wed, 7 Aug 2024 10:20:11 -0500 Subject: [PATCH 1/8] Fixed large variants in variant_key and added documentation and a test. --- src/genophenocorr/analysis/_gp_analysis.py | 4 +- src/genophenocorr/model/_variant.py | 9 ++- tests/analysis/test_examples.py | 4 +- tests/model/test_cohort.py | 1 + tests/test_data/SUOX.json | 72 ++++++++++++++++++++++ tests/view/test_formatter.py | 1 + 6 files changed, 84 insertions(+), 7 deletions(-) diff --git a/src/genophenocorr/analysis/_gp_analysis.py b/src/genophenocorr/analysis/_gp_analysis.py index 7b1acad3..871c9fc0 100644 --- a/src/genophenocorr/analysis/_gp_analysis.py +++ b/src/genophenocorr/analysis/_gp_analysis.py @@ -209,7 +209,7 @@ def __init__( # such as HP:0001611 Hypernasal speech, but it also contains "general" terms that # we skip according to this heuristic, e.g., HP:0030680 Abnormal cardiovascular system morphology for t in self._top_level_terms: - l2_terms = hpo.graph.get_children(t.tid, include_source=False) + l2_terms = hpo.graph.get_children(t.id, include_source=False) for t in l2_terms: tid = t label = hpo.get_term_name(tid) @@ -219,7 +219,7 @@ def __init__( # such as HP:0031109 Agalactia, but it also contains "general" terms that # we skip according to this heuristic, e.g., HP:0006500 Abnormal lower limb epiphysis morphology for t in self._second_level_terms: - l3_terms = hpo.graph.get_children(t.tid, include_source=False) + l3_terms = hpo.graph.get_children(t.id, include_source=False) for t in l3_terms: tid = t label = hpo.get_term_name(tid) diff --git a/src/genophenocorr/model/_variant.py b/src/genophenocorr/model/_variant.py index a71703c5..0fb6e95f 100644 --- a/src/genophenocorr/model/_variant.py +++ b/src/genophenocorr/model/_variant.py @@ -304,6 +304,7 @@ def variant_key(self) -> str: Get a readable representation of the variant's coordinates. For instance, ``X_12345_12345_C_G`` for a sequence variant or ``22_10001_20000_INV`` for a symbolic variant. + If the 'ref' or 'alt' sequences are over 50 bases, they are replaced with the number of bases. Example: ``16_78386858_78425054_--38197bp--_A`` .. note:: @@ -314,13 +315,15 @@ def variant_key(self) -> str: else: key = f'{self.chrom}_{self.start + 1}_{self.end}_{self.ref}_{self.alt}' if len(key) > 50: - ref = None - alt = None if len(self.ref) > 10: ref = f"--{len(self.ref)}bp--" + else: + ref = self.ref if len(self.alt) > 10: alt = f"--{len(self.alt)}bp--" - return f"{self.chrom}_{self.start + 1}_{self.end}_{ref if not None else self.ref}_{alt if not None else self.alt}" + else: + alt = self.alt + return f"{self.chrom}_{self.start + 1}_{self.end}_{ref}_{alt}" else: return key diff --git a/tests/analysis/test_examples.py b/tests/analysis/test_examples.py index 0eef340e..193cf68b 100644 --- a/tests/analysis/test_examples.py +++ b/tests/analysis/test_examples.py @@ -65,8 +65,8 @@ def test_get_count( all_counts = results.all_counts assert isinstance(all_counts, typing.Mapping) - # We tested 69 HPO terms - assert len(all_counts) == 69 + # We tested 74 HPO terms + assert len(all_counts) == 74 # The index of all_counts is a Tuple with (HPO TermId, BooleanPredicate # Let's test Seizure - we should have one row for each Patient Predicate diff --git a/tests/model/test_cohort.py b/tests/model/test_cohort.py index d0e397da..bb6685a2 100644 --- a/tests/model/test_cohort.py +++ b/tests/model/test_cohort.py @@ -16,6 +16,7 @@ def test_all_transcript_ids( "NM_001032387.2", "NM_001351089.2", "NM_000456.3", + "NM_016373.4", } def test_variant_effect_count_by_tx( diff --git a/tests/test_data/SUOX.json b/tests/test_data/SUOX.json index b24e58dd..3c13af15 100644 --- a/tests/test_data/SUOX.json +++ b/tests/test_data/SUOX.json @@ -7174,6 +7174,78 @@ } } ] + }, + { + "labels": { + "label": "Patient 15", + "meta_label": "PMID_30356099_Patient_15" + }, + "phenotypes": [ + { + "term_id": "HP:0000470", + "name": "Short Neck", + "is_present": true + } + ], + "diseases": [ + { + "term_id": "OMIM:616211", + "name": "Developmental and epileptic encephalopathy 28", + "is_observed": true + } + ], + "variants": [ + { + "variant_info": { + "variant_coordinates": { + "region": { + "start": 78386857, + "end": 78425054, + "contig": { + "name": "16", + "genbank_acc": "CM000674.2", + "refseq_name": "NC_000016.10", + "ucsc_name": "chr16", + "length": 133275309 + }, + "strand": "POSITIVE" + }, + "alt": "A", + "ref": "AGCATAAAGCCAAGGTAGAAGCAATGACCCTGGACCTCGCTCTGCTCCGTAGCGTGCAGCATTTTGCTGAAGCATTCAAGGCCAAGAATGTGTGAGTGTTCCAGTGGAGGGTTATAGATCATAATTTCTTGCTATTGTAATATCTTTATCAGATGAACACAATTGGGAGAATGCAAGGCTGTTGTGTTGTCTTGGCGTCCAAACAGGAGGCTCATTTATATTGGCCCTGTTAAGGTGAACCGTATTTTCTTGACTCACAGTCACCTTCATTATGAGATGTGTCATCAATCTAATAACAGCTTCCCACATACCAAAAGAGAAGACACTATTAAAGCACTAGTAAAAGTGGCTAATAAAAGCTTGGCAATAGTAAGATGCATCCTGATTATAAGATTTTTTGTAGTGCATTTCAGAATGGAGTAAGAGTATATTTAAATTGCATTCAGGAACAAGTAAACTCAGTTATCCAGTATGGCAGGGAGGTTGACAATCCAAGCACCCAAAAGACCTCTAGTTTCTAAAGCCTTCGATGATTTGATGTGGTACATGGATGTGGTTCCAAAAAACATGGACTCACATTCCTTTTATTTATTTTTTTTCATCCTTTTCAGTCTTTCAAAATTCCAGTTGGAGAAAGCCTTAGTTAGGGCCTAGCATATTTTGATCCTATCATATGCTAGCATCCCTTTCTAACAGAGAAGGTTGTAGGAGAAAGGGAGAGAAGCGGAAGGGGGTGGGGAGACAGAGAGACAGACAGGAGGCCTCAAACCCTGAAACACTGAGCTAAGGAAAGTGATCATGGCAAGCTACACTAATTACAATACTTTGTTTCCAAGTGTTTATTTTTACTCATATTTAGGGCAGGCAATCCTGGTTTCTCGTTGAACATAGAGGTTTGAATTTCATTAATAAATAACTTCATTTATTTTTTTTCAGTGACTTGATTCAAACATGAGGATTAAGTTAATAATAGCACAGGTTGTGCGAAGGATAAGATAATTACACAAGAGGCACCAGAACCACTGAATGTGGAGAGCTCTCATAAATGACAAGCTGCCTTTGGGTTAGGCTCTGTTGGGAACATTAGTTCTGCAGTGTTGCAAGCAGATGAAGGATGTGAGGGAAGGGATCTTAAACCAGATATTCAAATGGCCCTGTGGGGAGCTGACACCACACTGCTGTCTAGTGTCCAATTCTCCTTGCATGGCTGTGTCACCCAGGTTGGAACGTAGTGCACAATCTCGCCTTAATGCAACGTCCCACTGTGGGCCCAAGTGATTTTCCTGTCCCAGCCTCCTGAGTAGCTGGGACTACAGATGCCTGCCACCATGGCCTGCTAATTTTTGTATTTTTAGTAGACATAGGGTTTCACCATATTGGTCAGACTGGTCTCAAACTCCTGGCCTCAGGTGATCCACCCGCCTCAGCCTCTGAAAATGCTGGGATTACACACATGAACCACTGCGCCCAGCCGCTCTACTTTATTAGATTTAAAAAGTTTGCTCTCAGCTGGGTGCAGTGGCTTATGCCTGTAATCCCAGCAGTTTGGGAGGCCAAGGCGGGGAGGGTCATGAGGTCAAGAGATCAAGACCATCCTGGCCAACATTGTGAAACCCCATCTCTACTAAAGATACAAAAAATTAGCTGGGTGTAGTGGTGCACACCTCTAGTCCTAGCTACTTGGGAGGCTGAGGCAGGAAAATCGCTTGAACCTGGGAGGCAGAGGTTGCAGTGAGCCAAGATGGCACCACTGCACTCCAGCCTGGCGACAGATTGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGTTTGTACTCAGCCTGGCGTGGTGGCTCACGCCTGTAATCCCAGCACTTTGTGAGGCCGAGGTGGGCGGATCACCTGAAGTCAGGAGTTCGAGACCAGCCTGGGCAACAAGGTAAAACCCCGTCTCTACTAAAAATGCAAAATTAACTGGGCGGGGAGGCACATGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCTTGAACCCGGGAGGTGGAGGTTGCAGTGAGCTGAGACTGAACCAGTGCACTCCAGCCCGGGTGACAAGAGCAAAACTCTGTCTCAAAAAAAAAAAAAAAAATTGACTCAGGCTCTTGCTGGAGTATGTCAGTGTCCCAAGTCACTGAGGTCTACATTAGAGTCAGTCTTAGTGAGAGTTGCAACATCGCCTAAGCGCAGACCCCAAGCTGGCTTGTAGGAACAGTGAGATAACATCCCCCAGCCAGCCAGATCTGAGAGAGCCCTCCTGTGTGTGTCCTTTGCAGCGCTTGGCTGTTAGTAGCTTCCACCCTTTGCCAGAACTATCAAGAGGCACCTCAACAGGCTGCAACACTAGCATTCAAGACATTTGTTCGCTGATTTGCTTCGTCATTCATTTATCCACCCGTTCCTTCACTTCACACACATTTCAGGAGCACCTCTAAAGCCCCAGACATTCTTCTGGGCTCTGAGAAGAGAGTGGTGGCCCTGGAAGCCACAGTAACCGCACACAAGCTGTCACCTCAGTTGTCAGCTATTATGGATGGCTGACTCCTCAAACTCATCTGTCTGTGGTTGTTTGGGAGGGAATGATAATGACAAGAACAGCAGCGGAGTCAGCAGCTCATTTACTGAGGGCTTATGGGCCATACGTGGAAAATATTTTGCATGTATGTTTTGATTCAATTCTCAAAGCAGTCCTTGGGAGTAGATGCTATTAATATCTCTACTATACAGATAAGGAAAGTGAGGCTAGAGAGGATATATAATTTATCCAAGGCATGTGGAATGTGGGTAGAGGGGCCGATTTAGCCTCTTCACGGCTCCAACTTGACCACCGTACTATACTGTGTTTCATTTTAGTTTATTTTTTATTTATTTTTTGAGACAGGGTCTCGCTGTGTTGCCCAGGCTGAAGTGCAGTGGTGCAATCTCTGTTCATTGCAACCTCTGGCCTTGGGTTCAAGCGATTCTCATGCCTCATCCTCCAGAGTAGCTGGGATTACAGGTGCAGGTTACCACACACAGCTAATTTTTGTATTTTCTGTAGAGATGAGGTTTCACCATGTTGGCCAGGCTAGTCAACTCCTGACCTCAAGTGATCTCTCCACCTTGGCCTCCCAAACTGCTGGAATTACAGGCGTGAGCCACCCTGCCCGACCCTTTCATTTTAAATGTGATAGAAACAGACTCAGAGAATGTGAGTGACTTTCGCAGGGTCACTTAGCAGGTTGGTGACGGGCATGGGGTTTCACTCCAGTCTGCCTCTTGTTCTTTTCCTCATAGCATGTGAGATCTCACAATTCACTGCCGCTGAATAACTTTGTCCAGTTCCTCTGTCAGAATGTGTTGGAAGTGAGCAGCTTTGCTGAAGAGAGCCAACTGTTACATAATAGGATCATCTTCCACGTCTTAACTGGACCATTAGTTTTTAACCATTAAGTAGTTGGGAAATCATCAAGTTCAAGTGCCTTTTGTAATCACCAAGCTCAAAACCTTGGAGGAAACTCTCACCCATTTTGCCCTAATCCCAGATTTTAGCACAGGACTTTGGAATTTTAGAAAGTACTTCTAGGTTGAGAAACAAGGGTGATTTTCCATATAGAGACCTCTGTAGCACAACCCTCTTTGTATGTGTCCCATGAGAGTACAGGATTTTACATGGCTTAGAGAAATACTGTGGTGGTGAAAATATGCATAAAAAAGCGAGAGATTTTACTTTTGAAATGCTGCCAAAGTGAAATCTCTCTTGGATAGGCAGATGACAGTTTAGTACTTGCAGGTCTTTCGATTCTGACAACTGTCAGAAGTGAATTTAAGTGCTGGTGACTAGGCTGCCAGAGAAACACATGTAACTCTATTCTTATCACCCCCCATCTTCAGACATTAAAATTTTCTTGTGTGTAAAACATAATGACAAATGAAAGAAAGGCTGCCAGTTCAGAACCAATCTCTTTTCACGCTGTTGCATTGAGGAGAGGAAAAAATCCCTTAAATTTGAATTCCATGTGGATGCTCTTATTTAATATGCCGTTTCAAAGCCTGTATGTGATCAAGCCAGAGTATTATCAGGCTCTGAGTTCCTGTGTGTATTCCCTTGGTCCATGCTCCATCTCAAGTGTTTCTCCACACTGGGATAGGCAGCATATTGTGTAGTCTTAGAGCAGAAAAGGTAGAAACAGATTCTTAGTACAAACACTGGTGAGAAAAGCAGCATATTGTACTGTGGAACCACCCGCCGTCTGCCTTCTGTGGGAGATGGGGATCAGCCTCTTCCTGGATCTCTTCTTTTTATTCGGGTGCTGGGAGGTGAGGCTGCAAAGGAGGCTGTTGTAGAGGGAGCCCTCCTAACCATGATTATGTAAACAATAGCTGCTAGGCAACTTGTTGTTTGTTTTATGCAAGGCAGTGTGCTAAGCACTTACATACATGTATTCTGTCTGTAGTTTGTTATAAGAGTCCTGCACACTTAACCATGATTATCCCTACTTTGCAAATTACAAGGTTGCAGTGCAGAGAGCCTAAGACACTTTCTCAACTTGAGCAAGTAGGAGATCCAGAATTTGAACCCAAATCTCTCTGAATCCAAAACGACTTTTTGTTACTTTGCACACTTCAGTCATAGGAATTTCAGTGTGGATTTCTGTATTGGTTATGTATTGCCGAGTAAGAAATGACTCTGGCCAGGCACGGTGGCTCACACATGTAATCCCAGCACCTGGGGAGGCCGCAGTGGGTGGATTGCTTGAGGCCAGGAGTTTGAGAACAGCCTGGCCAACATGGCAAAACCCCGTCTGTACTAAAAATACAAAAATTAGCCGGGCATGGTGATGCATACCTGAAGTCCCAGCTACTCAGGAGGCTGAGGCATGAGAATTGCTTGAGCTCTGGGGTGGGCAGAGGTTGCAGTGAGCCGAGATCATGCCTGGGGGACAGTGAGACTCTGTCTCAAACAAATTACTCCAAAATTTAGTGGCTAAAAAACAACGTAAAACAAGAAACATTTCTTTTGTGCCTCCTCTTTTCCTCCCTCCCTCTTTCTCATAGTTAATTTGTTTGAATCAGGATTCAAATAAGGTCGATGCATTTTTAATCTATAGGTTTTGTCTCCTTTTTTTTTTTTTCCTTAAAAAAATTTCTTTTAATGCAACAGATTTAAGGGACCAGGGTGTTCTCTAGAGCAGGGGTCCATGACCTCTAGGCCACAAACCAATAGCGGAGCCGTGGCCTGTTATAAATTGGGTGGCATGGCAGGGAGTGAGTGGTGGGCAAGCATAGGAAGCTGCGTCTGTGCTTAGAGCCGCTCTCTGTCACTCTCATTAGTGCCTCAGGTCCTCCCCCTGTCAGATCAGCAACAGCATTAGTCTCATAGGACCGTGATCCCTATTGTGAGCCACACATGCAAGGGATCTAGGCTGCATGCTCCTTACGAAAATCTAATGCCTGATGATCTGTCACTGCCTCCCATCACCTTCAAGATGGGATTGTCTAGTTCTAGGAAAACAAGCTCAGGGCTCCCACCGATTGTACATTATGGTGAGTTGGATAACTATTTCATTATATATTACAACGTAATAATAATAGAAATAAAGTGCACAATAAATGTCATCTGCTTGAATCGTCCCAAAGCCACCCCCACTCCTTTGAGTCTATGGAAAAATTGTCTTCCAGAAAACCACTTGCTGGTGCCAAAAAAATTTGGAAACCTCTGCTCTAGAGATTTCTGCAGTCTGTACTTTGGTGATTGCATCTCTGTGGTGTCTTTTAACATACTCTTCTGTCTCCTGAGTTTCCTGTGAAAGGGTAGATAACTCAAGAGACTCAATCAACTTCGTGTTTAATGTTTTCGTAAGAATACTTCAGAGGCAGTGGTGTAATCGTCCAAGAGTACACACAGGCCTGATTGCCAAAGCCAGTTTTCTTCAGGCCCAGCACGTCCTGACCTCCTCACTCCTCTGCCCACTCTCTGTTTACTCACTCCCATTTTCATTCATTGATTCTGTTATTCTCCACTAAGTTTTCTGGGTGGTCAGCCCAGGAAGCCTCAGCCTGCTTCCAGATAGTTCTCCTCAGAGTTAGCGGTAGCAGCGGTCGTCGGCCAGTGTGTCTCGGGTCAAATTATTTTGGGCATTACTGGAAAGTTCCAAACCAAAGCCTGTGGTAAGTTCCACCCAAACTGGCTTGTGAGGGATATTTTTGTTTAAACTGCATTCCTGGAGAGGAGCCAGACTTCCTACTGCCTGGCAGCAGTGGTGTCCATAGGAGTATGAAGAGCTGGGACCCTTTCTAATCACTCAGACCAAATATTGGGGTTTTCTGAAATGGACTGAGAAATAACAGTATGTTTTTATGAGGCTTTGCACATTTTCCTTCCTAGCAGTAGCTGCTTAGTCTACTGAAAAGTGCATATTTTGAACAGGGCCTAGAAGAGTTAACAGCTCCTAGAGAGAGGTGCTCTGTAATACTTTTTCTTCTTCAAAAAATGGTTTATGGCTGGGCGCAATGGTTCATGCCTATAATCCCAGCGCTTTTGGGAGGCCAAGGTGAGAGGATTGCTTAAGCCCAGAAACGTGAGACCAGCCTGGGCAACGTAGTGAGACGCTGTCTCTATATTACAAAATTTTTTAAAAAACGGTTTGTAGGTCCTTAAGTCCCTGATAAAATAGAGAACTGAATTGCAATCCTGGAACTTAAAAAGTTGGTGACGACACCTGAGATATTTATTACTTAGATTGCAGTTACTGGTCAGCTTGTATAATACTGACCAAGGGTTTTGATTCTTCCTGGAATTGATAGGAAATTCATATTAAAATAATTACCCAAGTCCAAACATTTTTAGAACTGCATTTTTGATCATGGATTTTTATGTCTCTTCTGAACTTTCTGTCACCGGTATAATTTAAAGAAATTATACTTAAGCTTTGTCTCACTTAGAAGATAATATAGAACAGTGGTGTTTTTTTAATTAAAAAAAAGGTTAAAATAACGGTTTTGTATCCTTGCTTTACTTCTTAAACATATGGGAGGAAAAAAAATCTTTAACAAGTTTATTTATTTTCATTTTCTGCTAAATTACTTTCAGAACTTGAATCTACTAATCCCAGATATAATATTCTTGGATTCATATTCCAAATTTTGCTGTCTCAAATCCATCTAGGGAAGTGGGTGGGCTATAAATTATAAATAAATTCCAAATTTTGTGGGATGAATTACCCTGAAGACCAACGTGTAAATTACATATTAATCTTTCTTTTTCTCCCTAGCTCGGTTTTAAGAATAATGTTTTAGCCAACATATCTGCATTACTCTTGGCTCAATATGAGAAATCCATTTTTGGTTTGCCTAACAGAAGATCATCTTGCTTTGCTTCTCTACACAGTATGAAAACCCAAAGAAAAGAAAAACAGAGGCAGTTTTTTGCTCTAATGAATGCTCTAAATCTAGCTCTTAATTATGATTTTTTAAGGAAAATTTTGAAAAGTCTACAAGTTAAATTTTTTTTTCTATCCCATACATTTTCCATCCTAAGGCATTGAAAAAGCACACTGTGAAATACTTAGTGTATCTAGAAACATCAGGGAAGAATGCTTCCCTCCTAAGCAAAATTTTGCCTTCTGAAACTTTTTCAGCATTCAGTCTTTTTATATAATACTTAGAAAAATATTTCTGAAATAGATCATACACTCTCTTCCCAAAAACATCAAAGTATGACCGTAAAGGGCAGAGGTAGGTAAACTTCTTGTAAGGGGCCAGAGAGTGAATATTTGAGAATTTTCAGACTATTAAGATCTCTGTTGCAACTGCTTGCTTTTGCCGTGGTAGCCTGAAAGCAGCCATAGATAGTATGGAAATGGATGATCATGGCAGTGTTGCAATAAAACTTTACAAAACAGACAATGGGCCAGATTGGCCAGGGGCCATAGTATGCTACCCCTGGGCAACAACCTGTATGCCCTGGAGTAGTGTAAAGAACGTGGGTGTTGGGGGTCAACTGACGCTTCCAGCTCTACCACTTACTGGCTGTGTGGCTTTGGGCAAACTACTGAAAATCTCTCAGCGTCACTTTCCAAGTGTGTGTAATGTGTATTTTCACAGTGCTTTGCAGGTTGTTGATTATTGAAAATAGCCATAATGCATGAAATTACCAGACACATCTCACTTTATGGAGCCTGGGGCTATTGGTAATATGCATTTCTTTCTCATCTTGATCGTAAAATGATCTTAGAAAGGTTTCTGAGAATATATAGAGTTTAAGACAGCAATAAGACAACTAATTAATTAAACAGGAAAAGGGGATGTTGTGCTCAGAGAGGAAGTGTGGGTCTCATAAGGGCTTTCACAATCGTTTGAGAGGACACGTGTGATGTCTCATGCCTGTTATCCCAGCACTTTGGGAGGCCAAGGCAGGCAGGTTGCTTGAGTTCCGGAGTTTGAGACCAGCCTCGGTAATTTGGCAAAACCTTGTCTCTACAAAAATTACAGAAATTAGTTGGGTGTGGTGGTGCACACCTGTAGTCCCAGCTGCTTGGGAGGCTGACGAGTAAGGATCACTTGAGCCAGCATGGTGGAGGCTGCCATGATCATGCTACTGCACTCCAGCCTGGGCAACAGAGCCAGAACCTGTCTTGTAAAGAAAAGGAAAAAGAGAGAGAAGGGCAGAAAGAAAGAAGGGAAGGAAGAAAGGAAAATTGGGCCCAGGAATGATCTTTACAATGCCTGACAACCAAGAGAAGAAGGGAAATGAGCTTCACATTGCCTGCAAGCTCTAGGGTGACAAGAGCCAAGAGAAATTATTGTTACTGTAGTGATGTTCCACTGAGGATCATAAAGTACTTTATTACTCTACTGAGTATGGTTTTTTGTTTTGTTTTTTCTTTTTCTTTTTCTTTTTCTTTTTTGCTATTCTTTTGTTATTCTTGATTTATGCTGATGGAAAGCCATGGACCCAAGGATGCTTCACAGTTTTCTTTCGGAGTAAATGCTTAGATTCCATTTTCTTTGACATGACTATGTCTGTTCCTCTTGATTTTAGGCATCTTTTTCAGATGAGTTGCCATAAAAGCAGCCTGCTCTGGATAAGTGAGGTACAGCAGGACACACTGCAAATACTAGGAATCCTTAAGTACAGTGGAACCCCAGAGCACTCTACCTGCTTTCTTTCTCACCTCCTTAAAAACTTTTTTTGCCCTCACCTCATCATTTATTCAGCAGTCACAACAGTGCCAAGAACTTGGCTAGAGATTGGAAATAAAGCTTATGCCTTCTCTCATATCTCCTGTACCTTATTTCTTTCTTACAGTAGTTGTGATGCTTAGCAGTTTTTTGATAACTTTTTATGATGCCAACCTTCAAAAACCTGCCCCTGGTGGAGAGAATAATTATTACATCAATTAGGGGTCACTTAGCATGACATTTGTCGGAAAAAAAAAAGTTAGTGAGCCTTTTTGCCATATTAAAAGTCATCACTGCCAAGACATAAATGAAAATGTGTTCGAATTAACCACACCAATGTTCACAAAATAAACATTTTTGATTTCCCAACAGAATCCTAGGTTTAACTATCACTATCATCTTTCATGAAATCAAAGTCATATATGTAAATTGAACACAACTTTCCCTTCCATAGAGAGTAAAAACCACGCTTTGGAGGGTAGATACAATTACCCCAGGGTTGTCTTTTCCCACTCCTCACAATCCCACCAGTGCACATGCAAGGTGATGTCCTTCCTTTAGCTATAGCAAATAATGTTAATTATTGTTGGTGTTAAATAATGATTATGTAAAGCACTAGACTAGACATTCGTCGGCAAAGTTTTTCTGCAAAGAATCAGATAGTAAATATTTTTGCTCTCATCAGCCAGACAGTCTCTGCGGCAACCATTCAAGCATTGTTGAATACATTGTTGAGTGTTACATGAGACTATTGTAATATAAAAGTAGCCTGGGACAACACATAAATAACTGGGTGTGGCTGTGTCCTAATAAAACTTTATTTACAAAGAACAGGAAGTGGCTTGGATTTGGTATCTGGCCTGGCAGCTGTGGTTTACTACTCCTTGGACGGTGGCCCAGAGACCCTTTAAATGAAATTCATTTTACTAGCACCCTTTTTCATCATGAGAAATATATTCTGTTTTTCTTAGAAAATGGGTTATGTTAGGTCTGGTCAAGGTAAATAAGTGTTGAGAGTCGATGTTGTGTGCATAGTTAATTTCAATTCTTTGAAGAAACTCCCCCATGATATTTCACGGCTGAGAGAAGAGGAAAGAGTTTAAGTGGAACAGTGTGCTTTGCTGAGCTTTGGAAATATTACCATATAGGGAAGCAGGTCAATAAGACAACTAAGTGCTGTTTCAATAACGAAGATACTGAAGCGCTAATTGGAGTATGGAACCATATAATGATGATAATAATTGCTAATATTTATCAGCTATTTATCATGTGTCATGTACAGCTAAGCACTTACATACCATCCCATTTTATCCTTATAATGACTCTAAGAGTGGGTTAAAAAATGGCAGAAGAATGGCAGTGTTTAATGGTTCAGCCTGGTGCAATGATTAACCACATTTTACAGACTAAGAAATTAAGAACCTCAACCAAGTTCATGCTAGCTGGTACGCAGTAAGGCTAGAACTTCGTCCAAAATCTCTTCTTCTGTTGAGCTCAGCTTGCATAGTAGCTTGGAGAATCAGAAAGATCTGTCTCATTTGGGAAATATACCATGTAAAAAACATTGTTTCTAAAGGAGATTTGTCCCATGAGTAAAATAGATGATGGACAGCCACTGCCTAGTGGGACAATTAGAAAGGTCAGTTCAAGGTTGGAGGAGATGCTTCTTTCAGCCAATTTTCCTTTTTCTCAGGATCACCTCAGGTGATCCGCCCACCTCAACCTCCCAAACTGCTGGGATTATAGGCGTGAGCCACCGCTCCTGGCCTTTCAGCCAATTTTCTATCACCAAAGGGAAATCGTTTTGCTGGAATATGTGTAAAGGAGGTTAAAGTCAGAAGAGATTCTCGTCCGCTCAGTTAAGGTACTCAGACTATTTTCCAACCAATCAAGAGGGTGCTGCTTCATGGAGTTCGTTTAAGCTGAAGCGGCAGCTGTTGACTGTCATTTGCATCATCTTTATACATTTACTGTGAATGTCACTGTCCATTTCCACTTTCTTTCTACTTGTCTTCAAATTGATGCTTATCAAGTAGACAGAAGAAGACCAAGGGGTCGTTTTGCTATTTATACCTCCAAATTGATGGCGTGATCACTCTCAAGTGCAAACCCAGCCCTGACACTGTCCTGTTTGGCAATGTCCTGCTATCTGACCTGCAAATAGCTACACTTCCTGCTGTGGCCCACCCAGGCCTCTGGGATCTGATCCCTTCTCCCATGTCACCTGTGGCCATGTCTTCCCCAACCCAGGCTTCTCTGCTCCGCTGCTTATGTTCTGGATCCTGACTCTGAGCCTTTGCTTGTGTGGCTCCTCTCCCACTTGTCCATCATCATGTCAGTTATGTAAGCATAGATTCCAAAGTGACAATGAGGGTAGTTAGGATGGCAAGAGGAGAAAAACCAAGCTGGATTCATGGATTGCTTGTGTCACGTCCTCTACAGAGCCTCCCTTCTACCTGCTCTTACCCGGGCACCACAGTTGATGAGTTATTTTTTGGACCATCAGCAACACACCCAATCATTGTACATGGAATACTTCGGGGATGCTTTTTTCTATATTGATTTTACTTCATTAAACTGAGCTCCAGAGGAGCAGAAACTTTGGCTAATACATCTTGGTCTTAGCTTGTAATATCTGTGCTACAACTTATTAAGATGGTGACGCTGGCAAATTCCTTAATCTTTCTAATTCTTAATTTCATCAGCAAAGATGGGAAAGGATACTAACACCACTCTGGGTTGTTGAGAGGATTCAATAACTGAATATTTATAAAGCGTAGTACCTGATACTTAATAAAAAAGTGAATTTTAACCTGTGTCATCATTGTCATCGTCTTTATCATCCTTTGCAATTATTACATTTACTGCCTTCTAGTACAAGGAAGGGGATGGGTGGCTGGCTGGCTAGGTGGATAGAAGGATGGAAGAGGTAAGTACAAGGAAGGGGATGAGTGGCTGGCTGGCTAGATGGATAGAAAGATGGAACAGGTAAGTATAAGGAAGGGGATAGGTGGCTGGCTGGCTAGATGGATGGAAGGGGTGGAAGGGGATGCAAGAGGTAAGTACAAGGAAGGGGATGGGTGGCTGGCTGGCTAGATGGATAGAAGGTTGGAAGACAGGAAGGATGGAACGGAAGAAGAGGGAAGAACAAAAATGGAAACACATAGTACTGTTAGGTGAACTGAACTTCTAAGGTGCCGATTCTCAGTGATAGAATCTTGAGTTGATACCTCCTTGGGTGGCATGGAGCCTATACCTTTGTAGATCTTGGGAAACAACTTCTAAAGTAATCATAGTTGTATGTAATCGTAGTTACAACAATAGTTATTTTAGGCACATAACCCAAAGGTTTTCTTACAAGGAATCTATGAACCTTAAGAGTGAGGGCTTCTGTTAAGAGTAAGGGCTTCCCTGGAGTGGACATGGATCATGGGACTGAGCCAGCTTGGCATTGTTGGGTTGAACAGGGAGCGACACCTCTCAGCCCAGTCTATCAAGCCTGCTCTTTGACCTGCAGTGAGACCACCCACGCAGACATCAATGCAGCAAATCCCCGGCGTCAGGGTTTCAACATTTGGTTACTCTCAGAGAACTCTCGATTTATATAAGACTTGGAAAAAGGGTTTGAGTTTCTGTGGTTTACAATTATATTTCCCAACTTGGCCCATGAATCCAGCTTGGTTTTTCTCCTCTTAGCATCCTAACCACCCTCATTGTCACTTTGGAATCTACACTTACATAATTGACATGTTGAGAAGCTGTGCTAAAACCACACTGAAATCACATTTTAATAACATGGGAACCATCTTTTCCCAGTAAATTGTTGAGAAGCTAATTCTTGTCAGCCTAAAAACTTGAATATACATTTGAATAAATCAGCGGTGCTACACCGTGGCAGCCTGCTGAAAATCCCCAAGGAAGATTATATTTTTAGTTGAGCTACTTGTCACTGCACTGTGTTTTTAATATTGTGAGTCCTTTCTTGTCTTCATTTTTGAAGAATCTATTGCATACCTTGTCATTCAGAAAAACATAAACGGGACCTCTCAATTAGCGGTAAAGTTCGTCAGTTTAACTTTTAAGCTTAAACTCCTGTTATAGTTCGGTCACTTACTCGTCATCAAAAAGATATTTGAGCTGATATTATGCAATAATTTATAACCAAAAACAGGAGGAAAAGGTCTCTGTTTGTCTCAGAAGTACAAGTTATGCATAAGGTGACAAATTACACAGCTTTGGGAAATGGGTCTTAATGGAATGCACCAGGCTAATAGAAAAGCAGTGCCTCGATTTCCCACCTCAGATCTGAAAACTCCTGAGAGACTGACAGGGCGGTTCCCCAGCTTGGCTGTGTGAGGAATCAGCTGGGAATCGAACACTGCAGTCTGGCTGTCACCCCCAGGATTCTAATGTAATTGTTCTGCATTGTGGATCGATGGATGGATGGATGAATGGCTGGACAGAAGGAAGACTTGGAAAAAATCTTTGAGTTTCTGTGGTTTACAATTTTATTTCCCAACTTGGCCCATGAATCCAGCTTGGTTTTTCTCCTCTTACCATCCTAAGGAGGGAGGGAGGGAGGAAAAGGGGGAAGGAAGCAGGTAGAAAGGGAACAGGGAAGGGAGGACTGGAGGAAGGAAAGCAGGGATCATTCTGGAGTGTACACTGGGCATTTGATTTTAAAGCAAATCCAAGAGATCATTAAATTTGTATCAATGAATACACTGTTACCCCTCAATCAACACTACAATTCATATATCAGAATAGCACTTTGTTACTTGTTTCTGAGATAGGGCTGGCTCTGTCACCCAGACTGGTGTGCAGTGGCACGATCACAGCTCACTATGGCCTCAGCCACCAGGGTTCAAGTGATCCTTCCACTCAGCCTCCTGAGTAGTTGGGACTACATGCCTGCACCATCATGCCTGGCTAATTTTTATTTTTGTAGAGACGAGGTCTCCTTGTGTTGTCCAGGCTGGTCTCAAACTTCTGGAATCAAGTGATCCTCCTGCCCCTGCCTCCCAAAATACTGGGATTACAGGCATGAGCCATGTCCAGCTGCCTTGTTTTTTTCTTTGAAGAGAATGATAGACCTTCCATAGGAAAAATGTTAAATATGTGTGTGCAGAAATAAAGATTTCAAATATTTTTGCAAGATATTTTTTCTAATACCACTTTTTTCTACATTTTCCATAATTTAGTGAAGATAGTAAATTAACAAAGTGGAAAAGACTGAATATTTTAAGAAAAGCCAAGTTTAAAAATTTTGAACCTAATATTTCTTAAAGTAGCTAAAATTCAGATATTGAGAATAAATTCAACTTGACATGGCAAAATTCTAATAGGCTGAAATAATGTTTTGGTCTAGAACTATGTAGCTTTGTGTAGCCCATCAATTGTCTAAAAAAAGAGTAACCTATTTTGATGAAACTCGCTGTATCTTGTAACCTGTATCCTGTCTTGGTATTGTGGGAGTATATATGATTTAGGGGAAAGAGTCTGGAGAGACCTTAAGTCTGCTTTAGGGAAAGGGTGAGGAACCCCACTGAAGGCTACTTAACGCATTTTGAGAATGTCAGTAAAGATTTCTCAGAGCCCAGGGATTTTTTTTTTAATTGAGACATAATTTACATACAATACAATTCCTTTTTTCTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTGGAGTGTGATGGTGTGATCTTGGCTCACTGCAACCTCTGCCTCCTGGGTTCAAGTGATTCTCCTGCCTCAGCCTACTGAGTAGCTGGGATTACAGGTGTGCGCCACCACGCTTGGCTAATTTTTGTATTTTTAGTAGACACGGGGTTTCACCATGTTGGTTAAGCTGGTCTCAAACTCCTGACCTCTAGATCCGCCTGCCTCTGTAATCCCAAAGTGCTGGGATTACAGGCGTGAGCCCCTATGCCCAGCCAAAATTCATTATTTTAAGCTGTACAATTGAGTGATTTTTATTACGTTCACAAAGTTACGTTACCATTACCACTTTTGAATTGCAGAACAATTTCATCACTCCAAAAAGAAATCTCATAACAATTAGCAGTTATTTTCCATCGCTACTTCCTCCAGTCCCAAGGCAATCAGCAGTCTGCTTTCTATCTCTACCTGTTTGCCTATTCTGGACATTTCATATGAGTGGAGTTAGATAATACATGGCCTTTTGTGACTGGCTTTCACTTAGCATAATGTTCTAGAGGTTCATTTATGTTATTGCAAAAATCAGTACTTCATTTCTTTTTATGGCTGAAAAAAATTCCATTATGTAGATGTGCCATATTTGTTTATCTGGTTGTCAGTCGGATATTTCTGTTGTTCCTACTTTTTGGCTACTGTAAAAGATGCTGCTATGAACATTATTGTGATTATTATACCTTATTTGTAAACATCATGGGTGGGGGGTTGCAGTAAACATGTTGGAAAGTAGGGTTGGAGGTCCGTAGAAATTGGGGGCTTCAGCACTTCCCCCAAGCTCAACACCAACCCCCTTTCTGAGCCCCTCTTGAAGGAGAGTTCCCTGGGACGTGCCTGGTATTGGTACAATCAGTCAGGAAGCATTTTTCCTGGGGAGAAACTTACAAGTCCACGATCAAAGCCAACAAGAGACAAGGTGTTACATGACTCATTTTCGGTTTAAGAAGTGACAGGCTGATTCTAAGTTGGGTTCAATTATTTTGTTAAAGCGTTTTGCTTATTTGACTTCTCCTGACCTCGGAAATAATTCTAACCAATCAGTGCTGGCTCCCATTGGCCCTGGGGTCTGGTTGCTTTACAGCTGGTGACAGGGGGACCACTCCACTACCACATGTGAATTAATCCTCAACTCCAGAGCCAAGTGCCATTCTCCAGCAAGGTTGTATTTCTTCATTAGCTATTCCCAGGGCCCAGAAAGTCCCAGAGGATGTCAGAGTACATTAATTTTTATCATAACATGGAATCTTTCAGGTCTGAATGGCAGCACACGGCTGTCAGGGGCTTCTGAACTCTATTACAGCTCCATATATCTCTAGGCAAAACAGAGGAAAGAGTCGTCATTGGCAAGGGAGATGTACAAAATGCATGAGATGTTTTATTTTTTGAGTGACTTGACCACGTGCTTAAGCACATTCCCCAAACAATTTTTTTCTTATTGTTTGTAAGTTGTAAGTTGTAAATTCACCTCTGCCACCACCTATTAAAGCCCACTCCCTGCATTAAAACTGTATAAAGTGTATTTAAATAAACTCTCTTTGCATGATGTGAATGAAATCGTCATCTGGTACTTAAAACTATTCTATAAAGTTATTAAAAAATTAATGTTCCCTTCCCATGATTTTTCTGCAGAATTTATGCATCCATGATACTGCAGAAGTTCATAAATAATGGCTTGTATTGCTGCTTTAGTATTGCTTTATGCCTACGAAATATAATGTTAATTTGTAGCAATGCTAATGTGTTTTCAGGAAGGCTCTTTGTTTATTGCCTTTATTTTCCCCACTTACCAAGTGGGTAAAATGCTTTGAGGGTTGCATTTTATGTATTCAGGAGGCCCAGGTATTATTTTAATAGAAGCACTATTGACAAATACCAGTCATCCCCCCTGTGCCAGGCCCTGGATGAGGCACTGCTTCGCATGGGGGCTCCCCAGATTGTCCCACAAGGAAAGCATAGTCAAAGACAAAGTTTTCAGTTGTAAGAGTAAATGTGTTCTGCCTAGGCATTGTCAAGTAATTTACTGCCAGCTCTAGCCCTTCACTCAAGTTTCCTGGATACTTTTGACTTCTTAGCCATGGATGTGTTTGAAGGCTGCATGGACCTTCACTTACTTGCACTGCAGGTCAGCCTAATTGCATGAGCTCTGTGGACCACAGAGCAGGGTTTTCCAAAGTTCACCAAGACAAATATTGTATTATCTTAACATATATTCATTTTTTAAAACTGAAAATCAGAAGAGCAACTCCACCTAGCAGAAGTCTTTTGCAAAGGGCGAGGCGAGGCTAAAAAGTATAGAAGAGTTCGTTTCCAGTGCAATTTTATAAACACAGATGGTCCTTAAATTAAGCAAATGGTACCTAAATGACTGTGTTGTGGATAATGGTAACAGAGGGAGGGACTCGGGGGTTTTTTAAAAAGTACTGATTGTATGCAGTGTTTTAAACAGATAACTGTGATCTTAGTGTGATGAAAGATGCTGGGAGATTTCACCAGTGGTATCTTATTATTTTTCGGGGATTTTGTAATTCAACAAAATTCTGTTGTATGCCAAGCATAACCCTAGGTGTGAGAGCACAAGGTGACTTCAGATACCACCTCTATCCTTCAGGGGTTTGGGGCCCATTATTATGACTTAATCCATTTTGGGCGTGAGAAGCTGAGGGTCACAGAAAGAACCAATTCCCTCTTTAAATAATGCCACCCCAACCCTCCTCATCTGCCAGGTCTTTCCCTTCTTCTATTTGTATGAATAATAGTCACTTTCTCTTGTGGAGTTCGCTAAATTCTACTTTGGCCTATCAAATTTCTTTCATATCACAACTAAATTTCTTAAGGACGGGACTATGGTTCATTTGTCAGACGAACAAATGGGAATTTGCCAAGAGACACTTGGGTTAATTTACGTCTTTTCCATCCAAGGGCACTATGTTGAAGTGAGGCTAGTAGGTCATGAGTGTGGTTGAAGTTACTTTTTCTTACTTTCCCGACCAGCCCCCATCCTTACTGCACTTAAAGTTGATTGTCCATTTTATTAAATGTCCCCAGGAAGCCAGAACACAGGGCAGTAAAGTGCTGAATGCAAAGGGCAGGAGAAAAATGGAAACAACCAGAACTGTAACACCAAGGAATGAGACCTGCATGTCAGATATCATGCCCATTGCACTAAGTGCCATTGGGGCACAATTATCAAATGGATGCATTTTCCCTAGAAAACCATCTTGGAGAGCATGTGGATGTACTTCTATTTTACATTTCCCCCTATTTACAATCAATGAGATTGAGATTTTGTTGCTGGGACTGCTGATGATGGGATGGGAAAATATAATCAAGGTAATGGACATGAGGCAAAAATTTAAGGAAATGACAAAAACAAGAGTATTTCCATTTTCAGTTAAGTGTATGTACTGATGTTCTGGAATTCACTATAAGAAGTTGCAAATGGTGCATGAAATGAAAAATTCCTGGTGGTCTCCAGGGGACACAGCCGGTGCTGTGCTCCACTCTGGGTAACTGTTTTGGATTATTTTCTCTATTCCAACTGAAATAAAAAAAAATTAATTAAATGTGGCTAGGTTATCTTGACAGCAGAATCCATTCCCAGTTAATTATTATTTTAATACTTGATGGTGTCTGTCAAATTGTCGACATGTGACGGTCCTTTCAAATTTAAAGGAATAGCTGATGGTCACTGGCCACCCAAGCTGATACTGATTTTATATGTTGATGTTTCTCATTTTATTTGCTCTTTCCTTGAATATTTATTCAGGACATTCTCTACCAGACATATTGAGTAAGGGCAACAGAAACAATACATAAGTATCTTATAAATGTGGAAAACAATGTATATGTGTTTTTTATCTCTCAATGATTGGTGGGTACCATATCCCCAAAGTAGAATGAGCATTTGAGAAAACAGGAAATATCCTCTTTTAGGCACCATCTCTGTCAAGGCTGATGCTGGGCTTTTTATATATTTTCTCTAATTCTTGTGGCTGTCAAACAAGGTGGGCATTATCATTCCCTTTATAGGGGACACAGCTGTGGCTCAGAGGGGTTTATTCACTTTCCTGAGGGCCACACACATAATGAGAGGCAGACACAGGTGACGAAGTGAGTTTTCCCTGTCACGCCATCTTATCTGTCACATACCTCTCTGACATGCTAAAATTGCACTAAACAAAAGAATTCTCTTATGCACATATCATGCAAAAGATATTCTTTAACTGGGGATCATGTTTCTCATTCCATCAATAGAATGACTAACATTTTCTGAGGGTGTCTCACGTGAAAGTAAATCGCTCATGTTTGTTCTTTTTAAAAGATGCCCTTCGTATTGTGTATCTTGCAGTCTTGCTTTCTCAAACTTAAGCCAACTATATCGTCATTTTTGCAAAATCACTGCGTCAGTTTACTATTATTTAATGTTTATTGCTACCAATTTTAAGAAATCCTTTATAGGACTATTTGTGAAATTGATTTTGTGAGGATGATGATATAATTTCCATTACATTACAGCATATAAATATAAATATATATATATATATATATATATATATATATATATATATATATATATATATTTTATTATTTTTTTTTGAGACGGAGTCTTGCTCTGTCACCAGGCTGGAGTGCAGTGGTGCAATCTCGGCTCACTGTAACCTCCGCCGCCCGGGTTCAAGCGATTCCCCTGCCTTAGCCTCCTGAGTAGCTGGGACTACAGGCATGTGCTACCACACCCAGCTAATTTTTTGTATTTTAGTAGAGATGGTTTCACCATGTTGGCGAGGATGGTCTCAGTCTCCTGACCTCGTGATCCGCCTGCCTTGGCCTTCCAAAGTGCTGGGATATACATTTTTTTTTTTTTTTTGAGAGATGGAGTGTCACTCTGTTGCCGAGGCTGGAGTGTAGTGGCGCAATCTCGGCTCACTGCAACCTCCTCCTCCGGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGGGATTACAGTCGTGTGCCACCACGCCTGGTTAGTTTTTGTATTTTTAGTAGAGATGGGTTTCACTGTGTTGGCTAAGGTGGTCTCAAACTCATGACCTCAAGTGATCCGCCCGCTTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTCAGCCACTGTGCCCGGCCGGATAGAAATAATTTTTATAAACTCCTTGGATGCTACCTAAAATCATCTTGTTTTGCTAGTGGCACATGCTGCATTTTGGGCAGCTGTGGCCTTGGTGGATTGCTGAAGTAGATTTGACCTTACCTGGACTGAGGCAGCTGTTGAAGGGAATTGCTGTGTTCAGTGTATACTGCCATCCATGATTTCATGAAACCAGCTCTAGCTATTTAAGCAGGGGTCAAACTTAGAATTCTACATTATTTTTTTCCCTTTTCTGGGAGGAAAGACAGTTGAACACCAGCAAAGACTAAGAAGTTTCTTAGAAGACTGTGGGTCCTTGGGCCCTTTCTATTGAATTTCAGAGTATTTCCAAATACTATGAAGTCTTGCAGCTTAGTTGAGAAATGCCCCAGATGGTGTGACATTCTGCTTCCAGGAGGGATTGGAAAGTATTTCCTTTTACATAACATTCCACTCAGCTCATTCCTTTGCTGTGTCTGAAATTGAATCCCCCAAAGCCACAATTATCTTAACATTCAGAAGAGTGTTTATTTAATCTGCAAAATCTTGCCTCACTTTTGGGGAGCATGTTAACAATTTCACTTACAAATCTTCTGTGTAACTCAACCCCATGGTGGTGTCTACTGCTGCTCCTAGACTCTTTAAAGCACCTTTCTCATCTCAGGTTTGAAATGATATGTCTCATTCTTGGGTTCCTTGAGTCGTAATGGGTTTGTCTTGTCTCCACAGCATAAATGACTCTTTCTTGATCAACTAGAACCACATCAACTTCTTCCCTCCAGCTTCAGTGATATATTGTGAAACATGGCTATTCAACGTCCTGTAGACCAAATGCCATAAGAAAAATAGCATTGATTCAAACGTATCCATCCAGATACCTAAAAAAGTTTTACTTCTTACCACATCTTGAGTCTGGGCAAACACGCACTTCCTATGGACATTGATTACTGTCTACTGTAGAGATAACATTTGCACATACAGATTATGGCACATGGTAGAAAGTGTTAAGTAATGTAGGAATGGACATATCCCAAGCAAAATTGGAAGCCAAGTCCCCTGTCCCTGCTCAAGTTGGTATGACTGGTGTATGGTGCCTTAATGGGTACTTAAAGTCCAGGTGAGAGTGGCAGGAGGCAGCCAAATGCCTAGGTAGATAGGAGCCGGTCCCTGTTGAAACCCCACTTCCAAGTTGAAGACAGTTTAAAGACTGAAAGCCAAGCTACAAGTTAAATCCTCGGACCAGATTGAGAACTTGTCTTCTTACTTGGTGCACTCTTCTGATTGATCCCCACCTTTCACCTATTTTACATATTCCTGCCCTTCCCTAACTGGTTTCCTATGCTGTCATGCCCACCTTTGAGTGTTGCCTTCACTTTAACCTTCTGTGCATGCTCACAAAGTAATTAGCATGTACCCTCCATTCTGAGTTAATATAAGGCCCCAGACCCAGCCACATGGGGCAACTTTACTGCCTTCAGGTAGGGGAACCACCCCCACCACATTCCCTCTCCACTGAGAGTTTTCCTTTTAGTTAATAAATTCGGCTCCACTCACTCTCCATTGTCTGCATGCCTAATTCTTCCTGGTTGTGAGACAAGCAGTTGGACCTAGCTGAGCTAAGGAGCAGAAAGACTGTATCACAGGGAACTTGTGTAACAGCTTGATCTCCTGTCCTACGTAGCTATCTATTGGTAAGAAGTTGAAGGAACTTGTGTCATTCCGTTGTGCCTGTCGTCTTGACCTTGTAAAAGGTCTTGGGTAAGCATGCAAGAAGTTTTGAAGAGGGAGATACAGCTAATTTGCAGATAAAGAGCAAGGGAAGAATTCCTGGAGAAAGGAAGAGTTTCCTGAGTCACCTTTGGGAGGTAGGAAGGGTTTGACATGTAGGCTGGGCATCTGGGAACGAGTGAGGGATTCTGTGAGCCCCATCTCAGTGGACCACTCAAGGAAGGTGGGTAAGCCCTGGGTATAAGTGTGTAAGCAGGGAACAGAAAGTACTGTGATTTAAAATATGTTAATTTTTCTACTGTACAGATGAGAGCCAGCTTGGAGATGGGCTGTAGCTCAAGCATCTTACCTACCTCTGATTTCTTAATGCCACGTTATAAGGCTGCTGCTTATAGCTCTTGAAGTCACTCCAAAAACAGATGAGTGAGACCCTGTTGCTAAAGTCCCACTGGGTGTAGATTATTCACAGATGTATACACAGTGGCTCACTCCAGGTAGGATGTGATCAGTGCTTTTAGAAATACAGAAAGTCCTATTGGTTTAAAAAAAATTTTTTTTTGTAATGAATTGAGTTTTAAAGCTAGCACTGTACAATAAAAGGGTGAATTTCACTATGAATTATGACAAACACAGTCATAGAGCTGCCATCATCACTGTCAAGATACAGAACAGTGCCATCACCCCCCAAATGTCCTCTGTGCCTTACTGTAGTCATACCTGCTCCTGACACCTAGCCCCTGGGAACCATTGATCTTTTTTCTGTCCCTAGTTGTTTGTCTTCTCTGCAATGTTACATAAACAGAAATATTCTGGGTGTCGCTTTTGGAGTTTGGATTCCTTCCCATAGCATAATGCATGCAGTATTTGTCTGTTTCTGCTAAATTACCCCCAAGACCTAGCGACTTAAAACAGCAAACATACACTATCTCACTCTGTTAGTTTTCCATTGCTGTTGTAATAAATTACCACAAACTTATGTGCTTAAAACCCAAAGCTATTATCTGACAGCTCTGTAGGTTAAAGATTTGTCATGGTTCTCATTGGGCTAAAATTAAGGTGTCGGTATGGCTCTGCTGCTTTCTAGAGACTCCAGGGAGAATATTTTTCCTTGCCTTTTCCAGCTTCTAAAGGTCACCCACAACATCTTCAAAACCAGCACTGTTGCATCTCTGACCTTAGCCCTGTAGTCCCATTTCCCTCTAGCCACAATCGGGAAAGGATCTCAGGACTGTTGTGATGACACTGTGCTTACCTAGATTATCTAGCATGAGCTCCCTGTCTCAAGGTGATAGAGTTTGCATGTTTTTCTCCTGCACATGCCATGTTGAAACATAATCCCTAGTGTTGGCGGTGGGTGTGCTGGGAGGTATTTGGATCATGGGGGTGGAACCCTCATGCATGACTTACGGCCATCCCTTTGGTGATAAGTGAGTTCACATGATATCTGGCACCTTCCTTCCTCTGTTGCTCTTGCCCTCACCATGTTAGCTGCCTACTCCCCTTTTGCCTTCCGCCATGACTGTAAGCCTCCTGAGGCCTCACCGGAAGCCAAGCACATGCTTCTTGTAGAGTCTGCAGAATCGGAGCCAATTAAATCTCTTTTCATTATAAATTATCCAGCCTCAGTTACTTCTTTATTGCAGTGCAAGAATAGCTTAACACACAAGGTCTTACCCTTGATCACGGTCTGCAGCATGTCTTTTACCATGTAAGGTAATATGTTCAGTGGCTGTGGGGGTTAGGATGTGGACTTCTTTGGGGGACTTTTATTTTTCCCAGTTACTATTTTTGTGACTCAGGAATTTAGGGACAGTTTGGCTGGTTGTTTCTGGCTCAGGGTCTTTCTTGGGCTGCAATCAAGATGTCAGCTGGGGGCTGGGCATGGTGGCTCACTCCTGTTATCACAGCACTTTGGGAGGTCGAGGTGAGTGGACCATTTGAGGTTAGGAGTTTGAGACCAACCTGGCCAACATGGTGAAACGCCATACTAAAAGTACAAAAATTAGTTGGGCATGGTGGCACATGCATGTAATCCCAGTTAGTTGGGGGGCTAAGGCTGGAGAACAGCTTCAAAACAGGAGGCGGAGGTTGTGGTCAACTGAGATCACACCACTGCACTCCAGCCTGGGTGAAAGAGCAAGGCCCCACCTCAAAAAAAAAAAAAAAAAAAAAAAGTTAGCTGGGGCCAAGGTCATCTCAAGTCTTGACTATGAAAGGACCTAACATCATGCATGGCTGTTGGCAGAGGTCACTTCCTCATGAGCATTAAACTGAAAGCTATAATTCCTGACTATTGACCAGAGGCGAACCTCAGTTCCCTGCCATGTGGGCCTCTTCATGGGGCAGTTGATAACACAGCAGTTAGCTTCCATTGGATTGAGTAAGCAAGAGAGCAAGAACAGGAGTGATACAGAAGCCAGCATCTTTTTGTAAACCAATCTCAGAAGAATTGTCCATGATTTTTTCTATATTCTGTTAGTTAGAAACAAATTCCTAGGTCTCACCCACCCTTGAGGTGAGGGGATTACACAAAAGTATGAAAACCAGGAAGCAGGGAGCATTGGGAGTCATTTGGACCCTGCCTAGGACAGTGCGTTTGAGATCCATGTTATTCTATCCATTAGTAGTAGTTTGCTTCTTTTTAATGTTGAGTAATATGCCATTGTGTGTTTATATATTGCTAATGGATTTTAAAGAGGGCTAATCAACGTGTTGATTAGAGGGAAATTTTCTTCAGTGAAATAATATTTGAGCAAAACCTTGAAGAACAATTAGGAATTTGACAGAGGGAATGGCACGAATAAAGACCCAGACTTAATCAAGTGAGGGGCGTACTCATCCATGGGGCAGTGGATGATTCACCTGCCTGGAGCTTAAGGAGAAGGGGGTGTAGTTGTGGCTGTTGCTCAGAAGGACCTGAATGTCAGGATGCATTTGTGTTTAATTTAGCAGACATTTAGGAACCATTAAAAGTTTTTGAGAATGGGTAGGTAAGATTAGAATAGTATTTTAATAAGTTTAACTAGGCTTTAACCAGTATTCATAAAAAGTCAAGTGGGAGAATGTAGAGGTGACAAGACCAGGTGTGGGCTATTGCAGTGACTACACCTAAAAGGAACAGAGCTAGAAACATGGAGTGACATGAAGTCATTGTCTTAGGTTGGTGATCCTAGCAGCTGAGCCTGGGATGGGGATTCTTGTTCCTATGATCGATTAGGGGACTGTTCCCAGAGGAGAGCGGAGAGGGAAGCAGGCTTTGGTTTCTACTGGAAATCTGTTTCCGCCCGATTCCATGGGGAGCTCTGGAGAAGGAATTGTATCACCGAGTTGGTCCTCCTTGTCAACAGAGCATGTGTGTGGGGGTGATGACTTCCCAGATGTGAGGGTGCCCTCCAGCAATGGACAGTTATCTGGAGGAGTTCATGCTCCAGAGGTAGTGCCTACCTCTGTGGAGAAGTAGGGGATGGCACGGTATTTTAGGGGTTCCTAATGAGCCTAGAGATACTGACATTGACCAGTCATGGAGACTAAAGGAGAAGATGGGATAGCACATTGTTTTCCTAAGTATTTGTAATCAGGTGACTGGTTTGCAAATATCAGCCATTACAGAAATAAAGGGTATCAGGACGGGGAGGAGATGTGAGGAGAGAAGATGAAGAGTTTAGATTTAGGCATGTTGACCCTGAATTTGTGGGAAGGTATCCAGGAAGATAAGACTGACAGGTGGGAATGAGATTCTGGAACTTGGAAAGAATATAGTGGCTAGAGCTAAGGGTTTGAAGTCTTCATGGCGATTTTGTTCGGAGTCATGGATTTGGAAGAAACTGCTGCAGGGAAGATTTTAGAGATGGAATGAAAAGAGATTGGTGATGGAACAATGGGCAAAAGAGGAAGGGTTGGAGGAGACAAAAAATGGAGTGGTAGTGGCATTGGGAGGTCCAAGAGTGGATGGTATCACAGAAGCCAAGACAGGAAAGTGAGCCAGAAGTGAGGGAGGAGCAGGATCCTCTGCAACAGGGCAGTCTAGTCTTAGCAGAGAGGAGGTTGCTGAATTTATTGATTGAGAAGCCACTCCTCGCCAGGCAGTTTTGAGAGGTTCTGGATGGTGATAGAATGGAATTCATTCAGCTGATGATGAGAAGTAGATTTGGGTCAGGGAAGGAGACTGAATGGAGATGAATATTTCAAAACTATAGACCATAAATGGGAAGAGAGAGACGAGTGCAACCTGCCTAGGTCAAGTGAATGTCCTTTTTTCTCTTCCCAGCCCTTGTCTTTTTTACCCTCTTTGTTTTCTGCATTTCTTTCCTTCTGTTCTGATTTTTCCTTACCTGATGGTCAGAAAGATCTGATTGAGCTGACGCGTAGCAAGAATGAGCCAGTGGAGAGGAAAATGTAACCACCTAATGGGTTCTCCTTTCCTGCTAGCTGGACAGAGCTGATTTATCAAGACGGGGGTACTGCAAATAGAGAAAGAATTTAATCCACACAGAGCCAGCTGTATGGGAAACCAGAGTTGTACTACTCAAATCAATCTCCCCGAGAATTCAGGGATCGGAGTCTTTAAGGAGAAATGTGGTGGATAGGGAGCCAGTGAGTGATCTGGGAGTGCTGATTGTCTCACGTGTCCATGTGAAGAGACCACCAAACAGGGTTTGTGTGAACAACAAGGCTGTTTATTTCACCTGGGTGCAGGTGGGCTGAGTCTGAAAAAGGAGTCAGCAAAGGGTGATGGGATTGTCATTAGTTCTTAGAGGTTTTGGCATAGGCGGTAGAGTTAGGAGCAATGTTTTTGGGGCAGCGGGTGGATCTCACAAAGTACATTCTCAAGGGTGGGGAGAATTACAAAGAACCTTCTTAAAGGTGGTGGAGATTACAAAGTACGTTGATCAGTTAGGTAGGGACAGAAGCAAATCACAATGGGGGAATGTCATCAGTTAAGGCTATTTTCACTTCTTTTGTGGATCTTCAGTTGCTTCAGGCCATCTGGATGCATACGAGCAGGTCACTGGGGATATGATGGCTTAGCCTGGGCTCAGAGGCCTGACACTGATTTGTTGGCTTGAAGATGAAATCACAGGGAATCAAAGGTGCCCTCCTGTGCTGACTCAGTTCCTGGGTGGGGGCCAAAGGACTGGTTGGCAGGTCTGCATGGGGCCACCGTGTTGTCAGAAATGCAAAAATCTGGCTGGTGCAATGGCTTACCCCTGTAATCCCAGTACTTTGGGAGGCTGAGGTTGTCAGGCCTCTGAGCCCAAGCTGAGCCGTCATAACCCCTGTGACCTGCACATATACATCCAGATGGCCTGAAGCAACTGAAGAACTACAAAAGAAGTGAAATAGCCAGTTCTTGCCTTAACTGATGACATTCCATCAGTGTGATTTGTTCCTGCCCCACCCTAACTGATCAGTTGACCTTGTGACATTCCTCTTCCTGAGGCAGTGAGTCTCAGGAGCTCCCCCACCGAGCACCTTGTGACCCCCGCCCCTGCCTGCAAGACAAAACCCCCTTTAACTGTAATTTTCCACTACCCACCCAAATCCTGTAAAACTGCCCTACACCTATTTCCCTTTGCTGACCCCTTTTTCAAACTCCGTCTGCCTGCACTCAGGTGATTAAAAAGCTTTATTGCTCACACAAAGCCTGTTTGGTGGTCTCTTCACATGGACACGTGTAACAGAGGTGGGCAGATCACTTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACGTGGTGAAACCCTGCATCTACTAAAAATACAAAAATTAGCTGGGCATGGTGCTGTGCACCTGTAATCCCAGCTACTCAGGAGGTTGAGGCACGAGGATCACTTAAACCCAGGAGGAGGAGATTGCAGTGAGCCAATTGCGCCACTGCACTCCAGCCTGCAATGCAGAGTGAAACTGTCTTAAAACAAACAAACAAACAAAAAGCAAAAATCTGAAAAGACATCTCAAAAGCCAATCTTAGGTTCTACAACAGTGATGTTATCTGCATGAATAATTGAGAAAGTTGCAAATCTTGTGACCTGAGGAATAACGGCTGGTAATTATTTAAAACTCAAGCCCCTCTCATCCTAACGTGGTGGCCTTTCATTCATTTTACAGGAACAGTTTAGTTTTGGGGGAAAGCTGTTATTTAAACTATAAACGAAATTTCTCCCAAAGTTAGCTTGGCCCATGCCCAGGAATGACCAAAGACAGCTAACCAGTTACCGGAAACGGGTCCCAATCTAGACCCCAAGAGAGAGTTGTTGGATCTTGGGCAAGAAAGAATTTGGGGTGAGTCTATAAAATTAAAGTAACTTTATAAAGAAAGTAAAAGAATAAAAGAATGGCTACTCCATAGGCAGAGCAGCAACTTTGGCTGTTGGACTAAGGATATTTATAGTTAATTATTGATTAGCTATAATATATTTTAATATAATATATATAATTATAATACAATATTATATATAATATATATTTTATGTAACTATAAACAAGGGGTAGATTACTCATGAGTTTTCACGGAAAGGGGTAGGCAGTTCCCGGAAGTGAGGGTTCTTCCCCTTTTTAGACCATATAGGGTAACTTCTTGATGTTGCCATGGCATGTGTACACTGTCATGGTGCTGGTGGGAGTGTCTCTTAGCGTGCTAATGGATTCATATTAGGGTATAATGAGTAGTAAGGATGACCAAATGTCGTTGCCATCTTTGTTTTGGTGGGTTTTGGCCGGCTTCTTTACTGCAACCTATTTTATCAGCAAGGTCTTTATGACCTGTATCTTGTGCTGACCTCCTATGTTATCCTGTAACTTAGAATGCCTGACCTCGTGGGAATGCAGCCCAGTAGGTCTCAATCTTATTTTACCCAGCCCCTATTCAAAATGGAGTCACTCTGGTTTGAATGCTTCTGACAAACCTCTGAGACTAAGGACAAGATGGAGTCTGTTAGGTTAGTTTTAATTTCCTCAGTTATTATTTCACAAAGGGGGTTTCAGAAAGATAGAAAATGCTAGAAAGAGACGGGGTAGTCAGTGGTAGGGATCTTGAGGGGTTAAAAAAGGGAAGGAGCTGGTCCCTTTGCAGTAGGAAGTGCCACCTCACTCCACTCTTGCCAGAAAGCCTGGCTCTGTCATAAGGAATCTTGGCCCATTGTCACTCTCATTGATGACAACCCCCGCCCCCACCACTACCACCTTAACTACTGAGGGGATGTCCTCTGGCCCAGAAGGAATGCATTGGCTCTAGTTGAAGGGGTTGCTGGTTTGTAAATAGCATCCCCACAGTCAGCAGACACAGAGAATGCCATGGGGCCACTCATCAGAAAGGTGGAGGGGCTTTATCTGTGGGTAGATGAGAGCCATGTAGAACCTGCATGCTGCCTTCCATTGGAATCTCAGGCTCTAAACAAGGAGGACAACCTTTTCTGCCCTTAATCACCTCTCACAGATGAGAATCCACCCAAGAAGAAGAAAAAAAATCAGCCCCTTAAAATGCTGACTAGTAACATCAGTTTTTGGAAGGATACTATTTTTGTTTCCATTCTGTCTTTATAGTTTGAACTTTTCCTTTTCATAGTCTGTAAATATTCATCATCTCAGAGCCTTTTGAATTGCAATGGCTTTTAGTCAAAACTGAGCGCAATGTGCCATTTGAATAATAAGAACCTCATTAAAGTGAGGCAAAAGTAATTCTTTTCCTTTTGTACAAGCCAATTAAGTGTCTTTACAATTAAAACAAAGTCAAGCACTTTATAGTTTCATATTCCACAAGCCTGTTTTTAATTAATAGAGGTGACAAGTTTTAGGTACAGGATACATATTTTGCCTCTGGAGTTGTGCAATTAACATTAATGAAGGGTATTAAAATTTGGTGAGAGAGGAAAGGGTTTTCCCAGGTCCTCTGTGGGTTCTTGCCTCCAGGCCTGTCTTTGGTAGCAAGTTCCTTGGTTTGCTGAGGTTTGCATTTTCTTGAGAGAATGTACAGGCAAGAATAAAAGTAACTAGAAGAGACATTGTTGTTATTGAGAAGTATGAAGTTAAAATTCCCAATGTCCAGGCCTGTCTTAAAATTAGGTAAACACTGCAGAAACTATCTTTGAGGAATTTAATTTTGTCTTGTGGATAAAAGTGAAGGATAAAGAGGAGAAATAACAATTATCTAAGCTGGCAGGCCAGCAAGATACTGGGGCTTTACTTCTCACAACCATGGGAGGAGTATATTTAGTTTACAGGGGAGGACACAGATTCACAGGGGTGAACTAACTGGCCATGAGTCAGGTAAGGAAAATAACATGGCTGAGCTACAGTTTCTCTAAGAAGTTGGTTCTGTCCATTTCAGGGAGCTTATGTAAGACAGAAGGTATTGTGCAGTAGCCTTGAAGTCAGGTGACCCTTTGGGGGTGTGTGTGTGTTTGCTTCTTTATATTAGCTTTTTTTTCATTTTGAGACAGGGTGTTGCTCTTCCTCAGTTGCCCAGGCTGGAGATCAGTGGGGTGATTACAGCTCACTGCAGCCACAACCTCCCAGGTTCAAGAGATCCTCCCACCTCAGCTTCCCAAGTAGCTGAGACTACAGGTGCATGCCACTGGCTATTTTTATTTTTTTCTTTTTAGTTTTGAGTTTTTAATTTTTTGTAGAGATGAGGTCTTGCTATGTTGTCCAGGTTGGTCTTGAACCCCTGAGTCCAAAAGATGCTGCTGTCTTGGCCTCCCACTGTGCTGAGATTACAGGCATGAGCCACCATATCTGGCCTATGTTAGCTATTTCTGAAACTTCATTTCTGTCTGTAAATGGCAATCATAATATATGTCACGTAGGGTTCTTAAATACAAAAAGCTCTTAAAAAGTGCCTAACCCATAGTTAGTACTCAAAAAAGGAAAAAAAAGTATTAATAGCCCTGGAAAAGTGAGCAAAATATTTCAGATGCAACCTGAAAGGAGTTCAAATGTAAGTCTCTCTTCACTTTGCCCATCTTTCCCTCTTCCCCGCAAGGTTTTTAGCAGTGGTCCTGTCTTACTACAATATATGAATTATCCCTGCTACAAATTAGTAGATTCTGCTTTGATTTGAAGCAGAGTCTAATGAGTGTTACAGGTAGGCTCGGTGCTTCAAAGGATTTCTGCCTATCTCTTCACAGAGTAGCTGGGCTATTGAGGGACTGTGTATTGACAATTGTAGTGGGCACCCTATGTGGAAAACAGGTGTTGTCAAATAACTGCAACTGACCCCAACCACACCATGACTTTTCAGGTTTTAGGTAGGGGATGGGTGGGAAAACAGATGAGGCAACAGAATGAATTATTCCTTAATAAGATCCAGATTACCGAGTGATAATAAGATTTGCATGAATACTTCAAACAACAACAGTGATTAAATACGTTTTTCTAGTTAAAAAATTAATGAGTGCTTTCCAGCAAGCGCTTAAAGAATTAATACTGGCCGGGAACGGTGGCTCACGCCTGTAATCCCAACACTTTGGGAGGCTGAGGTGGGTGGATCACGAGGTCAGGAGGTCAAGACCATCCTGGCTAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAACAAGATTAGTGGGGCGTGGTGGCGGGCACCTGTAGGCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGTGGAGCTTGCAGTGAGCCGAGATTGCACCACTGCACTCCAGCCTGGGCAGGAGACAGAGTAAGACTCCATCTCAGAAAAAAAAAAAAAGAATTAATATCAATTTTCCACAAACTCTTTTAAAAAAACAGAAGAGGAGGAAACACTTTCCATCTGACACTGTGAAGCCACTATTACTCTGACAACAGAGGCAGACAAAGATATCACAAGAAAAGGAAACTTTAGGTCACTTTCACTTATAAATAGAGATGCAACAATTTTGTACAAAATGCTAGCAAGCCAAATGCAGCCACATATACAAAGAATTGTATATTATGACTAAGTGGAATTTATCCAAACAATTCACGGTTGGTTTTATATTAAAAAATCAGTCAATATAATATAACATAATAGTAGAATAAAAGATCCTCTCAATACATACATAAAAACATGTTTTAAAAAGGTCCAGCACGCTTTCTTGATGAAAACTTTTAGTAAACTTGGAACAAAGTGGCATTTTCTCAACTCAATAAAGGGCATCTGCAAAAAACCCACAAATGACATCATAATTAATGCTGAAAGATTGAGTACCGTCCCCTTAGGATCAAGAAATAAGACAAGGATATTCACTCTCATCACTTTAATATTCTACTAGAAATCCTAGCTAGGGAAATTGGGCAAAAAACAAAAAAGCAAAAGAATGGAAAAAAAAGATCCAGATTGAAAAGAAGTAAAACTATCTTTATTTGCAAATGAAATCACCTTATCTGTAGAAAATCCTAGAAAGGTACTTAAAAAATATTAAAATGAACAGGTTCAGCCAGGATAAAAGTAGGATAAAAGTTTAATATGCGTAAGTCAATTGTATTTCTATATACTAGCAATGAAGGCATCCCATGTTCATGTATCAGAAGATTTAATATTGTATAGCTGACCAAACTCTGCAAATTGTGTACAAATTTAGTGCAATCTCTATTAAAATTTCAGTTGACTTTTTTACAGAAATTGAAAAGCTGATCCTACATTTTGTATATGAATGCAAAGGAACCTGAATAACCAAAACAAGCTTGAAAAAGAAGAGTGTTGGAGGGCTTCCATTTCTCAATTTCAAAACTTTCTATAAATCCTTAGTAATAAAGACAGTGTCATATTGGCATAAGAATAGATCTATAGATAATAAAATAGACTTGAGAGTCCAGAAATGAATCCATGCATATTGTGAATTGATTTTCAGCAAGGATGCCAAGACAATTCAAAATAGTCTTTTCAACAGTGATGCTAGAACCACTGGATAGCCCCATGCAAAAGAATGAGATTAGATCACACTTGCATAAACTACACAGCAAAAAATAGCAAAAAAAAAAAAAAAAAAAAAAAAAGGGTCAGAGACTTAAATGTAAGAGCTAAAACTATACAACTCTTAGAAGCAAACATAGGGGTATATTTTTCTGATTTTGGCTTAGACAATGATTTCTTGGATACAACCCTGACAGCACAAGTGACAAAAGAAAAAAAGATATCCAGAATGATCCAGAATGAAAAGTGTTTGTGCTGCAAATGATACCATCACGAAAGTGTACAGATGACCTACAGAATGGGGGAAATACTTACAAATTAAATATTAAATAAGGGATTTGTATCTAGAATAAAGAACTTTTACAATTCAATAATGAAAACTCATTTAAAAAATGAGCATAAGATCTAAATAGATATTCCTCCAAATATGATACACAAATTGCCAATGAATACATAAAAGATGTTCAATCTCATTAGGGAAATGCAAATGAAAGCCACAATGAGATTCCACTTATTGCTAGGATGGCTAAAATAAAAAAGTCAGATAACAAATACTTACAAACACATGGAGAGATGAAAGTTCAAATACAGTCCTGGCGGAAATGTAAAATTGTATGGCCAAACTGGAAAACAGTTTGGCCATTATTCAAAAAGTTAAACATTGAATTACCATATGACCCAATAATCCCACTCCTAGATGTATCTTGAAGAGAAATGAAAACATGCATTTCCATAGCATCTTACAAATGAATGTACATAGTAGTGATATTCCTAATAGCCAATAAATGGAAACAATCCAAATATCCATCATTGGTGAATGGATAAACAAAGCGTGGTATATTCATATAATGGAATATTATTTGCCCATTAAAAAGAAATGGATTACTGATATATGCTACAACATGGATGAACCCTGCAAACATCATGCCAAGGGACAGAAGCCAGTCACAAAAGACTACGTATTGTATGGTTCCATTTAGTATATAAAATGTCCGGAATAGGCGAATCTATAGGACAGAAAGTACACTGATGGTGTTTTGGGAATAAGAGTGTAGGTAGATAGAGGGTGGCTAGTGATTGCTAATTTTTTTTTTTTTTTTTGAGTTAAAAAAGTGCTATAAAATTAGAATGTAGTTATGGTTGTACAATTATTTGAGTATACTAAAACAGCTGTACAGTATACATAGGTAGATTTTATGGTATGTGCATTATATTTCAGTAAAGTTCTTTAATAAATTAGCTAATGCTTATTGTAGAAAAGTTGGAAAATACCAAAAACCATAAACAGGAAATAGGAAAAAAAGTAATCCTGACTGTATATCTGGAAATCTTTTCGTGGGGTATGGGGTATGAATCACACATATTAATTTTAATTTCCTGCTTTTTCAATTAAATTGAGAAAGTTTGGGAACCTCTGGCTTAATCATTATTTATCCACTTCTCTGTTGTTGGATGTTAGATTGCTTCTAGTTCATATGTAATTTTATAATTTTAGTTGACATGTCATTATGGATTCTTGGGCATAAACGTTTATATGAATTTTGAATATTAGGAAATAATCTTGGAAGCTATATTAGTTTTCTAAAGCTATTATAACAAATTACCACAGATGGAGTGCCTCAAAACAACAGAATTTTACTCTCTTACAGTTTCAGAGACCAAACATCTGAAATCAAGTGGTTTGCAGCGTTGGCTCCTTCTGGAGGCTGGAAGGAGCATCTGTTCCATGCCTTTCTCCTAGCTTCCCATGACTGCCAGCCACCCTTGGAATTTCTTGGCTTTTATCTTAGTCAGTCTAATCTCTGCCTCTGTCTTTCCATCTCCTTCTCTGTGGGTGTTCCTTCTCCCCTTCTGTTTTGTAAGGATAGGCCTGTCATTGGATTTAAGGCCCACCTTAATCCAAGATGATCTCATTTTAAGATGTGGGACTTAATCACATTTGCGAAGACCCTTTTTCCAAATAAGGTCGCATTCACAGATCCCTACCATTCAACTCACTATGGAAGCAATGGGATGGGAGGTACCATTCAACTCACAGATTCCTACCATTCAACCCACTATAAAGCAGTGGCATATGAAAAGGATGGGAGGTTGCTATGGGAATGGTCTGCCCCAGATGCAGGTAAAATGGGGAAGCATTTTATATAGAATGGAAAAACAACAGTAAAGCTGGCTAAAACTTGATCTGCTTTTTACAGTCATGTGCTGACAATTCTAAACATTATCAATGATAAAATACTTCTCCCTCATGATATTAACTGTGCAATTTTTGCTGCCCCTGCTACATACATGCTTTATTGCTTGGAAACAGGATTACTTGGTGAAATAATGTGAATCTTTCAATTCTCATGATACATGTTTTCAACTTAATATCCAGGGTGCAGGAATATATATTCCCACCAACTTTATACAAGATAATACAAATATAATAGTATGAATAAATATGCTAATTTGATAGAAAATATTTTAATATTTACTACTAGTGTAGTTGGACGTGTTAAATATTGCAATGGTTATTTGTAATCTACCTTTATAAAATTTCTAATGTCTGCCACTGTTTCTGTTAGACTCTTAAGAAGATACGTGTTTGTCTCATAGATTTGTAAGAACTCTTTATATACTAAAAATATGGGGCTTCTATATGATTGCATATATTCGTCACTTTTGTTCCACTGTTTTAGAGAGCAGAAGATTTTAATTTCTATAAAGATTTTTATGTTTCCCTTTATTGCATGCCAATGTTTGTTTTTTAAAAACTTGTACGTCTTAGGCTGCGATGTGAAAAGTGTCTTTTTTTTGATTTGGAAACAGAGTCTTGCTCTGATGCCTGTAGTGGTATGATCATAGCTCATTGCAGCCCTTGAACTCCTGGGCTCAAGTAATCCTCCTATCTCAGCCTCCGAAGTAGCTGGGACTACAGCCACACACCACCACACCTGGCTCACTTTCGGGTGGTTGTTGTTGAGATGGGGTCTTGCTGTGTTGCTCAGGCTGGTTTTGAATACTGGGCTCAAGTGATTATCCTGCCTTAGCCTCCCAAAGTGCTGGAATTTCAGAGGTGAGCCACTGTGCCCAGCCTCCTGTTTTTTTTACATGTATATATATATATATATATATACACACACACACACACATATATATATACACACACACATATATATATACACACACATATATATACACATATATACACATATATATACACACATATATATATACACACACACATATATATACACACACATATATATACACACACATATATATATATACATATACACACACACACACACACACACACACACACACACACACACATATATTTTTTTTTTCTTTTAGATGGAGTGTCTCTCTGTTGCCCAGGCTGGGGTGCAGTGACATGATCTCTGCTCACTACAACCTCTGCCTCCTGGGTTCAAGTGATTCTCCTGCCTTAGCCTCCCAAGTAGCTGGGATTACAGGCGTGTGCCACCACGCTGGGCTAACTTTTGTATTTTTAGTAGAGATGGGGGTTTCACCATGTTGGTCAGGCTGGTCTTGAACTTCTGACCTCGTGATTTGCCTGCCTCAGCCTCCCAAAGTGCTGAGATTACAGGCGTGAGCCGCCATGCCTGCCCTTATTTTTATATATCTTCCATAACCTTTCTATCTCTTTCTTTCTCTTTCTCTGTTTCTGTGTGTGTCTGTGTCTGATAATAAAATACATTAAATTTTATTTTATAACATAAATGTTTTATAAGGTCGGTAGATTTGTTTATGATTTTAATGGCTGCATAGTATTCTATCATATGGATATATTCTAAATGTAATAATTCCTCAATGGTATATTTAGTTTACTTTATATTTGCTGCTTTAGTTTGTCACAAACACCGTGGATAGCCTTGTATATATTTATTTTGTTGTATAGTTCTTATTGGTACAGTATGATTTTTCACAAGTATACAATTACTTTCTGAGAATTTAGCTTTTTGATTCACGTGAAAGCCTTACTTGTATTGTTAACAATTTTGTTACGTTTGTGAGGCCTAGGCAGAAGGATTGCTTGAGACCAGGAACTCAAGACCACCTTGACCACCTTGGGCAACATAGGGGGAGCCCATCTCTACAAAAATAAAAAATAAGAAATAAAATAGCCATGCATGGTGGTGTGCCCCTGTAGTCCCAGTTACTCGGGAGGCTAAGGTGGAAGGATCGTCTGAGCCTGGGAGTTTGAGACTGCAGTGAGTTATGACCACACCACTGCACTCCAGCCTGGATGACAGAGAAAGATCTTGTCAAAAAAAAAAAAAAAATTAAATATATACAAAATAAGGGAGAATACTATGACAGGCTTCCACTTACCTATCTTTAGGCTTCTTTATAAAATGTTCATTTAATCTGATCTCTATAAGAAATTCTGTATCCTTAAACAACATGTGCCCTTCTCTTTACACAGAGCGATTACCCCCTTGCTATGTTTTATTAACAGCTAGCAAGTGCTGGGTCCTGTGTCAGTTGTTTTGTTTTTTCTTTTTCTTTGTTTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTTGTTTTTTTTTTTTTTGGAGATAGAGTCTTGCTCTGTTGGTCAGGCTGGAGTGCAGTGGCATGATCTTAGCTCACTGCAACCTCTGCCTCCTGGGTTCAAGTGATTCTCCGGCCTCAGCCTCCTGAGTAACTGGGATTACAGACATATACCACCACACCTGGCTAATTTTTTTTGTGTTTTGACAGAGATGGGGTTTCACCATGTTGGTCAGGCTGGTCTCAAACTCCTGACCTAAAATGGTCCACCTGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCCCCGCGCCTGGCCCTGTGTCAGCCTTTTTAACAGCATAATTTTTCCTTTATTAGTCATAACTACTTTAAGGGCTATTTTGATTTACAGACAAAGTGAGACTTGGATGAATTAAGAAAGAATTAATCAGGTGGTTGAACCTGAGTTCCAACTAGGGTGACAACTCTTCGCAGATTGCCTGCTGCTGTCCTGGTTTTCACACTGAAAGTTCCGTATCCTAATAAGCCTCTCATCCTCAGCAACGGAGGACAGTTGCCCACTCAAAGCCTTGTGACATTCTAGGGTATCCTATTTCTACATGGTGGGAATCTAGAAGACGGAGAAAGAATTTCTCATTCCCGAAGGAGCATGGATTATCCTTGGTTGTAGTGTTTATGTCCACATCACGTGGATTCCCGAAGGAGCATGGATTATCCTTGGTTGTAGTGTTTATGTCCACATCACATGGGATATTTTATTTTTCAGGCCTCTTCATGTGCTTGTGTGCAACGCAGCAACTTTTGCTCTACCCTGGAGTCTCACCAAAGATGGCCTGGAGACCACCTTTCAAGTGAATCATCTGGGGCACTTCTACCTTGTCCAGCTCCTCCAGGATGTTTTGTGCCGCTCAGCTCCTGCCCGTGTCATTGTGGTCTCCTCAGAGTCCCATC", + "change_length": 38197 + }, + "sv_info": null + }, + "tx_annotations": [ + { + "gene_id": "WWOX", + "transcript_id": "NM_016373.4", + "hgvs_cdna": "NM_016373.4:c.517_791del", + "is_preferred": true, + "variant_effects": [ + "INTRON_VARIANT" + ], + "overlapping_exons": [ + 6, + 7 + ], + "protein_id": "NP_057457.1", + "protein_effect_location": null + } + ], + "genotypes": { + "samples": [ + { + "label": "Patient 15", + "meta_label": "PMID_30356099_Patient_15" + } + ], + "genotypes": [ + "HOMOZYGOUS_ALTERNATE" + ] + } + } + ] } ], "excluded_patient_count": 0 diff --git a/tests/view/test_formatter.py b/tests/view/test_formatter.py index 45842a2c..0ba3876d 100644 --- a/tests/view/test_formatter.py +++ b/tests/view/test_formatter.py @@ -11,6 +11,7 @@ class TestFormatter: "variant, expected", [ ('12_56004525_56004525_A_G', "NM_001032386.2:c.1136A>G"), + ('16_78386858_78425054_--38197bp--_A', 'NM_016373.4:c.517_791del') ] ) def test_variant_formatter( From dd796cbd4c05927c516c876214053d99eec894e4 Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Fri, 9 Aug 2024 13:24:23 -0500 Subject: [PATCH 2/8] Fixed so ci runs --- src/genophenocorr/analysis/_gp_analysis.py | 4 ++-- tests/analysis/test_mtc_filter.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/genophenocorr/analysis/_gp_analysis.py b/src/genophenocorr/analysis/_gp_analysis.py index 871c9fc0..a3be7cad 100644 --- a/src/genophenocorr/analysis/_gp_analysis.py +++ b/src/genophenocorr/analysis/_gp_analysis.py @@ -209,7 +209,7 @@ def __init__( # such as HP:0001611 Hypernasal speech, but it also contains "general" terms that # we skip according to this heuristic, e.g., HP:0030680 Abnormal cardiovascular system morphology for t in self._top_level_terms: - l2_terms = hpo.graph.get_children(t.id, include_source=False) + l2_terms = hpo.graph.get_children(t, include_source=False) for t in l2_terms: tid = t label = hpo.get_term_name(tid) @@ -219,7 +219,7 @@ def __init__( # such as HP:0031109 Agalactia, but it also contains "general" terms that # we skip according to this heuristic, e.g., HP:0006500 Abnormal lower limb epiphysis morphology for t in self._second_level_terms: - l3_terms = hpo.graph.get_children(t.id, include_source=False) + l3_terms = hpo.graph.get_children(t, include_source=False) for t in l3_terms: tid = t label = hpo.get_term_name(tid) diff --git a/tests/analysis/test_mtc_filter.py b/tests/analysis/test_mtc_filter.py index 3f90ea61..f57c40ab 100644 --- a/tests/analysis/test_mtc_filter.py +++ b/tests/analysis/test_mtc_filter.py @@ -114,9 +114,10 @@ def test_filter_terms_to_test( filtered_n_usable, filtered_all_counts, reason_for_filtering_out = mtc_report + # After updating SUOX.json to include hgvsp, results changed. I (Lauren) don't understand why, could someone check on this test? assert reason_for_filtering_out['Skipping term because all genotypes have same HPO observed proportions'] == 1 - assert reason_for_filtering_out['Skipping non-target term'] == 14 - assert reason_for_filtering_out['Skipping top level term'] == 5 + assert reason_for_filtering_out['Skipping non-target term'] == 5 # Originally 14 + assert reason_for_filtering_out['Skipping top level term'] == 0 # Originally 5 assert len(filtered_n_usable) == 4 assert len(filtered_all_counts) == 4 From a72b1e377c3d8a4b672604384d5c7e46b35ce332 Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Mon, 12 Aug 2024 09:37:31 -0500 Subject: [PATCH 3/8] Added line in SUOX.json for tests. --- tests/test_data/SUOX.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_data/SUOX.json b/tests/test_data/SUOX.json index b49edc6a..444b5267 100644 --- a/tests/test_data/SUOX.json +++ b/tests/test_data/SUOX.json @@ -7470,6 +7470,7 @@ 7 ], "protein_id": "NP_057457.1", + "hgvsp": null, "protein_effect_location": null } ], From 29f0b45f51da9e61d515efead799a0912768cbda Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Wed, 14 Aug 2024 10:29:24 -0500 Subject: [PATCH 4/8] Fixed the documentation and added tests for large variants. --- src/genophenocorr/io.py | 10 ++ src/genophenocorr/model/_variant.py | 3 +- src/genophenocorr/preprocessing/_generic.py | 1 + tests/model/test_cohort.py | 1 + tests/model/test_variant.py | 23 ++++- tests/test_data/SUOX.json | 103 +++++++++++++++++++- 6 files changed, 138 insertions(+), 3 deletions(-) diff --git a/src/genophenocorr/io.py b/src/genophenocorr/io.py index ffa0bed5..5430b318 100644 --- a/src/genophenocorr/io.py +++ b/src/genophenocorr/io.py @@ -35,6 +35,11 @@ def default(self, o): 'alt': o.alt, 'change_length': o.change_length, } + elif isinstance(o, VariantClass): + return{ + 'name': o.name, + 'value': o.value + } elif isinstance(o, ImpreciseSvInfo): return { 'structural_type': o.structural_type.value, @@ -145,6 +150,7 @@ def default(self, o): _VARIANT_FIELDS = ('variant_info', 'tx_annotations', 'genotypes') _VARIANT_INFO_FIELDS = ('variant_coordinates', 'sv_info') _IMPRECISE_SV_INFO_FIELDS = ('structural_type', 'variant_class', 'gene_id', 'gene_symbol') +_VARIANT_CLASS_FIELDS = ('name', 'value') _VARIANT_COORDINATES_FIELDS = ('region', 'ref', 'alt', 'change_length') _REGION_FIELDS = ('start', 'end') _GENOMIC_REGION_FIELDS = ('contig', 'start', 'end', 'strand') @@ -217,6 +223,10 @@ def object_hook(obj: typing.Dict[typing.Any, typing.Any]) -> typing.Any: alt=obj['alt'], change_length=obj['change_length'], ) + elif GenophenocorrJSONDecoder._has_all_fields(obj, _VARIANT_CLASS_FIELDS): + return VariantClass( + value=obj['value'] + ) elif GenophenocorrJSONDecoder._has_all_fields(obj, _IMPRECISE_SV_INFO_FIELDS): return ImpreciseSvInfo( structural_type=hpotk.TermId.from_curie(obj['structural_type']), diff --git a/src/genophenocorr/model/_variant.py b/src/genophenocorr/model/_variant.py index 3f3cdd52..419599ce 100644 --- a/src/genophenocorr/model/_variant.py +++ b/src/genophenocorr/model/_variant.py @@ -461,7 +461,8 @@ def variant_key(self) -> str: Get a readable representation of the variant's coordinates. For instance, ``X_12345_12345_C_G`` for a sequence variant or ``22_10001_20000_INV`` for a symbolic variant. - If the 'ref' or 'alt' sequences are over 50 bases, they are replaced with the number of bases. Example: ``16_78386858_78425054_--38197bp--_A`` + If the key is larger than 50 characters, the 'ref' and/or 'alt' (if over 10 bps) are changed to just show number of bps. + Example: ``XX_123456789_123456816_AAAAAAAAAAAAAAAAAAAAAAAAAAA_T`` -> ``XX_123456789_123456816_--27bp--_T`` .. note:: diff --git a/src/genophenocorr/preprocessing/_generic.py b/src/genophenocorr/preprocessing/_generic.py index 81ceb449..abb82e64 100644 --- a/src/genophenocorr/preprocessing/_generic.py +++ b/src/genophenocorr/preprocessing/_generic.py @@ -32,6 +32,7 @@ def annotate(self, item: ImpreciseSvInfo) -> typing.Sequence[TranscriptAnnotatio variant_effects=variant_effects, affected_exons=affected_exons, protein_id=None, + hgvsp=None, protein_effect_coordinates=None, ) tx_annotations.append(annotation) diff --git a/tests/model/test_cohort.py b/tests/model/test_cohort.py index bb6685a2..532fb84f 100644 --- a/tests/model/test_cohort.py +++ b/tests/model/test_cohort.py @@ -17,6 +17,7 @@ def test_all_transcript_ids( "NM_001351089.2", "NM_000456.3", "NM_016373.4", + "NM_013275.6" } def test_variant_effect_count_by_tx( diff --git a/tests/model/test_variant.py b/tests/model/test_variant.py index 484bbd6a..f5c77526 100644 --- a/tests/model/test_variant.py +++ b/tests/model/test_variant.py @@ -27,4 +27,25 @@ def test_get_hgvs_cdna_by_tx( ): hgvs = some_variant.get_hgvs_cdna_by_tx_id(transcript_id=tx_id) - assert hgvs == expected \ No newline at end of file + assert hgvs == expected + + @pytest.mark.parametrize( + "search, expected", + [ + (56002673, "12_56002674_56002674_T_C"), + (78386857, "16_78386858_78425054_--38197bp--_A"), + ('SO:1000029', 'SO:1000037_HGNC:21316_ANKRD11') + ] # Couldn't find an example that produces a variant key similar to this: 22_10001_20000_INV + # That is an example in the documentation though, so is it possible? (specifically the "INV"/"DEL"/etc.) + ) + def test_variant_key(self, suox_cohort: Cohort, search, expected: typing.Optional[str]): + + for var in suox_cohort.all_variants(): + if var.variant_info.variant_coordinates is not None: + if var.variant_info.variant_coordinates.start == search: + assert var.variant_info.variant_key == expected + if var.variant_info.variant_coordinates is None: + if var.variant_info.sv_info.structural_type == search: + assert var.variant_info.variant_key == expected + + \ No newline at end of file diff --git a/tests/test_data/SUOX.json b/tests/test_data/SUOX.json index 444b5267..c6d8499c 100644 --- a/tests/test_data/SUOX.json +++ b/tests/test_data/SUOX.json @@ -7487,7 +7487,108 @@ } } ] - } + }, + { + "labels": { + "label": "Willemsen2010_P1", + "meta_label": "PMID_36446582_Willemsen2010_P1" + }, + "phenotypes": [ + { + "term_id": "HP:0001249", + "name": "Intellectual disability", + "is_present": true + }, + { + "term_id": "HP:0000729", + "name": "Autistic behavior", + "is_present": true + }, + { + "term_id": "HP:0004322", + "name": "Short stature", + "is_present": true + }, + { + "term_id": "HP:0000308", + "name": "Microretrognathia", + "is_present": true + }, + { + "term_id": "HP:0000486", + "name": "Strabismus", + "is_present": true + }, + { + "term_id": "HP:0000411", + "name": "Protruding ear", + "is_present": true + } + ], + "diseases": [ + { + "term_id": "OMIM:148050", + "name": "KBG syndrome", + "is_observed": true + } + ], + "variants": [ + { + "variant_info": { + "variant_coordinates": null, + "sv_info": { + "structural_type": "SO:1000029", + "variant_class": { + "name": "DEL", + "value": 0 + }, + "gene_id": "HGNC:21316", + "gene_symbol": "ANKRD11" + } + }, + "tx_annotations": [ + { + "gene_id": "HGNC:21316", + "transcript_id": "NM_013275.6", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "TRANSCRIPT_ABLATION" + ], + "overlapping_exons": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12 + ], + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + } + ], + "genotypes": { + "samples": [ + { + "label": "Willemsen2010_P1", + "meta_label": "PMID_36446582_Willemsen2010_P1" + } + ], + "genotypes": [ + "HETEROZYGOUS" + ] + } + } + ] + } ], "excluded_patient_count": 0 } \ No newline at end of file From 2ac1d71ecba10fc6eb36b9af0282adaf6cd7bcd1 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 15 Aug 2024 15:14:08 +0200 Subject: [PATCH 5/8] Simplify the documentation example. --- src/genophenocorr/model/_variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/genophenocorr/model/_variant.py b/src/genophenocorr/model/_variant.py index 419599ce..671c75ee 100644 --- a/src/genophenocorr/model/_variant.py +++ b/src/genophenocorr/model/_variant.py @@ -462,7 +462,7 @@ def variant_key(self) -> str: For instance, ``X_12345_12345_C_G`` for a sequence variant or ``22_10001_20000_INV`` for a symbolic variant. If the key is larger than 50 characters, the 'ref' and/or 'alt' (if over 10 bps) are changed to just show number of bps. - Example: ``XX_123456789_123456816_AAAAAAAAAAAAAAAAAAAAAAAAAAA_T`` -> ``XX_123456789_123456816_--27bp--_T`` + Example: ``X_1000001_1000027_TAAAAAAAAAAAAAAAAAAAAAAAAAA_T`` -> ``X_1000001_1000027_--27bp--_T`` .. note:: From ab360f30924dbe10a8ab1f53c3a6508aa0b3df97 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 15 Aug 2024 15:14:35 +0200 Subject: [PATCH 6/8] Update the serialization of `VariantClass` enum. --- src/genophenocorr/io.py | 15 +++------------ tests/test_data/SUOX.json | 5 +---- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/genophenocorr/io.py b/src/genophenocorr/io.py index 5430b318..fa860f57 100644 --- a/src/genophenocorr/io.py +++ b/src/genophenocorr/io.py @@ -35,11 +35,6 @@ def default(self, o): 'alt': o.alt, 'change_length': o.change_length, } - elif isinstance(o, VariantClass): - return{ - 'name': o.name, - 'value': o.value - } elif isinstance(o, ImpreciseSvInfo): return { 'structural_type': o.structural_type.value, @@ -92,7 +87,8 @@ def default(self, o): 'label': o.label, 'meta_label': o.meta_label, } - elif isinstance(o, (Genotype, VariantEffect, Strand)): + elif isinstance(o, (Genotype, VariantEffect, Strand, VariantClass)): + # enums return o.name elif isinstance(o, Phenotype): return { @@ -150,7 +146,6 @@ def default(self, o): _VARIANT_FIELDS = ('variant_info', 'tx_annotations', 'genotypes') _VARIANT_INFO_FIELDS = ('variant_coordinates', 'sv_info') _IMPRECISE_SV_INFO_FIELDS = ('structural_type', 'variant_class', 'gene_id', 'gene_symbol') -_VARIANT_CLASS_FIELDS = ('name', 'value') _VARIANT_COORDINATES_FIELDS = ('region', 'ref', 'alt', 'change_length') _REGION_FIELDS = ('start', 'end') _GENOMIC_REGION_FIELDS = ('contig', 'start', 'end', 'strand') @@ -223,14 +218,10 @@ def object_hook(obj: typing.Dict[typing.Any, typing.Any]) -> typing.Any: alt=obj['alt'], change_length=obj['change_length'], ) - elif GenophenocorrJSONDecoder._has_all_fields(obj, _VARIANT_CLASS_FIELDS): - return VariantClass( - value=obj['value'] - ) elif GenophenocorrJSONDecoder._has_all_fields(obj, _IMPRECISE_SV_INFO_FIELDS): return ImpreciseSvInfo( structural_type=hpotk.TermId.from_curie(obj['structural_type']), - variant_class=obj['variant_class'], + variant_class=VariantClass[obj['variant_class']], gene_id=obj['gene_id'], gene_symbol=obj['gene_symbol'], ) diff --git a/tests/test_data/SUOX.json b/tests/test_data/SUOX.json index c6d8499c..e0921009 100644 --- a/tests/test_data/SUOX.json +++ b/tests/test_data/SUOX.json @@ -7538,10 +7538,7 @@ "variant_coordinates": null, "sv_info": { "structural_type": "SO:1000029", - "variant_class": { - "name": "DEL", - "value": 0 - }, + "variant_class": "DEL", "gene_id": "HGNC:21316", "gene_symbol": "ANKRD11" } From 6a671006c78aecfa8eb3ddf8b11aa0b59a0b3671 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 15 Aug 2024 15:15:09 +0200 Subject: [PATCH 7/8] Add a test stub. --- tests/model/test_variant.py | 40 +++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/tests/model/test_variant.py b/tests/model/test_variant.py index f5c77526..3890dc11 100644 --- a/tests/model/test_variant.py +++ b/tests/model/test_variant.py @@ -2,7 +2,9 @@ import typing -from genophenocorr.model import Variant, Cohort +from genophenocorr.model import Variant, Cohort, VariantCoordinates +from genophenocorr.model.genome import GenomeBuild, GenomicRegion, Strand + class TestVariant: @@ -28,7 +30,7 @@ def test_get_hgvs_cdna_by_tx( hgvs = some_variant.get_hgvs_cdna_by_tx_id(transcript_id=tx_id) assert hgvs == expected - + @pytest.mark.parametrize( "search, expected", [ @@ -47,5 +49,35 @@ def test_variant_key(self, suox_cohort: Cohort, search, expected: typing.Optiona if var.variant_info.variant_coordinates is None: if var.variant_info.sv_info.structural_type == search: assert var.variant_info.variant_key == expected - - \ No newline at end of file + + +class TestVariantCoordinates: + + @pytest.mark.parametrize( + "contig_name, start, end, ref, alt, change_length, expected", + [ + ("chrX", 100, 101, "C", "T", 0, "X_101_101_C_T"), + # TODO(lnrekerle): add more tests + ] + ) + def test_variant_key( + self, + genome_build: GenomeBuild, + contig_name: str, + start: int, end: int, + ref: str, alt: str, + change_length: int, + expected: str, + ): + contig = genome_build.contig_by_name(contig_name) + assert contig is not None + + vc = VariantCoordinates( + region=GenomicRegion( + contig=contig, + start=start, end=end, strand=Strand.POSITIVE, + ), + ref=ref, alt=alt, change_length=change_length, + ) + + assert vc.variant_key == expected From c30a2c2df7b50e68db553c206a45189805ff0b9a Mon Sep 17 00:00:00 2001 From: Lauren Rekerle Date: Thu, 15 Aug 2024 11:46:21 -0500 Subject: [PATCH 8/8] Updated tests --- tests/model/test_variant.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/tests/model/test_variant.py b/tests/model/test_variant.py index 3890dc11..984aceb5 100644 --- a/tests/model/test_variant.py +++ b/tests/model/test_variant.py @@ -30,25 +30,6 @@ def test_get_hgvs_cdna_by_tx( hgvs = some_variant.get_hgvs_cdna_by_tx_id(transcript_id=tx_id) assert hgvs == expected - - @pytest.mark.parametrize( - "search, expected", - [ - (56002673, "12_56002674_56002674_T_C"), - (78386857, "16_78386858_78425054_--38197bp--_A"), - ('SO:1000029', 'SO:1000037_HGNC:21316_ANKRD11') - ] # Couldn't find an example that produces a variant key similar to this: 22_10001_20000_INV - # That is an example in the documentation though, so is it possible? (specifically the "INV"/"DEL"/etc.) - ) - def test_variant_key(self, suox_cohort: Cohort, search, expected: typing.Optional[str]): - - for var in suox_cohort.all_variants(): - if var.variant_info.variant_coordinates is not None: - if var.variant_info.variant_coordinates.start == search: - assert var.variant_info.variant_key == expected - if var.variant_info.variant_coordinates is None: - if var.variant_info.sv_info.structural_type == search: - assert var.variant_info.variant_key == expected class TestVariantCoordinates: @@ -57,7 +38,9 @@ class TestVariantCoordinates: "contig_name, start, end, ref, alt, change_length, expected", [ ("chrX", 100, 101, "C", "T", 0, "X_101_101_C_T"), - # TODO(lnrekerle): add more tests + ("chrY", 150, 152, "G", "GG", 1, "Y_151_152_G_GG"), + ("chr16", 1000, 1040, "A", "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG", 39, "16_1001_1040_A_--40bp--"), + ("chr2", 200, 301, "N", "", 100, "2_201_301_DEL") ] ) def test_variant_key(