diff --git a/docs/tutorial.rst b/docs/tutorial.rst index b506dab9..3d1d917a 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -315,10 +315,10 @@ and these are the tested HPO terms ordered by the p value corrected with the Ben :file: report/tbx5_frameshift_vs_missense.csv :header-rows: 2 - .. doctest:: tutorial - :hide: +.. doctest:: tutorial + :hide: - >>> summary_df.to_csv('docs/report/tbx5_frameshift_vs_missense.csv') # doctest: +SKIP + >>> summary_df.to_csv('docs/report/tbx5_frameshift_vs_missense.csv') # doctest: +SKIP We see that several HPO terms are significantly associated with presence of a frameshift variant in *TBX5*. diff --git a/docs/user-guide/analyses/phenotype-groups.rst b/docs/user-guide/analyses/phenotype-groups.rst index 60952536..f1f80a37 100644 --- a/docs/user-guide/analyses/phenotype-groups.rst +++ b/docs/user-guide/analyses/phenotype-groups.rst @@ -126,10 +126,10 @@ we expect the autosomal dominant mode of inheritance: >>> from gpsea.analysis.predicate.genotype import autosomal_dominant >>> gt_predicate = autosomal_dominant(is_frameshift) >>> gt_predicate.display_question() -'What is the genotype group: HOM_REF, HET' +'What is the genotype group: No allele, Monoallelic' -`gt_predicate` will assign the patients with no frameshift variant allele into `HOM_REF` group -and the patients with one frameshift allele will be assigned into `HET` group. +`gt_predicate` will assign the patients with no frameshift variant allele into `No allele` group +and the patients with one frameshift allele will be assigned into `Monoallelic` group. Note, any patient with 2 or more alleles will be *omitted* from the analysis. .. note:: @@ -239,13 +239,16 @@ We can learn more by showing the MTC filter report: >>> from gpsea.view import MtcStatsViewer >>> mtc_viewer = MtcStatsViewer() >>> mtc_report = mtc_viewer.process(result) ->>> with open('docs/user-guide/report/tbx5_frameshift.mtc_report.html', 'w') as fh: # doctest: +SKIP -... _ = fh.write(mtc_report) - +>>> mtc_report # doctest: +SKIP .. raw:: html :file: report/tbx5_frameshift.mtc_report.html +.. doctest:: phenotype-groups + :hide: + + >>> mtc_report.write('docs/user-guide/analyses/report/tbx5_frameshift.mtc_report.html') # doctest: +SKIP + Genotype phenotype associations =============================== @@ -255,12 +258,17 @@ ordered by the corrected p value (Benjamini-Hochberg FDR): >>> from gpsea.view import summarize_hpo_analysis >>> summary_df = summarize_hpo_analysis(hpo, result) ->>> summary_df.to_csv('docs/user-guide/report/tbx5_frameshift.csv') # doctest: +SKIP +>>> summary_df # doctest: +SKIP .. csv-table:: *TBX5* frameshift vs rest :file: report/tbx5_frameshift.csv :header-rows: 2 +.. doctest:: phenotype-groups + :hide: + + >>> summary_df.to_csv('docs/user-guide/analyses/report/tbx5_frameshift.csv') # doctest: +SKIP + The table shows that several HPO terms are significantly associated with presence of a heterozygous (`HET`) frameshift variant in *TBX5*. diff --git a/docs/user-guide/analyses/report/tbx5_frameshift.csv b/docs/user-guide/analyses/report/tbx5_frameshift.csv index 39c76f0b..8590bb9f 100644 --- a/docs/user-guide/analyses/report/tbx5_frameshift.csv +++ b/docs/user-guide/analyses/report/tbx5_frameshift.csv @@ -1,4 +1,4 @@ -What is the genotype group,HOM_REF,HOM_REF,HET,HET,, +What is the genotype group,No allele,No allele,Monoallelic,Monoallelic,, ,Count,Percent,Count,Percent,Corrected p values,p values Ventricular septal defect [HP:0001629],42/71,59%,19/19,100%,0.003870827221893892,0.00024192670136836825 Abnormal atrioventricular conduction [HP:0005150],1/23,4%,3/3,100%,0.01230769230769231,0.0015384615384615387 diff --git a/docs/user-guide/predicates/mode_of_inheritance_predicate.rst b/docs/user-guide/predicates/mode_of_inheritance_predicate.rst index 5762ab3a..49a3c487 100644 --- a/docs/user-guide/predicates/mode_of_inheritance_predicate.rst +++ b/docs/user-guide/predicates/mode_of_inheritance_predicate.rst @@ -29,15 +29,15 @@ to assign an individual into one of the following categories: .. table:: Autosomal dominant predicate categories - +------------------+-------------+ - | Allele count | Category | - +==================+=============+ - | 0 | `HOM_REF` | - +------------------+-------------+ - | 1 | `HET` | - +------------------+-------------+ - | :math:`\ge 2` | ``None`` | - +------------------+-------------+ + +------------------+----------------+ + | Allele count | Category | + +==================+================+ + | 0 | `No allele` | + +------------------+----------------+ + | 1 | `Monoallelic` | + +------------------+----------------+ + | :math:`\ge 2` | ``None`` | + +------------------+----------------+ Examples ======== @@ -50,7 +50,7 @@ We can create the predicate with the :func:`~gpsea.analysis.predicate.genotype.a >>> from gpsea.analysis.predicate.genotype import autosomal_dominant >>> gt_predicate = autosomal_dominant() >>> gt_predicate.display_question() -'What is the genotype group: HOM_REF, HET' +'What is the genotype group: No allele, Monoallelic' Use a variant subset @@ -80,18 +80,18 @@ an individual into one of the genotype categories: +------------------+-------------------+----------------+ | Allele count | Category | Category index | +==================+===================+================+ - | 0 | `HOM_REF` | 0 | + | 0 | `No allele` | 0 | +------------------+-------------------+----------------+ - | 1 | `HET` | 1 | + | 1 | `Monoallelic` | 1 | +------------------+-------------------+----------------+ - | 2 | `BIALLELIC_ALT` | 2 | + | 2 | `Biallelic` | 2 | +------------------+-------------------+----------------+ | :math:`\ge 3` | ``None`` | | +------------------+-------------------+----------------+ .. note:: - `BIALLELIC_ALT` includes both homozygous and compound heterozygous genotypes. + `Biallelic` includes both homozygous and compound heterozygous genotypes. Partitions @@ -100,7 +100,7 @@ Partitions Sometimes we are interested in lumping several genotype categories into a group or and then comparing the groups. For instance, to compare phenotype of the individuals with *at least one* frameshift allele with those with *no* frameshift allele. Alternatively, we may only want to analyze a subset of the genotype categories, -such as `HET` vs. `BIALLELIC_ALT`. +such as `Monoallelic` vs. `Biallelic`. The `partitions` option of the :func:`~gpsea.analysis.predicate.genotype.autosomal_recessive` function lets us do this. @@ -127,7 +127,7 @@ with the :func:`~gpsea.analysis.predicate.genotype.autosomal_recessive` function >>> from gpsea.analysis.predicate.genotype import autosomal_recessive >>> gt_predicate = autosomal_recessive() >>> gt_predicate.display_question() -'What is the genotype group: HOM_REF, HET, BIALLELIC_ALT' +'What is the genotype group: No allele, Monoallelic, Biallelic' Use a variant subset @@ -148,14 +148,14 @@ and then use it to create the autosomal recessive predicate: >>> gt_predicate = autosomal_recessive(is_missense) >>> gt_predicate.display_question() -'What is the genotype group: HOM_REF, HET, BIALLELIC_ALT' +'What is the genotype group: No allele, Monoallelic, Biallelic' This predicate will assign the individuals into one of the listed genotype categories based on the allele counts of the missense variants. -Compare `HET` vs. `BIALLELIC_ALT` ---------------------------------- +Compare `Monoallelic` vs. `Biallelic` +------------------------------------- We can provide ``partitions`` to only compare the heterozygotes with those carrying biallelic alt mutations (homozygous alternate or compound heterozygous): @@ -163,7 +163,7 @@ biallelic alt mutations (homozygous alternate or compound heterozygous): We consult the *Autosomal recessive predicate categories* table for the category indices and we create the genotype group partitions: ->>> # `1` for `HET` and `2` for `BIALLELIC_ALT` +>>> # `1` for `Monoallelic` and `2` for `Biallelic` >>> partitions = ({1,}, {2,}) which we use to create the autosomal recessive predicate: @@ -172,4 +172,4 @@ which we use to create the autosomal recessive predicate: ... partitions=partitions, ... ) >>> gt_predicate.display_question() -'What is the genotype group: HET, BIALLELIC_ALT' +'What is the genotype group: Monoallelic, Biallelic' diff --git a/src/gpsea/analysis/predicate/genotype/_test__gt_predicates.py b/src/gpsea/analysis/predicate/genotype/_test__gt_predicates.py index c363a39f..1bb7ff13 100644 --- a/src/gpsea/analysis/predicate/genotype/_test__gt_predicates.py +++ b/src/gpsea/analysis/predicate/genotype/_test__gt_predicates.py @@ -66,25 +66,25 @@ def test_build_count_to_cat( [(0,), (1,)], ModeOfInheritanceInfo.autosomal_dominant(), { - 0: "HOM_REF", - 1: "HET", + 0: "No allele", + 1: "Monoallelic", }, ), ( [(0,), (1,), (2,)], ModeOfInheritanceInfo.autosomal_recessive(), { - 0: "HOM_REF", - 1: "HET", - 2: "BIALLELIC_ALT", + 0: "No allele", + 1: "Monoallelic", + 2: "Biallelic", }, ), ( [(1,), (2,)], ModeOfInheritanceInfo.autosomal_recessive(), { - 1: "HET", - 2: "BIALLELIC_ALT", + 1: "Monoallelic", + 2: "Biallelic", }, ), ], diff --git a/src/gpsea/preprocessing/_config.py b/src/gpsea/preprocessing/_config.py index 27ada788..5ae53eac 100644 --- a/src/gpsea/preprocessing/_config.py +++ b/src/gpsea/preprocessing/_config.py @@ -430,7 +430,7 @@ def load_phenopacket_files( def load_phenopackets( - phenopackets: typing.Iterator[Phenopacket], + phenopackets: typing.Iterable[Phenopacket], cohort_creator: CohortCreator[Phenopacket], validation_policy: typing.Literal["permissive", "lenient", "strict"] = "permissive", ) -> typing.Tuple[Cohort, PreprocessingValidationResult]: diff --git a/src/gpsea/view/_report.py b/src/gpsea/view/_report.py index 387b9297..34f030ca 100644 --- a/src/gpsea/view/_report.py +++ b/src/gpsea/view/_report.py @@ -43,11 +43,12 @@ def html(self) -> str: def write(self, fh: typing.Union[io.IOBase, str]): should_close = isinstance(fh, str) + fout = None try: fout = open_text_io_handle_for_writing(fh) fout.write(self._html) except Exception: - if should_close: + if should_close and fout is not None: fout.close() def _repr_html_(self) -> str: diff --git a/tests/analysis/predicate/genotype/test_gt_predicates.py b/tests/analysis/predicate/genotype/test_gt_predicates.py index 03120a82..d799e22d 100644 --- a/tests/analysis/predicate/genotype/test_gt_predicates.py +++ b/tests/analysis/predicate/genotype/test_gt_predicates.py @@ -91,9 +91,9 @@ def variant_predicate(self) -> VariantPredicate: @pytest.mark.parametrize( "patient_name,name", [ - ("adam", "HOM_REF"), - ("eve", "HET"), - ("cain", "HET"), + ("adam", "No allele"), + ("eve", "Monoallelic"), + ("cain", "Monoallelic"), ], ) def test_autosomal_dominant( @@ -115,9 +115,9 @@ def test_autosomal_dominant( @pytest.mark.parametrize( "patient_name,name", [ - ("adam", "HET"), # 0/0 & 0/1 - ("eve", "HET"), # 0/1 & 0/0 - ("cain", "HET"), # 0/1 & 0/0 + ("adam", "Monoallelic"), # 0/0 & 0/1 + ("eve", "Monoallelic"), # 0/1 & 0/0 + ("cain", "Monoallelic"), # 0/1 & 0/0 ], ) def test_autosomal_dominant__with_default_predicate( @@ -138,10 +138,10 @@ def test_autosomal_dominant__with_default_predicate( @pytest.mark.parametrize( "patient_name,name", [ - ("walt", "HET"), - ("skyler", "HET"), - ("flynn", "BIALLELIC_ALT"), - ("holly", "HOM_REF"), + ("walt", "Monoallelic"), + ("skyler", "Monoallelic"), + ("flynn", "Biallelic"), + ("holly", "No allele"), ], ) def test_autosomal_recessive( @@ -164,10 +164,10 @@ def test_autosomal_recessive( "patient_name,name", [ # The White family has two variants: - ("walt", "BIALLELIC_ALT"), # 0/1 & 0/1 - ("skyler", "BIALLELIC_ALT"), # 0/1 & 0/1 - ("flynn", "BIALLELIC_ALT"), # 1/1 & 0/0 - ("holly", "BIALLELIC_ALT"), # 0/0 & 1/1 + ("walt", "Biallelic"), # 0/1 & 0/1 + ("skyler", "Biallelic"), # 0/1 & 0/1 + ("flynn", "Biallelic"), # 1/1 & 0/0 + ("holly", "Biallelic"), # 0/0 & 1/1 ], ) def test_autosomal_recessive__with_default_predicate(