Skip to content

Commit

Permalink
Merge pull request #280 from monarch-initiative/relax-handling-of-str…
Browse files Browse the repository at this point in the history
…ict-mtc-filtering

Relax handling of the MTC filtering
  • Loading branch information
ielis authored Sep 23, 2024
2 parents 4e9f7ab + 8a9dc2c commit 620378e
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 27 deletions.
75 changes: 59 additions & 16 deletions src/gpsea/analysis/pcats/_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,37 @@ def __init__(
self._corrected_pvals = (
None if corrected_pvals is None else tuple(corrected_pvals)
)
assert isinstance(gt_predicate, GenotypePolyPredicate)
self._gt_predicate = gt_predicate
errors = self._check_sanity()
if errors:
raise ValueError(os.linesep.join(errors))

def _check_sanity(self) -> typing.Sequence[str]:
errors = []
# All sequences must have the same lengths ...
for seq, name in (
(self._n_usable, 'n_usable'),
(self._all_counts, 'all_counts'),
(self._pvals, 'pvals'),
):
if len(self._pheno_predicates) != len(seq):
errors.append(
f"`len(pheno_predicates)` must be the same as `len({name})` but "
f"{len(self._pheno_predicates)}!={len(seq)}"
)

# ... including the optional corrected p values
if self._corrected_pvals is not None and len(self._pheno_predicates) != len(self._corrected_pvals):
errors.append(
f"`len(pheno_predicates)` must be the same as `len(corrected_pvals)` but "
f"{len(self._pheno_predicates)}!={len(self._corrected_pvals)}"
)

if not isinstance(self._gt_predicate, GenotypePolyPredicate):
errors.append(
"`gt_predicate` must be an instance of `GenotypePolyPredicate`"
)
return errors

@property
def gt_predicate(self) -> GenotypePolyPredicate:
Expand Down Expand Up @@ -428,6 +457,19 @@ def __init__(
self._mtc_filter_name = mtc_filter_name
self._mtc_filter_results = tuple(mtc_filter_results)
self._mtc_name = mtc_name

errors = self._check_hpo_result_sanity()
if errors:
raise ValueError(os.linesep.join(errors))

def _check_hpo_result_sanity(self) -> typing.Sequence[str]:
errors = []
if len(self._pheno_predicates) != len(self._mtc_filter_results):
errors.append(
f"`len(pheno_predicates)` must be the same as `len(mtc_filter_results)` but "
f"{len(self._pheno_predicates)}!={len(self._mtc_filter_results)}"
)
return errors

@property
def mtc_filter_name(self) -> str:
Expand Down Expand Up @@ -515,25 +557,26 @@ def _compute_result(
ph_predicates=pheno_predicates,
counts=all_counts,
)
mtc_mask = np.array([r.is_passed() for r in mtc_filter_results])
if not mtc_mask.any():
raise ValueError("No phenotypes are left for the analysis after MTC filtering step")

# 3 - Compute nominal p values
pvals = np.full(shape=(len(n_usable),), fill_value=np.nan)
pvals[mtc_mask] = self._compute_nominal_pvals(
n_usable=slice_list_in_numpy_style(n_usable, mtc_mask),
all_counts=slice_list_in_numpy_style(all_counts, mtc_mask),
)
corrected_pvals = None

# 4 - Apply Multiple Testing Correction
if self._mtc_correction is None:
corrected_pvals = None
else:
corrected_pvals = np.full(shape=pvals.shape, fill_value=np.nan)
# Do not test the p values that have been filtered out.
corrected_pvals[mtc_mask] = self._apply_mtc(pvals=pvals[mtc_mask])
mtc_mask = np.array([r.is_passed() for r in mtc_filter_results])
if mtc_mask.any():
# We have at least one HPO term to test.

# 3 - Compute nominal p values
pvals[mtc_mask] = self._compute_nominal_pvals(
n_usable=slice_list_in_numpy_style(n_usable, mtc_mask),
all_counts=slice_list_in_numpy_style(all_counts, mtc_mask),
)

# 4 - Apply Multiple Testing Correction
if self._mtc_correction is not None:
corrected_pvals = np.full(shape=pvals.shape, fill_value=np.nan)
# Do not test the p values that have been filtered out.
corrected_pvals[mtc_mask] = self._apply_mtc(pvals=pvals[mtc_mask])

return HpoTermAnalysisResult(
pheno_predicates=pheno_predicates,
n_usable=n_usable,
Expand Down
23 changes: 12 additions & 11 deletions tests/analysis/pcats/test_hpo_term_analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import typing

import hpotk
import numpy as np
import pytest

from gpsea.model import Cohort
Expand Down Expand Up @@ -64,7 +65,7 @@ def test_compare_genotype_vs_phenotypes(
0.48571428571428565,
float("nan"),
0.1048951048951049,
1.,
1.0,
],
nan_ok=True,
)
Expand All @@ -80,21 +81,21 @@ def test_compare_genotype_vs_phenotypes(
nan_ok=True,
)

def test_compare_genotype_vs_phenotypes_explodes_if_no_phenotypes_are_left_after_mtc_filter(
def test_compare_genotype_vs_phenotypes_can_handle_if_no_phenotypes_are_left_after_mtc_filter(
self,
analysis: HpoTermAnalysis,
degenerated_cohort: Cohort,
suox_gt_predicate: GenotypePolyPredicate,
suox_pheno_predicates: typing.Sequence[PhenotypePolyPredicate[hpotk.TermId]],
):
with pytest.raises(ValueError) as e:
analysis.compare_genotype_vs_phenotypes(
cohort=degenerated_cohort,
gt_predicate=suox_gt_predicate,
pheno_predicates=suox_pheno_predicates,
)
result = analysis.compare_genotype_vs_phenotypes(
cohort=degenerated_cohort,
gt_predicate=suox_gt_predicate,
pheno_predicates=suox_pheno_predicates,
)

assert (
e.value.args[0]
== "No phenotypes are left for the analysis after MTC filtering step"
)
result.total_tests == 0
), "No tests should have been done due to MTC filtering"
assert np.all(np.isnan(result.pvals)), "All p values should be NaN"
assert result.corrected_pvals is None

0 comments on commit 620378e

Please sign in to comment.