From e11a44c9374dea931240c09533a19ddd9f174523 Mon Sep 17 00:00:00 2001 From: Brian Clarke <9725212+bfclarke@users.noreply.github.com> Date: Wed, 30 Oct 2024 12:18:29 +0100 Subject: [PATCH] check for constant gene impairments --- deeprvat/deeprvat/associate.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py index ad6560c7..daf0a908 100644 --- a/deeprvat/deeprvat/associate.py +++ b/deeprvat/deeprvat/associate.py @@ -449,9 +449,19 @@ def make_regenie_input_( samples=list(sample_ids.astype(str)), metadata="Pseudovariants containing DeepRVAT gene impairment scores. One pseudovariant per gene.", ) as f: + skipped_genes = 0 for i in trange(n_genes): varid = f"pseudovariant_gene_{ensgids[i]}" this_burdens = burdens[:, i] + if np.all(this_burdens == this_burdens[0]): + # burdens are constant, cannot perform association testing + logger.warning( + f"Gene impairment scores for gene {i} ({ensgids[i]}) " + "are all constant. Gene will not be written to BGEN " + "and will be skipped during association testing" + ) + skipped_genes += 1 + continue # Rescale scores to fill out range [0, 1] (making dosages in [0, 2]) min_burden = np.min(this_burdens) @@ -478,6 +488,11 @@ def make_regenie_input_( bit_depth=16, ) + if skipped_genes > 0: + logger.warning( + f"Skipped {skipped_genes} with constant gene impairment scores" + ) + @cli.command() @click.option("--debug", is_flag=True)