From 6e89bd8902dc1b5dabb6cb34cf0f8ee0c28f815e Mon Sep 17 00:00:00 2001 From: Brian Clarke <9725212+bfclarke@users.noreply.github.com> Date: Mon, 11 Nov 2024 10:57:25 +0100 Subject: [PATCH] drop scores lower than median and scale to fill [0, 1] --- deeprvat/deeprvat/associate.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py index e9b073db..a147a055 100644 --- a/deeprvat/deeprvat/associate.py +++ b/deeprvat/deeprvat/associate.py @@ -442,6 +442,7 @@ def make_regenie_input_( pseudovar_pos = (this_gene_pos.End - this_gene_pos.Start).to_numpy().astype(int) ensgids = this_gene_pos.index.to_numpy() + median_score = np.median(burdens[:, 0]) # TODO: This is a hack!!!! logger.info(f"Writing pseudovariants to {bgen}") with BgenWriter( bgen, @@ -468,8 +469,10 @@ def make_regenie_input_( # 1. Warn if burdens are censored to remain > 0 # 2. Offset/scale more intelligently to fill out [0, 1] better # 3. (maybe) Allow for setting offset/scale as parameter - offset = 0.251 + offset = median_score this_burdens = np.maximum(this_burdens - offset, 0) + max_burden = np.max(this_burdens) + this_burdens = this_burdens / max_burden # REGENIE assumes by default genotypes are stored alt-first genotypes = np.stack(