From 3d25aa05ed92c1bb5d04cdbab1e03b894a2d82d8 Mon Sep 17 00:00:00 2001 From: Thibault Date: Wed, 6 Dec 2023 12:09:46 +0100 Subject: [PATCH] Added code to deduplified genes with common variants when grouping common variants per gene. --- deeprvat/data/dense_gt.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deeprvat/data/dense_gt.py b/deeprvat/data/dense_gt.py index 7aca60c0..569dd22e 100644 --- a/deeprvat/data/dense_gt.py +++ b/deeprvat/data/dense_gt.py @@ -693,6 +693,8 @@ def setup_common_groups(self): # add grouping column (gene_id) here from self.annotations # same merge logic as in line 579 + # import pdb; pdb.set_trace() + variants_with_gene_ids = safe_merge( self.variants[["id", "matrix_index"]].reset_index(drop=True), self.annotation_df[["gene_ids"]].reset_index(), @@ -708,6 +710,9 @@ def setup_common_groups(self): common_variant_groups[self.grouping_column].notna() ] + # drop duplicates after explode + common_variant_groups = common_variant_groups.drop_duplicates(subset=["id", "gene_ids"]) + if self.return_sparse: logger.debug(" Computing group IDs") if not hasattr(self, "group_names"):