From 3d25aa05ed92c1bb5d04cdbab1e03b894a2d82d8 Mon Sep 17 00:00:00 2001
From: Thibault
Date: Wed, 6 Dec 2023 12:09:46 +0100
Subject: [PATCH] Added code to deduplified genes with common variants when
grouping common variants per gene.
---
deeprvat/data/dense_gt.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/deeprvat/data/dense_gt.py b/deeprvat/data/dense_gt.py
index 7aca60c0..569dd22e 100644
--- a/deeprvat/data/dense_gt.py
+++ b/deeprvat/data/dense_gt.py
@@ -693,6 +693,8 @@ def setup_common_groups(self):
# add grouping column (gene_id) here from self.annotations
# same merge logic as in line 579
+ # import pdb; pdb.set_trace()
+
variants_with_gene_ids = safe_merge(
self.variants[["id", "matrix_index"]].reset_index(drop=True),
self.annotation_df[["gene_ids"]].reset_index(),
@@ -708,6 +710,9 @@ def setup_common_groups(self):
common_variant_groups[self.grouping_column].notna()
]
+ # drop duplicates after explode
+ common_variant_groups = common_variant_groups.drop_duplicates(subset=["id", "gene_ids"])
+
if self.return_sparse:
logger.debug(" Computing group IDs")
if not hasattr(self, "group_names"):