Skip to content

Commit

Permalink
Added code to deduplified genes with common variants when grouping co…
Browse files Browse the repository at this point in the history
…mmon variants per gene.
  • Loading branch information
ThibaultBechtler committed Dec 6, 2023
1 parent 3702039 commit 3d25aa0
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions deeprvat/data/dense_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,8 @@ def setup_common_groups(self):
# add grouping column (gene_id) here from self.annotations
# same merge logic as in line 579

# import pdb; pdb.set_trace()

variants_with_gene_ids = safe_merge(
self.variants[["id", "matrix_index"]].reset_index(drop=True),
self.annotation_df[["gene_ids"]].reset_index(),
Expand All @@ -708,6 +710,9 @@ def setup_common_groups(self):
common_variant_groups[self.grouping_column].notna()
]

# drop duplicates after explode
common_variant_groups = common_variant_groups.drop_duplicates(subset=["id", "gene_ids"])

if self.return_sparse:
logger.debug(" Computing group IDs")
if not hasattr(self, "group_names"):
Expand Down

0 comments on commit 3d25aa0

Please sign in to comment.