Skip to content

Commit

Permalink
use only variants present in annotation dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
bfclarke committed Jan 20, 2024
1 parent db9a28e commit 0e3f9c3
Showing 1 changed file with 1 addition and 4 deletions.
5 changes: 1 addition & 4 deletions deeprvat/data/dense_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,10 +542,6 @@ def setup_variants(
variants = dd.read_parquet(self.variant_filename, engine="pyarrow").compute()
variants = variants.set_index("id", drop=False)
variants = variants.drop(columns="matrix_index", errors="ignore")
self.variants_to_keep = variants['id']
if self.variants_to_keep is not None:
logger.info("Selecting subset of variants as defined by variants_to_keep")
variants = variants.loc[self.variants_to_keep]
logger.debug(" Filtering variants")
if min_common_variant_count is not None:
mask = (variants["count"] >= min_common_variant_count) & (
Expand Down Expand Up @@ -588,6 +584,7 @@ def setup_variants(
rare_variant_mask = ~mask
chromosome_mask = variants["chrom"].isin(self.chromosomes).to_numpy()
additional_mask = chromosome_mask
additional_mask &= variants["id"].isin(set(self.annotation_df.index))
if self.exons_to_keep is not None:
raise NotImplementedError("The variant dataframes have outdated exon_ids")
additional_mask &= (
Expand Down

0 comments on commit 0e3f9c3

Please sign in to comment.