Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
bfclarke committed Jan 23, 2024
1 parent 42d9688 commit 4210bbf
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions deeprvat/data/dense_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,8 @@ def setup_phenotypes(
self.phenotype_df = pd.read_parquet(phenotype_file, engine="pyarrow")
gt_file = h5py.File(self.gt_filename, "r") #TODO change this to using with open
samples_gt = gt_file['samples'][:]
samples_gt = np.array([item.decode('utf-8') for item in samples_gt]).astype(int)
samples_phenotype_df = np.array(self.phenotype_df.index.astype(int))
samples_gt = np.array([item.decode('utf-8') for item in samples_gt])
samples_phenotype_df = np.array(self.phenotype_df.index)
assert all(samples_phenotype_df == samples_gt) #TODO allow this to be different,
#in principle done by introducing self.index_map_geno and self.index_map_pheno but needs sanity check
# but phenotypes_df has first to be sorted in the same order as samples_gt
Expand Down Expand Up @@ -299,7 +299,7 @@ def setup_phenotypes(
binary_cols = [
c for c in self.y_phenotypes if self.phenotype_df[c].dtype == bool
]
samples_to_keep_mask = [True if i in samples_to_keep else False for i in self.phenotype_df.index.astype(int)]
samples_to_keep_mask = [True if i in samples_to_keep else False for i in self.phenotype_df.index]
assert sum(samples_to_keep_mask) == len(samples_to_keep)
mask_cols = copy.deepcopy(self.x_phenotypes)
if skip_y_na:
Expand All @@ -311,7 +311,7 @@ def setup_phenotypes(
f"Number of samples with phenotype and covariates: {mask.sum()}"
)
mask &= samples_to_keep_mask
samples_to_keep = self.phenotype_df.index[mask].astype(int)
samples_to_keep = self.phenotype_df.index[mask]
self.n_samples = mask.sum()
logger.info(
f"Final number of kept samples: {self.n_samples}"
Expand Down

0 comments on commit 4210bbf

Please sign in to comment.