Merge branch 'feature-precomputed-burden-testing' of github.com:PMBio…

…/deeprvat into feature-precomputed-burden-testing
PMBio · Mar 29, 2024 · 67fa91c · 67fa91c
2 parents 7925630 + 1f8c424
commit 67fa91c
Showing 1 changed file with 4 additions and 2 deletions.
diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py
@@ -509,6 +509,7 @@ def make_regenie_input_(
     dataset_files = [p[1] for p in phenotype]
     xy_dirs = [p[2] for p in phenotype]
 
+    # load only first sample_ids zarr here
     sample_ids = zarr.load(xy_dirs[0] / "sample_ids.zarr")
     covariates = zarr.load(xy_dirs[0] / "x.zarr")
     ys = [zarr.load(b / "y.zarr") for b in xy_dirs]
@@ -520,6 +521,7 @@ def make_regenie_input_(
 
     n_samples = sample_ids.shape[0]
     assert covariates.shape[0] == n_samples
+    # assert that ALL y.zarrs are the same lengths as the single sample_ids zarr loaded above
     assert all([y.shape[0] == n_samples for y in ys])
 
     # Sanity check: sample_ids and covariates should be consistent for all phenotypes
@@ -566,13 +568,13 @@ def make_regenie_input_(
         pheno_df.to_csv(phenotype_file, sep=" ", index=False, na_rep="NA")
 
     if not skip_burdens:
-        burden_file, gene_file, sample_file = burdens_genes_samples
+        burden_file, gene_file, b_sample_file = burdens_genes_samples
 
         genes = np.load(gene_file)
         n_genes = genes.shape[0]
 
         sample_ids = zarr.load(
-            sample_file
+            b_sample_file
         )  # Might be different from those for the phenotypes
         n_samples = sample_ids.shape[0]