bug fixes

PMBio · Mar 12, 2024 · 8deacd9 · 8deacd9
1 parent 742b09f
commit 8deacd9
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 29 deletions.
diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py
@@ -474,11 +474,10 @@ def compute_burdens_(
 
 def make_regenie_input_(
     debug: bool,
-    skip_samples: bool,
     skip_covariates: bool,
     skip_phenotypes: bool,
     skip_burdens: bool,
-    burdens_genes_samples: Optional[Path],
+    burdens_genes_samples: Optional[Tuple[Path, Path, Path]],
     repeat: int,
     average_repeats: bool,
     phenotype: Tuple[Tuple[str, Path, Path, Path]],
@@ -494,8 +493,6 @@ def make_regenie_input_(
     ## Check options
     if not skip_burdens and burdens_genes_samples is None:
         raise ValueError("--burdens-genes must be specified if --skip-burdens is not")
-    if not skip_samples and sample_file is None:
-        raise ValueError("Either sample_file or skip_samples must be specified")
     if not skip_covariates and covariate_file is None:
         raise ValueError("Either covariate_file or skip_covariates must be specified")
     if not skip_phenotypes and phenotype_file is None:
@@ -547,22 +544,6 @@ def make_regenie_input_(
 
     sample_df = pd.DataFrame({"FID": sample_ids, "IID": sample_ids})
 
-    if not skip_samples:
-        ## Make sample file
-        logger.info(f"Creating sample file {sample_file}")
-        samples_out = pd.concat(
-            [
-                pd.DataFrame({"ID_1": 0, "ID_2": 0}, index=[0]),
-                sample_df.rename(
-                    columns={
-                        "FID": "ID_1",
-                        "IID": "ID_2",
-                    }
-                ),
-            ]
-        )
-        samples_out.to_csv(sample_file, sep=" ", index=False)
-
     if not skip_covariates:
         ## Make covariate file
         logger.info(f"Creating covariate file {covariate_file}")
@@ -595,9 +576,25 @@ def make_regenie_input_(
         )  # Might be different from those for the phenotypes
         n_samples = sample_ids.shape[0]
 
+        ## Make sample file
+        logger.info(f"Creating sample file {sample_file}")
+        sample_df = pd.DataFrame({"FID": sample_ids, "IID": sample_ids})
+        samples_out = pd.concat(
+            [
+                pd.DataFrame({"ID_1": 0, "ID_2": 0}, index=[0]),
+                sample_df.rename(
+                    columns={
+                        "FID": "ID_1",
+                        "IID": "ID_2",
+                    }
+                ),
+            ]
+        )
+        samples_out.to_csv(sample_file, sep=" ", index=False)
+
         burdens_zarr = zarr.open(burden_file)
         if not debug:
-            # assert burdens_zarr.shape[0] == n_samples
+            assert burdens_zarr.shape[0] == n_samples
             assert burdens_zarr.shape[1] == n_genes
 
         if average_repeats:
@@ -657,7 +654,6 @@ def make_regenie_input_(
 
 @cli.command()
 @click.option("--debug", is_flag=True)
-@click.option("--skip-samples", is_flag=True)
 @click.option("--skip-covariates", is_flag=True)
 @click.option("--skip-phenotypes", is_flag=True)
 @click.option("--skip-burdens", is_flag=True)
@@ -690,7 +686,6 @@ def make_regenie_input_(
 @click.argument("gtf", type=click.Path(exists=True, path_type=Path))
 def make_regenie_input(
     debug: bool,
-    skip_samples: bool,
     skip_covariates: bool,
     skip_phenotypes: bool,
     skip_burdens: bool,
@@ -707,7 +702,6 @@ def make_regenie_input(
 ):
     make_regenie_input_(
         debug=debug,
-        skip_samples=skip_samples,
         skip_covariates=skip_covariates,
         skip_phenotypes=skip_phenotypes,
         skip_burdens=skip_burdens,

diff --git a/pipelines/association_testing/plot.snakefile b/pipelines/association_testing/plot.snakefile
@@ -1,3 +1,11 @@
+configfile: 'config.yaml'
+
+phenotypes = config['phenotypes']
+phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
+training_phenotypes = config["training"].get("phenotypes", phenotypes)
+
+n_repeats = config['n_repeats']
+
 #requires that comparison_results.pkl is linked to the experiment directory
 #requires deeprvat-analyis to be installed
 DEEPRVAT_ANALYSIS_DIR = os.environ['DEEPRVAT_ANALYSIS_DIR']
@@ -43,4 +51,4 @@ rule compute_replication:
         '--out-file {output} '
         '--n-repeats {params.n_repeats} '
         '{params.result_files} '
-        './ '
+        './ '
diff --git a/pipelines/association_testing/regress_eval_regenie.snakefile b/pipelines/association_testing/regress_eval_regenie.snakefile
@@ -239,20 +239,21 @@ rule make_regenie_burdens:
         phenotypes = " ".join([f"--phenotype {p} {p}/deeprvat/association_dataset.pkl {p}/deeprvat/xy"
                                for p in phenotypes]) + " "
     output:
+        sample_file = "regenie_input/deeprvat_pseudovariants.sample",
         bgen = "regenie_input/deeprvat_pseudovariants.bgen",
     threads: 8
     resources:
         mem_mb = 64000
     shell:
         "deeprvat_associate make-regenie-input "
         + debug +
-        "--skip-samples "
         "--skip-covariates "
         "--skip-phenotypes "
         "--average-repeats "
-        "{params.phenotypes}"
+        "{params.phenotypes} "
         # "{input.dataset} "
         # "{wildcards.phenotype}/deeprvat/burdens "
+        "--sample-file {output.sample_file} "
         "--bgen {output.bgen} "
         "--burdens-genes-samples {input.burdens} {input.genes} {input.samples} "
         "{input.gene_file} "
@@ -272,7 +273,6 @@ rule make_regenie_metadata:
                                f"{p}/deeprvat/xy"
                                for p in phenotypes]) + " "
     output:
-        sample_file = "regenie_input/deeprvat_pseudovariants.sample",
         covariate_file = "regenie_input/covariates.txt",
         phenotype_file = "regenie_input/phenotypes.txt",
     threads: 1
@@ -285,7 +285,6 @@ rule make_regenie_metadata:
         "{params.phenotypes}"
         # "{input.dataset} "
         # "{wildcards.phenotype}/deeprvat/burdens "
-        "--sample-file {output.sample_file} "
         "--covariate-file {output.covariate_file} "
         "--phenotype-file {output.phenotype_file} "
         "{input.gene_file} "