diff --git a/pipelines/cv_training/cv_burdens.snakefile b/pipelines/cv_training/cv_burdens.snakefile index 0cce2d35..6a5572f1 100644 --- a/pipelines/cv_training/cv_burdens.snakefile +++ b/pipelines/cv_training/cv_burdens.snakefile @@ -26,6 +26,9 @@ rule make_deeprvat_test_config: data_config="cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/config.yaml", output: data_config_test="cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/config_test.yaml", + log: + stdout="logs/make_deeprvat_test_config/cv_split{cv_split}_{phenotype}.stdout", + stderr="logs/make_deeprvat_test_config/cv_split{cv_split}_{phenotype}.stderr" shell: " && ".join( [ @@ -47,6 +50,9 @@ use rule association_dataset from deeprvat_workflow as deeprvat_association_data output: temp("cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/association_dataset.pkl"), threads: 4 + log: + stdout="logs/association_dataset/cv_split{cv_split}_{phenotype}.stdout", + stderr="logs/association_dataset/cv_split{cv_split}_{phenotype}.stderr" use rule association_dataset_burdens from deeprvat_workflow as deeprvat_association_dataset_burdens with: input: @@ -54,6 +60,9 @@ use rule association_dataset_burdens from deeprvat_workflow as deeprvat_associat output: temp("cv_split{cv_split}/deeprvat/burdens/association_dataset.pkl"), threads: 4 + log: + stdout=f"logs/association_dataset_burdens/cv_split{{cv_split}}_{burden_phenotype}.stdout", + stderr=f"logs/association_dataset_burdens/cv_split{{cv_split}}_{burden_phenotype}.stderr" rule combine_test_burdens: @@ -94,6 +103,9 @@ rule combine_test_burdens: ), resources: mem_mb=lambda wildcards, attempt: 32000 + attempt * 4098 * 2, + log: + stdout="logs/combine_test_burdens/{phenotype}.stdout", + stderr="logs/combine_test_burdens/{phenotype}.stderr" shell: " && ".join( [ @@ -118,11 +130,17 @@ rule combine_test_burdens: use rule combine_burdens from deeprvat_workflow as deeprvat_combine_burdens with: params: prefix="cv_split{cv_split}/deeprvat", + log: + stdout="logs/combine_burdens/cv_split{cv_split}.stdout", + stderr="logs/combine_burdens/cv_split{cv_split}.stderr" use rule compute_burdens from deeprvat_workflow as deeprvat_compute_burdens with: params: prefix="cv_split{cv_split}/deeprvat", + log: + stdout="logs/compute_burdens/cv_split{cv_split}_burdens_{chunk}.stdout", + stderr="logs/compute_burdens/cv_split{cv_split}_burdens_{chunk}.stderr" use rule compute_xy from deeprvat_workflow as deeprvat_compute_xy with: @@ -133,6 +151,9 @@ use rule compute_xy from deeprvat_workflow as deeprvat_compute_xy with: samples = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/sample_ids.zarr'), x = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/x.zarr'), y = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/y.zarr'), + log: + stdout="logs/compute_xy/cv_split{cv_split}_{phenotype}.stdout", + stderr="logs/compute_xy/cv_split{cv_split}_{phenotype}.stderr" use rule reverse_models from deeprvat_workflow as deeprvat_reverse_models diff --git a/pipelines/cv_training/cv_training.snakefile b/pipelines/cv_training/cv_training.snakefile index aef4419b..35679b99 100644 --- a/pipelines/cv_training/cv_training.snakefile +++ b/pipelines/cv_training/cv_training.snakefile @@ -18,6 +18,9 @@ rule spread_config: threads: 1 resources: mem_mb = 1024, + log: + stdout="logs/spread_config/cv_split{cv_split}.stdout", + stderr="logs/spread_config/cv_split{cv_split}.stderr" shell: ' && '.join([ conda_check, @@ -48,7 +51,9 @@ use rule link_config from deeprvat_workflow as deeprvat_link_config use rule best_training_run from deeprvat_workflow as deeprvat_best_training_run with: params: prefix = 'cv_split{cv_split}/deeprvat' - + log: + stdout="logs/best_training_run/cv_split{cv_split}_repeat_{repeat}.stdout", + stderr="logs/best_training_run/cv_split{cv_split}_repeat_{repeat}.stderr" use rule train from deeprvat_workflow as deeprvat_train with: priority: 1000 @@ -85,9 +90,9 @@ use rule config from deeprvat_workflow as deeprvat_config with: baseline_out = lambda wildcards: f'--baseline-results-out cv_split{wildcards.cv_split}/deeprvat/{wildcards.phenotype}/deeprvat/baseline_results.parquet' if wildcards.phenotype in training_phenotypes else ' ', seed_genes_out = lambda wildcards: f'--seed-genes-out cv_split{wildcards.cv_split}/deeprvat/{wildcards.phenotype}/deeprvat/seed_genes.parquet' if wildcards.phenotype in training_phenotypes else ' ', association_only = lambda wildcards: f'--association-only' if wildcards.phenotype not in training_phenotypes else ' ' + log: + stdout="logs/config/cv_split{cv_split}_{phenotype}.stdout", + stderr="logs/config/cv_split{cv_split}_{phenotype}.stderr" use rule create_main_config from deeprvat_workflow as deeprvat_create_main_config - - - diff --git a/pipelines/cv_training/cv_training_association_testing.snakefile b/pipelines/cv_training/cv_training_association_testing.snakefile index 9f507b5b..be390cea 100644 --- a/pipelines/cv_training/cv_training_association_testing.snakefile +++ b/pipelines/cv_training/cv_training_association_testing.snakefile @@ -1,13 +1,17 @@ from pathlib import Path from deeprvat.deeprvat.config import create_main_config +import logging create_main_config("deeprvat_input_config.yaml") -configfile: "deeprvat_config.yaml" +#remove duplicate logging handlers from loaded deeprvat.config module +logging.root.handlers.clear() +configfile: "deeprvat_config.yaml" conda_check = 'conda info | grep "active environment"' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get("debug", False) phenotypes = config["phenotypes"] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes