From 46d5cac5e4f4a10627c100a877b48da4ad32808b Mon Sep 17 00:00:00 2001 From: Kayla Meyer Date: Tue, 29 Oct 2024 11:51:52 +0100 Subject: [PATCH] add in logging redirct -remove logging from train rule (already performed with parallel) - remove params.prefix bug from log paths --- .../association_dataset.snakefile | 6 +-- .../association_testing/burdens.snakefile | 13 +++--- .../regress_eval_regenie.snakefile | 41 ++++++++++++++++--- ...ting_control_for_common_variants.snakefile | 2 +- ...ting_precomputed_burdens_regenie.snakefile | 2 +- .../association_testing_pretrained.snakefile | 2 +- ...ation_testing_pretrained_regenie.snakefile | 2 +- pipelines/run_training.snakefile | 2 +- pipelines/training/train.snakefile | 14 +------ pipelines/training/training_dataset.snakefile | 4 +- .../training_association_testing.snakefile | 2 +- ...ning_association_testing_regenie.snakefile | 2 +- 12 files changed, 56 insertions(+), 36 deletions(-) diff --git a/pipelines/association_testing/association_dataset.snakefile b/pipelines/association_testing/association_dataset.snakefile index 8e20407b..fc751a7d 100644 --- a/pipelines/association_testing/association_dataset.snakefile +++ b/pipelines/association_testing/association_dataset.snakefile @@ -18,7 +18,7 @@ rule association_dataset: priority: 30 log: stdout="logs/association_dataset/{phenotype}.stdout", - stderr="logs/association_dataset/{phenotype}stderr" + stderr="logs/association_dataset/{phenotype}.stderr" shell: 'deeprvat_associate make-dataset ' + debug + @@ -38,8 +38,8 @@ rule association_dataset_burdens: mem_mb = lambda wildcards, attempt: 32000 * (attempt + 1) priority: 30 log: - stdout="logs/association_dataset_burdens/{phenotypes[0]}.stdout", - stderr="logs/association_dataset_burdens/{phenotypes[0]}stderr" + stdout=f"logs/association_dataset_burdens/{phenotypes[0]}.stdout", + stderr=f"logs/association_dataset_burdens/{phenotypes[0]}.stderr" shell: 'deeprvat_associate make-dataset ' + debug + diff --git a/pipelines/association_testing/burdens.snakefile b/pipelines/association_testing/burdens.snakefile index b02f6acf..d0941f62 100644 --- a/pipelines/association_testing/burdens.snakefile +++ b/pipelines/association_testing/burdens.snakefile @@ -17,8 +17,8 @@ rule combine_burdens: resources: mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098, log: - stdout="logs/combine_burdens/{params.prefix}/combine_burdens.stdout", - stderr="logs/combine_burdens/{params.prefix}/combine_burdens.stderr" + stdout="logs/combine_burdens/combine_burdens.stdout", + stderr="logs/combine_burdens/combine_burdens.stderr" shell: ' '.join([ 'deeprvat_associate combine-burden-chunks', @@ -82,8 +82,8 @@ rule compute_burdens: mem_mb = 32000, gpus = 1 log: - stdout="logs/compute_burdens/{params.prefix}/compute_burdens_{chunk}.stdout", - stderr="logs/compute_burdens/{params.prefix}/compute_burdens_{chunk}.stderr" + stdout="logs/compute_burdens/compute_burdens_{chunk}.stdout", + stderr="logs/compute_burdens/compute_burdens_{chunk}.stderr" shell: ' '.join([ 'deeprvat_associate compute-burdens ' @@ -118,7 +118,8 @@ rule reverse_models: ("deeprvat_associate reverse-models " "{input.model_config} " "{input.data_config} " - "{input.checkpoints}"), + "{input.checkpoints} " + + logging_redirct), "touch {output} " - + logging_redirct + ]) diff --git a/pipelines/association_testing/regress_eval_regenie.snakefile b/pipelines/association_testing/regress_eval_regenie.snakefile index 03b80e3f..28860590 100644 --- a/pipelines/association_testing/regress_eval_regenie.snakefile +++ b/pipelines/association_testing/regress_eval_regenie.snakefile @@ -15,6 +15,9 @@ rule evaluate: mem_mb = 16000, params: use_baseline_results = '--use-baseline-results' if 'baseline_results' in config else '' + log: + stdout="logs/evaluate/{phenotype}.stdout", + stderr="logs/evaluate/{phenotype}.stderr" shell: 'deeprvat_evaluate ' + debug + @@ -22,7 +25,8 @@ rule evaluate: '--phenotype {wildcards.phenotype} ' '{input.associations} ' '{input.data_config} ' - '{wildcards.phenotype}/deeprvat/eval' + '{wildcards.phenotype}/deeprvat/eval ' + + logging_redirct rule all_regenie: input: @@ -45,10 +49,14 @@ rule convert_regenie_output: threads: 1 resources: mem_mb = 2048 + log: + stdout="logs/convert_regenie_output/convert_regenie_output.stdout", + stderr="logs/convert_regenie_output/convert_regenie_output.stderr" shell: "deeprvat_associate convert-regenie-output " "{params.pheno_options} " - "{params.gene_file}" + "{params.gene_file} " + + logging_redirct rule regenie_step2: input: @@ -68,6 +76,9 @@ rule regenie_step2: threads: 16 resources: mem_mb = 16384 + log: + stdout="logs/regenie_step2/regenie_step2.stdout", + stderr="logs/regenie_step2/regenie_step2.stderr", shell: "regenie " "--step 2 " @@ -80,7 +91,8 @@ rule regenie_step2: f"--bsize {regenie_step2_bsize} " "--threads 16 " + " ".join(regenie_config_step2.get("options", [])) + " " + - "--out regenie_output/step2/deeprvat" + "--out regenie_output/step2/deeprvat " + + logging_redirct rule regenie_step1: input: @@ -96,6 +108,9 @@ rule regenie_step1: threads: 24 resources: mem_mb = 16000 + log: + stdout="logs/regenie_step1/regenie_step1.stdout", + stderr="logs/regenie_step1/regenie_step1.stderr" shell: "mkdir -p regenie_step1_tmp && " "regenie " @@ -110,8 +125,10 @@ rule regenie_step1: "--lowmem " "--lowmem-prefix regenie_step1_tmp/deeprvat " + " ".join(regenie_config_step1.get("options", [])) + " " + - "--out regenie_output/step1/deeprvat ; " - "rm -rf regenie_step1_tmp" + "--out regenie_output/step1/deeprvat + + logging_redirct + " ; " + "rm -rf regenie_step1_tmp " + # rule regenie_step1_runl1: @@ -228,6 +245,9 @@ rule make_regenie_burdens: threads: 8 resources: mem_mb = 64000 + log: + stdout="logs/make_regenie_burdens/make_regenie_burdens.stdout", + stderr="logs/make_regenie_burdens/make_regenie_burdens.stderr" shell: "deeprvat_associate make-regenie-input " + debug + @@ -242,6 +262,7 @@ rule make_regenie_burdens: "--burdens-genes-samples {params.burdens} {params.genes} {params.samples} " "{input.gene_file} " "{input.gtf_file} " + + logging_redirct rule make_regenie_metadata: input: @@ -262,6 +283,9 @@ rule make_regenie_metadata: threads: 1 resources: mem_mb = 16000 + log: + stdout="logs/make_regenie_metadata/make_regenie_metadata.stdout", + stderr="logs/make_regenie_metadata/make_regenie_metadata.stderr", shell: "deeprvat_associate make-regenie-input " + debug + @@ -273,6 +297,7 @@ rule make_regenie_metadata: "--phenotype-file {output.phenotype_file} " "{input.gene_file} " "{input.gtf_file} " + + logging_redirct rule average_burdens: @@ -290,6 +315,9 @@ rule average_burdens: resources: mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098, priority: 10, + log: + stdout="logs/average_burdens/average_burdens_{chunk}.stdout", + stderr="logs/average_burdens/average_burdens_{chunk}.stderr" shell: ' && '.join([ ('deeprvat_associate average-burdens ' @@ -298,6 +326,7 @@ rule average_burdens: '{params.repeats} ' '--agg-fct mean ' #TODO remove this '{params.burdens_in} ' - '{params.burdens_out}'), + '{params.burdens_out} ' + + logging_redirct), 'touch {output}' ]) diff --git a/pipelines/association_testing_control_for_common_variants.snakefile b/pipelines/association_testing_control_for_common_variants.snakefile index 0f4cf75f..9ec50140 100644 --- a/pipelines/association_testing_control_for_common_variants.snakefile +++ b/pipelines/association_testing_control_for_common_variants.snakefile @@ -10,7 +10,7 @@ for handler in logging.root.handlers[:]: configfile: 'deeprvat_config.yaml' -logging_redirct = "> {log.stdout} 2> {log.stderr}" #for Linux-based systems +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) phenotypes = config['phenotypes'] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/association_testing_precomputed_burdens_regenie.snakefile b/pipelines/association_testing_precomputed_burdens_regenie.snakefile index 0ee0a993..913c4e32 100644 --- a/pipelines/association_testing_precomputed_burdens_regenie.snakefile +++ b/pipelines/association_testing_precomputed_burdens_regenie.snakefile @@ -2,7 +2,7 @@ from pathlib import Path configfile: 'config.yaml' -logging_redirct = "> {log.stdout} 2> {log.stderr}" #for Linux-based systems +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) phenotypes = config['phenotypes'] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/association_testing_pretrained.snakefile b/pipelines/association_testing_pretrained.snakefile index a57dad58..28ba9863 100644 --- a/pipelines/association_testing_pretrained.snakefile +++ b/pipelines/association_testing_pretrained.snakefile @@ -10,7 +10,7 @@ for handler in logging.root.handlers[:]: configfile: 'deeprvat_config.yaml' -logging_redirct = "> {log.stdout} 2> {log.stderr}" #for Linux-based systems +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) phenotypes = config['phenotypes'] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/association_testing_pretrained_regenie.snakefile b/pipelines/association_testing_pretrained_regenie.snakefile index 75cc80f9..0c883d63 100644 --- a/pipelines/association_testing_pretrained_regenie.snakefile +++ b/pipelines/association_testing_pretrained_regenie.snakefile @@ -10,7 +10,7 @@ for handler in logging.root.handlers[:]: configfile: 'deeprvat_config.yaml' -logging_redirct = "> {log.stdout} 2> {log.stderr}" #for Linux-based systems +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) phenotypes = config['phenotypes'] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/run_training.snakefile b/pipelines/run_training.snakefile index b3b2ce8a..78a2d5eb 100644 --- a/pipelines/run_training.snakefile +++ b/pipelines/run_training.snakefile @@ -10,7 +10,7 @@ for handler in logging.root.handlers[:]: configfile: 'deeprvat_config.yaml' -logging_redirct = "> {log.stdout} 2> {log.stderr}" #for Linux-based systems +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) debug = '--debug ' if debug_flag else '' deterministic_flag = config.get('deterministic', False) diff --git a/pipelines/training/train.snakefile b/pipelines/training/train.snakefile index 2a10d02e..1fa80b66 100644 --- a/pipelines/training/train.snakefile +++ b/pipelines/training/train.snakefile @@ -4,13 +4,9 @@ rule link_config: output: model_path / 'model_config.yaml' threads: 1 - log: - stdout="logs/link_config/link_config.stdout", - stderr="logs/link_config/link_config.stderr" shell: "ln -rfs {input} {output} " # "ln -s repeat_0/model_config.yaml {output}" - + logging_redirct rule best_training_run: input: @@ -28,8 +24,8 @@ rule best_training_run: resources: mem_mb = 2048, log: - stdout="logs/best_training_run/{params.prefix}/repeat_{repeat}.stdout", - stderr="logs/best_training_run/{params.prefix}/repeat_{repeat}.stderr" + stdout="logs/best_training_run/repeat_{repeat}.stdout", + stderr="logs/best_training_run/repeat_{repeat}.stderr" shell: ( 'deeprvat_train best-training-run ' @@ -68,11 +64,6 @@ rule train: resources: mem_mb = 20000, gpus = 1 - log: - stdout=expand("logs/train/{{params.prefix}}/train_repeat{repeat}_trial{trial_number}.stdout", - repeat=range(n_repeats), trial_number=range(n_trials)), - stderr=expand("logs/train/{{params.prefix}}/train_repeat{repeat}_trial{trial_number}.stderr", - repeat=range(n_repeats), trial_number=range(n_trials)), shell: f"parallel --jobs {n_parallel_training_jobs} --halt now,fail=1 --results {{params.prefix}}/train_repeat{{{{1}}}}_trial{{{{2}}}}/ " 'deeprvat_train train ' + @@ -84,6 +75,5 @@ rule train: '{params.prefix}/{model_path}/repeat_{{1}}/trial{{2}} ' "{params.prefix}/{model_path}/repeat_{{1}}/hyperparameter_optimization.db '&&' " "touch {params.prefix}/{model_path}/repeat_{{1}}/trial{{2}}/finished.tmp " - + logging_redirct + " " "::: " + " ".join(map(str, range(n_repeats))) + " " "::: " + " ".join(map(str, range(n_trials))) diff --git a/pipelines/training/training_dataset.snakefile b/pipelines/training/training_dataset.snakefile index aff90a01..a8965d29 100644 --- a/pipelines/training/training_dataset.snakefile +++ b/pipelines/training/training_dataset.snakefile @@ -13,7 +13,7 @@ rule training_dataset: priority: 5000 log: stdout="logs/training_dataset/{phenotype}.stdout", - stderr="logs/training_dataset/{phenotype}stderr" + stderr="logs/training_dataset/{phenotype}.stderr" shell: ( "deeprvat_train make-dataset " @@ -41,7 +41,7 @@ rule training_dataset_pickle: load=16000, log: stdout="logs/training_dataset_pickle/{phenotype}.stdout", - stderr="logs/training_dataset_pickle/{phenotype}stderr" + stderr="logs/training_dataset_pickle/{phenotype}.stderr" shell: ( "deeprvat_train make-dataset " diff --git a/pipelines/training_association_testing.snakefile b/pipelines/training_association_testing.snakefile index 96b420c4..4cba91f2 100644 --- a/pipelines/training_association_testing.snakefile +++ b/pipelines/training_association_testing.snakefile @@ -10,7 +10,7 @@ for handler in logging.root.handlers[:]: configfile: 'deeprvat_config.yaml' -logging_redirct = "> {log.stdout} 2> {log.stderr}" #for Linux-based systems +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) debug = '--debug ' if debug_flag else '' deterministic_flag = config.get('deterministic', False) diff --git a/pipelines/training_association_testing_regenie.snakefile b/pipelines/training_association_testing_regenie.snakefile index 107b128e..eeb3aaf1 100644 --- a/pipelines/training_association_testing_regenie.snakefile +++ b/pipelines/training_association_testing_regenie.snakefile @@ -10,7 +10,7 @@ for handler in logging.root.handlers[:]: configfile: 'deeprvat_config.yaml' -logging_redirct = "> {log.stdout} 2> {log.stderr}" #for Linux-based systems +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) deterministic_flag = config.get('deterministic', False) # TODO SHOULD THIS BE HERE? deterministic = '--deterministic ' if deterministic_flag else ''