diff --git a/pipelines/training_association_testing.snakefile b/pipelines/training_association_testing.snakefile index 70429f0b..7f45f643 100644 --- a/pipelines/training_association_testing.snakefile +++ b/pipelines/training_association_testing.snakefile @@ -14,7 +14,7 @@ n_repeats = config['n_repeats'] debug = '--debug ' if debug_flag else '' do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else '' tensor_compression_level = config['training'].get('tensor_compression_level', 1) -n_parallel_training_jobs = config["training"].get("n_parallel_jobs") +n_parallel_training_jobs = config["training"].get("n_parallel_jobs", 1) wildcard_constraints: repeat="\d+", @@ -239,7 +239,7 @@ rule train: expand('models/repeat_{repeat}/trial{trial_number}/config.yaml', repeat=range(n_repeats), trial_number=range(n_trials)), expand('models/repeat_{repeat}/trial{trial_number}/finished.tmp', - repeat=range(n_repeats), trial_number=range(n_trials))) + repeat=range(n_repeats), trial_number=range(n_trials)) params: phenotypes = " ".join( [f"--phenotype {p} " @@ -248,7 +248,7 @@ rule train: f"{p}/deeprvat/y.zarr" for p in phenotypes]) shell: - f"parallel --jobs {n_parallel_training_jobs} --results train_repeat{{1}}_trial{{2}}/ " + f"parallel --jobs {n_parallel_training_jobs} --results train_repeat{{{{1}}}}_trial{{{{2}}}}/ " 'deeprvat_train train ' + debug + '--trial-id {{2}} ' @@ -257,89 +257,89 @@ rule train: 'models/repeat_{{1}}/trial{{2}} ' "models/repeat_{{1}}/hyperparameter_optimization.db '&&' " "touch models/repeat_{{1}}/trial{{2}}/finished.tmp " - "::: " + " ".join(range(n_repeats)) + " " - "::: " + " ".join(range(n_trials)) + "::: " + " ".join(map(str, range(n_repeats))) + " " + "::: " + " ".join(map(str, range(n_trials))) -rule all_training_dataset: - input: - input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr', - phenotype=phenotypes, repeat=range(n_repeats)), - covariates = expand('{phenotype}/deeprvat/covariates.zarr', - phenotype=phenotypes, repeat=range(n_repeats)), - y = expand('{phenotype}/deeprvat/y.zarr', - phenotype=phenotypes, repeat=range(n_repeats)) +# rule all_training_dataset: +# input: +# input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr', +# phenotype=phenotypes, repeat=range(n_repeats)), +# covariates = expand('{phenotype}/deeprvat/covariates.zarr', +# phenotype=phenotypes, repeat=range(n_repeats)), +# y = expand('{phenotype}/deeprvat/y.zarr', +# phenotype=phenotypes, repeat=range(n_repeats)) -rule training_dataset: - input: - config = '{phenotype}/deeprvat/hpopt_config.yaml', - training_dataset = '{phenotype}/deeprvat/training_dataset.pkl' - output: - input_tensor = directory('{phenotype}/deeprvat/input_tensor.zarr'), - covariates = directory('{phenotype}/deeprvat/covariates.zarr'), - y = directory('{phenotype}/deeprvat/y.zarr') - threads: 8 - priority: 50 - shell: - ( - 'deeprvat_train make-dataset ' - + debug + - '--compression-level ' + str(tensor_compression_level) + ' ' - '--training-dataset-file {input.training_dataset} ' - '{input.config} ' - '{output.input_tensor} ' - '{output.covariates} ' - '{output.y}' - ) +# rule training_dataset: +# input: +# config = '{phenotype}/deeprvat/hpopt_config.yaml', +# training_dataset = '{phenotype}/deeprvat/training_dataset.pkl' +# output: +# input_tensor = directory('{phenotype}/deeprvat/input_tensor.zarr'), +# covariates = directory('{phenotype}/deeprvat/covariates.zarr'), +# y = directory('{phenotype}/deeprvat/y.zarr') +# threads: 8 +# priority: 50 +# shell: +# ( +# 'deeprvat_train make-dataset ' +# + debug + +# '--compression-level ' + str(tensor_compression_level) + ' ' +# '--training-dataset-file {input.training_dataset} ' +# '{input.config} ' +# '{output.input_tensor} ' +# '{output.covariates} ' +# '{output.y}' +# ) -rule training_dataset_pickle: - input: - '{phenotype}/deeprvat/hpopt_config.yaml' - output: - '{phenotype}/deeprvat/training_dataset.pkl' - threads: 1 - shell: - ( - 'deeprvat_train make-dataset ' - '--pickle-only ' - '--training-dataset-file {output} ' - '{input} ' - 'dummy dummy dummy' - ) +# rule training_dataset_pickle: +# input: +# '{phenotype}/deeprvat/hpopt_config.yaml' +# output: +# '{phenotype}/deeprvat/training_dataset.pkl' +# threads: 1 +# shell: +# ( +# 'deeprvat_train make-dataset ' +# '--pickle-only ' +# '--training-dataset-file {output} ' +# '{input} ' +# 'dummy dummy dummy' +# ) -rule all_config: - input: - seed_genes = expand('{phenotype}/deeprvat/seed_genes.parquet', - phenotype=phenotypes), - config = expand('{phenotype}/deeprvat/hpopt_config.yaml', - phenotype=phenotypes), - baseline = expand('{phenotype}/deeprvat/baseline_results.parquet', - phenotype=phenotypes), +# rule all_config: +# input: +# seed_genes = expand('{phenotype}/deeprvat/seed_genes.parquet', +# phenotype=phenotypes), +# config = expand('{phenotype}/deeprvat/hpopt_config.yaml', +# phenotype=phenotypes), +# baseline = expand('{phenotype}/deeprvat/baseline_results.parquet', +# phenotype=phenotypes), -rule config: - input: - config = 'config.yaml', - baseline = lambda wildcards: [ - str(Path(r['base']) / wildcards.phenotype / r['type'] / - 'eval/burden_associations.parquet') - for r in config['baseline_results'] - ] - output: - seed_genes = '{phenotype}/deeprvat/seed_genes.parquet', - config = '{phenotype}/deeprvat/hpopt_config.yaml', - baseline = '{phenotype}/deeprvat/baseline_results.parquet', - threads: 1 - params: - baseline_results = lambda wildcards, input: ''.join([ - f'--baseline-results {b} ' - for b in input.baseline - ]) - shell: - ( - 'deeprvat_config update-config ' - '--phenotype {wildcards.phenotype} ' - '{params.baseline_results}' - '--baseline-results-out {output.baseline} ' - '--seed-genes-out {output.seed_genes} ' - '{input.config} ' - '{output.config}' - ) +# rule config: +# input: +# config = 'config.yaml', +# baseline = lambda wildcards: [ +# str(Path(r['base']) / wildcards.phenotype / r['type'] / +# 'eval/burden_associations.parquet') +# for r in config['baseline_results'] +# ] +# output: +# seed_genes = '{phenotype}/deeprvat/seed_genes.parquet', +# config = '{phenotype}/deeprvat/hpopt_config.yaml', +# baseline = '{phenotype}/deeprvat/baseline_results.parquet', +# threads: 1 +# params: +# baseline_results = lambda wildcards, input: ''.join([ +# f'--baseline-results {b} ' +# for b in input.baseline +# ]) +# shell: +# ( +# 'deeprvat_config update-config ' +# '--phenotype {wildcards.phenotype} ' +# '{params.baseline_results}' +# '--baseline-results-out {output.baseline} ' +# '--seed-genes-out {output.seed_genes} ' +# '{input.config} ' +# '{output.config}' +# )