Skip to content

Commit

Permalink
Feature snakemake modular (#41)
Browse files Browse the repository at this point in the history
* making snakemake runners modular
Additional snakemake runners for only running training, only association testing, and full train + association testing pipelines.

* bug-fix pretrained model path

* Adding additional snakemake pipeline run option to readthedocs

* update train snakefile pipeline from PR #42

* bug-fix model path for snakemake pipeline runners

* bug-fix f string syntax

* Update github-actions.yml

* Update github-actions.yml

* Update github-actions.yml

* Revert "Update github-actions.yml"

This reverts commit b8b82e5.

* Revert "Update github-actions.yml"

This reverts commit d27e6a8.

* Revert "Update github-actions.yml"

This reverts commit 6cde84f.

* Update github-actions.yml

* Update github-actions.yml

* fix-model path string variable in rules

---------

Co-authored-by: Magnus Wahlberg <endast@gmail.com>
  • Loading branch information
meyerkm and endast authored Dec 22, 2023
1 parent 2d4a387 commit fc14c51
Show file tree
Hide file tree
Showing 10 changed files with 363 additions and 459 deletions.
14 changes: 14 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,20 @@ Replace `[path_to_deeprvat]` with the path to your clone of the repository.
Note that the example data is randomly generated, and so is only suited for testing whether the `deeprvat` package has been correctly installed.


### Run the training pipeline on some example data

```shell
mkdir example
cd example
ln -s [path_to_deeprvat]/example/* .
snakemake -j 1 --snakefile [path_to_deeprvat]/pipelines/run_training.snakefile
```

Replace `[path_to_deeprvat]` with the path to your clone of the repository.

Note that the example data is randomly generated, and so is only suited for testing whether the `deeprvat` package has been correctly installed.


### Run the association testing pipeline with pretrained models

```shell
Expand Down
12 changes: 12 additions & 0 deletions pipelines/association_testing/association_dataset.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

rule association_dataset:
input:
config = '{phenotype}/deeprvat/hpopt_config.yaml'
output:
'{phenotype}/deeprvat/association_dataset.pkl'
threads: 4
shell:
'deeprvat_associate make-dataset '
+ debug +
'{input.config} '
'{output}'
74 changes: 74 additions & 0 deletions pipelines/association_testing/burdens.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@

rule link_burdens:
priority: 1
input:
checkpoints = lambda wildcards: [
f'{model_path}/repeat_{repeat}/best/bag_{bag}.ckpt'
for repeat in range(n_repeats) for bag in range(n_bags)
],
dataset = '{phenotype}/deeprvat/association_dataset.pkl',
data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
model_config = model_path / 'config.yaml',
output:
'{phenotype}/deeprvat/burdens/chunk{chunk}.linked'
threads: 8
shell:
' && '.join([
('deeprvat_associate compute-burdens '
+ debug +
' --n-chunks '+ str(n_burden_chunks) + ' '
f'--link-burdens ../../../{phenotypes[0]}/deeprvat/burdens/burdens.zarr '
'--chunk {wildcards.chunk} '
'--dataset-file {input.dataset} '
'{input.data_config} '
'{input.model_config} '
'{input.checkpoints} '
'{wildcards.phenotype}/deeprvat/burdens'),
'touch {output}'
])

rule compute_burdens:
priority: 10
input:
reversed = model_path / "reverse_finished.tmp",
checkpoints = lambda wildcards: [
model_path / f'repeat_{repeat}/best/bag_{bag}.ckpt'
for repeat in range(n_repeats) for bag in range(n_bags)
],
dataset = '{phenotype}/deeprvat/association_dataset.pkl',
data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
model_config = model_path / 'config.yaml',
output:
'{phenotype}/deeprvat/burdens/chunk{chunk}.finished'
threads: 8
shell:
' && '.join([
('deeprvat_associate compute-burdens '
+ debug +
' --n-chunks '+ str(n_burden_chunks) + ' '
'--chunk {wildcards.chunk} '
'--dataset-file {input.dataset} '
'{input.data_config} '
'{input.model_config} '
'{input.checkpoints} '
'{wildcards.phenotype}/deeprvat/burdens'),
'touch {output}'
])

rule reverse_models:
input:
checkpoints = expand(model_path / 'repeat_{repeat}/best/bag_{bag}.ckpt',
bag=range(n_bags), repeat=range(n_repeats)),
model_config = model_path / 'config.yaml',
data_config = Path(phenotypes[0]) / "deeprvat/hpopt_config.yaml",
output:
temp(model_path / "reverse_finished.tmp")
threads: 4
shell:
" && ".join([
("deeprvat_associate reverse-models "
"{input.model_config} "
"{input.data_config} "
"{input.checkpoints}"),
"touch {output}"
])
63 changes: 63 additions & 0 deletions pipelines/association_testing/regress_eval.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

rule evaluate:
input:
associations = expand('{{phenotype}}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
repeat=range(n_repeats)),
config = '{phenotype}/deeprvat/hpopt_config.yaml',
output:
"{phenotype}/deeprvat/eval/significant.parquet",
"{phenotype}/deeprvat/eval/all_results.parquet"
threads: 1
shell:
'deeprvat_evaluate '
+ debug +
'--use-seed-genes '
'--n-repeats {n_repeats} '
'--correction-method FDR '
'{input.associations} '
'{input.config} '
'{wildcards.phenotype}/deeprvat/eval'

rule all_regression:
input:
expand('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
phenotype=phenotypes, type=['deeprvat'], repeat=range(n_repeats)),

rule combine_regression_chunks:
input:
expand('{{phenotype}}/deeprvat/repeat_{{repeat}}/results/burden_associations_{chunk}.parquet', chunk=range(n_regression_chunks)),
output:
'{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
threads: 1
shell:
'deeprvat_associate combine-regression-results '
'--model-name repeat_{wildcards.repeat} '
'{input} '
'{output}'

rule regress:
input:
config = "{phenotype}/deeprvat/hpopt_config.yaml",
chunks = lambda wildcards: expand(
('{{phenotype}}/deeprvat/burdens/chunk{chunk}.' +
("finished" if wildcards.phenotype == phenotypes[0] else "linked")),
chunk=range(n_burden_chunks)
),
phenotype_0_chunks = expand(
phenotypes[0] + '/deeprvat/burdens/chunk{chunk}.finished',
chunk=range(n_burden_chunks)
),
output:
temp('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations_{chunk}.parquet'),
threads: 2
shell:
'deeprvat_associate regress '
+ debug +
'--chunk {wildcards.chunk} '
'--n-chunks ' + str(n_regression_chunks) + ' '
'--use-bias '
'--repeat {wildcards.repeat} '
+ do_scoretest +
'{input.config} '
'{wildcards.phenotype}/deeprvat/burdens ' #TODO make this w/o repeats
'{wildcards.phenotype}/deeprvat/repeat_{wildcards.repeat}/results'
Loading

0 comments on commit fc14c51

Please sign in to comment.