Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature snakemake modular #41

Merged
merged 17 commits into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 59 additions & 36 deletions .github/workflows/github-actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,33 @@ jobs:
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Training Association Testing smoke test
uses: snakemake/snakemake-github-action@v1.24.0
- uses: mamba-org/setup-micromamba@v1.4.3
with:
directory: 'example'
snakefile: 'pipelines/training_association_testing.snakefile'
args: '-j 2 -n'
environment-name: deeprvat-gh-action
environment-file: ${{ github.workspace }}/deeprvat_env_no_gpu.yml
cache-environment: true
cache-downloads: true
- name: Smoketest training_association_testing pipeline
run: |
python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
--snakefile ${{ github.workspace }}/pipelines/training_association_testing.snakefile --show-failed-logs
shell: micromamba-shell {0}
- name: Link pretrained models
run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
- name: Association Testing Pretrained Smoke Test
uses: snakemake/snakemake-github-action@v1.24.0
with:
directory: 'example'
snakefile: 'pipelines/association_testing_pretrained.snakefile'
args: '-j 2 -n'
- name: Seed Gene Discovery Smoke Test
uses: snakemake/snakemake-github-action@v1.24.0
with:
directory: 'example'
snakefile: 'pipelines/seed_gene_discovery.snakefile'
args: '-j 2 -n'
shell: bash -el {0}
- name: Smoketest association_testing_pretrained pipeline
run: |
python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
--snakefile ${{ github.workspace }}/pipelines/association_testing_pretrained.snakefile --show-failed-logs
shell: micromamba-shell {0}
- name: Copy seed gene discovery snakemake config
run: cd ${{ github.workspace }}/example && cp ../deeprvat/seed_gene_discovery/config.yaml .
shell: bash -el {0}
- name: Smoketest seed_gene_discovery pipeline
run: |
python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
--snakefile ${{ github.workspace }}/pipelines/seed_gene_discovery.snakefile --show-failed-logs
shell: micromamba-shell {0}

DeepRVAT-Pipeline-Tests:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -76,41 +83,57 @@ jobs:
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Preprocessing Smoke Test With QC
uses: snakemake/snakemake-github-action@v1.24.0
- uses: mamba-org/setup-micromamba@v1.4.3
with:
directory: 'example/preprocess'
snakefile: 'pipelines/preprocess_with_qc.snakefile'
args: '-j 2 -n --configfile pipelines/config/deeprvat_preprocess_config.yaml'
stagein: 'touch example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa'
environment-name: deeprvat-preprocess-gh-action
environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
cache-environment: true
cache-downloads: true

- name: Preprocessing Smoke Test No QC
uses: snakemake/snakemake-github-action@v1.24.0
with:
directory: 'example/preprocess'
snakefile: 'pipelines/preprocess_no_qc.snakefile'
args: '-j 2 -n --configfile pipelines/config/deeprvat_preprocess_config.yaml'
stagein: 'touch example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa'
- name: Fake fasta data
if: steps.cache-fasta.outputs.cache-hit != 'true'
run: |
cd ${{ github.workspace }}/example/preprocess && touch workdir/reference/GRCh38.primary_assembly.genome.fa

- name: Run preprocessing pipeline no qc Smoke Test
run: |
python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/preprocess \
--snakefile ${{ github.workspace }}/pipelines/preprocess_no_qc.snakefile \
--configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
shell: micromamba-shell {0}


- name: Preprocessing pipeline with qc Smoke Test
run: |
python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/preprocess \
--snakefile ${{ github.workspace }}/pipelines/preprocess_with_qc.snakefile \
--configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
shell: micromamba-shell {0}


DeepRVAT-Annotation-Pipeline-Smoke-Tests:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Annotations Smoke Test
uses: snakemake/snakemake-github-action@v1.25.1
- uses: mamba-org/setup-micromamba@v1.4.3
with:
directory: 'example/annotations'
snakefile: 'pipelines/annotations.snakefile'
args: '-j 2 -n --configfile pipelines/config/deeprvat_annotation_config.yaml'
environment-name: deeprvat-preprocess-gh-action
environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
cache-environment: true
cache-downloads: true
- name: Annotations Smoke Test
run: |
python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/annotations \
--snakefile ${{ github.workspace }}/pipelines/annotations.snakefile \
--configfile ${{ github.workspace }}/pipelines/config/deeprvat_annotation_config.yaml --show-failed-logs
shell: micromamba-shell {0}


DeepRVAT-Preprocessing-Pipeline-Tests-No-QC:
runs-on: ubuntu-latest
needs: DeepRVAT-Preprocessing-Pipeline-Smoke-Tests
steps:

- name: Check out repository code
uses: actions/checkout@v3
- uses: mamba-org/setup-micromamba@v1.4.3
Expand Down
14 changes: 14 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,20 @@ Replace `[path_to_deeprvat]` with the path to your clone of the repository.
Note that the example data is randomly generated, and so is only suited for testing whether the `deeprvat` package has been correctly installed.


### Run the training pipeline on some example data

```shell
mkdir example
cd example
ln -s [path_to_deeprvat]/example/* .
snakemake -j 1 --snakefile [path_to_deeprvat]/pipelines/run_training.snakefile
```

Replace `[path_to_deeprvat]` with the path to your clone of the repository.

Note that the example data is randomly generated, and so is only suited for testing whether the `deeprvat` package has been correctly installed.


### Run the association testing pipeline with pretrained models

```shell
Expand Down
12 changes: 12 additions & 0 deletions pipelines/association_testing/association_dataset.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

rule association_dataset:
input:
config = '{phenotype}/deeprvat/hpopt_config.yaml'
output:
'{phenotype}/deeprvat/association_dataset.pkl'
threads: 4
shell:
'deeprvat_associate make-dataset '
+ debug +
'{input.config} '
'{output}'
74 changes: 74 additions & 0 deletions pipelines/association_testing/burdens.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@

rule link_burdens:
priority: 1
input:
checkpoints = lambda wildcards: [
f'{model_path}/repeat_{repeat}/best/bag_{bag}.ckpt'
for repeat in range(n_repeats) for bag in range(n_bags)
],
dataset = '{phenotype}/deeprvat/association_dataset.pkl',
data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
model_config = model_path / 'config.yaml',
output:
'{phenotype}/deeprvat/burdens/chunk{chunk}.linked'
threads: 8
shell:
' && '.join([
('deeprvat_associate compute-burdens '
+ debug +
' --n-chunks '+ str(n_burden_chunks) + ' '
f'--link-burdens ../../../{phenotypes[0]}/deeprvat/burdens/burdens.zarr '
'--chunk {wildcards.chunk} '
'--dataset-file {input.dataset} '
'{input.data_config} '
'{input.model_config} '
'{input.checkpoints} '
'{wildcards.phenotype}/deeprvat/burdens'),
'touch {output}'
])

rule compute_burdens:
priority: 10
input:
reversed = model_path / "reverse_finished.tmp",
checkpoints = lambda wildcards: [
model_path / f'repeat_{repeat}/best/bag_{bag}.ckpt'
for repeat in range(n_repeats) for bag in range(n_bags)
],
dataset = '{phenotype}/deeprvat/association_dataset.pkl',
data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
model_config = model_path / 'config.yaml',
output:
'{phenotype}/deeprvat/burdens/chunk{chunk}.finished'
threads: 8
shell:
' && '.join([
('deeprvat_associate compute-burdens '
+ debug +
' --n-chunks '+ str(n_burden_chunks) + ' '
'--chunk {wildcards.chunk} '
'--dataset-file {input.dataset} '
'{input.data_config} '
'{input.model_config} '
'{input.checkpoints} '
'{wildcards.phenotype}/deeprvat/burdens'),
'touch {output}'
])

rule reverse_models:
input:
checkpoints = expand(model_path / 'repeat_{repeat}/best/bag_{bag}.ckpt',
bag=range(n_bags), repeat=range(n_repeats)),
model_config = model_path / 'config.yaml',
data_config = Path(phenotypes[0]) / "deeprvat/hpopt_config.yaml",
output:
temp(model_path / "reverse_finished.tmp")
threads: 4
shell:
" && ".join([
("deeprvat_associate reverse-models "
"{input.model_config} "
"{input.data_config} "
"{input.checkpoints}"),
"touch {output}"
])
63 changes: 63 additions & 0 deletions pipelines/association_testing/regress_eval.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

rule evaluate:
input:
associations = expand('{{phenotype}}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
repeat=range(n_repeats)),
config = '{phenotype}/deeprvat/hpopt_config.yaml',
output:
"{phenotype}/deeprvat/eval/significant.parquet",
"{phenotype}/deeprvat/eval/all_results.parquet"
threads: 1
shell:
'deeprvat_evaluate '
+ debug +
'--use-seed-genes '
'--n-repeats {n_repeats} '
'--correction-method FDR '
'{input.associations} '
'{input.config} '
'{wildcards.phenotype}/deeprvat/eval'

rule all_regression:
input:
expand('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
phenotype=phenotypes, type=['deeprvat'], repeat=range(n_repeats)),

rule combine_regression_chunks:
input:
expand('{{phenotype}}/deeprvat/repeat_{{repeat}}/results/burden_associations_{chunk}.parquet', chunk=range(n_regression_chunks)),
output:
'{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
threads: 1
shell:
'deeprvat_associate combine-regression-results '
'--model-name repeat_{wildcards.repeat} '
'{input} '
'{output}'

rule regress:
input:
config = "{phenotype}/deeprvat/hpopt_config.yaml",
chunks = lambda wildcards: expand(
('{{phenotype}}/deeprvat/burdens/chunk{chunk}.' +
("finished" if wildcards.phenotype == phenotypes[0] else "linked")),
chunk=range(n_burden_chunks)
),
phenotype_0_chunks = expand(
phenotypes[0] + '/deeprvat/burdens/chunk{chunk}.finished',
chunk=range(n_burden_chunks)
),
output:
temp('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations_{chunk}.parquet'),
threads: 2
shell:
'deeprvat_associate regress '
+ debug +
'--chunk {wildcards.chunk} '
'--n-chunks ' + str(n_regression_chunks) + ' '
'--use-bias '
'--repeat {wildcards.repeat} '
+ do_scoretest +
'{input.config} '
'{wildcards.phenotype}/deeprvat/burdens ' #TODO make this w/o repeats
'{wildcards.phenotype}/deeprvat/repeat_{wildcards.repeat}/results'
Loading