From 198335a485c6b181cb1a87b5d56c2de2e8c7b177 Mon Sep 17 00:00:00 2001
From: Kayla Meyer <meyer.kmt@gmail.com>
Date: Thu, 14 Dec 2023 16:21:07 +0100
Subject: [PATCH 01/15] making snakemake runners modular Additional snakemake
 runners for only running training, only association testing, and full train +
 association testing pipelines.

---
 .../association_dataset.snakefile             |  12 +
 .../association_testing/burdens.snakefile     |  74 +++++
 .../regress_eval.snakefile                    |  63 ++++
 .../association_testing_pretrained.snakefile  | 187 +-----------
 pipelines/run_training.snakefile              |  48 +++
 pipelines/training/config.snakefile           |  29 ++
 pipelines/training/train.snakefile            |  61 ++++
 pipelines/training/training_dataset.snakefile |  37 +++
 .../training_association_testing.snakefile    | 286 +-----------------
 9 files changed, 341 insertions(+), 456 deletions(-)
 create mode 100644 pipelines/association_testing/association_dataset.snakefile
 create mode 100644 pipelines/association_testing/burdens.snakefile
 create mode 100644 pipelines/association_testing/regress_eval.snakefile
 create mode 100644 pipelines/run_training.snakefile
 create mode 100644 pipelines/training/config.snakefile
 create mode 100644 pipelines/training/train.snakefile
 create mode 100644 pipelines/training/training_dataset.snakefile

diff --git a/pipelines/association_testing/association_dataset.snakefile b/pipelines/association_testing/association_dataset.snakefile
new file mode 100644
index 00000000..0e63e53f
--- /dev/null
+++ b/pipelines/association_testing/association_dataset.snakefile
@@ -0,0 +1,12 @@
+
+rule association_dataset:
+    input:
+        config = '{phenotype}/deeprvat/hpopt_config.yaml'
+    output:
+        '{phenotype}/deeprvat/association_dataset.pkl'
+    threads: 4
+    shell:
+        'deeprvat_associate make-dataset '
+        + debug +
+        '{input.config} '
+        '{output}'
diff --git a/pipelines/association_testing/burdens.snakefile b/pipelines/association_testing/burdens.snakefile
new file mode 100644
index 00000000..7e95372f
--- /dev/null
+++ b/pipelines/association_testing/burdens.snakefile
@@ -0,0 +1,74 @@
+
+rule link_burdens:
+    priority: 1
+    input:
+        checkpoints = lambda wildcards: [
+            f'{pretrained_model_path}/repeat_{repeat}/best/bag_{bag}.ckpt'
+            for repeat in range(n_repeats) for bag in range(n_bags)
+        ],
+        dataset = '{phenotype}/deeprvat/association_dataset.pkl',
+        data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
+        model_config = pretrained_model_path / 'config.yaml',
+    output:
+        '{phenotype}/deeprvat/burdens/chunk{chunk}.linked'
+    threads: 8
+    shell:
+        ' && '.join([
+            ('deeprvat_associate compute-burdens '
+             + debug +
+             ' --n-chunks '+ str(n_burden_chunks) + ' '
+             f'--link-burdens ../../../{phenotypes[0]}/deeprvat/burdens/burdens.zarr '
+             '--chunk {wildcards.chunk} '
+             '--dataset-file {input.dataset} '
+             '{input.data_config} '
+             '{input.model_config} '
+             '{input.checkpoints} '
+             '{wildcards.phenotype}/deeprvat/burdens'),
+            'touch {output}'
+        ])
+
+rule compute_burdens:
+    priority: 10
+    input:
+        reversed = pretrained_model_path / "reverse_finished.tmp",
+        checkpoints = lambda wildcards: [
+            pretrained_model_path / f'repeat_{repeat}/best/bag_{bag}.ckpt'
+            for repeat in range(n_repeats) for bag in range(n_bags)
+        ],
+        dataset = '{phenotype}/deeprvat/association_dataset.pkl',
+        data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
+        model_config = pretrained_model_path / 'config.yaml',
+    output:
+        '{phenotype}/deeprvat/burdens/chunk{chunk}.finished'
+    threads: 8
+    shell:
+        ' && '.join([
+            ('deeprvat_associate compute-burdens '
+             + debug +
+             ' --n-chunks '+ str(n_burden_chunks) + ' '
+             '--chunk {wildcards.chunk} '
+             '--dataset-file {input.dataset} '
+             '{input.data_config} '
+             '{input.model_config} '
+             '{input.checkpoints} '
+             '{wildcards.phenotype}/deeprvat/burdens'),
+            'touch {output}'
+        ])
+
+rule reverse_models:
+    input:
+        checkpoints = expand(pretrained_model_path / 'repeat_{repeat}/best/bag_{bag}.ckpt',
+                             bag=range(n_bags), repeat=range(n_repeats)),
+        model_config = pretrained_model_path / 'config.yaml',
+        data_config = Path(phenotypes[0]) / "deeprvat/hpopt_config.yaml",
+    output:
+        temp(pretrained_model_path / "reverse_finished.tmp")
+    threads: 4
+    shell:
+        " && ".join([
+            ("deeprvat_associate reverse-models "
+             "{input.model_config} "
+             "{input.data_config} "
+             "{input.checkpoints}"),
+            "touch {output}"
+        ])
\ No newline at end of file
diff --git a/pipelines/association_testing/regress_eval.snakefile b/pipelines/association_testing/regress_eval.snakefile
new file mode 100644
index 00000000..bcb3f369
--- /dev/null
+++ b/pipelines/association_testing/regress_eval.snakefile
@@ -0,0 +1,63 @@
+
+rule evaluate:
+    input:
+        associations = expand('{{phenotype}}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
+                              repeat=range(n_repeats)),
+        config = '{phenotype}/deeprvat/hpopt_config.yaml',
+    output:
+        "{phenotype}/deeprvat/eval/significant.parquet",
+        "{phenotype}/deeprvat/eval/all_results.parquet"
+    threads: 1
+    shell:
+        'deeprvat_evaluate '
+        + debug +
+        '--use-seed-genes '
+        '--n-repeats {n_repeats} '
+        '--correction-method FDR '
+        '{input.associations} '
+        '{input.config} '
+        '{wildcards.phenotype}/deeprvat/eval'
+
+rule all_regression:
+    input:
+        expand('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
+               phenotype=phenotypes, type=['deeprvat'], repeat=range(n_repeats)),
+
+rule combine_regression_chunks:
+    input:
+        expand('{{phenotype}}/deeprvat/repeat_{{repeat}}/results/burden_associations_{chunk}.parquet', chunk=range(n_regression_chunks)),
+    output:
+        '{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
+    threads: 1
+    shell:
+        'deeprvat_associate combine-regression-results '
+        '--model-name repeat_{wildcards.repeat} '
+        '{input} '
+        '{output}'
+
+rule regress:
+    input:
+        config = "{phenotype}/deeprvat/hpopt_config.yaml",
+        chunks = lambda wildcards: expand(
+            ('{{phenotype}}/deeprvat/burdens/chunk{chunk}.' +
+             ("finished" if wildcards.phenotype == phenotypes[0] else "linked")),
+            chunk=range(n_burden_chunks)
+        ),
+        phenotype_0_chunks =  expand(
+            phenotypes[0] + '/deeprvat/burdens/chunk{chunk}.finished',
+            chunk=range(n_burden_chunks)
+        ),
+    output:
+        temp('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations_{chunk}.parquet'),
+    threads: 2
+    shell:
+        'deeprvat_associate regress '
+        + debug +
+        '--chunk {wildcards.chunk} '
+        '--n-chunks ' + str(n_regression_chunks) + ' '
+        '--use-bias '
+        '--repeat {wildcards.repeat} '
+        + do_scoretest +
+        '{input.config} '
+        '{wildcards.phenotype}/deeprvat/burdens ' #TODO make this w/o repeats
+        '{wildcards.phenotype}/deeprvat/repeat_{wildcards.repeat}/results'
\ No newline at end of file
diff --git a/pipelines/association_testing_pretrained.snakefile b/pipelines/association_testing_pretrained.snakefile
index 702302f0..379ba795 100644
--- a/pipelines/association_testing_pretrained.snakefile
+++ b/pipelines/association_testing_pretrained.snakefile
@@ -8,16 +8,23 @@ phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
 
 n_burden_chunks = config.get('n_burden_chunks', 1) if not debug_flag else 2
 n_regression_chunks = config.get('n_regression_chunks', 40) if not debug_flag else 2
+n_trials = config['hyperparameter_optimization']['n_trials']
 n_bags = config['training']['n_bags'] if not debug_flag else 3
 n_repeats = config['n_repeats']
 debug = '--debug ' if debug_flag else ''
 do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
-pretrained_model_path = Path(config.get("pretrained_model_path", "pretrained_models"))
+tensor_compression_level = config['training'].get('tensor_compression_level', 1)
+pretrained_model_path = Path('models') #Path(config.get("pretrained_model_path", "pretrained_models"))
 
 wildcard_constraints:
     repeat="\d+",
     trial="\d+",
 
+include: "training/config.snakefile"
+include: "association_testing/association_dataset.snakefile"
+include: "association_testing/burdens.snakefile"
+include: "association_testing/regress_eval.snakefile"
+
 rule all:
     input:
         expand("{phenotype}/deeprvat/eval/significant.parquet",
@@ -25,69 +32,6 @@ rule all:
         expand("{phenotype}/deeprvat/eval/all_results.parquet",
                phenotype=phenotypes)
 
-rule evaluate:
-    input:
-        associations = expand('{{phenotype}}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
-                              repeat=range(n_repeats)),
-        config = '{phenotype}/deeprvat/hpopt_config.yaml',
-    output:
-        "{phenotype}/deeprvat/eval/significant.parquet",
-        "{phenotype}/deeprvat/eval/all_results.parquet"
-    threads: 1
-    shell:
-        'deeprvat_evaluate '
-        + debug +
-        '--use-seed-genes '
-        '--n-repeats {n_repeats} '
-        '--correction-method FDR '
-        '{input.associations} '
-        '{input.config} '
-        '{wildcards.phenotype}/deeprvat/eval'
-
-rule all_regression:
-    input:
-        expand('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
-               phenotype=phenotypes, type=['deeprvat'], repeat=range(n_repeats)),
-
-rule combine_regression_chunks:
-    input:
-        expand('{{phenotype}}/deeprvat/repeat_{{repeat}}/results/burden_associations_{chunk}.parquet', chunk=range(n_regression_chunks)),
-    output:
-        '{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
-    threads: 1
-    shell:
-        'deeprvat_associate combine-regression-results '
-        '--model-name repeat_{wildcards.repeat} '
-        '{input} '
-        '{output}'
-
-rule regress:
-    input:
-        config = "{phenotype}/deeprvat/hpopt_config.yaml",
-        chunks = lambda wildcards: expand(
-            ('{{phenotype}}/deeprvat/burdens/chunk{chunk}.' +
-             ("finished" if wildcards.phenotype == phenotypes[0] else "linked")),
-            chunk=range(n_burden_chunks)
-        ),
-        phenotype_0_chunks =  expand(
-            phenotypes[0] + '/deeprvat/burdens/chunk{chunk}.finished',
-            chunk=range(n_burden_chunks)
-        ),
-    output:
-        temp('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations_{chunk}.parquet'),
-    threads: 2
-    shell:
-        'deeprvat_associate regress '
-        + debug +
-        '--chunk {wildcards.chunk} '
-        '--n-chunks ' + str(n_regression_chunks) + ' '
-        '--use-bias '
-        '--repeat {wildcards.repeat} '
-        + do_scoretest +
-        '{input.config} '
-        '{wildcards.phenotype}/deeprvat/burdens ' #TODO make this w/o repeats
-        '{wildcards.phenotype}/deeprvat/repeat_{wildcards.repeat}/results'
-
 rule all_burdens:
     input:
         [
@@ -97,97 +41,11 @@ rule all_burdens:
             for c in range(n_burden_chunks)
         ]
 
-rule link_burdens:
-    priority: 1
-    input:
-        checkpoints = lambda wildcards: [
-            f'{pretrained_model_path}/repeat_{repeat}/best/bag_{bag}.ckpt'
-            for repeat in range(n_repeats) for bag in range(n_bags)
-        ],
-        dataset = '{phenotype}/deeprvat/association_dataset.pkl',
-        data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        model_config = pretrained_model_path / 'config.yaml',
-    output:
-        '{phenotype}/deeprvat/burdens/chunk{chunk}.linked'
-    threads: 8
-    shell:
-        ' && '.join([
-            ('deeprvat_associate compute-burdens '
-             + debug +
-             ' --n-chunks '+ str(n_burden_chunks) + ' '
-             f'--link-burdens ../../../{phenotypes[0]}/deeprvat/burdens/burdens.zarr '
-             '--chunk {wildcards.chunk} '
-             '--dataset-file {input.dataset} '
-             '{input.data_config} '
-             '{input.model_config} '
-             '{input.checkpoints} '
-             '{wildcards.phenotype}/deeprvat/burdens'),
-            'touch {output}'
-        ])
-
-rule compute_burdens:
-    priority: 10
-    input:
-        reversed = pretrained_model_path / "reverse_finished.tmp",
-        checkpoints = lambda wildcards: [
-            pretrained_model_path / f'repeat_{repeat}/best/bag_{bag}.ckpt'
-            for repeat in range(n_repeats) for bag in range(n_bags)
-        ],
-        dataset = '{phenotype}/deeprvat/association_dataset.pkl',
-        data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        model_config = pretrained_model_path / 'config.yaml',
-    output:
-        '{phenotype}/deeprvat/burdens/chunk{chunk}.finished'
-    threads: 8
-    shell:
-        ' && '.join([
-            ('deeprvat_associate compute-burdens '
-             + debug +
-             ' --n-chunks '+ str(n_burden_chunks) + ' '
-             '--chunk {wildcards.chunk} '
-             '--dataset-file {input.dataset} '
-             '{input.data_config} '
-             '{input.model_config} '
-             '{input.checkpoints} '
-             '{wildcards.phenotype}/deeprvat/burdens'),
-            'touch {output}'
-        ])
-
 rule all_association_dataset:
     input:
         expand('{phenotype}/deeprvat/association_dataset.pkl',
                phenotype=phenotypes)
 
-rule association_dataset:
-    input:
-        config = '{phenotype}/deeprvat/hpopt_config.yaml'
-    output:
-        '{phenotype}/deeprvat/association_dataset.pkl'
-    threads: 4
-    shell:
-        'deeprvat_associate make-dataset '
-        + debug +
-        '{input.config} '
-        '{output}'
-
-rule reverse_models:
-    input:
-        checkpoints = expand(pretrained_model_path / 'repeat_{repeat}/best/bag_{bag}.ckpt',
-                             bag=range(n_bags), repeat=range(n_repeats)),
-        model_config = pretrained_model_path / 'config.yaml',
-        data_config = Path(phenotypes[0]) / "deeprvat/hpopt_config.yaml",
-    output:
-        temp(pretrained_model_path / "reverse_finished.tmp")
-    threads: 4
-    shell:
-        " && ".join([
-            ("deeprvat_associate reverse-models "
-             "{input.model_config} "
-             "{input.data_config} "
-             "{input.checkpoints}"),
-            "touch {output}"
-        ])
-
 rule all_config:
     input:
         seed_genes = expand('{phenotype}/deeprvat/seed_genes.parquet',
@@ -196,32 +54,3 @@ rule all_config:
                         phenotype=phenotypes),
         baseline = expand('{phenotype}/deeprvat/baseline_results.parquet',
                           phenotype=phenotypes),
-
-rule config:
-    input:
-        config = 'config.yaml',
-        baseline = lambda wildcards: [
-            str(Path(r['base']) / wildcards.phenotype / r['type'] /
-                'eval/burden_associations.parquet')
-            for r in config['baseline_results']
-        ]
-    output:
-        seed_genes = '{phenotype}/deeprvat/seed_genes.parquet',
-        config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        baseline = '{phenotype}/deeprvat/baseline_results.parquet',
-    threads: 1
-    params:
-        baseline_results = lambda wildcards, input: ''.join([
-            f'--baseline-results {b} '
-            for b in input.baseline
-        ])
-    shell:
-        (
-            'deeprvat_config update-config '
-            '--phenotype {wildcards.phenotype} '
-            '{params.baseline_results}'
-            '--baseline-results-out {output.baseline} '
-            '--seed-genes-out {output.seed_genes} '
-            '{input.config} '
-            '{output.config}'
-        )
diff --git a/pipelines/run_training.snakefile b/pipelines/run_training.snakefile
new file mode 100644
index 00000000..dc5f0254
--- /dev/null
+++ b/pipelines/run_training.snakefile
@@ -0,0 +1,48 @@
+from pathlib import Path
+
+configfile: 'config.yaml'
+
+debug_flag = config.get('debug', False)
+phenotypes = config['phenotypes']
+phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
+
+n_burden_chunks = config.get('n_burden_chunks', 1) if not debug_flag else 2
+n_regression_chunks = config.get('n_regression_chunks', 40) if not debug_flag else 2
+n_trials = config['hyperparameter_optimization']['n_trials']
+n_bags = config['training']['n_bags'] if not debug_flag else 3
+n_repeats = config['n_repeats']
+debug = '--debug ' if debug_flag else ''
+do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
+tensor_compression_level = config['training'].get('tensor_compression_level', 1)
+
+wildcard_constraints:
+    repeat="\d+",
+    trial="\d+",
+
+include: "training/config.snakefile"
+include: "training/training_dataset.snakefile"
+include: "training/train.snakefile"
+
+rule all:
+    input:
+        expand('models/repeat_{repeat}/best/bag_{bag}.ckpt',
+               bag=range(n_bags), repeat=range(n_repeats)),
+        "models/config.yaml"
+
+rule all_training_dataset:
+    input:
+        input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr',
+                              phenotype=phenotypes, repeat=range(n_repeats)),
+        covariates = expand('{phenotype}/deeprvat/covariates.zarr',
+                            phenotype=phenotypes, repeat=range(n_repeats)),
+        y = expand('{phenotype}/deeprvat/y.zarr',
+                   phenotype=phenotypes, repeat=range(n_repeats))
+
+rule all_config:
+    input:
+        seed_genes = expand('{phenotype}/deeprvat/seed_genes.parquet',
+                            phenotype=phenotypes),
+        config = expand('{phenotype}/deeprvat/hpopt_config.yaml',
+                        phenotype=phenotypes),
+        baseline = expand('{phenotype}/deeprvat/baseline_results.parquet',
+                          phenotype=phenotypes),
\ No newline at end of file
diff --git a/pipelines/training/config.snakefile b/pipelines/training/config.snakefile
new file mode 100644
index 00000000..3c58a39d
--- /dev/null
+++ b/pipelines/training/config.snakefile
@@ -0,0 +1,29 @@
+
+rule config:
+    input:
+        config = 'config.yaml',
+        baseline = lambda wildcards: [
+            str(Path(r['base']) / wildcards.phenotype / r['type'] /
+                'eval/burden_associations.parquet')
+            for r in config['baseline_results']
+        ]
+    output:
+        seed_genes = '{phenotype}/deeprvat/seed_genes.parquet',
+        config = '{phenotype}/deeprvat/hpopt_config.yaml',
+        baseline = '{phenotype}/deeprvat/baseline_results.parquet',
+    threads: 1
+    params:
+        baseline_results = lambda wildcards, input: ''.join([
+            f'--baseline-results {b} '
+            for b in input.baseline
+        ])
+    shell:
+        (
+            'deeprvat_config update-config '
+            '--phenotype {wildcards.phenotype} '
+            '{params.baseline_results}'
+            '--baseline-results-out {output.baseline} '
+            '--seed-genes-out {output.seed_genes} '
+            '{input.config} '
+            '{output.config}'
+        )
\ No newline at end of file
diff --git a/pipelines/training/train.snakefile b/pipelines/training/train.snakefile
new file mode 100644
index 00000000..c904180d
--- /dev/null
+++ b/pipelines/training/train.snakefile
@@ -0,0 +1,61 @@
+
+rule link_config:
+    input:
+        'models/repeat_0/config.yaml'
+    output:
+        "models/config.yaml"
+    threads: 1
+    shell:
+        "ln -s repeat_0/config.yaml {output}"
+
+
+rule best_training_run:
+    input:
+        expand('models/repeat_{{repeat}}/trial{trial_number}/config.yaml',
+               trial_number=range(n_trials)),
+    output:
+        checkpoints = expand('models/repeat_{{repeat}}/best/bag_{bag}.ckpt',
+                             bag=range(n_bags)),
+        config = 'models/repeat_{repeat}/config.yaml'
+    threads: 1
+    shell:
+        (
+            'deeprvat_train best-training-run '
+            + debug +
+            'models/repeat_{wildcards.repeat} '
+            'models/repeat_{wildcards.repeat}/best '
+            'models/repeat_{wildcards.repeat}/hyperparameter_optimization.db '
+            '{output.config}'
+        )
+
+rule train:
+    input:
+        config = expand('{phenotype}/deeprvat/hpopt_config.yaml',
+                        phenotype=phenotypes),
+        input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr',
+                              phenotype=phenotypes),
+        covariates = expand('{phenotype}/deeprvat/covariates.zarr',
+                            phenotype=phenotypes),
+        y = expand('{phenotype}/deeprvat/y.zarr',
+                   phenotype=phenotypes),
+    output:
+        config = 'models/repeat_{repeat}/trial{trial_number}/config.yaml',
+        finished = 'models/repeat_{repeat}/trial{trial_number}/finished.tmp'
+    params:
+        phenotypes = " ".join(
+            [f"--phenotype {p} "
+             f"{p}/deeprvat/input_tensor.zarr "
+             f"{p}/deeprvat/covariates.zarr "
+             f"{p}/deeprvat/y.zarr"
+             for p in phenotypes])
+    shell:
+        ' && '.join([
+            'deeprvat_train train '
+            + debug +
+            '--trial-id {wildcards.trial_number} '
+            "{params.phenotypes} "
+            'config.yaml '
+            'models/repeat_{wildcards.repeat}/trial{wildcards.trial_number} '
+            'models/repeat_{wildcards.repeat}/hyperparameter_optimization.db',
+            'touch {output.finished}'
+        ])
diff --git a/pipelines/training/training_dataset.snakefile b/pipelines/training/training_dataset.snakefile
new file mode 100644
index 00000000..66903b85
--- /dev/null
+++ b/pipelines/training/training_dataset.snakefile
@@ -0,0 +1,37 @@
+
+rule training_dataset:
+    input:
+        config = '{phenotype}/deeprvat/hpopt_config.yaml',
+        training_dataset = '{phenotype}/deeprvat/training_dataset.pkl'
+    output:
+        input_tensor = directory('{phenotype}/deeprvat/input_tensor.zarr'),
+        covariates = directory('{phenotype}/deeprvat/covariates.zarr'),
+        y = directory('{phenotype}/deeprvat/y.zarr')
+    threads: 8
+    priority: 50
+    shell:
+        (
+            'deeprvat_train make-dataset '
+            + debug +
+            '--compression-level ' + str(tensor_compression_level) + ' '
+            '--training-dataset-file {input.training_dataset} '
+            '{input.config} '
+            '{output.input_tensor} '
+            '{output.covariates} '
+            '{output.y}'
+        )
+
+rule training_dataset_pickle:
+    input:
+        '{phenotype}/deeprvat/hpopt_config.yaml'
+    output:
+        '{phenotype}/deeprvat/training_dataset.pkl'
+    threads: 1
+    shell:
+        (
+            'deeprvat_train make-dataset '
+            '--pickle-only '
+            '--training-dataset-file {output} '
+            '{input} '
+            'dummy dummy dummy'
+        )
\ No newline at end of file
diff --git a/pipelines/training_association_testing.snakefile b/pipelines/training_association_testing.snakefile
index 069602b6..f0f91134 100644
--- a/pipelines/training_association_testing.snakefile
+++ b/pipelines/training_association_testing.snakefile
@@ -14,11 +14,19 @@ n_repeats = config['n_repeats']
 debug = '--debug ' if debug_flag else ''
 do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
 tensor_compression_level = config['training'].get('tensor_compression_level', 1)
+pretrained_model_path = Path('models')
 
 wildcard_constraints:
     repeat="\d+",
     trial="\d+",
 
+include: "training/config.snakefile"
+include: "training/training_dataset.snakefile"
+include: "training/train.snakefile"
+include: "association_testing/association_dataset.snakefile"
+include: "association_testing/burdens.snakefile"
+include: "association_testing/regress_eval.snakefile"
+
 rule all:
     input:
         expand("{phenotype}/deeprvat/eval/significant.parquet",
@@ -26,69 +34,6 @@ rule all:
         expand("{phenotype}/deeprvat/eval/all_results.parquet",
                phenotype=phenotypes)
 
-rule evaluate:
-    input:
-        associations = expand('{{phenotype}}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
-                              repeat=range(n_repeats)),
-        config = '{phenotype}/deeprvat/hpopt_config.yaml',
-    output:
-        "{phenotype}/deeprvat/eval/significant.parquet",
-        "{phenotype}/deeprvat/eval/all_results.parquet"
-    threads: 1
-    shell:
-        'deeprvat_evaluate '
-        + debug +
-        '--use-seed-genes '
-        '--n-repeats {n_repeats} '
-        '--correction-method FDR '
-        '{input.associations} '
-        '{input.config} '
-        '{wildcards.phenotype}/deeprvat/eval'
-
-rule all_regression:
-    input:
-        expand('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
-               phenotype=phenotypes, type=['deeprvat'], repeat=range(n_repeats)),
-
-rule combine_regression_chunks:
-    input:
-        expand('{{phenotype}}/deeprvat/repeat_{{repeat}}/results/burden_associations_{chunk}.parquet', chunk=range(n_regression_chunks)),
-    output:
-        '{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations.parquet',
-    threads: 1
-    shell:
-        'deeprvat_associate combine-regression-results '
-        '--model-name repeat_{wildcards.repeat} '
-        '{input} '
-        '{output}'
-
-rule regress:
-    input:
-        config = "{phenotype}/deeprvat/hpopt_config.yaml",
-        chunks = lambda wildcards: expand(
-            ('{{phenotype}}/deeprvat/burdens/chunk{chunk}.' +
-             ("finished" if wildcards.phenotype == phenotypes[0] else "linked")),
-            chunk=range(n_burden_chunks)
-        ),
-        phenotype_0_chunks =  expand(
-            phenotypes[0] + '/deeprvat/burdens/chunk{chunk}.finished',
-            chunk=range(n_burden_chunks)
-        ),
-    output:
-        temp('{phenotype}/deeprvat/repeat_{repeat}/results/burden_associations_{chunk}.parquet'),
-    threads: 2
-    shell:
-        'deeprvat_associate regress '
-        + debug +
-        '--chunk {wildcards.chunk} '
-        '--n-chunks ' + str(n_regression_chunks) + ' '
-        '--use-bias '
-        '--repeat {wildcards.repeat} '
-        + do_scoretest +
-        '{input.config} '
-        '{wildcards.phenotype}/deeprvat/burdens ' #TODO make this w/o repeats
-        '{wildcards.phenotype}/deeprvat/repeat_{wildcards.repeat}/results'
-
 rule all_burdens:
     input:
         [
@@ -98,164 +43,17 @@ rule all_burdens:
             for c in range(n_burden_chunks)
         ]
 
-rule link_burdens:
-    priority: 1
-    input:
-        checkpoints = lambda wildcards: [
-            f'models/repeat_{repeat}/best/bag_{bag}.ckpt'
-            for repeat in range(n_repeats) for bag in range(n_bags)
-        ],
-        dataset = '{phenotype}/deeprvat/association_dataset.pkl',
-        data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        model_config = 'models/config.yaml',
-    output:
-        '{phenotype}/deeprvat/burdens/chunk{chunk}.linked'
-    threads: 8
-    shell:
-        ' && '.join([
-            ('deeprvat_associate compute-burdens '
-             + debug +
-             ' --n-chunks '+ str(n_burden_chunks) + ' '
-             f'--link-burdens ../../../{phenotypes[0]}/deeprvat/burdens/burdens.zarr '
-             '--chunk {wildcards.chunk} '
-             '--dataset-file {input.dataset} '
-             '{input.data_config} '
-             '{input.model_config} '
-             '{input.checkpoints} '
-             '{wildcards.phenotype}/deeprvat/burdens'),
-            'touch {output}'
-        ])
-
-rule compute_burdens:
-    priority: 10
-    input:
-        reversed = "models/reverse_finished.tmp",
-        checkpoints = lambda wildcards: [
-            f'models/repeat_{repeat}/best/bag_{bag}.ckpt'
-            for repeat in range(n_repeats) for bag in range(n_bags)
-        ],
-        dataset = '{phenotype}/deeprvat/association_dataset.pkl',
-        data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        model_config = 'models/config.yaml',
-    output:
-        '{phenotype}/deeprvat/burdens/chunk{chunk}.finished'
-    threads: 8
-    shell:
-        ' && '.join([
-            ('deeprvat_associate compute-burdens '
-             + debug +
-             ' --n-chunks '+ str(n_burden_chunks) + ' '
-             '--chunk {wildcards.chunk} '
-             '--dataset-file {input.dataset} '
-             '{input.data_config} '
-             '{input.model_config} '
-             '{input.checkpoints} '
-             '{wildcards.phenotype}/deeprvat/burdens'),
-            'touch {output}'
-        ])
-
 rule all_association_dataset:
     input:
         expand('{phenotype}/deeprvat/association_dataset.pkl',
                phenotype=phenotypes)
 
-rule association_dataset:
-    input:
-        config = '{phenotype}/deeprvat/hpopt_config.yaml'
-    output:
-        '{phenotype}/deeprvat/association_dataset.pkl'
-    threads: 4
-    shell:
-        'deeprvat_associate make-dataset '
-        + debug +
-        '{input.config} '
-        '{output}'
-
-rule reverse_models:
-    input:
-        checkpoints = expand('models/repeat_{repeat}/best/bag_{bag}.ckpt',
-                             bag=range(n_bags), repeat=range(n_repeats)),
-        model_config = 'models/config.yaml',
-        data_config = Path(phenotypes[0]) / "deeprvat/hpopt_config.yaml",
-    output:
-        "models/reverse_finished.tmp"
-    threads: 4
-    shell:
-        " && ".join([
-            ("deeprvat_associate reverse-models "
-             "{input.model_config} "
-             "{input.data_config} "
-             "{input.checkpoints}"),
-            "touch {output}"
-        ])
-
 rule all_training:
     input:
         expand('models/repeat_{repeat}/best/bag_{bag}.ckpt',
                bag=range(n_bags), repeat=range(n_repeats)),
         "models/config.yaml"
 
-rule link_config:
-    input:
-        'models/repeat_0/config.yaml'
-    output:
-        "models/config.yaml"
-    threads: 1
-    shell:
-        "ln -s repeat_0/config.yaml {output}"
-
-
-rule best_training_run:
-    input:
-        expand('models/repeat_{{repeat}}/trial{trial_number}/config.yaml',
-               trial_number=range(n_trials)),
-    output:
-        checkpoints = expand('models/repeat_{{repeat}}/best/bag_{bag}.ckpt',
-                             bag=range(n_bags)),
-        config = 'models/repeat_{repeat}/config.yaml'
-    threads: 1
-    shell:
-        (
-            'deeprvat_train best-training-run '
-            + debug +
-            'models/repeat_{wildcards.repeat} '
-            'models/repeat_{wildcards.repeat}/best '
-            'models/repeat_{wildcards.repeat}/hyperparameter_optimization.db '
-            '{output.config}'
-        )
-
-rule train:
-    input:
-        config = expand('{phenotype}/deeprvat/hpopt_config.yaml',
-                        phenotype=phenotypes),
-        input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr',
-                              phenotype=phenotypes),
-        covariates = expand('{phenotype}/deeprvat/covariates.zarr',
-                            phenotype=phenotypes),
-        y = expand('{phenotype}/deeprvat/y.zarr',
-                   phenotype=phenotypes),
-    output:
-        config = 'models/repeat_{repeat}/trial{trial_number}/config.yaml',
-        finished = 'models/repeat_{repeat}/trial{trial_number}/finished.tmp'
-    params:
-        phenotypes = " ".join(
-            [f"--phenotype {p} "
-             f"{p}/deeprvat/input_tensor.zarr "
-             f"{p}/deeprvat/covariates.zarr "
-             f"{p}/deeprvat/y.zarr"
-             for p in phenotypes])
-    shell:
-        ' && '.join([
-            'deeprvat_train train '
-            + debug +
-            '--trial-id {wildcards.trial_number} '
-            "{params.phenotypes} "
-            'config.yaml '
-            'models/repeat_{wildcards.repeat}/trial{wildcards.trial_number} '
-            'models/repeat_{wildcards.repeat}/hyperparameter_optimization.db',
-            'touch {output.finished}'
-        ])
-
 rule all_training_dataset:
     input:
         input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr',
@@ -265,43 +63,6 @@ rule all_training_dataset:
         y = expand('{phenotype}/deeprvat/y.zarr',
                    phenotype=phenotypes, repeat=range(n_repeats))
 
-rule training_dataset:
-    input:
-        config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        training_dataset = '{phenotype}/deeprvat/training_dataset.pkl'
-    output:
-        input_tensor = directory('{phenotype}/deeprvat/input_tensor.zarr'),
-        covariates = directory('{phenotype}/deeprvat/covariates.zarr'),
-        y = directory('{phenotype}/deeprvat/y.zarr')
-    threads: 8
-    priority: 50
-    shell:
-        (
-            'deeprvat_train make-dataset '
-            + debug +
-            '--compression-level ' + str(tensor_compression_level) + ' '
-            '--training-dataset-file {input.training_dataset} '
-            '{input.config} '
-            '{output.input_tensor} '
-            '{output.covariates} '
-            '{output.y}'
-        )
-
-rule training_dataset_pickle:
-    input:
-        '{phenotype}/deeprvat/hpopt_config.yaml'
-    output:
-        '{phenotype}/deeprvat/training_dataset.pkl'
-    threads: 1
-    shell:
-        (
-            'deeprvat_train make-dataset '
-            '--pickle-only '
-            '--training-dataset-file {output} '
-            '{input} '
-            'dummy dummy dummy'
-        )
-
 rule all_config:
     input:
         seed_genes = expand('{phenotype}/deeprvat/seed_genes.parquet',
@@ -309,33 +70,4 @@ rule all_config:
         config = expand('{phenotype}/deeprvat/hpopt_config.yaml',
                         phenotype=phenotypes),
         baseline = expand('{phenotype}/deeprvat/baseline_results.parquet',
-                          phenotype=phenotypes),
-
-rule config:
-    input:
-        config = 'config.yaml',
-        baseline = lambda wildcards: [
-            str(Path(r['base']) / wildcards.phenotype / r['type'] /
-                'eval/burden_associations.parquet')
-            for r in config['baseline_results']
-        ]
-    output:
-        seed_genes = '{phenotype}/deeprvat/seed_genes.parquet',
-        config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        baseline = '{phenotype}/deeprvat/baseline_results.parquet',
-    threads: 1
-    params:
-        baseline_results = lambda wildcards, input: ''.join([
-            f'--baseline-results {b} '
-            for b in input.baseline
-        ])
-    shell:
-        (
-            'deeprvat_config update-config '
-            '--phenotype {wildcards.phenotype} '
-            '{params.baseline_results}'
-            '--baseline-results-out {output.baseline} '
-            '--seed-genes-out {output.seed_genes} '
-            '{input.config} '
-            '{output.config}'
-        )
+                          phenotype=phenotypes),
\ No newline at end of file

From f827648567c3248a15c8e65d84d039873fe422a4 Mon Sep 17 00:00:00 2001
From: Kayla Meyer <meyer.kmt@gmail.com>
Date: Thu, 14 Dec 2023 16:26:50 +0100
Subject: [PATCH 02/15] bug-fix pretrained model path

---
 pipelines/association_testing_pretrained.snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/association_testing_pretrained.snakefile b/pipelines/association_testing_pretrained.snakefile
index 379ba795..dca4fc7c 100644
--- a/pipelines/association_testing_pretrained.snakefile
+++ b/pipelines/association_testing_pretrained.snakefile
@@ -14,7 +14,7 @@ n_repeats = config['n_repeats']
 debug = '--debug ' if debug_flag else ''
 do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
 tensor_compression_level = config['training'].get('tensor_compression_level', 1)
-pretrained_model_path = Path('models') #Path(config.get("pretrained_model_path", "pretrained_models"))
+pretrained_model_path = Path(config.get("pretrained_model_path", "pretrained_models"))
 
 wildcard_constraints:
     repeat="\d+",

From 5a8696e2d8774291f00f58cfa41aac3cc0a50ecd Mon Sep 17 00:00:00 2001
From: Kayla Meyer <meyer.kmt@gmail.com>
Date: Fri, 15 Dec 2023 16:01:13 +0100
Subject: [PATCH 03/15] Adding additional snakemake pipeline run option to
 readthedocs

---
 docs/usage.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/docs/usage.md b/docs/usage.md
index 93361782..5d7c9170 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -56,6 +56,20 @@ Replace `[path_to_deeprvat]` with the path to your clone of the repository.
 Note that the example data is randomly generated, and so is only suited for testing whether the `deeprvat` package has been correctly installed.
 
 
+### Run the training pipeline on some example data
+
+```shell
+mkdir example
+cd example
+ln -s [path_to_deeprvat]/example/* .
+snakemake -j 1 --snakefile [path_to_deeprvat]/pipelines/run_training.snakefile
+```
+
+Replace `[path_to_deeprvat]` with the path to your clone of the repository.
+
+Note that the example data is randomly generated, and so is only suited for testing whether the `deeprvat` package has been correctly installed.
+
+
 ### Run the association testing pipeline with pretrained models
 
 ```shell

From 3f139db37f2a290f61274f3cafb67afeb118584b Mon Sep 17 00:00:00 2001
From: Kayla Meyer <meyer.kmt@gmail.com>
Date: Thu, 21 Dec 2023 12:08:34 +0100
Subject: [PATCH 04/15] update train snakefile pipeline from PR #42

---
 pipelines/training/train.snakefile | 37 ++++++++++++++++--------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/pipelines/training/train.snakefile b/pipelines/training/train.snakefile
index c904180d..c66b6858 100644
--- a/pipelines/training/train.snakefile
+++ b/pipelines/training/train.snakefile
@@ -31,31 +31,34 @@ rule best_training_run:
 rule train:
     input:
         config = expand('{phenotype}/deeprvat/hpopt_config.yaml',
-                        phenotype=phenotypes),
+                        phenotype=training_phenotypes),
         input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr',
-                              phenotype=phenotypes),
+                              phenotype=training_phenotypes),
         covariates = expand('{phenotype}/deeprvat/covariates.zarr',
-                            phenotype=phenotypes),
+                            phenotype=training_phenotypes),
         y = expand('{phenotype}/deeprvat/y.zarr',
-                   phenotype=phenotypes),
+                   phenotype=training_phenotypes),
     output:
-        config = 'models/repeat_{repeat}/trial{trial_number}/config.yaml',
-        finished = 'models/repeat_{repeat}/trial{trial_number}/finished.tmp'
+        expand('models/repeat_{repeat}/trial{trial_number}/config.yaml',
+               repeat=range(n_repeats), trial_number=range(n_trials)),
+        expand('models/repeat_{repeat}/trial{trial_number}/finished.tmp',
+               repeat=range(n_repeats), trial_number=range(n_trials))
     params:
         phenotypes = " ".join(
             [f"--phenotype {p} "
              f"{p}/deeprvat/input_tensor.zarr "
              f"{p}/deeprvat/covariates.zarr "
              f"{p}/deeprvat/y.zarr"
-             for p in phenotypes])
+             for p in training_phenotypes])
     shell:
-        ' && '.join([
-            'deeprvat_train train '
-            + debug +
-            '--trial-id {wildcards.trial_number} '
-            "{params.phenotypes} "
-            'config.yaml '
-            'models/repeat_{wildcards.repeat}/trial{wildcards.trial_number} '
-            'models/repeat_{wildcards.repeat}/hyperparameter_optimization.db',
-            'touch {output.finished}'
-        ])
+        f"parallel --jobs {n_parallel_training_jobs} --halt now,fail=1 --results train_repeat{{{{1}}}}_trial{{{{2}}}}/ "
+        'deeprvat_train train '
+        + debug +
+        '--trial-id {{2}} '
+        "{params.phenotypes} "
+        'config.yaml '
+        'models/repeat_{{1}}/trial{{2}} '
+        "models/repeat_{{1}}/hyperparameter_optimization.db '&&' "
+        "touch models/repeat_{{1}}/trial{{2}}/finished.tmp "
+        "::: " + " ".join(map(str, range(n_repeats))) + " "
+        "::: " + " ".join(map(str, range(n_trials)))

From 7382e2770a8870844582b4ffbfe965d88228641a Mon Sep 17 00:00:00 2001
From: Kayla Meyer <meyer.kmt@gmail.com>
Date: Thu, 21 Dec 2023 15:05:33 +0100
Subject: [PATCH 05/15] bug-fix model path for snakemake pipeline runners

---
 pipelines/association_testing/burdens.snakefile  | 16 ++++++++--------
 .../association_testing_pretrained.snakefile     |  3 ++-
 pipelines/training_association_testing.snakefile |  6 +++---
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pipelines/association_testing/burdens.snakefile b/pipelines/association_testing/burdens.snakefile
index 7e95372f..550390fa 100644
--- a/pipelines/association_testing/burdens.snakefile
+++ b/pipelines/association_testing/burdens.snakefile
@@ -3,12 +3,12 @@ rule link_burdens:
     priority: 1
     input:
         checkpoints = lambda wildcards: [
-            f'{pretrained_model_path}/repeat_{repeat}/best/bag_{bag}.ckpt'
+            f'{model_path}/repeat_{repeat}/best/bag_{bag}.ckpt'
             for repeat in range(n_repeats) for bag in range(n_bags)
         ],
         dataset = '{phenotype}/deeprvat/association_dataset.pkl',
         data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        model_config = pretrained_model_path / 'config.yaml',
+        model_config = model_path / 'config.yaml',
     output:
         '{phenotype}/deeprvat/burdens/chunk{chunk}.linked'
     threads: 8
@@ -30,14 +30,14 @@ rule link_burdens:
 rule compute_burdens:
     priority: 10
     input:
-        reversed = pretrained_model_path / "reverse_finished.tmp",
+        reversed = model_path / "reverse_finished.tmp",
         checkpoints = lambda wildcards: [
-            pretrained_model_path / f'repeat_{repeat}/best/bag_{bag}.ckpt'
+            model_path / f'repeat_{repeat}/best/bag_{bag}.ckpt'
             for repeat in range(n_repeats) for bag in range(n_bags)
         ],
         dataset = '{phenotype}/deeprvat/association_dataset.pkl',
         data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
-        model_config = pretrained_model_path / 'config.yaml',
+        model_config = model_path / 'config.yaml',
     output:
         '{phenotype}/deeprvat/burdens/chunk{chunk}.finished'
     threads: 8
@@ -57,12 +57,12 @@ rule compute_burdens:
 
 rule reverse_models:
     input:
-        checkpoints = expand(pretrained_model_path / 'repeat_{repeat}/best/bag_{bag}.ckpt',
+        checkpoints = expand(model_path / 'repeat_{repeat}/best/bag_{bag}.ckpt',
                              bag=range(n_bags), repeat=range(n_repeats)),
-        model_config = pretrained_model_path / 'config.yaml',
+        model_config = model_path / 'config.yaml',
         data_config = Path(phenotypes[0]) / "deeprvat/hpopt_config.yaml",
     output:
-        temp(pretrained_model_path / "reverse_finished.tmp")
+        temp(model_path / "reverse_finished.tmp")
     threads: 4
     shell:
         " && ".join([
diff --git a/pipelines/association_testing_pretrained.snakefile b/pipelines/association_testing_pretrained.snakefile
index dca4fc7c..d7aaa006 100644
--- a/pipelines/association_testing_pretrained.snakefile
+++ b/pipelines/association_testing_pretrained.snakefile
@@ -5,6 +5,7 @@ configfile: 'config.yaml'
 debug_flag = config.get('debug', False)
 phenotypes = config['phenotypes']
 phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
+training_phenotypes = config["training"].get("phenotypes", phenotypes)
 
 n_burden_chunks = config.get('n_burden_chunks', 1) if not debug_flag else 2
 n_regression_chunks = config.get('n_regression_chunks', 40) if not debug_flag else 2
@@ -14,7 +15,7 @@ n_repeats = config['n_repeats']
 debug = '--debug ' if debug_flag else ''
 do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
 tensor_compression_level = config['training'].get('tensor_compression_level', 1)
-pretrained_model_path = Path(config.get("pretrained_model_path", "pretrained_models"))
+model_path = Path(config.get("pretrained_model_path", "pretrained_models"))
 
 wildcard_constraints:
     repeat="\d+",
diff --git a/pipelines/training_association_testing.snakefile b/pipelines/training_association_testing.snakefile
index b7001bb9..8b28ce26 100644
--- a/pipelines/training_association_testing.snakefile
+++ b/pipelines/training_association_testing.snakefile
@@ -15,7 +15,7 @@ n_repeats = config['n_repeats']
 debug = '--debug ' if debug_flag else ''
 do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
 tensor_compression_level = config['training'].get('tensor_compression_level', 1)
-pretrained_model_path = Path('models')
+model_path = Path('models')
 n_parallel_training_jobs = config["training"].get("n_parallel_jobs", 1)
 
 wildcard_constraints:
@@ -52,9 +52,9 @@ rule all_association_dataset:
 
 rule all_training:
     input:
-        expand('models/repeat_{repeat}/best/bag_{bag}.ckpt',
+        expand('{model_path}/repeat_{repeat}/best/bag_{bag}.ckpt',
                bag=range(n_bags), repeat=range(n_repeats)),
-        "models/config.yaml"
+        "{model_path}/config.yaml"
 
 rule all_training_dataset:
     input:

From aec735ffbdce1ae9422aea9124709b9f18f1e668 Mon Sep 17 00:00:00 2001
From: Kayla Meyer <meyer.kmt@gmail.com>
Date: Thu, 21 Dec 2023 15:32:21 +0100
Subject: [PATCH 06/15] bug-fix f string syntax

---
 pipelines/training/train.snakefile            | 26 +++++++++----------
 .../training_association_testing.snakefile    |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/pipelines/training/train.snakefile b/pipelines/training/train.snakefile
index c66b6858..bed870bf 100644
--- a/pipelines/training/train.snakefile
+++ b/pipelines/training/train.snakefile
@@ -1,9 +1,9 @@
 
 rule link_config:
     input:
-        'models/repeat_0/config.yaml'
+        '{model_path}/repeat_0/config.yaml'
     output:
-        "models/config.yaml"
+        '{model_path}/config.yaml'
     threads: 1
     shell:
         "ln -s repeat_0/config.yaml {output}"
@@ -11,20 +11,20 @@ rule link_config:
 
 rule best_training_run:
     input:
-        expand('models/repeat_{{repeat}}/trial{trial_number}/config.yaml',
+        expand('{model_path}/repeat_{{repeat}}/trial{trial_number}/config.yaml',
                trial_number=range(n_trials)),
     output:
-        checkpoints = expand('models/repeat_{{repeat}}/best/bag_{bag}.ckpt',
+        checkpoints = expand('{model_path}/repeat_{{repeat}}/best/bag_{bag}.ckpt',
                              bag=range(n_bags)),
-        config = 'models/repeat_{repeat}/config.yaml'
+        config = '{model_path}/repeat_{repeat}/config.yaml'
     threads: 1
     shell:
         (
             'deeprvat_train best-training-run '
             + debug +
-            'models/repeat_{wildcards.repeat} '
-            'models/repeat_{wildcards.repeat}/best '
-            'models/repeat_{wildcards.repeat}/hyperparameter_optimization.db '
+            '{model_path}/repeat_{wildcards.repeat} '
+            '{model_path}/repeat_{wildcards.repeat}/best '
+            '{model_path}/repeat_{wildcards.repeat}/hyperparameter_optimization.db '
             '{output.config}'
         )
 
@@ -39,9 +39,9 @@ rule train:
         y = expand('{phenotype}/deeprvat/y.zarr',
                    phenotype=training_phenotypes),
     output:
-        expand('models/repeat_{repeat}/trial{trial_number}/config.yaml',
+        expand('{model_path}/repeat_{repeat}/trial{trial_number}/config.yaml',
                repeat=range(n_repeats), trial_number=range(n_trials)),
-        expand('models/repeat_{repeat}/trial{trial_number}/finished.tmp',
+        expand('{model_path}/repeat_{repeat}/trial{trial_number}/finished.tmp',
                repeat=range(n_repeats), trial_number=range(n_trials))
     params:
         phenotypes = " ".join(
@@ -57,8 +57,8 @@ rule train:
         '--trial-id {{2}} '
         "{params.phenotypes} "
         'config.yaml '
-        'models/repeat_{{1}}/trial{{2}} '
-        "models/repeat_{{1}}/hyperparameter_optimization.db '&&' "
-        "touch models/repeat_{{1}}/trial{{2}}/finished.tmp "
+        '{model_path}/repeat_{{1}}/trial{{2}} '
+        '{model_path}/repeat_{{1}}/hyperparameter_optimization.db "&&" '
+        'touch {model_path}/repeat_{{1}}/trial{{2}}/finished.tmp '
         "::: " + " ".join(map(str, range(n_repeats))) + " "
         "::: " + " ".join(map(str, range(n_trials)))
diff --git a/pipelines/training_association_testing.snakefile b/pipelines/training_association_testing.snakefile
index 8b28ce26..320cb9fb 100644
--- a/pipelines/training_association_testing.snakefile
+++ b/pipelines/training_association_testing.snakefile
@@ -15,7 +15,7 @@ n_repeats = config['n_repeats']
 debug = '--debug ' if debug_flag else ''
 do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
 tensor_compression_level = config['training'].get('tensor_compression_level', 1)
-model_path = Path('models')
+model_path = Path("models")
 n_parallel_training_jobs = config["training"].get("n_parallel_jobs", 1)
 
 wildcard_constraints:

From 6cde84f1188a7f3aae910c72b1c53fbd9c80f219 Mon Sep 17 00:00:00 2001
From: Magnus Wahlberg <endast@gmail.com>
Date: Thu, 21 Dec 2023 16:18:15 +0100
Subject: [PATCH 07/15] Update github-actions.yml

---
 .github/workflows/github-actions.yml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 2f686b17..2f2e534b 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -8,12 +8,31 @@ jobs:
     steps:
       - name: Check out repository code
         uses: actions/checkout@v3
+      - uses: mamba-org/setup-micromamba@v1.4.3
+        with:
+          environment-name: deeprvat-preprocess-gh-action
+          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
+          cache-environment: true
+          cache-downloads: true
+
+      - name: Fake fasta data
+        run: touch workdir/reference/GRCh38.primary_assembly.genome.fa.gz
+
+      - name: Run preprocessing pipeline dry run
+        run: |
+          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/preprocess \
+          --snakefile ${{ github.workspace }}/pipelines/preprocess_no_qc.snakefile \
+          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
+        shell: micromamba-shell {0}
+
       - name: Training Association Testing smoke test
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
           directory: 'example'
           snakefile: 'pipelines/training_association_testing.snakefile'
           args: '-j 2 -n'
+
+          
       - name: Link pretrained models
         run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
       - name: Association Testing Pretrained Smoke Test

From d27e6a8b9d65561717536cd2111f8b91891cc558 Mon Sep 17 00:00:00 2001
From: Magnus Wahlberg <endast@gmail.com>
Date: Thu, 21 Dec 2023 16:20:09 +0100
Subject: [PATCH 08/15] Update github-actions.yml

---
 .github/workflows/github-actions.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 2f2e534b..186e6102 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -16,7 +16,7 @@ jobs:
           cache-downloads: true
 
       - name: Fake fasta data
-        run: touch workdir/reference/GRCh38.primary_assembly.genome.fa.gz
+        run: touch ${{ github.workspace }}/example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa.gz
 
       - name: Run preprocessing pipeline dry run
         run: |
@@ -32,7 +32,7 @@ jobs:
           snakefile: 'pipelines/training_association_testing.snakefile'
           args: '-j 2 -n'
 
-          
+
       - name: Link pretrained models
         run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
       - name: Association Testing Pretrained Smoke Test

From b8b82e568f823fa8030e244c7b2515b7ad9d3813 Mon Sep 17 00:00:00 2001
From: Magnus Wahlberg <endast@gmail.com>
Date: Thu, 21 Dec 2023 16:21:25 +0100
Subject: [PATCH 09/15] Update github-actions.yml

---
 .github/workflows/github-actions.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 186e6102..dbd58b87 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -16,7 +16,7 @@ jobs:
           cache-downloads: true
 
       - name: Fake fasta data
-        run: touch ${{ github.workspace }}/example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa.gz
+        run: touch ${{ github.workspace }}/example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa
 
       - name: Run preprocessing pipeline dry run
         run: |

From 90edc4c3987217788a6f885e38d2661504002ad1 Mon Sep 17 00:00:00 2001
From: Magnus Wahlberg <endast@gmail.com>
Date: Thu, 21 Dec 2023 16:23:31 +0100
Subject: [PATCH 10/15] Revert "Update github-actions.yml"

This reverts commit b8b82e568f823fa8030e244c7b2515b7ad9d3813.
---
 .github/workflows/github-actions.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index dbd58b87..186e6102 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -16,7 +16,7 @@ jobs:
           cache-downloads: true
 
       - name: Fake fasta data
-        run: touch ${{ github.workspace }}/example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa
+        run: touch ${{ github.workspace }}/example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa.gz
 
       - name: Run preprocessing pipeline dry run
         run: |

From 12c30de79e79050a89379303edada5e58189993c Mon Sep 17 00:00:00 2001
From: Magnus Wahlberg <endast@gmail.com>
Date: Thu, 21 Dec 2023 16:23:35 +0100
Subject: [PATCH 11/15] Revert "Update github-actions.yml"

This reverts commit d27e6a8b9d65561717536cd2111f8b91891cc558.
---
 .github/workflows/github-actions.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 186e6102..2f2e534b 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -16,7 +16,7 @@ jobs:
           cache-downloads: true
 
       - name: Fake fasta data
-        run: touch ${{ github.workspace }}/example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa.gz
+        run: touch workdir/reference/GRCh38.primary_assembly.genome.fa.gz
 
       - name: Run preprocessing pipeline dry run
         run: |
@@ -32,7 +32,7 @@ jobs:
           snakefile: 'pipelines/training_association_testing.snakefile'
           args: '-j 2 -n'
 
-
+          
       - name: Link pretrained models
         run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
       - name: Association Testing Pretrained Smoke Test

From 289ee3efaf3912dcea8f30c0418fda8fe4f5199b Mon Sep 17 00:00:00 2001
From: Magnus Wahlberg <endast@gmail.com>
Date: Thu, 21 Dec 2023 16:23:38 +0100
Subject: [PATCH 12/15] Revert "Update github-actions.yml"

This reverts commit 6cde84f1188a7f3aae910c72b1c53fbd9c80f219.
---
 .github/workflows/github-actions.yml | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 2f2e534b..2f686b17 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -8,31 +8,12 @@ jobs:
     steps:
       - name: Check out repository code
         uses: actions/checkout@v3
-      - uses: mamba-org/setup-micromamba@v1.4.3
-        with:
-          environment-name: deeprvat-preprocess-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
-          cache-environment: true
-          cache-downloads: true
-
-      - name: Fake fasta data
-        run: touch workdir/reference/GRCh38.primary_assembly.genome.fa.gz
-
-      - name: Run preprocessing pipeline dry run
-        run: |
-          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/preprocess \
-          --snakefile ${{ github.workspace }}/pipelines/preprocess_no_qc.snakefile \
-          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
-        shell: micromamba-shell {0}
-
       - name: Training Association Testing smoke test
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
           directory: 'example'
           snakefile: 'pipelines/training_association_testing.snakefile'
           args: '-j 2 -n'
-
-          
       - name: Link pretrained models
         run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
       - name: Association Testing Pretrained Smoke Test

From e5b6a2d8dabdf47b64bf4535e940b8f1061fa418 Mon Sep 17 00:00:00 2001
From: Magnus Wahlberg <endast@gmail.com>
Date: Thu, 21 Dec 2023 16:28:25 +0100
Subject: [PATCH 13/15] Update github-actions.yml

---
 .github/workflows/github-actions.yml | 99 ++++++++++++++++++----------
 1 file changed, 63 insertions(+), 36 deletions(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 2f686b17..83a31730 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -8,26 +8,37 @@ jobs:
     steps:
       - name: Check out repository code
         uses: actions/checkout@v3
-      - name: Training Association Testing smoke test
-        uses: snakemake/snakemake-github-action@v1.24.0
+      - uses: mamba-org/setup-micromamba@v1.4.3
         with:
-          directory: 'example'
-          snakefile: 'pipelines/training_association_testing.snakefile'
-          args: '-j 2 -n'
+          environment-name: deeprvat-gh-action
+          environment-file: ${{ github.workspace }}/deeprvat_env_no_gpu.yml
+          cache-environment: true
+          cache-downloads: true
+
       - name: Link pretrained models
         run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
-      - name: Association Testing Pretrained Smoke Test
-        uses: snakemake/snakemake-github-action@v1.24.0
-        with:
-          directory: 'example'
-          snakefile: 'pipelines/association_testing_pretrained.snakefile'
-          args: '-j 2 -n'
-      - name: Seed Gene Discovery Smoke Test
-        uses: snakemake/snakemake-github-action@v1.24.0
-        with:
-          directory: 'example'
-          snakefile: 'pipelines/seed_gene_discovery.snakefile'
-          args: '-j 2 -n'
+
+      - name: Smoketest training_association_testing pipeline
+        run: |
+          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
+          --snakefile ${{ github.workspace }}/pipelines/training_association_testing.snakefile --show-failed-logs
+        shell: micromamba-shell {0}
+      - name: Link pretrained models
+        run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
+        shell: bash -el {0}
+      - name: Smoketest association_testing_pretrained pipeline
+        run: |
+          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
+          --snakefile ${{ github.workspace }}/pipelines/association_testing_pretrained.snakefile --show-failed-logs
+        shell: micromamba-shell {0}
+      - name: Copy seed gene discovery snakemake config
+        run: cd ${{ github.workspace }}/example && cp ../deeprvat/seed_gene_discovery/config.yaml .
+        shell: bash -el {0}
+      - name: Smoketest seed_gene_discovery pipeline
+        run: |
+          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
+          --snakefile ${{ github.workspace }}/pipelines/seed_gene_discovery.snakefile --show-failed-logs
+        shell: micromamba-shell {0}
 
   DeepRVAT-Pipeline-Tests:
     runs-on: ubuntu-latest
@@ -76,21 +87,32 @@ jobs:
     steps:
       - name: Check out repository code
         uses: actions/checkout@v3
-      - name: Preprocessing Smoke Test With QC
-        uses: snakemake/snakemake-github-action@v1.24.0
+      - uses: mamba-org/setup-micromamba@v1.4.3
         with:
-          directory: 'example/preprocess'
-          snakefile: 'pipelines/preprocess_with_qc.snakefile'
-          args: '-j 2 -n --configfile pipelines/config/deeprvat_preprocess_config.yaml'
-          stagein: 'touch example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa'
+          environment-name: deeprvat-preprocess-gh-action
+          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
+          cache-environment: true
+          cache-downloads: true
 
-      - name: Preprocessing Smoke Test No QC
-        uses: snakemake/snakemake-github-action@v1.24.0
-        with:
-          directory: 'example/preprocess'
-          snakefile: 'pipelines/preprocess_no_qc.snakefile'
-          args: '-j 2 -n --configfile pipelines/config/deeprvat_preprocess_config.yaml'
-          stagein: 'touch example/preprocess/workdir/reference/GRCh38.primary_assembly.genome.fa'
+      - name: Fake fasta data
+        if: steps.cache-fasta.outputs.cache-hit != 'true'
+        run: |
+          cd ${{ github.workspace }}/example/preprocess && touch workdir/reference/GRCh38.primary_assembly.genome.fa
+
+      - name: Run preprocessing pipeline no qc Smoke Test
+        run: |
+          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/preprocess \
+          --snakefile ${{ github.workspace }}/pipelines/preprocess_no_qc.snakefile \
+          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
+        shell: micromamba-shell {0}
+
+
+      - name: Preprocessing pipeline with qc Smoke Test
+        run: |
+          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/preprocess \
+          --snakefile ${{ github.workspace }}/pipelines/preprocess_with_qc.snakefile \
+          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
+        shell: micromamba-shell {0}
 
 
   DeepRVAT-Annotation-Pipeline-Smoke-Tests:
@@ -98,19 +120,24 @@ jobs:
     steps:
       - name: Check out repository code
         uses: actions/checkout@v3
-      - name: Annotations Smoke Test
-        uses: snakemake/snakemake-github-action@v1.25.1
+      - uses: mamba-org/setup-micromamba@v1.4.3
         with:
-          directory: 'example/annotations'
-          snakefile: 'pipelines/annotations.snakefile'
-          args: '-j 2 -n --configfile pipelines/config/deeprvat_annotation_config.yaml'
+          environment-name: deeprvat-preprocess-gh-action
+          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
+          cache-environment: true
+          cache-downloads: true
+      - name: Annotations Smoke Test
+        run: |
+          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/annotations \
+          --snakefile ${{ github.workspace }}/pipelines/annotations.snakefile \
+          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_annotation_config.yaml --show-failed-logs
+        shell: micromamba-shell {0}
 
 
   DeepRVAT-Preprocessing-Pipeline-Tests-No-QC:
     runs-on: ubuntu-latest
     needs: DeepRVAT-Preprocessing-Pipeline-Smoke-Tests
     steps:
-
       - name: Check out repository code
         uses: actions/checkout@v3
       - uses: mamba-org/setup-micromamba@v1.4.3

From b79409dc5557dbdce53c30a7d352a365f2f00a1a Mon Sep 17 00:00:00 2001
From: Magnus Wahlberg <endast@gmail.com>
Date: Thu, 21 Dec 2023 16:31:10 +0100
Subject: [PATCH 14/15] Update github-actions.yml

---
 .github/workflows/github-actions.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 83a31730..5b3ff8a6 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -14,10 +14,6 @@ jobs:
           environment-file: ${{ github.workspace }}/deeprvat_env_no_gpu.yml
           cache-environment: true
           cache-downloads: true
-
-      - name: Link pretrained models
-        run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
-
       - name: Smoketest training_association_testing pipeline
         run: |
           python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \

From 03e04a6697139571dd3100b640c001930b08ea20 Mon Sep 17 00:00:00 2001
From: Kayla Meyer <meyer.kmt@gmail.com>
Date: Fri, 22 Dec 2023 11:34:42 +0100
Subject: [PATCH 15/15] fix-model path string variable in rules

---
 pipelines/run_training.snakefile                 | 13 ++++++++-----
 pipelines/training/train.snakefile               | 14 +++++++-------
 pipelines/training_association_testing.snakefile |  4 ++--
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/pipelines/run_training.snakefile b/pipelines/run_training.snakefile
index dc5f0254..0e10d79e 100644
--- a/pipelines/run_training.snakefile
+++ b/pipelines/run_training.snakefile
@@ -5,6 +5,7 @@ configfile: 'config.yaml'
 debug_flag = config.get('debug', False)
 phenotypes = config['phenotypes']
 phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
+training_phenotypes = config["training"].get("phenotypes", phenotypes)
 
 n_burden_chunks = config.get('n_burden_chunks', 1) if not debug_flag else 2
 n_regression_chunks = config.get('n_regression_chunks', 40) if not debug_flag else 2
@@ -14,6 +15,8 @@ n_repeats = config['n_repeats']
 debug = '--debug ' if debug_flag else ''
 do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
 tensor_compression_level = config['training'].get('tensor_compression_level', 1)
+model_path = Path("models")
+n_parallel_training_jobs = config["training"].get("n_parallel_jobs", 1)
 
 wildcard_constraints:
     repeat="\d+",
@@ -25,18 +28,18 @@ include: "training/train.snakefile"
 
 rule all:
     input:
-        expand('models/repeat_{repeat}/best/bag_{bag}.ckpt',
+        expand( model_path / 'repeat_{repeat}/best/bag_{bag}.ckpt',
                bag=range(n_bags), repeat=range(n_repeats)),
-        "models/config.yaml"
+        model_path / "config.yaml"
 
 rule all_training_dataset:
     input:
         input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr',
-                              phenotype=phenotypes, repeat=range(n_repeats)),
+                              phenotype=training_phenotypes, repeat=range(n_repeats)),
         covariates = expand('{phenotype}/deeprvat/covariates.zarr',
-                            phenotype=phenotypes, repeat=range(n_repeats)),
+                            phenotype=training_phenotypes, repeat=range(n_repeats)),
         y = expand('{phenotype}/deeprvat/y.zarr',
-                   phenotype=phenotypes, repeat=range(n_repeats))
+                   phenotype=training_phenotypes, repeat=range(n_repeats))
 
 rule all_config:
     input:
diff --git a/pipelines/training/train.snakefile b/pipelines/training/train.snakefile
index bed870bf..c747fd1f 100644
--- a/pipelines/training/train.snakefile
+++ b/pipelines/training/train.snakefile
@@ -1,9 +1,9 @@
 
 rule link_config:
     input:
-        '{model_path}/repeat_0/config.yaml'
+        model_path / 'repeat_0/config.yaml'
     output:
-        '{model_path}/config.yaml'
+        model_path / 'config.yaml'
     threads: 1
     shell:
         "ln -s repeat_0/config.yaml {output}"
@@ -11,12 +11,12 @@ rule link_config:
 
 rule best_training_run:
     input:
-        expand('{model_path}/repeat_{{repeat}}/trial{trial_number}/config.yaml',
+        expand(model_path / 'repeat_{{repeat}}/trial{trial_number}/config.yaml',
                trial_number=range(n_trials)),
     output:
-        checkpoints = expand('{model_path}/repeat_{{repeat}}/best/bag_{bag}.ckpt',
+        checkpoints = expand(model_path / 'repeat_{{repeat}}/best/bag_{bag}.ckpt',
                              bag=range(n_bags)),
-        config = '{model_path}/repeat_{repeat}/config.yaml'
+        config = model_path / 'repeat_{repeat}/config.yaml'
     threads: 1
     shell:
         (
@@ -39,9 +39,9 @@ rule train:
         y = expand('{phenotype}/deeprvat/y.zarr',
                    phenotype=training_phenotypes),
     output:
-        expand('{model_path}/repeat_{repeat}/trial{trial_number}/config.yaml',
+        expand(model_path / 'repeat_{repeat}/trial{trial_number}/config.yaml',
                repeat=range(n_repeats), trial_number=range(n_trials)),
-        expand('{model_path}/repeat_{repeat}/trial{trial_number}/finished.tmp',
+        expand(model_path / 'repeat_{repeat}/trial{trial_number}/finished.tmp',
                repeat=range(n_repeats), trial_number=range(n_trials))
     params:
         phenotypes = " ".join(
diff --git a/pipelines/training_association_testing.snakefile b/pipelines/training_association_testing.snakefile
index 320cb9fb..60384eaf 100644
--- a/pipelines/training_association_testing.snakefile
+++ b/pipelines/training_association_testing.snakefile
@@ -52,9 +52,9 @@ rule all_association_dataset:
 
 rule all_training:
     input:
-        expand('{model_path}/repeat_{repeat}/best/bag_{bag}.ckpt',
+        expand(model_path / 'repeat_{repeat}/best/bag_{bag}.ckpt',
                bag=range(n_bags), repeat=range(n_repeats)),
-        "{model_path}/config.yaml"
+        model_path / "config.yaml"
 
 rule all_training_dataset:
     input: