diff --git a/.github/workflows/code-tests.yml b/.github/workflows/code-tests.yml
new file mode 100644
index 00000000..968b9b59
--- /dev/null
+++ b/.github/workflows/code-tests.yml
@@ -0,0 +1,24 @@
+name: DeepRVAT code test runner
+run-name: DeepRVAT Code Tests 🧑🏼‍💻✅
+on: [ push ]
+
+jobs:
+  DeepRVAT-Tests-Runner-Preprocessing:
+    uses: ./.github/workflows/run-pytest.yml
+    with:
+      environment_file: ./deeprvat_preprocessing_env.yml
+      test_path: ./tests/preprocessing
+
+  DeepRVAT-Tests-Runner-Annotations:
+    uses: ./.github/workflows/run-pytest.yml
+    with:
+      environment_file: ./deeprvat_annotations.yml
+      test_path: ./tests/annotations
+      parallel_tests: true
+
+  DeepRVAT-Tests-Runner:
+    uses: ./.github/workflows/run-pytest.yml
+    with:
+      environment_file: ./deeprvat_env_no_gpu.yml
+      parallel_tests: true
+      test_path: ./tests/deeprvat
diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
deleted file mode 100644
index 3708d48b..00000000
--- a/.github/workflows/github-actions.yml
+++ /dev/null
@@ -1,211 +0,0 @@
-name: DeepRVAT
-run-name: DeepRVAT 🧬🧪💻🧑‍🔬
-on: [ push ]
-
-jobs:
-  DeepRVAT-Pipeline-Smoke-Tests:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_env_no_gpu.yml
-          cache-environment: true
-          cache-downloads: true
-      - name: Smoketest training_association_testing pipeline
-        run: |
-          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
-          --snakefile ${{ github.workspace }}/pipelines/training_association_testing.snakefile --show-failed-logs
-        shell: micromamba-shell {0}
-      - name: Link pretrained models
-        run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
-        shell: bash -el {0}
-      - name: Smoketest association_testing_pretrained pipeline
-        run: |
-          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
-          --snakefile ${{ github.workspace }}/pipelines/association_testing_pretrained.snakefile --show-failed-logs
-        shell: micromamba-shell {0}
-      - name: Copy seed gene discovery snakemake config
-        run: cd ${{ github.workspace }}/example && cp ../deeprvat/seed_gene_discovery/config.yaml .
-        shell: bash -el {0}
-      - name: Smoketest seed_gene_discovery pipeline
-        run: |
-          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example \
-          --snakefile ${{ github.workspace }}/pipelines/seed_gene_discovery.snakefile --show-failed-logs
-        shell: micromamba-shell {0}
-
-  DeepRVAT-Pipeline-Tests:
-    runs-on: ubuntu-latest
-    needs: DeepRVAT-Pipeline-Smoke-Tests
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_env_no_gpu.yml
-          cache-environment: true
-          cache-downloads: true
-      - name: Install DeepRVAT
-        run: pip install -e ${{ github.workspace }}
-        shell: micromamba-shell {0}
-      # There are no GPUs on the gh worker, so we disable it in the config
-      - name: Update config to use no gpus
-        run: "sed -i 's/gpus: 1/gpus: 0/' ${{ github.workspace }}/example/config.yaml"
-        shell: bash -el {0}
-      - name: Run training_association_testing pipeline
-        run: |
-          python -m snakemake -j 2 --directory ${{ github.workspace }}/example \
-          --snakefile ${{ github.workspace }}/pipelines/training_association_testing.snakefile --show-failed-logs
-        shell: micromamba-shell {0}
-      - name: Link pretrained models
-        run: cd ${{ github.workspace }}/example && ln -s ../pretrained_models
-        shell: bash -el {0}
-      - name: Run association_testing_pretrained pipeline
-        run: |
-          python -m snakemake -j 2 --directory ${{ github.workspace }}/example \
-          --snakefile ${{ github.workspace }}/pipelines/association_testing_pretrained.snakefile --show-failed-logs
-        shell: micromamba-shell {0}
-      - name: Copy seed gene discovery snakemake config
-        run: cd ${{ github.workspace }}/example && cp ../deeprvat/seed_gene_discovery/config.yaml .
-        shell: bash -el {0}
-      - name: Run seed_gene_discovery pipeline
-        run: |
-          python -m snakemake -j 2 --directory ${{ github.workspace }}/example \
-          --snakefile ${{ github.workspace }}/pipelines/seed_gene_discovery.snakefile --show-failed-logs
-        shell: micromamba-shell {0}
-
-
-  DeepRVAT-Preprocessing-Pipeline-Smoke-Tests:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-preprocess-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
-          cache-environment: true
-          cache-downloads: true
-
-      - name: Fake fasta data
-        if: steps.cache-fasta.outputs.cache-hit != 'true'
-        run: |
-          cd ${{ github.workspace }}/example/preprocess && touch workdir/reference/GRCh38.primary_assembly.genome.fa
-
-      - name: Run preprocessing pipeline no qc Smoke Test
-        run: |
-          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/preprocess \
-          --snakefile ${{ github.workspace }}/pipelines/preprocess_no_qc.snakefile \
-          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
-        shell: micromamba-shell {0}
-
-
-      - name: Preprocessing pipeline with qc Smoke Test
-        run: |
-          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/preprocess \
-          --snakefile ${{ github.workspace }}/pipelines/preprocess_with_qc.snakefile \
-          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
-        shell: micromamba-shell {0}
-
-
-  DeepRVAT-Annotation-Pipeline-Smoke-Tests:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-preprocess-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
-          cache-environment: true
-          cache-downloads: true
-      - name: Annotations Smoke Test
-        run: |
-          python -m snakemake -n -j 2 --directory ${{ github.workspace }}/example/annotations \
-          --snakefile ${{ github.workspace }}/pipelines/annotations.snakefile \
-          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_annotation_config.yaml --show-failed-logs
-        shell: micromamba-shell {0}
-
-
-  DeepRVAT-Preprocessing-Pipeline-Tests-No-QC:
-    runs-on: ubuntu-latest
-    needs: DeepRVAT-Preprocessing-Pipeline-Smoke-Tests
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-preprocess-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
-          cache-environment: true
-          cache-downloads: true
-
-      - name: Install DeepRVAT
-        run: pip install -e ${{ github.workspace }}
-        shell: micromamba-shell {0}
-
-      - name: Cache Fasta file
-        id: cache-fasta
-        uses: actions/cache@v4
-        with:
-          path: example/preprocess/workdir/reference
-          key: ${{ runner.os }}-reference-fasta
-
-      - name: Download and unpack fasta data
-        if: steps.cache-fasta.outputs.cache-hit != 'true'
-        run: |
-          cd ${{ github.workspace }}/example/preprocess && \
-          wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/GRCh38.primary_assembly.genome.fa.gz \
-          -O workdir/reference/GRCh38.primary_assembly.genome.fa.gz \
-          && gzip -d workdir/reference/GRCh38.primary_assembly.genome.fa.gz
-
-      - name: Run preprocessing pipeline
-        run: |
-          python -m snakemake -j 2 --directory ${{ github.workspace }}/example/preprocess \
-          --snakefile ${{ github.workspace }}/pipelines/preprocess_no_qc.snakefile \
-          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
-        shell: micromamba-shell {0}
-
-
-  DeepRVAT-Preprocessing-Pipeline-Tests-With-QC:
-    runs-on: ubuntu-latest
-    needs: DeepRVAT-Preprocessing-Pipeline-Smoke-Tests
-    steps:
-
-      - name: Check out repository code
-        uses: actions/checkout@v4
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-preprocess-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
-          cache-environment: true
-          cache-downloads: true
-
-      - name: Install DeepRVAT
-        run: pip install -e ${{ github.workspace }}
-        shell: micromamba-shell {0}
-
-      - name: Cache Fasta file
-        id: cache-fasta
-        uses: actions/cache@v4
-        with:
-          path: example/preprocess/workdir/reference
-          key: ${{ runner.os }}-reference-fasta
-
-      - name: Download and unpack fasta data
-        if: steps.cache-fasta.outputs.cache-hit != 'true'
-        run: |
-          cd ${{ github.workspace }}/example/preprocess && \
-          wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/GRCh38.primary_assembly.genome.fa.gz \
-          -O workdir/reference/GRCh38.primary_assembly.genome.fa.gz \
-          && gzip -d workdir/reference/GRCh38.primary_assembly.genome.fa.gz
-
-      - name: Run preprocessing pipeline
-        run: |
-          python -m snakemake -j 2 --directory ${{ github.workspace }}/example/preprocess \
-          --snakefile ${{ github.workspace }}/pipelines/preprocess_with_qc.snakefile \
-          --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
-        shell: micromamba-shell {0}
diff --git a/.github/workflows/pipeline-tests.yml b/.github/workflows/pipeline-tests.yml
new file mode 100644
index 00000000..0cf7e64c
--- /dev/null
+++ b/.github/workflows/pipeline-tests.yml
@@ -0,0 +1,155 @@
+name: DeepRVAT Pipeline Tests
+run-name: DeepRVAT Pipeline Tests 🧬🧪💻🧑‍🔬
+on: [ push ]
+
+jobs:
+  # Training Pipeline
+  Smoke-RunTraining:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/run_training.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+
+  Pipeline-Tests-RunTraining:
+    needs: Smoke-RunTraining
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/run_training.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      dry_run: false
+
+  # Association Testing Pretrained Pipeline
+  Smoke-Association-Testing-Pretrained:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/association_testing_pretrained.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      prerun_cmd: cd ./example && ln -s ../pretrained_models
+
+  Pipeline-Tests-Training-Association-Testing:
+    needs: Smoke-Association-Testing-Pretrained
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/association_testing_pretrained.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      prerun_cmd: cd ./example && ln -s ../pretrained_models
+      dry_run: false
+
+  # Association Testing Pretrained Regenie
+  Smoke-Association-Testing-Pretrained-Regenie:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/association_testing_pretrained_regenie.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      prerun_cmd: cd ./example && ln -s ../pretrained_models
+
+  Pipeline-Tests-Association-Testing-Pretrained-Regenie:
+    needs: Smoke-Association-Testing-Pretrained-Regenie
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/association_testing_pretrained_regenie.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      prerun_cmd: cd ./example && ln -s ../pretrained_models
+      dry_run: false
+
+  # Association Testing Training
+  Smoke-Association-Testing-Training:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/training_association_testing.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+
+  Pipeline-Tests-Association-Testing-Training:
+    needs: Smoke-Association-Testing-Training
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/training_association_testing.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      dry_run: false
+
+  # Association Testing Training Regenie
+  Smoke-Association-Testing-Training-Regenie:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/training_association_testing_regenie.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+
+  Pipeline-Tests-Training-Association-Testing-Regenie:
+    needs: Smoke-Association-Testing-Training-Regenie
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/training_association_testing_regenie.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      dry_run: false
+
+  # Seed Gene Discovery
+  Smoke-Seed-Gene-Discovery:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/seed_gene_discovery.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      prerun_cmd: cd ./example && cp ../deeprvat/seed_gene_discovery/config.yaml .
+
+  Pipeline-Tests-Seed-Gene-Discovery:
+    needs: Smoke-Seed-Gene-Discovery
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/seed_gene_discovery.snakefile
+      environment_file: ./deeprvat_env_no_gpu.yml
+      prerun_cmd: cd ./example && cp ../deeprvat/seed_gene_discovery/config.yaml .
+      dry_run: false
+
+  # Preprocessing With QC
+  Smoke-Preprocessing-With-QC:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/preprocess_with_qc.snakefile
+      environment_file: ./deeprvat_preprocessing_env.yml
+      pipeline_directory: ./example/preprocess
+      pipeline_config: ./pipelines/config/deeprvat_preprocess_config.yaml
+      download_fasta_data: true
+      fasta_download_path: ./example/preprocess/workdir/reference
+
+  Pipeline-Tests-Preprocessing-With-QC:
+    needs: Smoke-Preprocessing-With-QC
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/preprocess_with_qc.snakefile
+      environment_file: ./deeprvat_preprocessing_env.yml
+      pipeline_directory: ./example/preprocess
+      pipeline_config: ./pipelines/config/deeprvat_preprocess_config.yaml
+      dry_run: false
+      download_fasta_data: true
+      fasta_download_path: ./example/preprocess/workdir/reference
+
+  # Preprocessing-No-QC
+  Smoke-Preprocessing-No-QC:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/preprocess_no_qc.snakefile
+      environment_file: ./deeprvat_preprocessing_env.yml
+      pipeline_directory: ./example/preprocess
+      pipeline_config: ./pipelines/config/deeprvat_preprocess_config.yaml
+      download_fasta_data: true
+      fasta_download_path: ./example/preprocess/workdir/reference
+
+  Pipeline-Tests-Preprocessing-No-QC:
+    needs: Smoke-Preprocessing-No-QC
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/preprocess_no_qc.snakefile
+      environment_file: ./deeprvat_preprocessing_env.yml
+      pipeline_directory: ./example/preprocess
+      pipeline_config: ./pipelines/config/deeprvat_preprocess_config.yaml
+      dry_run: false
+      download_fasta_data: true
+      fasta_download_path: ./example/preprocess/workdir/reference
+
+  # Annotation Pipeline
+  Smoke-Annotation-Pipeline:
+    uses: ./.github/workflows/run-pipeline.yml
+    with:
+      pipeline_file: ./pipelines/annotations.snakefile
+      environment_file: ./deeprvat_annotations.yml
+      pipeline_config: ./pipelines/config/deeprvat_annotation_config.yaml
+      pipeline_directory: ./example/annotations
diff --git a/.github/workflows/run-pipeline.yml b/.github/workflows/run-pipeline.yml
new file mode 100644
index 00000000..6971a7fd
--- /dev/null
+++ b/.github/workflows/run-pipeline.yml
@@ -0,0 +1,87 @@
+name: Run snakemake pipeline
+
+on:
+  workflow_call:
+    inputs:
+      environment_file:
+        required: true
+        type: string
+      prerun_cmd:
+        required: false
+        type: string
+      pipeline_file:
+        required: true
+        type: string
+      no_gpu:
+        required: false
+        type: boolean
+        default: true
+      pipeline_directory:
+        required: false
+        type: string
+        default: ./example
+      pipeline_config:
+        required: false
+        type: string
+      dry_run:
+        required: false
+        default: true
+        type: boolean
+      download_fasta_data:
+        required: false
+        default: false
+        type: boolean
+      fasta_download_path:
+        required: false
+        type: string
+      postrun_cmd:
+        required: false
+        type: string
+
+jobs:
+  Run-Pipeline:
+      runs-on: ubuntu-latest
+      steps:
+        - name: Check out repository code
+          uses: actions/checkout@v4
+        - uses: mamba-org/setup-micromamba@v1.8.1
+          with:
+            environment-file: ${{inputs.environment_file}}
+            cache-environment: true
+            cache-downloads: true
+        - name: Install DeepRVAT
+          run: pip install -e ${{ github.workspace }}
+          shell: micromamba-shell {0}
+        - name: Cache Fasta file
+          if: inputs.download_fasta_data
+          id: cache-fasta
+          uses: actions/cache@v4
+          with:
+            path: ${{ inputs.fasta_download_path}}
+            key: cache-reference-fasta-${{ inputs.fasta_download_path}}
+        - name: Download and unpack fasta data
+          if: inputs.download_fasta_data && steps.cache-fasta.outputs.cache-hit != 'true'
+          run: |
+            wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/GRCh38.primary_assembly.genome.fa.gz \
+            -O ${{ inputs.fasta_download_path}}/GRCh38.primary_assembly.genome.fa.gz \
+            && gzip -d ${{ inputs.fasta_download_path}}/GRCh38.primary_assembly.genome.fa.gz
+        - name: Run pre pipeline cmd
+          if: inputs.prerun_cmd
+          run: ${{inputs.prerun_cmd}}
+          shell: bash -el {0}
+        - name: Set to 0 GPUs in config
+          if: inputs.no_gpu
+          # There are no GPUs on the gh worker, so we can disable it in the config
+          run: "sed -i 's/gpus: 1/gpus: 0/' ./example/config.yaml"
+          shell: bash -el {0}
+        - name: "Running pipeline ${{ github.jobs[github.job].name }}"
+          run: |
+            python -m snakemake ${{ (inputs.dry_run && '-n') || '' }} \
+            -j 2 --directory ${{inputs.pipeline_directory}} \
+            ${{ (endsWith(inputs.pipeline_config, 'ml') && '--configfile')  || '' }}  ${{ inputs.pipeline_config }} \
+            --snakefile ${{inputs.pipeline_file}} --show-failed-logs -F
+          shell: micromamba-shell {0}
+        - name: Run post pipeline cmd
+          if: inputs.postrun_cmd
+          run: ${{inputs.postrun_cmd}}
+          shell: micromamba-shell {0}
diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
new file mode 100644
index 00000000..09309904
--- /dev/null
+++ b/.github/workflows/run-pytest.yml
@@ -0,0 +1,33 @@
+name: Run pytest
+
+on:
+  workflow_call:
+    inputs:
+      environment_file:
+        required: true
+        type: string
+      test_path:
+        required: true
+        type: string
+      parallel_tests:
+        required: false
+        default: false
+        type: boolean
+
+jobs:
+  Run-Pytest:
+      runs-on: ubuntu-latest
+      steps:
+        - name: Check out repository code
+          uses: actions/checkout@v4
+        - uses: mamba-org/setup-micromamba@v1.8.1
+          with:
+            environment-file: ${{inputs.environment_file}}
+            cache-environment: true
+            cache-downloads: true
+        - name: Install DeepRVAT
+          run: pip install -e ${{ github.workspace }}
+          shell: micromamba-shell {0}
+        - name: Run pytest
+          run: pytest ${{ (inputs.parallel_tests && '-n auto') || '' }} -v ${{ inputs.test_path }}
+          shell: micromamba-shell {0}
diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml
deleted file mode 100644
index 32e33474..00000000
--- a/.github/workflows/test-runner.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-name: DeepRVAT test runner
-run-name: DeepRVAT Tests 🧑🏼‍💻✅
-on: [ push ]
-
-jobs:
-  DeepRVAT-Tests-Runner:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-preprocess-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_env_no_gpu.yml
-          cache-environment: true
-          cache-downloads: true
-
-      - name: Install DeepRVAT
-        run: pip install -e ${{ github.workspace }}
-        shell: micromamba-shell {0}
-      - name: Run pytest deeprvat
-        run: pytest -n auto -v ${{ github.workspace }}/tests/deeprvat
-        shell: micromamba-shell {0}
-
-  DeepRVAT-Tests-Runner-Preprocessing:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-preprocess-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_preprocessing_env.yml
-          cache-environment: true
-          cache-downloads: true
-
-      - name: Install DeepRVAT
-        run: pip install -e ${{ github.workspace }}
-        shell: micromamba-shell {0}
-
-      - name: Run pytest preprocessing
-        run: pytest -v ${{ github.workspace }}/tests/preprocessing
-        shell: micromamba-shell {0}
-
-  DeepRVAT-Tests-Runner-Annotations:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v4
-      - uses: mamba-org/setup-micromamba@v1.8.1
-        with:
-          environment-name: deeprvat-annotation-gh-action
-          environment-file: ${{ github.workspace }}/deeprvat_annotations.yml
-          cache-environment: true
-          cache-downloads: true
-
-      - name: Install DeepRVAT
-        run: pip install -e ${{ github.workspace }}
-        shell: micromamba-shell {0}
-
-      - name: Run pytest annotations
-        run: pytest -n auto -v ${{ github.workspace }}/tests/annotations
-        shell: micromamba-shell {0}
diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py
index 5af2e770..f8d18c97 100644
--- a/deeprvat/deeprvat/associate.py
+++ b/deeprvat/deeprvat/associate.py
@@ -19,7 +19,7 @@
 import statsmodels.api as sm
 import yaml
 from bgen import BgenWriter
-from numcodecs import Blosc
+from numcodecs import Blosc, JSON
 from seak import scoretest
 from statsmodels.tools.tools import add_constant
 from torch.utils.data import DataLoader, Dataset, Subset
@@ -295,7 +295,7 @@ def compute_burdens_(
                     chunk_burden = np.zeros(shape=(n_samples,) + this_burdens.shape[1:])
                 chunk_y = np.zeros(shape=(n_samples,) + this_y.shape[1:])
                 chunk_x = np.zeros(shape=(n_samples,) + this_x.shape[1:])
-                chunk_sampleid = np.zeros(shape=(n_samples))
+                chunk_sampleid = [""] * n_samples
 
                 logger.info(f"Batch size: {batch['rare_variant_annotations'].shape}")
 
@@ -333,8 +333,8 @@ def compute_burdens_(
                     mode="a",
                     shape=(n_total_samples),
                     chunks=(None),
-                    dtype=np.float32,
-                    compressor=Blosc(clevel=compression_level),
+                    dtype=object,
+                    object_codec=JSON(),
                 )
             start_idx = i * batch_size
             end_idx = min(start_idx + batch_size, chunk_end)  # read from chunk shape
@@ -513,7 +513,7 @@ def make_regenie_input_(
         with BgenWriter(
             bgen,
             n_samples,
-            samples=list(sample_ids),
+            samples=list(sample_ids.astype(str)),
             metadata="Pseudovariants containing DeepRVAT gene impairment scores. One pseudovariant per gene.",
         ) as f:
             for i in trange(n_genes):
diff --git a/deeprvat/preprocessing/preprocess.py b/deeprvat/preprocessing/preprocess.py
index e979912b..9f5f22d1 100644
--- a/deeprvat/preprocessing/preprocess.py
+++ b/deeprvat/preprocessing/preprocess.py
@@ -277,6 +277,8 @@ def process_sparse_gt(
         variants = variants[~variants["id"].isin(variant_ids_to_exclude)]
         if not skip_sanity_checks:
             assert total_variants - len(variants) == len(variant_ids_to_exclude)
+            if variants.empty:
+                raise ValueError("All variants have been filtered out.")
 
     logging.info(f"Dropped {total_variants - len(variants)} variants")
     logging.info(f"...done ({time.time() - start_time} s)")
@@ -313,6 +315,9 @@ def process_sparse_gt(
 
     samples = sorted(list(samples))
 
+    if len(samples) == 0:
+        raise ValueError("All samples have been excluded.")
+
     logging.info("Processing sparse GT files by chromosome")
     total_calls_dropped = 0
     variant_groups = variants.groupby("chrom")
diff --git a/example/config.yaml b/example/config.yaml
index 33ffb03b..9eca68be 100644
--- a/example/config.yaml
+++ b/example/config.yaml
@@ -32,6 +32,21 @@ n_repeats: 2
 
 do_scoretest: True
 
+gtf_file: gencode.v38.basic.annotation.gtf.gz
+
+regenie:
+    step_1:
+        bgen: imputation.bgen
+        snplist: imputation.snplist
+        bsize: 1000
+        options:
+            - "--sample imputation.sample"
+            - "--qt"
+    step_2:
+        bsize: 400
+        options:
+            - "--qt"
+
 training:
     min_variant_count: 1
     n_bags: 1
diff --git a/example/gencode.v38.basic.annotation.gtf.gz b/example/gencode.v38.basic.annotation.gtf.gz
new file mode 100644
index 00000000..ab6059d5
Binary files /dev/null and b/example/gencode.v38.basic.annotation.gtf.gz differ
diff --git a/example/imputation.bgen b/example/imputation.bgen
new file mode 100644
index 00000000..7ae8d5ea
Binary files /dev/null and b/example/imputation.bgen differ
diff --git a/example/imputation.bgen.bgi b/example/imputation.bgen.bgi
new file mode 100644
index 00000000..2a537d70
Binary files /dev/null and b/example/imputation.bgen.bgi differ
diff --git a/example/imputation.sample b/example/imputation.sample
new file mode 100644
index 00000000..f3b2f396
--- /dev/null
+++ b/example/imputation.sample
@@ -0,0 +1,1002 @@
+ID_1 ID_2 missing sex
+0 0 0 D
+0 0 0 0
+1 1 0 1
+2 2 0 0
+3 3 0 1
+4 4 0 0
+5 5 0 1
+6 6 0 0
+7 7 0 1
+8 8 0 0
+9 9 0 1
+10 10 0 0
+11 11 0 1
+12 12 0 0
+13 13 0 1
+14 14 0 0
+15 15 0 1
+16 16 0 0
+17 17 0 1
+18 18 0 0
+19 19 0 1
+20 20 0 0
+21 21 0 1
+22 22 0 0
+23 23 0 1
+24 24 0 0
+25 25 0 1
+26 26 0 0
+27 27 0 1
+28 28 0 0
+29 29 0 1
+30 30 0 0
+31 31 0 1
+32 32 0 0
+33 33 0 1
+34 34 0 0
+35 35 0 1
+36 36 0 0
+37 37 0 1
+38 38 0 0
+39 39 0 1
+40 40 0 0
+41 41 0 1
+42 42 0 0
+43 43 0 1
+44 44 0 0
+45 45 0 1
+46 46 0 0
+47 47 0 1
+48 48 0 0
+49 49 0 1
+50 50 0 0
+51 51 0 1
+52 52 0 0
+53 53 0 1
+54 54 0 0
+55 55 0 1
+56 56 0 0
+57 57 0 1
+58 58 0 0
+59 59 0 1
+60 60 0 0
+61 61 0 1
+62 62 0 0
+63 63 0 1
+64 64 0 0
+65 65 0 1
+66 66 0 0
+67 67 0 1
+68 68 0 0
+69 69 0 1
+70 70 0 0
+71 71 0 1
+72 72 0 0
+73 73 0 1
+74 74 0 0
+75 75 0 1
+76 76 0 0
+77 77 0 1
+78 78 0 0
+79 79 0 1
+80 80 0 0
+81 81 0 1
+82 82 0 0
+83 83 0 1
+84 84 0 0
+85 85 0 1
+86 86 0 0
+87 87 0 1
+88 88 0 0
+89 89 0 1
+90 90 0 0
+91 91 0 1
+92 92 0 0
+93 93 0 1
+94 94 0 0
+95 95 0 1
+96 96 0 0
+97 97 0 1
+98 98 0 0
+99 99 0 1
+100 100 0 0
+101 101 0 1
+102 102 0 0
+103 103 0 1
+104 104 0 0
+105 105 0 1
+106 106 0 0
+107 107 0 1
+108 108 0 0
+109 109 0 1
+110 110 0 0
+111 111 0 1
+112 112 0 0
+113 113 0 1
+114 114 0 0
+115 115 0 1
+116 116 0 0
+117 117 0 1
+118 118 0 0
+119 119 0 1
+120 120 0 0
+121 121 0 1
+122 122 0 0
+123 123 0 1
+124 124 0 0
+125 125 0 1
+126 126 0 0
+127 127 0 1
+128 128 0 0
+129 129 0 1
+130 130 0 0
+131 131 0 1
+132 132 0 0
+133 133 0 1
+134 134 0 0
+135 135 0 1
+136 136 0 0
+137 137 0 1
+138 138 0 0
+139 139 0 1
+140 140 0 0
+141 141 0 1
+142 142 0 0
+143 143 0 1
+144 144 0 0
+145 145 0 1
+146 146 0 0
+147 147 0 1
+148 148 0 0
+149 149 0 1
+150 150 0 0
+151 151 0 1
+152 152 0 0
+153 153 0 1
+154 154 0 0
+155 155 0 1
+156 156 0 0
+157 157 0 1
+158 158 0 0
+159 159 0 1
+160 160 0 0
+161 161 0 1
+162 162 0 0
+163 163 0 1
+164 164 0 0
+165 165 0 1
+166 166 0 0
+167 167 0 1
+168 168 0 0
+169 169 0 1
+170 170 0 0
+171 171 0 1
+172 172 0 0
+173 173 0 1
+174 174 0 0
+175 175 0 1
+176 176 0 0
+177 177 0 1
+178 178 0 0
+179 179 0 1
+180 180 0 0
+181 181 0 1
+182 182 0 0
+183 183 0 1
+184 184 0 0
+185 185 0 1
+186 186 0 0
+187 187 0 1
+188 188 0 0
+189 189 0 1
+190 190 0 0
+191 191 0 1
+192 192 0 0
+193 193 0 1
+194 194 0 0
+195 195 0 1
+196 196 0 0
+197 197 0 1
+198 198 0 0
+199 199 0 1
+200 200 0 0
+201 201 0 1
+202 202 0 0
+203 203 0 1
+204 204 0 0
+205 205 0 1
+206 206 0 0
+207 207 0 1
+208 208 0 0
+209 209 0 1
+210 210 0 0
+211 211 0 1
+212 212 0 0
+213 213 0 1
+214 214 0 0
+215 215 0 1
+216 216 0 0
+217 217 0 1
+218 218 0 0
+219 219 0 1
+220 220 0 0
+221 221 0 1
+222 222 0 0
+223 223 0 1
+224 224 0 0
+225 225 0 1
+226 226 0 0
+227 227 0 1
+228 228 0 0
+229 229 0 1
+230 230 0 0
+231 231 0 1
+232 232 0 0
+233 233 0 1
+234 234 0 0
+235 235 0 1
+236 236 0 0
+237 237 0 1
+238 238 0 0
+239 239 0 1
+240 240 0 0
+241 241 0 1
+242 242 0 0
+243 243 0 1
+244 244 0 0
+245 245 0 1
+246 246 0 0
+247 247 0 1
+248 248 0 0
+249 249 0 1
+250 250 0 0
+251 251 0 1
+252 252 0 0
+253 253 0 1
+254 254 0 0
+255 255 0 1
+256 256 0 0
+257 257 0 1
+258 258 0 0
+259 259 0 1
+260 260 0 0
+261 261 0 1
+262 262 0 0
+263 263 0 1
+264 264 0 0
+265 265 0 1
+266 266 0 0
+267 267 0 1
+268 268 0 0
+269 269 0 1
+270 270 0 0
+271 271 0 1
+272 272 0 0
+273 273 0 1
+274 274 0 0
+275 275 0 1
+276 276 0 0
+277 277 0 1
+278 278 0 0
+279 279 0 1
+280 280 0 0
+281 281 0 1
+282 282 0 0
+283 283 0 1
+284 284 0 0
+285 285 0 1
+286 286 0 0
+287 287 0 1
+288 288 0 0
+289 289 0 1
+290 290 0 0
+291 291 0 1
+292 292 0 0
+293 293 0 1
+294 294 0 0
+295 295 0 1
+296 296 0 0
+297 297 0 1
+298 298 0 0
+299 299 0 1
+300 300 0 0
+301 301 0 1
+302 302 0 0
+303 303 0 1
+304 304 0 0
+305 305 0 1
+306 306 0 0
+307 307 0 1
+308 308 0 0
+309 309 0 1
+310 310 0 0
+311 311 0 1
+312 312 0 0
+313 313 0 1
+314 314 0 0
+315 315 0 1
+316 316 0 0
+317 317 0 1
+318 318 0 0
+319 319 0 1
+320 320 0 0
+321 321 0 1
+322 322 0 0
+323 323 0 1
+324 324 0 0
+325 325 0 1
+326 326 0 0
+327 327 0 1
+328 328 0 0
+329 329 0 1
+330 330 0 0
+331 331 0 1
+332 332 0 0
+333 333 0 1
+334 334 0 0
+335 335 0 1
+336 336 0 0
+337 337 0 1
+338 338 0 0
+339 339 0 1
+340 340 0 0
+341 341 0 1
+342 342 0 0
+343 343 0 1
+344 344 0 0
+345 345 0 1
+346 346 0 0
+347 347 0 1
+348 348 0 0
+349 349 0 1
+350 350 0 0
+351 351 0 1
+352 352 0 0
+353 353 0 1
+354 354 0 0
+355 355 0 1
+356 356 0 0
+357 357 0 1
+358 358 0 0
+359 359 0 1
+360 360 0 0
+361 361 0 1
+362 362 0 0
+363 363 0 1
+364 364 0 0
+365 365 0 1
+366 366 0 0
+367 367 0 1
+368 368 0 0
+369 369 0 1
+370 370 0 0
+371 371 0 1
+372 372 0 0
+373 373 0 1
+374 374 0 0
+375 375 0 1
+376 376 0 0
+377 377 0 1
+378 378 0 0
+379 379 0 1
+380 380 0 0
+381 381 0 1
+382 382 0 0
+383 383 0 1
+384 384 0 0
+385 385 0 1
+386 386 0 0
+387 387 0 1
+388 388 0 0
+389 389 0 1
+390 390 0 0
+391 391 0 1
+392 392 0 0
+393 393 0 1
+394 394 0 0
+395 395 0 1
+396 396 0 0
+397 397 0 1
+398 398 0 0
+399 399 0 1
+400 400 0 0
+401 401 0 1
+402 402 0 0
+403 403 0 1
+404 404 0 0
+405 405 0 1
+406 406 0 0
+407 407 0 1
+408 408 0 0
+409 409 0 1
+410 410 0 0
+411 411 0 1
+412 412 0 0
+413 413 0 1
+414 414 0 0
+415 415 0 1
+416 416 0 0
+417 417 0 1
+418 418 0 0
+419 419 0 1
+420 420 0 0
+421 421 0 1
+422 422 0 0
+423 423 0 1
+424 424 0 0
+425 425 0 1
+426 426 0 0
+427 427 0 1
+428 428 0 0
+429 429 0 1
+430 430 0 0
+431 431 0 1
+432 432 0 0
+433 433 0 1
+434 434 0 0
+435 435 0 1
+436 436 0 0
+437 437 0 1
+438 438 0 0
+439 439 0 1
+440 440 0 0
+441 441 0 1
+442 442 0 0
+443 443 0 1
+444 444 0 0
+445 445 0 1
+446 446 0 0
+447 447 0 1
+448 448 0 0
+449 449 0 1
+450 450 0 0
+451 451 0 1
+452 452 0 0
+453 453 0 1
+454 454 0 0
+455 455 0 1
+456 456 0 0
+457 457 0 1
+458 458 0 0
+459 459 0 1
+460 460 0 0
+461 461 0 1
+462 462 0 0
+463 463 0 1
+464 464 0 0
+465 465 0 1
+466 466 0 0
+467 467 0 1
+468 468 0 0
+469 469 0 1
+470 470 0 0
+471 471 0 1
+472 472 0 0
+473 473 0 1
+474 474 0 0
+475 475 0 1
+476 476 0 0
+477 477 0 1
+478 478 0 0
+479 479 0 1
+480 480 0 0
+481 481 0 1
+482 482 0 0
+483 483 0 1
+484 484 0 0
+485 485 0 1
+486 486 0 0
+487 487 0 1
+488 488 0 0
+489 489 0 1
+490 490 0 0
+491 491 0 1
+492 492 0 0
+493 493 0 1
+494 494 0 0
+495 495 0 1
+496 496 0 0
+497 497 0 1
+498 498 0 0
+499 499 0 1
+500 500 0 0
+501 501 0 1
+502 502 0 0
+503 503 0 1
+504 504 0 0
+505 505 0 1
+506 506 0 0
+507 507 0 1
+508 508 0 0
+509 509 0 1
+510 510 0 0
+511 511 0 1
+512 512 0 0
+513 513 0 1
+514 514 0 0
+515 515 0 1
+516 516 0 0
+517 517 0 1
+518 518 0 0
+519 519 0 1
+520 520 0 0
+521 521 0 1
+522 522 0 0
+523 523 0 1
+524 524 0 0
+525 525 0 1
+526 526 0 0
+527 527 0 1
+528 528 0 0
+529 529 0 1
+530 530 0 0
+531 531 0 1
+532 532 0 0
+533 533 0 1
+534 534 0 0
+535 535 0 1
+536 536 0 0
+537 537 0 1
+538 538 0 0
+539 539 0 1
+540 540 0 0
+541 541 0 1
+542 542 0 0
+543 543 0 1
+544 544 0 0
+545 545 0 1
+546 546 0 0
+547 547 0 1
+548 548 0 0
+549 549 0 1
+550 550 0 0
+551 551 0 1
+552 552 0 0
+553 553 0 1
+554 554 0 0
+555 555 0 1
+556 556 0 0
+557 557 0 1
+558 558 0 0
+559 559 0 1
+560 560 0 0
+561 561 0 1
+562 562 0 0
+563 563 0 1
+564 564 0 0
+565 565 0 1
+566 566 0 0
+567 567 0 1
+568 568 0 0
+569 569 0 1
+570 570 0 0
+571 571 0 1
+572 572 0 0
+573 573 0 1
+574 574 0 0
+575 575 0 1
+576 576 0 0
+577 577 0 1
+578 578 0 0
+579 579 0 1
+580 580 0 0
+581 581 0 1
+582 582 0 0
+583 583 0 1
+584 584 0 0
+585 585 0 1
+586 586 0 0
+587 587 0 1
+588 588 0 0
+589 589 0 1
+590 590 0 0
+591 591 0 1
+592 592 0 0
+593 593 0 1
+594 594 0 0
+595 595 0 1
+596 596 0 0
+597 597 0 1
+598 598 0 0
+599 599 0 1
+600 600 0 0
+601 601 0 1
+602 602 0 0
+603 603 0 1
+604 604 0 0
+605 605 0 1
+606 606 0 0
+607 607 0 1
+608 608 0 0
+609 609 0 1
+610 610 0 0
+611 611 0 1
+612 612 0 0
+613 613 0 1
+614 614 0 0
+615 615 0 1
+616 616 0 0
+617 617 0 1
+618 618 0 0
+619 619 0 1
+620 620 0 0
+621 621 0 1
+622 622 0 0
+623 623 0 1
+624 624 0 0
+625 625 0 1
+626 626 0 0
+627 627 0 1
+628 628 0 0
+629 629 0 1
+630 630 0 0
+631 631 0 1
+632 632 0 0
+633 633 0 1
+634 634 0 0
+635 635 0 1
+636 636 0 0
+637 637 0 1
+638 638 0 0
+639 639 0 1
+640 640 0 0
+641 641 0 1
+642 642 0 0
+643 643 0 1
+644 644 0 0
+645 645 0 1
+646 646 0 0
+647 647 0 1
+648 648 0 0
+649 649 0 1
+650 650 0 0
+651 651 0 1
+652 652 0 0
+653 653 0 1
+654 654 0 0
+655 655 0 1
+656 656 0 0
+657 657 0 1
+658 658 0 0
+659 659 0 1
+660 660 0 0
+661 661 0 1
+662 662 0 0
+663 663 0 1
+664 664 0 0
+665 665 0 1
+666 666 0 0
+667 667 0 1
+668 668 0 0
+669 669 0 1
+670 670 0 0
+671 671 0 1
+672 672 0 0
+673 673 0 1
+674 674 0 0
+675 675 0 1
+676 676 0 0
+677 677 0 1
+678 678 0 0
+679 679 0 1
+680 680 0 0
+681 681 0 1
+682 682 0 0
+683 683 0 1
+684 684 0 0
+685 685 0 1
+686 686 0 0
+687 687 0 1
+688 688 0 0
+689 689 0 1
+690 690 0 0
+691 691 0 1
+692 692 0 0
+693 693 0 1
+694 694 0 0
+695 695 0 1
+696 696 0 0
+697 697 0 1
+698 698 0 0
+699 699 0 1
+700 700 0 0
+701 701 0 1
+702 702 0 0
+703 703 0 1
+704 704 0 0
+705 705 0 1
+706 706 0 0
+707 707 0 1
+708 708 0 0
+709 709 0 1
+710 710 0 0
+711 711 0 1
+712 712 0 0
+713 713 0 1
+714 714 0 0
+715 715 0 1
+716 716 0 0
+717 717 0 1
+718 718 0 0
+719 719 0 1
+720 720 0 0
+721 721 0 1
+722 722 0 0
+723 723 0 1
+724 724 0 0
+725 725 0 1
+726 726 0 0
+727 727 0 1
+728 728 0 0
+729 729 0 1
+730 730 0 0
+731 731 0 1
+732 732 0 0
+733 733 0 1
+734 734 0 0
+735 735 0 1
+736 736 0 0
+737 737 0 1
+738 738 0 0
+739 739 0 1
+740 740 0 0
+741 741 0 1
+742 742 0 0
+743 743 0 1
+744 744 0 0
+745 745 0 1
+746 746 0 0
+747 747 0 1
+748 748 0 0
+749 749 0 1
+750 750 0 0
+751 751 0 1
+752 752 0 0
+753 753 0 1
+754 754 0 0
+755 755 0 1
+756 756 0 0
+757 757 0 1
+758 758 0 0
+759 759 0 1
+760 760 0 0
+761 761 0 1
+762 762 0 0
+763 763 0 1
+764 764 0 0
+765 765 0 1
+766 766 0 0
+767 767 0 1
+768 768 0 0
+769 769 0 1
+770 770 0 0
+771 771 0 1
+772 772 0 0
+773 773 0 1
+774 774 0 0
+775 775 0 1
+776 776 0 0
+777 777 0 1
+778 778 0 0
+779 779 0 1
+780 780 0 0
+781 781 0 1
+782 782 0 0
+783 783 0 1
+784 784 0 0
+785 785 0 1
+786 786 0 0
+787 787 0 1
+788 788 0 0
+789 789 0 1
+790 790 0 0
+791 791 0 1
+792 792 0 0
+793 793 0 1
+794 794 0 0
+795 795 0 1
+796 796 0 0
+797 797 0 1
+798 798 0 0
+799 799 0 1
+800 800 0 0
+801 801 0 1
+802 802 0 0
+803 803 0 1
+804 804 0 0
+805 805 0 1
+806 806 0 0
+807 807 0 1
+808 808 0 0
+809 809 0 1
+810 810 0 0
+811 811 0 1
+812 812 0 0
+813 813 0 1
+814 814 0 0
+815 815 0 1
+816 816 0 0
+817 817 0 1
+818 818 0 0
+819 819 0 1
+820 820 0 0
+821 821 0 1
+822 822 0 0
+823 823 0 1
+824 824 0 0
+825 825 0 1
+826 826 0 0
+827 827 0 1
+828 828 0 0
+829 829 0 1
+830 830 0 0
+831 831 0 1
+832 832 0 0
+833 833 0 1
+834 834 0 0
+835 835 0 1
+836 836 0 0
+837 837 0 1
+838 838 0 0
+839 839 0 1
+840 840 0 0
+841 841 0 1
+842 842 0 0
+843 843 0 1
+844 844 0 0
+845 845 0 1
+846 846 0 0
+847 847 0 1
+848 848 0 0
+849 849 0 1
+850 850 0 0
+851 851 0 1
+852 852 0 0
+853 853 0 1
+854 854 0 0
+855 855 0 1
+856 856 0 0
+857 857 0 1
+858 858 0 0
+859 859 0 1
+860 860 0 0
+861 861 0 1
+862 862 0 0
+863 863 0 1
+864 864 0 0
+865 865 0 1
+866 866 0 0
+867 867 0 1
+868 868 0 0
+869 869 0 1
+870 870 0 0
+871 871 0 1
+872 872 0 0
+873 873 0 1
+874 874 0 0
+875 875 0 1
+876 876 0 0
+877 877 0 1
+878 878 0 0
+879 879 0 1
+880 880 0 0
+881 881 0 1
+882 882 0 0
+883 883 0 1
+884 884 0 0
+885 885 0 1
+886 886 0 0
+887 887 0 1
+888 888 0 0
+889 889 0 1
+890 890 0 0
+891 891 0 1
+892 892 0 0
+893 893 0 1
+894 894 0 0
+895 895 0 1
+896 896 0 0
+897 897 0 1
+898 898 0 0
+899 899 0 1
+900 900 0 0
+901 901 0 1
+902 902 0 0
+903 903 0 1
+904 904 0 0
+905 905 0 1
+906 906 0 0
+907 907 0 1
+908 908 0 0
+909 909 0 1
+910 910 0 0
+911 911 0 1
+912 912 0 0
+913 913 0 1
+914 914 0 0
+915 915 0 1
+916 916 0 0
+917 917 0 1
+918 918 0 0
+919 919 0 1
+920 920 0 0
+921 921 0 1
+922 922 0 0
+923 923 0 1
+924 924 0 0
+925 925 0 1
+926 926 0 0
+927 927 0 1
+928 928 0 0
+929 929 0 1
+930 930 0 0
+931 931 0 1
+932 932 0 0
+933 933 0 1
+934 934 0 0
+935 935 0 1
+936 936 0 0
+937 937 0 1
+938 938 0 0
+939 939 0 1
+940 940 0 0
+941 941 0 1
+942 942 0 0
+943 943 0 1
+944 944 0 0
+945 945 0 1
+946 946 0 0
+947 947 0 1
+948 948 0 0
+949 949 0 1
+950 950 0 0
+951 951 0 1
+952 952 0 0
+953 953 0 1
+954 954 0 0
+955 955 0 1
+956 956 0 0
+957 957 0 1
+958 958 0 0
+959 959 0 1
+960 960 0 0
+961 961 0 1
+962 962 0 0
+963 963 0 1
+964 964 0 0
+965 965 0 1
+966 966 0 0
+967 967 0 1
+968 968 0 0
+969 969 0 1
+970 970 0 0
+971 971 0 1
+972 972 0 0
+973 973 0 1
+974 974 0 0
+975 975 0 1
+976 976 0 0
+977 977 0 1
+978 978 0 0
+979 979 0 1
+980 980 0 0
+981 981 0 1
+982 982 0 0
+983 983 0 1
+984 984 0 0
+985 985 0 1
+986 986 0 0
+987 987 0 1
+988 988 0 0
+989 989 0 1
+990 990 0 0
+991 991 0 1
+992 992 0 0
+993 993 0 1
+994 994 0 0
+995 995 0 1
+996 996 0 0
+997 997 0 1
+998 998 0 0
+999 999 0 1
diff --git a/example/imputation.snplist b/example/imputation.snplist
new file mode 100644
index 00000000..231104ce
--- /dev/null
+++ b/example/imputation.snplist
@@ -0,0 +1,100 @@
+var0
+var1
+var2
+var3
+var4
+var5
+var6
+var7
+var8
+var9
+var10
+var11
+var12
+var13
+var14
+var15
+var16
+var17
+var18
+var19
+var20
+var21
+var22
+var23
+var24
+var25
+var26
+var27
+var28
+var29
+var30
+var31
+var32
+var33
+var34
+var35
+var36
+var37
+var38
+var39
+var40
+var41
+var42
+var43
+var44
+var45
+var46
+var47
+var48
+var49
+var50
+var51
+var52
+var53
+var54
+var55
+var56
+var57
+var58
+var59
+var60
+var61
+var62
+var63
+var64
+var65
+var66
+var67
+var68
+var69
+var70
+var71
+var72
+var73
+var74
+var75
+var76
+var77
+var78
+var79
+var80
+var81
+var82
+var83
+var84
+var85
+var86
+var87
+var88
+var89
+var90
+var91
+var92
+var93
+var94
+var95
+var96
+var97
+var98
+var99
diff --git a/pipelines/association_testing/regress_eval_regenie.snakefile b/pipelines/association_testing/regress_eval_regenie.snakefile
index 7cad2da4..a37ffca1 100644
--- a/pipelines/association_testing/regress_eval_regenie.snakefile
+++ b/pipelines/association_testing/regress_eval_regenie.snakefile
@@ -17,33 +17,41 @@ regenie_step2_bsize = regenie_config_step2["bsize"]
 regenie_njobs = regenie_config_step1.get("njobs", 1)
 regenie_joblist = range(1, regenie_njobs)
 
+config_file_prefix = (
+    "cv_split0/deeprvat/" if cv_exp else ""
+)
+
 
 wildcard_constraints:
     job="\d+"
 
 
-# rule evaluate:
-#     input:
-#         associations = expand('{{phenotype}}/deeprvat/mean_agg_results/burden_associations.parquet',
-#                               repeat=range(n_repeats)),
-#         config = '{phenotype}/deeprvat/hpopt_config.yaml',
-#     output:
-#         "{phenotype}/deeprvat/eval/significant.parquet",
-#         "{phenotype}/deeprvat/eval/all_results.parquet"
-#     threads: 1
-#     shell:
-#         'deeprvat_evaluate '
-#         + debug +
-#         '--use-seed-genes '
-#         '--n-repeats {n_repeats} '
-#         '--correction-method FDR '
-#         '{input.associations} '
-#         '{input.config} '
-#         '{wildcards.phenotype}/deeprvat/eval'
+rule evaluate:
+    input:
+        associations ='{phenotype}/deeprvat/average_regression_results/burden_associations.parquet',
+        config = f"{config_file_prefix}{{phenotype}}/deeprvat/hpopt_config.yaml"
+    output:
+        "{phenotype}/deeprvat/eval/significant.parquet",
+        "{phenotype}/deeprvat/eval/all_results.parquet"
+    threads: 1
+    resources:
+        mem_mb = 16000,
+        load = 16000
+    params:
+        use_baseline_results = '--use-baseline-results'
+    shell:
+        'deeprvat_evaluate '
+        + debug +
+        '{params.use_baseline_results} '
+        '--correction-method Bonferroni '
+        '--phenotype {wildcards.phenotype} '
+        '{input.associations} '
+        '{input.config} '
+        '{wildcards.phenotype}/deeprvat/eval'
 
 rule all_regenie:
     input:
-        expand('{phenotype}/deeprvat/mean_agg_results/burden_associations.parquet',
+        expand('{phenotype}/deeprvat/average_regression_results/burden_associations.parquet',
                phenotype=phenotypes),
 
 rule convert_regenie_output:
@@ -51,12 +59,12 @@ rule convert_regenie_output:
         expand("regenie_output/step2/deeprvat_{phenotype}.regenie",
                phenotype=phenotypes)
     output:
-        expand('{phenotype}/deeprvat/mean_agg_results/burden_associations.parquet',
+        expand('{phenotype}/deeprvat/average_regression_results/burden_associations.parquet',
                phenotype=phenotypes)
     params:
         pheno_options = " ".join([
             f"--phenotype {phenotype} regenie_output/step2/deeprvat_{phenotype}.regenie "
-            f"{phenotype}/deeprvat/mean_agg_results/burden_associations.parquet"
+            f"{phenotype}/deeprvat/average_regression_results/burden_associations.parquet"
         for phenotype in phenotypes]),
         gene_file = config["data"]["dataset_config"]["rare_embedding"]["config"]["gene_file"]
     threads: 1
diff --git a/pipelines/association_testing_pretrained_regenie.snakefile b/pipelines/association_testing_pretrained_regenie.snakefile
index f3eb0b0e..87050d87 100644
--- a/pipelines/association_testing_pretrained_regenie.snakefile
+++ b/pipelines/association_testing_pretrained_regenie.snakefile
@@ -5,20 +5,27 @@ configfile: 'config.yaml'
 debug_flag = config.get('debug', False)
 phenotypes = config['phenotypes']
 phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
+training_phenotypes = config["training"].get("phenotypes", phenotypes)
 
 n_burden_chunks = config.get('n_burden_chunks', 1) if not debug_flag else 2
 n_regression_chunks = config.get('n_regression_chunks', 40) if not debug_flag else 2
+n_avg_chunks = config.get('n_avg_chunks', 1)
 n_bags = config['training']['n_bags'] if not debug_flag else 3
 n_repeats = config['n_repeats']
 debug = '--debug ' if debug_flag else ''
 do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
 model_path = Path(config.get("pretrained_model_path", "pretrained_models"))
 
+cv_exp = False
+config_file_prefix = (
+    "cv_split0/deeprvat/" if cv_exp else ""
+)
+
 wildcard_constraints:
     repeat="\d+",
     trial="\d+",
 
-include: "association_testing/config.snakefile"
+include: "training/config.snakefile"
 include: "association_testing/association_dataset.snakefile"
 include: "association_testing/burdens.snakefile"
 include: "association_testing/regress_eval_regenie.snakefile"
diff --git a/pipelines/training_association_testing_regenie.snakefile b/pipelines/training_association_testing_regenie.snakefile
index 3f8a4e01..ce4dd990 100644
--- a/pipelines/training_association_testing_regenie.snakefile
+++ b/pipelines/training_association_testing_regenie.snakefile
@@ -9,6 +9,7 @@ training_phenotypes = config["training"].get("phenotypes", phenotypes)
 
 n_burden_chunks = config.get('n_burden_chunks', 1) if not debug_flag else 2
 n_regression_chunks = config.get('n_regression_chunks', 40) if not debug_flag else 2
+n_avg_chunks = config.get('n_avg_chunks', 1)
 n_trials = config['hyperparameter_optimization']['n_trials']
 n_bags = config['training']['n_bags'] if not debug_flag else 3
 n_repeats = config['n_repeats']
@@ -17,6 +18,7 @@ do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
 tensor_compression_level = config['training'].get('tensor_compression_level', 1)
 model_path = Path("models")
 n_parallel_training_jobs = config["training"].get("n_parallel_jobs", 1)
+cv_exp = False
 
 wildcard_constraints:
     repeat="\d+",
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/expected/expected_data.npz b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/expected/expected_data.npz
new file mode 100644
index 00000000..607f68df
Binary files /dev/null and b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/expected/expected_data.npz differ
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/qc/excluded_samples.csv b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/qc/excluded_samples.csv
new file mode 100644
index 00000000..ea0a0082
--- /dev/null
+++ b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/qc/excluded_samples.csv
@@ -0,0 +1,11 @@
+100096
+100097
+100099
+100100
+100101
+100102
+100103
+100104
+100105
+100106
+100107
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/samples_chr.csv b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/samples_chr.csv
new file mode 100644
index 00000000..ea0a0082
--- /dev/null
+++ b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/samples_chr.csv
@@ -0,0 +1,11 @@
+100096
+100097
+100099
+100100
+100101
+100102
+100103
+100104
+100105
+100106
+100107
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/sparse_gt/chr1/input_c1_b1.tsv.gz b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/sparse_gt/chr1/input_c1_b1.tsv.gz
new file mode 100644
index 00000000..0fee2c66
Binary files /dev/null and b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/sparse_gt/chr1/input_c1_b1.tsv.gz differ
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/variants.parquet b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/variants.parquet
new file mode 100644
index 00000000..df779fb3
Binary files /dev/null and b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/variants.parquet differ
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/variants.tsv.gz b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/variants.tsv.gz
new file mode 100644
index 00000000..6da9e9cd
Binary files /dev/null and b/tests/preprocessing/test_data/process_sparse_gt/filter_samples_all/input/variants.tsv.gz differ
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/expected/expected_data.npz b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/expected/expected_data.npz
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/qc/input_c1_b1.tsv b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/qc/input_c1_b1.tsv
new file mode 100644
index 00000000..0f863dc6
--- /dev/null
+++ b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/qc/input_c1_b1.tsv
@@ -0,0 +1,20 @@
+chr1	16103	T	G
+chr1	51479	T	A
+chr1	51898	C	A
+chr1	51928	G	A
+chr1	51954	G	C
+chr1	54490	G	A
+chr1	54669	C	T
+chr1	54708	G	C
+chr1	54716	C	T
+chr1	54725	T	G
+chr1	54727	T	C
+chr1	54753	T	G
+chr1	55299	C	T
+chr1	55326	T	C
+chr1	55330	G	A
+chr1	55351	T	A
+chr1	55365	A	G
+chr1	55367	G	A
+chr1	55385	A	G
+chr1	55388	C	T
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/samples_chr.csv b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/samples_chr.csv
new file mode 100644
index 00000000..ea0a0082
--- /dev/null
+++ b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/samples_chr.csv
@@ -0,0 +1,11 @@
+100096
+100097
+100099
+100100
+100101
+100102
+100103
+100104
+100105
+100106
+100107
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/sparse_gt/chr1/input_c1_b1.tsv.gz b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/sparse_gt/chr1/input_c1_b1.tsv.gz
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/variants.parquet b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/variants.parquet
new file mode 100644
index 00000000..df779fb3
Binary files /dev/null and b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/variants.parquet differ
diff --git a/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/variants.tsv.gz b/tests/preprocessing/test_data/process_sparse_gt/filter_variants_all/input/variants.tsv.gz
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/preprocessing/test_preprocess.py b/tests/preprocessing/test_preprocess.py
index a8fc0415..cd5828b4 100644
--- a/tests/preprocessing/test_preprocess.py
+++ b/tests/preprocessing/test_preprocess.py
@@ -23,7 +23,7 @@ def load_h5_archive(h5_path):
 
 
 @pytest.mark.parametrize(
-    "test_data_name_dir, extra_cli_params, genotype_file_name",
+    "test_data_name_dir, extra_cli_params, genotype_file_name, should_fail",
     [
         (
             "no_filters_minimal",
@@ -32,6 +32,7 @@ def load_h5_archive(h5_path):
                 "1",
             ],
             "genotypes_chr1.h5",
+            False,
         ),
         (
             "no_filters_minimal_str_samples",
@@ -40,6 +41,7 @@ def load_h5_archive(h5_path):
                 "1",
             ],
             "genotypes_chr1.h5",
+            False,
         ),
         (
             "filter_variants_minimal",
@@ -50,6 +52,18 @@ def load_h5_archive(h5_path):
                 f"{(tests_data_dir / 'process_sparse_gt/filter_variants_minimal/input/qc').as_posix()}",
             ],
             "genotypes_chr1.h5",
+            False,
+        ),
+        (
+            "filter_variants_all",
+            [
+                "--chromosomes",
+                "1",
+                "--exclude-variants",
+                f"{(tests_data_dir / 'process_sparse_gt/filter_variants_all/input/qc').as_posix()}",
+            ],
+            "genotypes_chr1.h5",
+            True,
         ),
         (
             "filter_variants_multiple",
@@ -60,6 +74,7 @@ def load_h5_archive(h5_path):
                 f"{(tests_data_dir / 'process_sparse_gt/filter_variants_multiple/input/qc').as_posix()}",
             ],
             "genotypes_chr1.h5",
+            False,
         ),
         (
             "filter_samples_minimal",
@@ -70,6 +85,18 @@ def load_h5_archive(h5_path):
                 f"{(tests_data_dir / 'process_sparse_gt/filter_samples_minimal/input/qc').as_posix()}",
             ],
             "genotypes_chr1.h5",
+            False,
+        ),
+        (
+            "filter_samples_all",
+            [
+                "--chromosomes",
+                "1",
+                "--exclude-samples",
+                f"{(tests_data_dir / 'process_sparse_gt/filter_samples_all/input/qc').as_posix()}",
+            ],
+            "genotypes_chr1.h5",
+            True,
         ),
         (
             "filter_calls_minimal",
@@ -80,6 +107,7 @@ def load_h5_archive(h5_path):
                 f"{(tests_data_dir / 'process_sparse_gt/filter_calls_minimal/input/qc').as_posix()}",
             ],
             "genotypes_chr1.h5",
+            False,
         ),
         (
             "filter_calls_vars_samples_minimal",
@@ -94,11 +122,12 @@ def load_h5_archive(h5_path):
                 f"{(tests_data_dir / 'process_sparse_gt/filter_calls_vars_samples_minimal/input/qc/variants/').as_posix()}",
             ],
             "genotypes_chr1.h5",
+            False,
         ),
     ],
 )
 def test_process_sparse_gt_file(
-    test_data_name_dir, extra_cli_params, genotype_file_name, tmp_path
+    test_data_name_dir, extra_cli_params, genotype_file_name, should_fail, tmp_path
 ):
     cli_runner = CliRunner()
 
@@ -127,7 +156,14 @@ def test_process_sparse_gt_file(
         out_file_base.as_posix(),
     ]
 
-    result = cli_runner.invoke(preprocess_cli, cli_parameters, catch_exceptions=False)
+    result = cli_runner.invoke(preprocess_cli, cli_parameters, catch_exceptions=True)
+
+    if should_fail:
+        assert isinstance(result.exception, ValueError)
+        return
+    else:
+        assert result.exception is None
+
     assert result.exit_code == 0
 
     h5_file = out_file_base.as_posix().replace("genotypes", genotype_file_name)