-
Notifications
You must be signed in to change notification settings - Fork 6
224 lines (221 loc) · 7.81 KB
/
run-pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
name: Run snakemake pipeline
on:
workflow_call:
inputs:
environment_file:
required: true
type: string
branch:
required: false
default: ${{ github.ref_name }}
type: string
prerun_cmd:
required: false
type: string
pipeline_file:
required: true
type: string
pipeline_extra_flags:
required: false
type: string
no_gpu:
required: false
type: boolean
default: true
pipeline_directory:
required: false
type: string
default: ./example
pipeline_config:
required: false
type: string
dry_run:
required: false
default: true
type: boolean
download_fasta_data:
required: false
default: false
type: boolean
fasta_download_path:
required: false
type: string
postrun_cmd:
required: false
type: string
upload_training_outputs:
required: false
default: false
type: boolean
upload_pretrained_outputs:
required: false
default: false
type: boolean
upload_regenie_outputs:
required: false
default: false
type: boolean
download_training_outputs:
required: false
default: false
type: boolean
download_pretrained_outputs:
required: false
default: false
type: boolean
download_regenie_outputs:
required: false
default: false
type: boolean
run_training_results_check:
required: false
default: false
type: boolean
run_burden_results_check:
required: false
default: false
type: boolean
run_association_results_check:
required: false
default: false
type: boolean
run_regenie_association_results_check:
required: false
default: false
type: boolean
jobs:
Run-Pipeline:
runs-on: ubuntu-latest
env:
CUDA_VISIBLE_DEVICES: -1
steps:
- name: Check out repository code
uses: actions/checkout@v4
with:
ref: ${{inputs.branch}}
- uses: mamba-org/setup-micromamba@v1.8.1
with:
environment-file: ${{inputs.environment_file}}
cache-environment: true
cache-downloads: true
- name: Install DeepRVAT
run: pip install -e ${{ github.workspace }}
shell: micromamba-shell {0}
- name: Cache Fasta file
if: inputs.download_fasta_data
id: cache-fasta
uses: actions/cache@v4
with:
path: ${{ inputs.fasta_download_path}}
key: cache-reference-fasta-${{ inputs.fasta_download_path}}
- name: Download and unpack fasta data
if: inputs.download_fasta_data && steps.cache-fasta.outputs.cache-hit != 'true'
run: |
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/GRCh38.primary_assembly.genome.fa.gz \
-O ${{ inputs.fasta_download_path}}/GRCh38.primary_assembly.genome.fa.gz \
&& gzip -d ${{ inputs.fasta_download_path}}/GRCh38.primary_assembly.genome.fa.gz
- name: Run pre pipeline cmd
if: inputs.prerun_cmd
run: ${{inputs.prerun_cmd}}
shell: bash -el {0}
- name: "Running pipeline ${{ github.jobs[github.job].name }}"
run: |
python -m snakemake ${{ (inputs.dry_run && '-n') || '' }} \
-j 2 --directory ${{inputs.pipeline_directory}} \
${{ (endsWith(inputs.pipeline_config, 'ml') && '--configfile') || '' }} ${{ inputs.pipeline_config }} \
--snakefile ${{inputs.pipeline_file}} --show-failed-logs -F -p ${{ inputs.pipeline_extra_flags }}
shell: micromamba-shell {0}
- name: Run post pipeline cmd
if: inputs.postrun_cmd
run: ${{inputs.postrun_cmd}}
shell: micromamba-shell {0}
- name: Upload Training Outputs
id: uploaded_training_outputs
if: inputs.upload_training_outputs
uses: actions/upload-artifact@v4
with:
name: completed_training_outputs
path: |
./example/**/seed_genes.parquet
./example/**/covariates.zarr/
./example/**/y.zarr/
./example/**/input_tensor.zarr/
./example/**/models/
include-hidden-files: true #for .zarr needed
retention-days: 1
- name: Upload Pretrained Outputs
id: uploaded_pretrained_outputs
if: inputs.upload_pretrained_outputs
uses: actions/upload-artifact@v4
with:
name: completed_pretrained_outputs
path: |
./example/**/sample_ids.zarr/
./example/**/y.zarr/
./example/**/x.zarr/
./example/**/burdens.zarr/
./example/**/genes.npy
./example/**/all_results.parquet
include-hidden-files: true #for .zarr needed
retention-days: 1
- name: Upload Regenie Outputs
id: uploaded_regenie_outputs
if: inputs.upload_regenie_outputs
uses: actions/upload-artifact@v4
with:
name: completed_regenie_outputs
path: |
./example/**/all_results.parquet
retention-days: 1
- name: Download Previous Training Outputs
id: downloaded_training_outputs
if: inputs.download_training_outputs
uses: actions/download-artifact@v4
with:
name: completed_training_outputs
path: ./tests/completed_training_outputs
- name: Download Previous Pretrained Outputs
id: downloaded_pretrained_outputs
if: inputs.download_pretrained_outputs
uses: actions/download-artifact@v4
with:
name: completed_pretrained_outputs
path: ./tests/completed_pretrained_outputs
- name: Download Previous Regenie Outputs
id: downloaded_regenie_outputs
if: inputs.download_regenie_outputs
uses: actions/download-artifact@v4
with:
name: completed_regenie_outputs
path: ./tests/completed_regenie_outputs
# - name: Display structure of downloaded files
# if: inputs.download_outputs
# run: ls -R ./tests/completed_run_output
- name: Run Training Results Check
if: inputs.run_training_results_check
run: |
python $GITHUB_WORKSPACE/tests/deeprvat/compare_reference.py compare-training \
./example/ ./tests/completed_training_outputs/ \
"Cholesterol" "Platelet_count"
shell: micromamba-shell {0}
- name: Run Burden Score Results Check
if: inputs.run_burden_results_check
run: |
python $GITHUB_WORKSPACE/tests/deeprvat/compare_reference.py compare-burdens \
./example/ ./tests/completed_pretrained_outputs/ \
"Cholesterol" "Platelet_count"
shell: micromamba-shell {0}
- name: Run Association Results Check
if: inputs.run_association_results_check
run: |
python $GITHUB_WORKSPACE/tests/deeprvat/compare_reference.py compare-association \
./example/ ./tests/completed_pretrained_outputs/ \
"Cholesterol" "Platelet_count"
shell: micromamba-shell {0}
- name: Run REGENIE Association Results Check
if: inputs.run_regenie_association_results_check
run: |
python $GITHUB_WORKSPACE/tests/deeprvat/compare_reference.py compare-association \
./example/ ./tests/completed_regenie_outputs/ \
"Cholesterol" "Platelet_count"
shell: micromamba-shell {0}