This repository has been archived by the owner on May 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
pathseq-build-host-reference.wdl
202 lines (183 loc) · 5.89 KB
/
pathseq-build-host-reference.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
###############################################################
##
## PathSeq Host Reference Build WDL
##
###############################################################
##
## Builds a host reference for use with PathSeq
##
## For further info see the GATK Documentation for the PathSeqPipelineSpark tool:
## https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_hellbender_tools_spark_pathseq_PathSeqPipelineSpark.php
##
###############################################################
##
## Input requirements :
## - FASTA file containing host sequences
##
## Output:
## - FASTA index and dictionary files
## - GATK BWA-MEM index image
## - PathSeq host kmer file
##
###############################################################
# WORKFLOW DEFINITION
workflow PathSeqBuildHostReferenceWorkflow {
#Mandatory input
File host_fasta
#Optional input
File? gatk4_jar_override
# Runtime parameters
String gatk_docker
Int? preemptible_attempts
call IndexFasta {
input:
fasta_file=host_fasta,
gatk_docker=gatk_docker,
gatk4_jar_override=gatk4_jar_override,
preemptible_attempts=preemptible_attempts
}
call BuildBwaMemIndexImage {
input:
fasta_file=host_fasta,
fai_file=IndexFasta.output_fai_file,
gatk_docker=gatk_docker,
gatk4_jar_override=gatk4_jar_override,
preemptible_attempts=preemptible_attempts
}
call BuildPathSeqKmerFile {
input:
fasta_file=host_fasta,
fai_file=IndexFasta.output_fai_file,
dict_file=IndexFasta.output_dict_file,
gatk_docker=gatk_docker,
gatk4_jar_override=gatk4_jar_override,
preemptible_attempts=preemptible_attempts
}
output {
File output_fai_file = IndexFasta.output_fai_file
File output_dict_file = IndexFasta.output_dict_file
File output_img_file = BuildBwaMemIndexImage.output_img_file
File output_taxonomy_file = BuildPathSeqKmerFile.output_kmer_file
}
}
# Task DEFINITIONS
# Builds Index files for Fasta
task IndexFasta {
# Inputs for this task
File fasta_file
String fasta_filename = basename(fasta_file)
String fai_path = fasta_filename + ".fai"
String dict_path = sub(fasta_filename, "\\.fasta$|\\.fa$", ".dict")
File? gatk4_jar_override
# Runtime parameters
String gatk_docker
Int? mem_gb
Int? preemptible_attempts
Int? disk_space_gb
# Disk size
Int fasta_size_gb = ceil(size(fasta_file, "GB"))
Int default_disk_space_gb = fasta_size_gb + 20
# Mem is in units of GB but our command and memory runtime values are in MB
Int default_mem_gb = 7
Int machine_mem = if defined(mem_gb) then mem_gb*1000 else default_mem_gb*1000
Int command_mem = machine_mem - 1000
command <<<
set -e
mv ${fasta_file} .
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
samtools faidx ${fasta_filename}
gatk --java-options "-Xmx${command_mem}m" CreateSequenceDictionary -R ${fasta_filename} -O ${dict_path}
>>>
runtime {
docker: gatk_docker
memory: machine_mem + " MB"
# Note that the space before SSD and HDD should be included.
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + " HDD"
preemptible: select_first([preemptible_attempts, 3])
}
output {
File output_fai_file = "${fai_path}"
File output_dict_file = "${dict_path}"
}
}
# Bilds BWA index images
task BuildBwaMemIndexImage {
# Inputs for this task
File fasta_file
File fai_file
String fasta_filename = basename(fasta_file)
String img_path = fasta_filename + ".img"
File? gatk4_jar_override
# Runtime parameters
String gatk_docker
Int? mem_gb
Int? preemptible_attempts
Int? disk_space_gb
#Disk size
Int fasta_size_gb = ceil(size(fasta_file, "GB"))
Int default_disk_space_gb = (fasta_size_gb * 3) + 20
# Mem is in units of GB but our command and memory runtime values are in MB
Int default_mem_gb = (fasta_size_gb * 4) + 8
Int machine_mem = if defined(mem_gb) then mem_gb*1000 else default_mem_gb*1000
Int command_mem = machine_mem - 4000
command <<<
set -e
mv ${fasta_file} .
mv ${fai_file} .
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
gatk --java-options "-Xmx${command_mem}m" BwaMemIndexImageCreator -I ${fasta_filename}
>>>
runtime {
docker: gatk_docker
memory: machine_mem + " MB"
# Note that the space before SSD and HDD should be included.
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + " HDD"
preemptible: select_first([preemptible_attempts, 3])
}
output {
File output_img_file = "${img_path}"
}
}
# Builds Kmer file
task BuildPathSeqKmerFile {
# Inputs for this task
File fasta_file
File fai_file
File dict_file
String fasta_filename = basename(fasta_file)
String kmer_file = fasta_filename + ".host.hss"
File? gatk4_jar_override
# Runtime parameters
String gatk_docker
Int? mem_gb
Int? preemptible_attempts
Int? disk_space_gb
# Disk size
Int fasta_size_gb = ceil(size(fasta_file, "GB"))
Int default_disk_space_gb = fasta_size_gb + 20
# Mem is in units of GB but our command and memory runtime values are in MB
Int default_mem_gb = 100
Int machine_mem = if defined(mem_gb) then mem_gb*1000 else default_mem_gb*1000
Int command_mem = machine_mem - 4000
command <<<
set -e
mv ${fasta_file} .
mv ${fai_file} .
mv ${dict_file} .
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
gatk --java-options "-Xmx${command_mem}m" \
PathSeqBuildKmers \
--reference ${fasta_filename} \
--O ${kmer_file}
>>>
runtime {
docker: gatk_docker
memory: machine_mem + " MB"
# Note that the space before SSD and HDD should be included.
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + " HDD"
preemptible: select_first([preemptible_attempts, 3])
}
output {
File output_kmer_file = "${kmer_file}"
}
}