-
Notifications
You must be signed in to change notification settings - Fork 17
/
pipeline-pe.cwl
303 lines (303 loc) · 12.2 KB
/
pipeline-pe.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#!/usr/bin/env cwl-runner
class: Workflow
cwlVersion: v1.0
doc: "ChIP-seq pipeline - reads: PE, samples: treatment."
requirements:
- class: ScatterFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
inputs:
input_treatment_fastq_read1_files:
doc: Input treatment fastq paired-end read 1 files
type: File[]
input_treatment_fastq_read2_files:
doc: Input treatment fastq paired-end read 2 files
type: File[]
genome_sizes_file:
doc: Genome sizes tab-delimited file (used in samtools)
type: File
genome_effective_size:
default: hs
doc: Effective genome size used by MACS2. It can be numeric or a shortcuts:'hs' for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8), Default:hs
type: string
default_adapters_file:
doc: Adapters file
type: File
ENCODE_blacklist_bedfile:
doc: Bedfile containing ENCODE consensus blacklist regions to be excluded.
type: File
genome_ref_first_index_file:
doc: '"First index file of Bowtie reference genome with extension 1.ebwt. \ (Note: the rest of the index files MUST be in the same folder)" '
type: File
secondaryFiles:
- ^^.2.ebwt
- ^^.3.ebwt
- ^^.4.ebwt
- ^^.rev.1.ebwt
- ^^.rev.2.ebwt
as_narrowPeak_file:
doc: Definition narrowPeak file in AutoSql format (used in bedToBigBed)
type: File
as_broadPeak_file:
doc: Definition broadPeak file in AutoSql format (used in bedToBigBed)
type: File
trimmomatic_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
trimmomatic_jar_path:
doc: Trimmomatic Java jar file
type: string
picard_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
picard_jar_path:
doc: Picard Java jar file
type: string
nthreads_qc:
doc: Number of threads required for the 01-qc step
type: int
nthreads_trimm:
doc: Number of threads required for the 02-trim step
type: int
nthreads_map:
doc: Number of threads required for the 03-map step
type: int
nthreads_peakcall:
doc: Number of threads required for the 04-peakcall step
type: int
nthreads_quant:
doc: Number of threads required for the 05-quantification step
type: int
outputs:
qc_treatment_count_raw_reads_read1:
doc: Raw read counts of fastq files for read 1 after QC for treatment
type: File[]
outputSource: qc_treatment/output_count_raw_reads_read1
qc_treatment_count_raw_reads_read2:
doc: Raw read counts of fastq files for read 2 after QC for treatment
type: File[]
outputSource: qc_treatment/output_count_raw_reads_read2
qc_treatment_fastqc_data_files_read1:
doc: FastQC data files for paired read 1
type: File[]
outputSource: qc_treatment/output_fastqc_data_files_read1
qc_treatment_fastqc_data_files_read2:
doc: FastQC data files for paired read 2
type: File[]
outputSource: qc_treatment/output_fastqc_data_files_read2
qc_treatment_fastqc_report_files_read1:
doc: FastQC report files for paired read 1
type: File[]
outputSource: qc_treatment/output_fastqc_report_files_read1
qc_treatment_fastqc_report_files_read2:
doc: FastQC report files for paired read 2
type: File[]
outputSource: qc_treatment/output_fastqc_report_files_read2
qc_treatment_diff_counts_read1:
doc: Diff file between number of raw reads and number of reads counted by FASTQC, read 1 for treatment
type: File[]
outputSource: qc_treatment/output_diff_counts_read1
qc_treatment_diff_counts_read2:
doc: Diff file between number of raw reads and number of reads counted by FASTQC, read 2 for treatment
type: File[]
outputSource: qc_treatment/output_diff_counts_read2
trimm_treatment_fastq_files_read1:
doc: FASTQ files after trimming step for treatment
type: File[]
outputSource: trimm_treatment/output_data_fastq_read1_trimmed_files
trimm_treatment_fastq_files_read2:
doc: FASTQ files after trimming step for treatment
type: File[]
outputSource: trimm_treatment/output_data_fastq_read2_trimmed_files
trimm_treatment_raw_counts_read1:
doc: Raw read counts for R1 of fastq files after trimming for treatment
type: File[]
outputSource: trimm_treatment/output_trimmed_read2_fastq_read_count
trimm_treatment_raw_counts_read2:
doc: Raw read counts for R2 of fastq files after trimming for treatment
type: File[]
outputSource: trimm_treatment/output_trimmed_read2_fastq_read_count
map_treatment_mark_duplicates_files:
doc: Summary of duplicates removed with Picard tool MarkDuplicates (for multiple reads aligned to the same positions) for treatment
type: File[]
outputSource: map_treatment/output_picard_mark_duplicates_files
map_treatment_dedup_bam_files:
doc: Filtered BAM files (post-processing end point) for treatment
type: File[]
outputSource: map_treatment/output_data_sorted_dedup_bam_files
map_treatment_dups_marked_bam_files:
doc: Filtered BAM files with duplicates marked (post-processing end point) for treatment
type: File[]
outputSource: map_treatment/output_data_sorted_dups_marked_bam_files
map_treatment_pbc_files:
doc: PCR Bottleneck Coefficient files (used to flag samples when pbc<0.5) for control
type: File[]
outputSource: map_treatment/output_pbc_files
map_treatment_preseq_percentage_uniq_reads:
doc: Preseq percentage of uniq reads
type: File[]
outputSource: map_treatment/output_percentage_uniq_reads
map_treatment_read_count_mapped:
doc: Read counts of the mapped BAM files
type: File[]
outputSource: map_treatment/output_read_count_mapped
map_treatment_bowtie_log_files:
doc: Bowtie log file with mapping stats for treatment
type: File[]
outputSource: map_treatment/output_bowtie_log
map_treatment_preseq_c_curve_files:
doc: Preseq c_curve output files for treatment
type: File[]
outputSource: map_treatment/output_preseq_c_curve_files
peak_call_treatment_spp_x_cross_corr:
doc: SPP strand cross correlation summary
type: File[]
outputSource: peak_call_treatment/output_spp_x_cross_corr
peak_call_treatment_spp_x_cross_corr_plot:
doc: SPP strand cross correlation plot
type: File[]
outputSource: peak_call_treatment/output_spp_cross_corr_plot
peak_call_treatment_filtered_read_count_file:
doc: Filtered read count after peak calling
type: File[]
outputSource: peak_call_treatment/output_filtered_read_count_file
peak_call_treatment_narrowpeak_peak_xls_file:
doc: Peak calling report file
type: File[]
outputSource: peak_call_treatment/output_narrowpeak_xls_file
peak_call_treatment_read_in_narrowpeak_count_within_replicate:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call_treatment/output_read_in_narrowpeak_count_within_replicate
peak_call_treatment_narrowpeak_count:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call_treatment/output_narrowpeak_count
peak_call_treatment_narrowpeak_file:
doc: Peaks in narrowPeak file format
type: File[]
outputSource: peak_call_treatment/output_narrowpeak_file
peak_call_treatment_narrowpeak_summits_file:
doc: Peaks summits in bedfile format
type:
type: array
items:
- 'null'
- items: File
type: array
outputSource: peak_call_treatment/output_narrowpeak_summits_file
peak_call_treatment_narrowpeak_bigbed_file:
doc: narrowPeaks in bigBed format
type: File[]
outputSource: peak_call_treatment/output_narrowpeak_bigbed_file
peak_call_treatment_read_in_broadpeak_count_within_replicate:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call_treatment/output_read_in_broadpeak_count_within_replicate
peak_call_treatment_broadpeak_count:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call_treatment/output_broadpeak_count
peak_call_treatment_broadpeak_file:
doc: Peaks in broadPeak file format
type: File[]
outputSource: peak_call_treatment/output_broadpeak_file
peak_call_treatment_broadpeak_bigbed_file:
doc: broadPeaks in bigBed format
type: File[]
outputSource: peak_call_treatment/output_broadpeak_bigbed_file
quant_bigwig_raw_files:
doc: Raw reads bigWig (signal) files
type: File[]
outputSource: quant/bigwig_raw_files
quant_bigwig_rpkm_extended_files:
doc: Fragment extended reads bigWig (signal) files
type: File[]
outputSource: quant/bigwig_rpkm_extended_files
steps:
qc_treatment:
run: 01-qc-pe.cwl
in:
default_adapters_file: default_adapters_file
input_read1_fastq_files: input_treatment_fastq_read1_files
input_read2_fastq_files: input_treatment_fastq_read2_files
nthreads: nthreads_qc
out:
- output_count_raw_reads_read1
- output_diff_counts_read1
- output_fastqc_report_files_read1
- output_fastqc_data_files_read1
- output_custom_adapters_read1
- output_count_raw_reads_read2
- output_diff_counts_read2
- output_fastqc_report_files_read2
- output_fastqc_data_files_read2
- output_custom_adapters_read2
trimm_treatment:
run: 02-trim-pe.cwl
in:
input_read1_fastq_files: input_treatment_fastq_read1_files
input_read1_adapters_files: qc_treatment/output_custom_adapters_read1
input_read2_fastq_files: input_treatment_fastq_read2_files
input_read2_adapters_files: qc_treatment/output_custom_adapters_read2
trimmomatic_java_opts: trimmomatic_java_opts
trimmomatic_jar_path: trimmomatic_jar_path
nthreads: nthreads_trimm
out:
- output_data_fastq_read1_trimmed_files
- output_trimmed_read1_fastq_read_count
- output_data_fastq_read2_trimmed_files
- output_trimmed_read2_fastq_read_count
map_treatment:
run: 03-map-pe.cwl
in:
input_fastq_read1_files: trimm_treatment/output_data_fastq_read1_trimmed_files
input_fastq_read2_files: trimm_treatment/output_data_fastq_read2_trimmed_files
genome_sizes_file: genome_sizes_file
ENCODE_blacklist_bedfile: ENCODE_blacklist_bedfile
genome_ref_first_index_file: genome_ref_first_index_file
picard_jar_path: picard_jar_path
picard_java_opts: picard_java_opts
nthreads: nthreads_map
out:
- output_data_sorted_dedup_bam_files
- output_data_sorted_dups_marked_bam_files
- output_picard_mark_duplicates_files
- output_pbc_files
- output_bowtie_log
- output_preseq_c_curve_files
- output_percentage_uniq_reads
- output_read_count_mapped
peak_call_treatment:
run: 04-peakcall.cwl
in:
input_bam_files: map_treatment/output_data_sorted_dedup_bam_files
input_genome_sizes: genome_sizes_file
genome_effective_size: genome_effective_size
as_narrowPeak_file: as_narrowPeak_file
as_broadPeak_file: as_broadPeak_file
nthreads: nthreads_peakcall
out:
- output_spp_x_cross_corr
- output_spp_cross_corr_plot
- output_filtered_read_count_file
- output_read_in_narrowpeak_count_within_replicate
- output_narrowpeak_count
- output_narrowpeak_file
- output_narrowpeak_summits_file
- output_narrowpeak_bigbed_file
- output_narrowpeak_xls_file
- output_read_in_broadpeak_count_within_replicate
- output_broadpeak_count
- output_broadpeak_file
- output_broadpeak_summits_file
- output_broadpeak_bigbed_file
quant:
run: 05-quantification.cwl
in:
nthreads: nthreads_quant
input_trt_bam_files: map_treatment/output_data_sorted_dedup_bam_files
input_genome_sizes: genome_sizes_file
out:
- bigwig_raw_files
- bigwig_rpkm_extended_files