-
Notifications
You must be signed in to change notification settings - Fork 17
/
pipeline-pe-umis.cwl
220 lines (220 loc) · 7.67 KB
/
pipeline-pe-umis.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
class: Workflow
cwlVersion: v1.0
doc: 'STARR-seq pipeline - reads: PE'
requirements:
- class: ScatterFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
inputs:
input_fastq_read1_files:
doc: Input read1 fastq files
type: File[]
input_fastq_read2_files:
doc: Input read2 fastq files
type: File[]
input_fastq_umi_files:
doc: Input fastq with UMIs files
type: File[]
fgbio_jar_path:
default: /opt/fgbio.jar
doc: fgbio Java jar file
type: string
genome_sizes_file:
doc: Genome sizes tab-delimited file (used in samtools)
type: File
ENCODE_blacklist_bedfile:
doc: Bedfile containing ENCODE consensus blacklist regions to be excluded.
type: File
regions_bed_file:
doc: Regions bed file used to filter-in reads (used in samtools, for example chromosomes of interest)
type: File
genome_ref_first_index_file:
doc: '"First index file of Bowtie2 reference genome with extension 1.bt2. \ (Note: the rest of the index files MUST be in the same folder)" '
type: File
secondaryFiles:
- ^^.2.bt2
- ^^.3.bt2
- ^^.4.bt2
- ^^.rev.1.bt2
- ^^.rev.2.bt2
nthreads_qc:
doc: Number of threads - qc.
type: int
nthreads_trimm:
doc: Number of threads - trim.
type: int
nthreads_map:
doc: Number of threads - map.
type: int
nthreads_quant:
doc: Number of threads - quantification.
type: int
default_adapters_file:
doc: Adapters file
type: File
trimmomatic_jar_path:
doc: Trimmomatic Java jar file
type: string
trimmomatic_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
picard_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
picard_jar_path:
doc: Picard Java jar file
type: string
outputs:
output_fastqc_report_files_read1:
doc: FastQC reports in zip format for paired read 1
type: File[]
outputSource: qc/output_fastqc_report_files_read1
output_fastqc_data_files_read1:
doc: FastQC data files for paired read 1
type: File[]
outputSource: qc/output_fastqc_data_files_read1
output_count_raw_reads_read1:
outputSource: qc/output_count_raw_reads_read1
type: File[]
output_custom_adapters_read1:
outputSource: qc/output_custom_adapters_read1
type: File[]
output_diff_counts_read1:
outputSource: qc/output_diff_counts_read1
type: File[]
output_trimmed_read1_fastq_read_count:
doc: Trimmed read counts of paired read 1 fastq files
type: File[]
outputSource: trim/output_trimmed_read1_fastq_read_count
output_data_fastq_read1_trimmed_files:
doc: Trimmed fastq files for paired read 1
type: File[]
outputSource: trim/output_data_fastq_read1_trimmed_files
output_fastqc_report_files_read2:
doc: FastQC reports in zip format for paired read 2
type: File[]
outputSource: qc/output_fastqc_report_files_read2
output_fastqc_data_files_read2:
doc: FastQC data files for paired read 2
type: File[]
outputSource: qc/output_fastqc_data_files_read2
output_count_raw_reads_read2:
outputSource: qc/output_count_raw_reads_read2
type: File[]
output_custom_adapters_read2:
outputSource: qc/output_custom_adapters_read2
type: File[]
output_diff_counts_read2:
outputSource: qc/output_diff_counts_read2
type: File[]
output_trimmed_read2_fastq_read_count:
doc: Trimmed read counts of paired read 2 fastq files
type: File[]
outputSource: trim/output_trimmed_read2_fastq_read_count
output_data_fastq_read2_trimmed_files:
doc: Trimmed fastq files for paired read 2
type: File[]
outputSource: trim/output_data_fastq_read2_trimmed_files
map_mark_duplicates_files:
doc: Summary of duplicates removed with Picard tool MarkDuplicates (for multiple reads aligned to the same positions)
type: File[]
outputSource: map/output_picard_mark_duplicates_files
map_dups_marked_bam_files:
doc: Filtered BAM files with duplicates marked (post-processing end point)
type: File[]
outputSource: map/output_data_bam_files
map_unmapped_fastq_files:
doc: Gzip compressed FASTQ ummaped and unpaired sequences
type: File[]
outputSource: map/output_data_unmapped_fastq_files
map_bowtie_log_files:
doc: Bowtie log file with mapping stats
type: File[]
outputSource: map/output_bowtie_log
map_genomic_template_files:
doc: BEDPE files with fragment/template coordinates
type: File[]
outputSource: map/output_templates_files
map_preseq_c_curve_files:
doc: Preseq c_curve output files
type: File[]
outputSource: map/output_preseq_c_curve_files
quant_bw_dedup_norm_files:
doc: Signal files with RPKM normalization ignoring duplicates.
type: File[]
outputSource: quant/bw_dedup_norm_files
quant_bw_dedup_raw_files:
doc: Signal files with 1bp raw read pileup ignoring duplicates.
type: File[]
outputSource: quant/bw_dedup_raw_files
quant_bw_with_dups_norm_files:
doc: Signal files with RPKM normalization including duplicates.
type: File[]
outputSource: quant/bw_with_dups_norm_files
steps:
qc:
in:
input_fastq_read1_files: input_fastq_read1_files
input_fastq_read2_files: input_fastq_read2_files
default_adapters_file: default_adapters_file
nthreads: nthreads_qc
run: 01-qc-pe.cwl
out:
- output_fastqc_report_files_read1
- output_fastqc_data_files_read1
- output_custom_adapters_read1
- output_count_raw_reads_read1
- output_diff_counts_read1
- output_fastqc_report_files_read2
- output_fastqc_data_files_read2
- output_custom_adapters_read2
- output_count_raw_reads_read2
- output_diff_counts_read2
trim:
in:
input_read1_adapters_files: qc/output_custom_adapters_read1
input_fastq_read1_files: input_fastq_read1_files
input_read2_adapters_files: qc/output_custom_adapters_read2
input_fastq_read2_files: input_fastq_read2_files
nthreads: nthreads_trimm
trimmomatic_java_opts: trimmomatic_java_opts
trimmomatic_jar_path: trimmomatic_jar_path
run: 02-trim-pe.cwl
out:
- output_data_fastq_read1_trimmed_files
- output_trimmed_read1_fastq_read_count
- output_data_fastq_read2_trimmed_files
- output_trimmed_read2_fastq_read_count
map:
in:
input_fastq_read1_files: trim/output_data_fastq_read1_trimmed_files
input_fastq_read2_files: trim/output_data_fastq_read2_trimmed_files
input_fastq_umi_files: input_fastq_umi_files
genome_sizes_file: genome_sizes_file
ENCODE_blacklist_bedfile: ENCODE_blacklist_bedfile
regions_bed_file: regions_bed_file
genome_ref_first_index_file: genome_ref_first_index_file
picard_jar_path: picard_jar_path
picard_java_opts: picard_java_opts
nthreads: nthreads_map
fgbio_jar_path: fgbio_jar_path
run: 03-map-pe-umis.cwl
out:
- output_data_bam_files
- output_data_dedup_bam_files
- output_picard_mark_duplicates_files
- output_data_unmapped_fastq_files
- output_bowtie_log
- output_preseq_c_curve_files
- output_templates_files
quant:
in:
input_bam_files: map/output_data_bam_files
input_dedup_bam_files: map/output_data_dedup_bam_files
ENCODE_blacklist_bedfile: ENCODE_blacklist_bedfile
nthreads: nthreads_quant
run: 04-quantification.cwl
out:
- bw_dedup_norm_files
- bw_with_dups_norm_files
- bw_dedup_raw_files