-
Notifications
You must be signed in to change notification settings - Fork 3
/
ngs.nextflow
113 lines (87 loc) · 2.95 KB
/
ngs.nextflow
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env nextflow
params.pair1 = "$HOME/Projects/nextflow-test/reads/*_R1.fastq.gz"
params.pair2 = "$HOME/Projects/nextflow-test/reads/*_R2.fastq.gz"
params.genome = "$HOME/Data/Resource/hg19/human_g1k_v37.fasta"
// For both left and right reads get triples (readID, runID, UnixFile).
reads1 = Channel
.fromPath( params.pair1 )
.map { path -> [ path.toString().replace("_R1", "_RX"), path ] }
reads2 = Channel
.fromPath( params.pair2 )
.map { path -> [ path.toString().replace("_R2", "_RX"), path ] }
// Join pairs on their key.
read_pairs = reads1
.phase(reads2)
.map { pair1, pair2 -> [ pathToDatasetID(pair1[1]), pair1[1], pair2[1] ] }
// Get the genome file.
genome_file = file(params.genome)
//
// Step 1. Map the reads using bwa.
//
process mapping {
echo true
cpus params.cpus.mapping
// module 'bwa/0.7.2'
input:
set dataset_id, file(read1), file(read2) from read_pairs
output:
set dataset_id, file('alignment.bam') into bam_files
"""
bwa mem -R '@RG\tID:${params.run_id}\tSM:${params.run_id}' -t ${params.cpus.mapping} ${genome_file} ${read1} ${read2} | samtools view -Sb - | samtools sort - alignment
"""
}
// Collect all alignments with the same dataset_id.
joint_bams = bam_files.groupTuple()
//
// Step 2. Merge the BAM files and index result.
//
process merging {
echo true
cpus params.cpus.merging
input:
set dataset_id, bams from joint_bams
output:
file finalAlignmentPath() into final_alignment
file finalAlignmentPath(".bai") into final_alignment_bai
"""
mkdir -p `dirname ${finalAlignmentPath()}`
samtools merge -f ${finalAlignmentPath()} ${bams.join(" ")}
samtools index ${finalAlignmentPath()}
"""
}
//
// Step 3. Perform Variant Calling.
//
process variant_calling {
echo true
cpus params.cpus.variant_calling
input:
file final_alignment
file final_alignment_bai
output:
file finalVariantsPath() into final_vcf
"""
mkdir -p `dirname ${finalVariantsPath()}`
samtools mpileup -uf ${params.genome} ${final_alignment} | bcftools view -vcg - > ${finalVariantsPath()}
"""
}
//java -Xmx32G -Djava.io.tmpdir=${params.tmpdir} -jar $HOME/local/GenomeAnalysisTK-3.3-0/GenomeAnalysisTK.jar -R ${params.genome} -T HaplotypeCaller --emitRefConfidence GVCF --variant_index_type LINEAR --variant_index_parameter 128000 --dbsnp ${params.db_snp} -I ${final_alignment} -o ${finalVariantsPath()}
//final_vcf.subscribe onNext: { println "FINAL VCF\t" + it }, onComplete { println 'Done.' }
/**
* @return path to final VCF file.
*/
def finalVariantsPath() {
return [params.target_dir, 'vcf', params.run_id + '.vcf'].join(File.separator)
}
/**
* @return path to final alignment directory
*/
def finalAlignmentPath(ext = "") {
return [params.target_dir, 'bam', params.run_id + '.bam' + ext].join(File.separator)
}
/**
* @return String with the path to the parent of <code>path</code>.
*/
def pathToDatasetID(path) {
return path.getParent().toString();
}