-
Notifications
You must be signed in to change notification settings - Fork 2
/
Snakefile
133 lines (122 loc) · 3.24 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
IDS, = glob_wildcards("{id}_R1.fastq.gz")
rule all:
input:
expand(["taxonomy/{id}.krona.html"], id=IDS),
"iqtree.log",
"results/",
"amr_output.tab",
"heatmap_output.html"
# Cleaning up fastq files
rule fastp:
input:
["{id}_R1.fastq.gz", "{id}_R2.fastq.gz"]
output:
["fastp/{id}_R1.fastq.gz.fastp", "fastp/{id}_R2.fastq.gz.fastp"]
message:
"Filtering fastQ files by trimming low quality reads using fastp"
shell:
"fastp -i {input[0]} -I {input[1]} -o {output[0]} -O {output[1]}"
# Taxonomic classification with Centrifuge
rule centrifuge:
input:
rules.fastp.output[0], rules.fastp.output[1]
output:
"taxonomy/{id}-report.txt",
"taxonomy/{id}-result.txt"
message:
"Taxonomic classification of processed reads using centrifuge"
shell:
"centrifuge -p 4 -x $CENTRIFUGE_DEFAULT_DB -1 {input[0]} -2 {input[1]} --report-file {output[0]} -S {output[1]}"
# Prepping centrifuge results
rule prep_centrifuge_results:
input:
"taxonomy/{id}-result.txt"
output:
"taxonomy/{id}-results.krona"
message:
"Re-formatting centrifuge results"
shell:
"cat {input} | cut -f 1,3 > {output}"
# Generating Krona plot
rule krona_plot:
input:
rules.prep_centrifuge_results.output
output:
touch("taxonomy/{id}.krona.html")
message:
"Rendering krona plot visualization"
params:
prefix="{id}"
shell:
"ktImportTaxonomy -o taxonomy/{params.prefix}.krona.html {input}"
# Vaiant call requires reference genome
rule snippy:
input:
["fastp/{id}_R1.fastq.gz.fastp", "fastp/{id}_R2.fastq.gz.fastp"]
params:
ref = config['ref']
output:
directory("{id}/"),
touch("{id}.snippy")
message:
"Calling variants using snippy"
shell:
"snippy --force --cleanup --outdir {output[0]} --ref {params.ref} --R1 {input[0]} --R2 {input[1]}"
# Alignment of whole/core genomes requires reference genome
rule snippy_core:
input:
expand("{id}.snippy", id=IDS)
output:
touch("core.aln"), touch("core.vcf"), touch("core.full.aln")
message:
"Aligning core and whole genomes into a multi fasta file"
params:
prefix="core", ref=config['ref']
run:
dirs=""
for x in input:
d=x.split(".")[0]
dirs=dirs+d+" "
shell("snippy-core --ref {params.ref} --prefix {params.prefix} "+dirs)
# Arranging files that were generated by snippy-core
rule move_files:
input:
rules.snippy_core.output
output:
directory("results/")
message:
"Sorting alignment files"
run:
shell("mv core* {output}")
shell("rm fastq/*.snippy")
shell("rm -r taxonomy/fastq/*.files")
# Building phylogeny
rule tree:
input:
"core.aln"
output:
"iqtree.log"
message:
"Builing phylogeny tree of whole genomes using IQ-Tree"
shell:
"iqtree -s results/{input} -bb 1000 > {output}"
# Screening genomes for antimicorbial resistance genes
rule abricate:
input:
rules.snippy_core.output[2]
output:
"amr_output.tab"
message:
"Screening genomes for antimicorbial resistance genes"
shell:
"abricate results/{input} > {output}"
# Generating SNP Heatmap
rule vcf_viewer:
input:
"core.vcf"
output:
touch("heatmap_output.html")
message:
"Generating SNP heatmap"
shell:
"Rscript data/vcf2heatmap.R results/{input}"