forked from mayankpahadia1993/ContigValidator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
142 lines (116 loc) · 4.73 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
KMERSIZE = config["KMERSIZE"]
ABUNDANCE_MIN = config["ABUNDANCE_MIN"]
TMP_DIR = config["TMP_DIR"]
OUT_DIR = config["OUT_DIR"]
EXACT_ALIGN = "/home/pzm11/research/software/ContigValidator/src/exactalign"
rule all:
input:
kmer_results = expand("cvfiles/{x}.kmer_results", x=config["CONTIG_FILES"]),
bwa_results = expand("cvfiles/{x}.bwa_align_results", x=config["CONTIG_FILES"]),
exact_results = "cvfiles/all.exact_align_results"
output:
temp("cvfiles/temp0"),
temp("cvfiles/temp1"),
temp("cvfiles/temp2"),
temp("cvfiles/temp3"),
"cvfiles/results.txt"
shell:
"""
echo -e "kmer_recall%\tkmer_precision%\tbwa_align%\texact_align%" > cvfiles/temp0
cat {input.kmer_results} > cvfiles/temp1
cat {input.bwa_results} | sed s/\%//g > cvfiles/temp2
paste cvfiles/temp1 cvfiles/temp2 {input.exact_results} > cvfiles/temp3
cat cvfiles/temp0 cvfiles/temp3 > cvfiles/results.txt
"""
rule concat_refs:
input:
{config["PRIMARY_REF"]},
expand("{x}", x=config["SECONDARY_REFS"])
output:
"cvfiles/refs.fa"
shell:
"cat {input} > cvfiles/refs.fa"
rule kmc_ref:
input:
"cvfiles/refs.fa"
output:
"cvfiles/refs.fa.kmc_pre",
"cvfiles/refs.fa.kmc_suf"
threads:
4
shell:
"kmc -t{threads} -ci1 -k{KMERSIZE} -fm {input} cvfiles/refs.fa {TMP_DIR}"
rule kmc_contigs:
input:
"{contigs}"
output:
"cvfiles/all.{contigs}.kmc_pre",
"cvfiles/all.{contigs}.kmc_suf"
threads:
4
shell:
"kmc -t{threads} -ci{ABUNDANCE_MIN} -k{KMERSIZE} -fm {input} cvfiles/all.{wildcards.contigs} {TMP_DIR}"
rule intersect_kmers:
input:
"cvfiles/all.{contigs}.kmc_pre",
"cvfiles/all.{contigs}.kmc_suf",
"cvfiles/refs.fa.kmc_pre",
"cvfiles/refs.fa.kmc_suf"
output:
temp("cvfiles/tp.{contigs}.kmc_pre"),
temp("cvfiles/tp.{contigs}.kmc_suf"),
temp("cvfiles/fn.{contigs}.kmc_pre"),
temp("cvfiles/fn.{contigs}.kmc_suf"),
temp("cvfiles/fp.{contigs}.kmc_pre"),
temp("cvfiles/fp.{contigs}.kmc_suf"),
"cvfiles/{contigs}.kmer_results"
shell:
"""
kmc_tools simple cvfiles/refs.fa -ci1 cvfiles/all.{wildcards.contigs} -ci1 intersect cvfiles/tp.{wildcards.contigs} -ci1
kmc_tools simple cvfiles/refs.fa -ci1 cvfiles/all.{wildcards.contigs} -ci1 kmers_subtract cvfiles/fn.{wildcards.contigs} -ci1
kmc_tools simple cvfiles/refs.fa -ci1 cvfiles/all.{wildcards.contigs} -ci1 reverse_kmers_subtract cvfiles/fp.{wildcards.contigs} -ci1
kmc_dump cvfiles/tp.{wildcards.contigs} cvfiles/temp; tp=`less cvfiles/temp | wc -l`
kmc_dump cvfiles/fn.{wildcards.contigs} cvfiles/temp; fn=`less cvfiles/temp | wc -l`
kmc_dump cvfiles/fp.{wildcards.contigs} cvfiles/temp; fp=`less cvfiles/temp | wc -l`
precision=$(bc -l <<< "scale=4; $tp * 100 / ($tp + $fp)")
recall=$(bc -l <<< "scale=4; $tp * 100 / ($tp + $fn)")
echo -e "$recall\t$precision" >> cvfiles/{wildcards.contigs}.kmer_results
"""
# tp=$(/home/pzm11/research/software/ContigValidator/src/count_kmers_kmc cvfiles/tp.{wildcards.contigs})
# fn=$(/home/pzm11/research/software/ContigValidator/src/count_kmers_kmc cvfiles/fn.{wildcards.contigs})
# fp=$(/home/pzm11/research/software/ContigValidator/src/count_kmers_kmc cvfiles/fp.{wildcards.contigs})
rule bwa_index_ref:
input:
{config["PRIMARY_REF"]}
output:
"cvfiles/refindex.bwt",
"cvfiles/refindex.sa",
"cvfiles/refindex.pac",
"cvfiles/refindex.ann",
"cvfiles/refindex.amb"
shell:
"bwa index -p cvfiles/refindex {input}"
rule bwa_align:
input:
"cvfiles/refindex.bwt",
"{contigs}"
output:
"cvfiles/{contigs}.bwa_align_results",
temp("cvfiles/{contigs}.bwa.bam"),
temp("cvfiles/{contigs}.bwa.bam.bai")
threads: 8
shell:
"""
bwa mem -t {threads} cvfiles/refindex {wildcards.contigs} | samtools sort > cvfiles/{wildcards.contigs}.bwa.bam
samtools index cvfiles/{wildcards.contigs}.bwa.bam
samtools flagstat cvfiles/{wildcards.contigs}.bwa.bam | grep mapped | cut -f 5 -d " "| cut -f 2 -d "(" | head -1 >> cvfiles/{wildcards.contigs}.bwa_align_results
"""
rule exact_align:
input:
refs = "cvfiles/refs.fa",
contig_files = expand("{x}", x=config["CONTIG_FILES"])
output:
"cvfiles/all.exact_align_results",
"cvfiles/all.exact_align_results.exact"
shell:
"{EXACT_ALIGN} cvfiles/refs.fa cvfiles/all.exact_align_results {input.contig_files}"