-
Notifications
You must be signed in to change notification settings - Fork 0
/
RedfishMarkerCall
117 lines (99 loc) · 3.16 KB
/
RedfishMarkerCall
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""Redfish Amplicon Marker Calls via Snakemake pipeline
Returns:
vcf : a variant call format file defined in the config file. Default is `qual_filtered_redfishamplicon_calls.vcf`
"""
configfile: "config.json" #this file generated with `python redfish_snake_setup.py`
def get_fastp_input_fastqs(wildcards):
"""Get the list of sample names to build inputs from the specified json config file."""
return config["samples"][wildcards.sample]
rule all:
input:
config["vcf"]
rule fastp_cut:
"""
Run fastp for the list of input files,
right trimming the reads based on quality.
"""
input:
get_fastp_input_fastqs
output:
"data/trimmed/{sample}.fastq.gz"
log:
"logs/fastp/{sample}.log"
threads: 24
shell:
"fastp -i {input} --cut_right --cut_right_window_size 4 "
"--cut_right_mean_quality 20 -o {output} --thread {threads}"
rule bwa_index:
"""
Index the genom file for alignment
"""
input: config["genome"]
output:
"{input}.amb"
shell:
"bwa index {input}"
rule bwa_map:
"""
Take the specified input genome and the trimmed sample read files.
Align the latter to the former.
"""
input:
config["genome"],
"data/trimmed/{sample}.fastq.gz" #could specify with function
output:
"data/mapped_reads/{sample}.bam"
params:
rg=r"@RG\tID:{sample}\tSM:{sample}\tLB:library1"
log:
"logs/bwa_mem/{sample}.log" #logging info to a 'logs/' folder
threads: 24
shell:
"bwa mem -R '{params.rg}' -t {threads} {input} | samtools view -Sb - > {output}"
rule samtools_sort:
"""
Sort the bam file.
"""
input:
"data/mapped_reads/{sample}.bam"
output:
"data/sorted_reads/{sample}.bam"
shell:
"samtools sort -T data/sorted_reads/{wildcards.sample} "
"-O bam {input} > {output}"
rule samtools_index:
"""
Index the bam file.
"""
input:
"data/sorted_reads/{sample}.bam"
output:
"data/sorted_reads/{sample}.bam.bai"
shell:
"samtools index {input}"
rule bcftools_call:
"""
Use bcftools to create a pileup and call the genetic variants.
"""
input:
fa=config["genome"],
bam=expand("data/sorted_reads/{sample}.bam", sample=config["samples"]),
bai=expand("data/sorted_reads/{sample}.bam.bai", sample=config["samples"])
output:
bcf=config["bcf"]
threads: 24
shell:
" bcftools mpileup -Ou --max-depth 2000000 --threads {threads} -f {input.fa} {input.bam} | "
" bcftools call --threads {threads} -mv -Ob -V indels -o {output.bcf}"
#above specifies to make the snp calls, "-Ob" says make output a binary, "-V indels" says skip indel variants
rule variant_filter:
"""
Filter the bcf file based on quality and produce a .vcf file.
"""
input:
bcf=config["bcf"]
output:
vcf=config["vcf"]
threads: 24
shell:
' bcftools filter -i "DP>1800 && QUAL >= 30" {input.bcf} > {output.vcf}'