-
Notifications
You must be signed in to change notification settings - Fork 0
/
TRIMMOMATIC-AUTO.py
63 lines (51 loc) · 2.79 KB
/
TRIMMOMATIC-AUTO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/python
#TRIMMOMATIC-AUTO.py#
#Author: Rachel Wiltshire, U. Notre Dame, January 2017
#Automated Trimmomatic usage for multiple samples - removes Illumina adapters and trims poor quality reads
#Usage: Navigate to indir; call python and TRIMMOMATIC-AUTO.py
#This script is specifically for the WGS gDNA PE libraries AFar April 2013 sequenced on Illumina HiSeq2000 v.1.9
#Broad Institute (16 Anopheles genomes project)
#Import Python packages
import os, gzip, subprocess
#Set variables
indir = '/afs/crc.nd.edu/user/r/rwiltshi/FARAUTI/SRA_downloads/'
outdir = '/afs/crc.nd.edu/user/r/rwiltshi/FARAUTI/TERMINAL/trimmed/'
trimmomatic = '/opt/crc/bio/Trimmomatic/0.32/bin/trimmomatic'
illclip = 'ILLUMINACLIP:/afs/crc.nd.edu/user/r/rwiltshi/GROUP_SOLOMON/rwiltshi/AGam_chromosomes/TruSeq3-PE.fa:2:30:10'
lead = 'LEADING:5'
trail = 'TRAILING:5'
slide = 'SLIDINGWINDOW:4:15'
minlen = 'MINLEN:50'
f1 = ""
f2 = ""
#Read through data files in input directory and execute shell command when conditions are met
#(i.e. find matching pair (SRX_AND_RUN_1.fastq.gz and SRX_AND_RUN_2.fastq.gz)
#indir MUST be sorted first as os.listdir() is in arbitrary order as an artifact of the file system
for filename in sorted(os.listdir(indir)):
#print filename
#continue
if filename.endswith("_1.fastq.gz"):
f1 = filename
elif filename.endswith("_2.fastq.gz"):
f2 = filename
else:
continue
#If you've found a matching file pair that both have content, unzip them and set variables
if ((f1 != "" and f2 != "") and (f1[0:-11] == f2[0:-11])):
with gzip.open(f1) as f, gzip.open(f2) as g:
f1parts = f.name.rsplit("_", 1)
f1paired = f1parts[0] + "_1.paired.fq"
f1unpaired = f1parts[0] + "_1.unpaired.fq"
f2parts = g.name.rsplit("_", 1)
f2paired = f2parts[0] + "_2.paired.fq"
f2unpaired = f2parts[0] + "_2.unpaired.fq"
#Define trimmomatic parameters in the command line
trimmomaticCMD = trimmomatic + " PE -threads 8 -phred33 " + " " + f1 + " " + f2 + " " + outdir + f1paired + \
" " + outdir + f1unpaired + " " + outdir + f2paired + " " + outdir + f2unpaired + \
" " + illclip + " " + lead + " " + trail + " " + slide + " " + minlen
print trimmomaticCMD
#Call the command in the Python script and set it to execute through the shell
subprocess.call(trimmomaticCMD, shell=True)
f1 = ""
f2 = ""
#END