forked from jpatsenker/cra
-
Notifications
You must be signed in to change notification settings - Fork 0
/
process_data_old.py
186 lines (110 loc) · 5.13 KB
/
process_data_old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
from email.mime.multipart import MIMEMultipart
import sys
import os
import subprocess
import smtplib
from email.mime.application import MIMEApplication
from email.mime.text import MIMEText
"""DEPRECATED"""
def send_email(info, email, files):
sender = 'noreply@kirschner.med.harvard.edu'
receivers = email
message = MIMEMultipart(
From="CRAP DB <noreply@kirschner.med.harvard.edu>",
Subject="CRAP Score"
)
body = MIMEText(info)
message.attach(body)
for f in files or []:
with open(f, "rb") as fil:
attach_file = MIMEApplication(fil.read())
attach_file.add_header('Content-Disposition', 'attachment', filename="length_distribution.png")
message.attach(attach_file)
try:
smtpObj = smtplib.SMTP('localhost')
smtpObj.sendmail(sender, receivers, message.as_string())
print "Successfully sent email"
except smtplib.SMTPException:
print "Error: unable to send email"
# launch from main dir
fastaChecker = '/www/kirschner.med.harvard.edu/docroot/genomes/code/fasta_checker.pl'
input_file = sys.argv[1]
mail_address = sys.argv[2]
checked_file = input_file[:input_file.rfind('.')] + '_checked' + input_file[input_file.rfind('.'):]
clear_errors = subprocess.Popen(['rm', 'tmp/fasta_errors.txt'])
clear_errors.wait()
# PERFORM A FASTA CHECK
# print './run_with_profile.sh -q short -K -W 1 -o ' + checked_file + ' -e tmp/errors.txt perl ' + fastaChecker +' '+ input_file +' 0 2>tmp/fasta_errors.txt'
process_fastaCheck = subprocess.Popen(['/bin/bash', '-c',
'./run_with_profile.sh -q short -K -W 1 -o ' + checked_file + ' -e tmp/fasta_errors.txt perl ' + fastaChecker + ' ' + input_file + ' 0 2>tmp/errors.txt'])
process_fastaCheck.wait() # wait for fasta to finish before continuing
# CHECK IF ITS OK TO CONTINUE
with open('tmp/fasta_errors.txt', "r") as fastaErrors:
if fastaErrors.readline():
fastaErrors.seek(0, 0)
errorStr = fastaErrors.read()
send_email("Fasta file improperly formatted: \n" + errorStr, mail_address, []);
sys.exit(0);
# CHANGE INTO MINING DIRECTORY
try:
os.chdir('mining')
except OSError:
print "Error, couldn't get into directory mining"
sys.exit(0)
# THE OUTSTRING
outstr = "Fasta is in proper format \n"
# FILES TO BE ATTACHED
outfiles = []
# CONSTANTS
too_short = 30
too_long = 30000
# TOOLS
addLengths = 'add_lengths.py'
getLongShort = 'get_longest_and_shortest.py'
getLenDist = 'get_length_distribution.py'
getTooLongTooShort = 'get_too_long_too_short.py'
getSimpleStats = 'get_simple_stats.py'
graphMe = 'graph_ordered_pairs.py'
# ADD LENGTHS TO THE FILE
file_with_lengths = checked_file[:checked_file.rfind('.')] + '_lengths' + checked_file[checked_file.rfind('.'):]
process_addLengths = subprocess.Popen(['/bin/sh', '-c',
'../run_with_profile.sh -q short -K -W 1 python ' + addLengths + ' ../' + checked_file + ' ../' + file_with_lengths])
process_addLengths.wait()
# GET LONG AND SHORT SEQS
long_short = input_file[:input_file.rfind('.')] + '_long_short' + input_file[input_file.rfind('.'):]
process_longShort = subprocess.Popen(['/bin/sh', '-c',
'../run_with_profile.sh -q short -K -W 1 python ' + getLongShort + ' ../' + file_with_lengths + ' ../' + long_short])
# GET LENGTH DISTRIBUTION
len_dist = 'tmp' + input_file[input_file.rfind('/'):] + '.hist'
process_lenDistribution = subprocess.Popen(['/bin/sh', '-c',
'../run_with_profile.sh -q short -K -W 1 python ' + getLenDist + ' ../' + file_with_lengths + ' ' + len_dist + ' 100'])
# GET SEQUENCES THAT ARE TOO SHORT AND TOO LONG
too_s_too_l = input_file[:input_file.rfind('.')] + '_bad_length' + input_file[input_file.rfind('.'):]
process_badLength = subprocess.Popen(['/bin/sh', '-c',
'../run_with_profile.sh -q short -K -W 1 python ' + getTooLongTooShort + ' ../' + file_with_lengths + ' ../' + too_s_too_l + ' ' + str(
too_short) + ' ' + str(too_long)])
# JUST SOME SIMPLE STATS
simple_stats = input_file + '.stat'
process_simple = subprocess.Popen(['/bin/sh', '-c',
'../run_with_profile.sh -q short -K -W 1 python ' + getSimpleStats + ' ../' + file_with_lengths + ' ../' + simple_stats])
# -------PULLING ANALYSIS-------
# PROCESS SIMPLE
process_simple.wait()
with open('../' + simple_stats, "r") as stream_simp:
outstr += '\n' + stream_simp.read()
# LONGEST?SHORTEST
process_longShort.wait()
with open('../' + long_short, "r") as stream_long_short:
outstr += '\n' + stream_long_short.read()
# TOO LONG TOO SHORT
process_badLength.wait()
with open('../' + too_s_too_l, "r") as stream_too_s_too_l:
outstr += '\n' + stream_too_s_too_l.read()
# WAIT FOR LENGTH DIST. TO FINISH
process_lenDistribution.wait()
# GRAPH IT
process_graph = subprocess.Popen(['python', graphMe, len_dist], stderr=open("err.log", "w"))
process_graph.wait()
outfiles.append(len_dist + '.png')
# SEND EMAIL WITH RESULTS
send_email(outstr, mail_address, outfiles)