-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy patheffDataAddOneScore.py
61 lines (52 loc) · 1.61 KB
/
effDataAddOneScore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# add a single score to the effData/*.scores.tab files
# this works by writing all new files to effData.tmp/
# you have then have to move them over manually to effData/
from annotateOffs import *
import glob
import sys
from os.path import *
sys.path.insert(0, "../crispor")
from crisporEffScores import *
# crisprEffScores needs to find its binaries
setBinDir("../crispor/bin")
setCacheDir("./out/")
if not isdir("effData.tmp"):
os.mkdir("effData.tmp")
tmpFnames = []
fnames = glob.glob("effData/*.scores.tab")
#fnames = ["effData/schoenig.scores.tab"]
for fname in fnames:
print "reading %s" % fname
outFname = fname.replace("effData/", "effData.tmp/")
#if isfile(outFname):
#print "already there, %s" % outFname
#continue
dataset = basename(fname).split(".")[0]
print "Processing %s" % fname
newRows = []
seqs = []
headers = None
for row in iterTsvRows(fname):
headers = list(row._fields)
seqs.append(row.longSeq100Bp)
newRow = list(row)
newRows.append(newRow)
# CHANGE HERE WHEN ADDING ANOTHER SCORE
print "Getting scores for %d sequences" % len(seqs)
shortSeqs = trimSeqs(seqs, -20, 10)
scoreList = calcWuCrisprScore(shortSeqs)
#print seqs
#print scoreList
#print len(scoreList)
#print len(seqs)
headers.append("wuCrispr")
# END CHANGE
assert(len(scoreList)==len(newRows))
ofh = open(outFname, "w")
writeRow(ofh, headers)
for i in range(0, len(newRows)):
row = newRows[i]
row.append(scoreList[i])
writeRow(ofh, row)
ofh.close()
print "wrote %s" % ofh.name