-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathmonteCarloSchoenig.py
45 lines (37 loc) · 1.36 KB
/
monteCarloSchoenig.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from annotateOffs import *
from collections import defaultdict
import random
realData = defaultdict(list)
scoreData = defaultdict(list)
for row in iterTsvRows("effData/schoenig.scores.tab"):
#if row.guide.startswith("cdk"):
#continue
#if row.guide.startswith("Hamid"):
#continue
setName = row.guide.split("_")[0]
realData[setName].append(int(row.modFreq))
useScore = row.doench
scoreData[setName].append((float(useScore), int(row.modFreq)))
print "Sets of guides:"
for setName, setVals in realData.items():
print setName, setVals
#realData= {'snap25': [3, 2, 1], 'pTALRep': [1, 1, 3], 'pTAlReport1w': [3, 2, 1], 'cdk4int2': [3, 2, 1], 'cdk4int5': [3, 3, 1], 'Hamid': [2, 2, 3], 'SPARC': [3, 2, 2], 'OTR': [1, 1, 3]}
minSuccCount = 0
for setName, scoreList in scoreData.items():
scoreList.sort()
if scoreList[-1][1]==3:
minSuccCount += 1
print "Total number of times the score is correct:", minSuccCount
totalSuccessCount = 0
n = 100000
for i in range(0, n):
successCount = 0
for gene, scores in realData.iteritems():
random.shuffle(scores)
score = scores[0]
if score==3:
successCount += 1
if successCount >= minSuccCount:
totalSuccessCount += 1
print totalSuccessCount, n
print "drawing a '3', >= 6 times, when drawing one from each set:", totalSuccessCount/float(n)