-
Notifications
You must be signed in to change notification settings - Fork 6
/
clustering.py
94 lines (82 loc) · 2.76 KB
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import csv
from numpy import *
import configparser
import os
def Two_Column_List(file):
with open(file) as input:
mycsv = csv.reader(input, delimiter='\t', skipinitialspace=True)
peaks = []
i=0
for cols in mycsv:
if len(cols)==2:
peaks.append([i,float(cols[0].strip()),float(cols[1].strip())])
i+=1
return peaks
def setofy(peaks):
yvalues=set()
for peak in peaks:
yvalues.add(peak[2])
return yvalues
def cluster2dspectrum(cp, project):
datapath=cp.get('datadir')
C_LIMIT=float(cp.get('tolerancec'))
H_LIMIT=float(cp.get('toleranceh'))
peaks = Two_Column_List(datapath+os.sep+project+os.sep+cp.get('spectruminput'))
#print(peaks)
xclusters=[]
yclusters=[]
for peak in peaks:
found=False
for xcluster in xclusters:
#print(str(peak[2])+' '+str(xcluster[0][2])+' '+str(peak[1])+' '+str(xcluster[0][1]))
if peak[1]>xcluster[0][1]-C_LIMIT and peak[1]<xcluster[0][1]+C_LIMIT:
xcluster.append(peak)
found=True
break;
if not found:
xclusters.append([peak])
found=False
for ycluster in yclusters:
if peak[2]>ycluster[0][2]-H_LIMIT and peak[2]<ycluster[0][2]+H_LIMIT:
ycluster.append(peak)
found=True
break;
if not found:
yclusters.append([peak])
#print(xclusters)
#print(yclusters)
found=True
while found:
for yindex, ycluster in enumerate(yclusters):
xclustersnew=[]
donexclusters=[]
found=False
for index1, xcluster1 in enumerate(xclusters):
for index2, xcluster2 in enumerate(xclusters):
if index2>index1 and setofy(xcluster1).intersection(setofy(ycluster)) and setofy(xcluster2).intersection(setofy(ycluster)):
xpeaks=xcluster1+xcluster2
xclustersnew.append(xpeaks)
donexclusters.append(index1)
donexclusters.append(index2)
found=True
break
if found:
break
for index, xcluster in enumerate(xclusters):
if index not in donexclusters:
xclustersnew.append(xcluster)
xclusters=xclustersnew
#print(xclusters)
#for cluster in xclusters:
#print('cluster')
# for peak1 in cluster:
# for peak2 in cluster:
# if(peak2[0]>peak1[0] and ((peak1[1]>peak2[1]-C_LIMIT and peak1[1]<peak2[1]+C_LIMIT) or (peak1[2]>peak2[2]-H_LIMIT and peak1[2]<peak2[2]+H_LIMIT))):
# print(str(peak1[0])+'->'+str(peak2[0]))
f=open(datapath+os.sep+project+os.sep+'result'+os.sep+cp.get('clusteringoutput'),'w')
for cluster in xclusters:
for peak1 in cluster:
for peak2 in cluster:
if(peak2[0]>=peak1[0] and ((peak1[1]>=peak2[1]-C_LIMIT and peak1[1]<=peak2[1]+C_LIMIT) or (peak1[2]>=peak2[2]-H_LIMIT and peak1[2]<=peak2[2]+H_LIMIT))):
f.write(str(peak1[0])+' '+str(peak2[0])+'\n')
f.close()