-
Notifications
You must be signed in to change notification settings - Fork 4
/
DCCM.py
244 lines (215 loc) · 8.09 KB
/
DCCM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import pandas as pd
import numpy as np
import math
from MDAnalysis.coordinates.XTC import XTCReader
import matplotlib.pyplot as plt
import matplotlib
import sys
class Traj:
"Reads trajectory from .xtc file"
def __init__(self,fn):
'''
# Paramters \n
fn: traj filename
b: start time ps (default -1)
e: end time ps(default -1)
dt: skip time ps (default 2 only even numbers)
nc: cores of cpu to use (default 4)
'''
# dt即时间间隔只能为偶数
self.fn=fn
self.traj=self.readTraj()
self.coord=self.getCoord()
def readTraj(self):
return XTCReader(self.fn)
# def getBox(self):
# return [self.traj.box[i][i] for i in range(3)]
def getAtomNum(self):
return self.traj.n_atoms
def getCoord(self):
# 0对应于vmd中的第一帧
atomCoord={}
x=np.array([[0 for i in range(1,4)] for j in range(1,self.getAtomNum()+1)])
for t in self.traj.trajectory:
x=np.array(t)
atomCoord[str(t.time)]=x
n_series=pd.Series(atomCoord)
tmp=pd.concat([(pd.DataFrame(n_series[i],columns=[[str(n_series.keys()[i]),str(n_series.keys()[i]),str(n_series.keys()[i])],["x","y","z"]])) for i in range(n_series.size)],axis=1)
print("input file contains {} frames from {} ps to {} ps dt {}ps".format(int(tmp.shape[1]/3),n_series.keys()[0],n_series.keys()[-1],float(n_series.keys()[1])-float(n_series.keys()[0])))
# print(tmp)
return tmp
class gro:
"读取gro文件"
def __init__(self,fn):
self.fn=fn
self.atomInfo=self.readGro()
def readGro(self):
gro=open(self.fn,"r")
atomSet=[]
for line in gro.readlines():
tmp=[]
if len(line.split())>3:
# 1 resnum 2 resn 3 aton name 4 atom num 5x 6y 7z 8vx 9vy 10vz
tmp.append(line[0:5].replace(" ",""))
tmp.append(line[5:10].replace(" ",""))
tmp.append(line[10:15].replace(" ",""))
tmp.append(line[15:20].replace(" ",""))
tmp.append(line[20:28].replace(" ",""))
tmp.append(line[28:36].replace(" ",""))
tmp.append(line[36:44].replace(" ",""))
tmp.append(line[44:52].replace(" ",""))
tmp.append(line[52:60].replace(" ",""))
tmp.append(line[60:68].replace(" ",""))
if tmp!=[]:
atomSet.append(tmp)
return pd.DataFrame(atomSet,columns=["resNum","resName","atomName","atomNum","X","Y","Z","Vx","Vy","Vz"])
class combine():
"""
读取gro文件和xtc文件并整合
"""
def __init__(self,grofile,xtcfile):
'''
# Paramters \n
fn: traj filename
b: start frame (default -1)
e: end frame (default -1)
dt: skip frames (default 2 only even numbers)
nc: cores of cpu to use (default 4)
'''
self.grofile=grofile
self.xtcfile=xtcfile
def gro_xtc(self):
groFile=gro(self.grofile).readGro()
xtcFile=Traj(self.xtcfile).coord
xtcFile.insert(0,"atomNum",groFile["atomNum"])
xtcFile.insert(1,"atomName",groFile["atomName"])
xtcFile.insert(2,"resNum",groFile["resNum"])
xtcFile.insert(3,"resName",groFile["resName"])
return xtcFile
def resi_sele(sheet,sele):
"""
#Paramters
sele example: `resi 1:5`
`resi 1:5,10`
`resi 1,2,3`
"""
sele_name=list(filter(None,sele.split(" ")))
if sele_name[0].lower()=="resi":
tmp=[]
for i in sele_name[1].split(","):
if len(i.split(":"))==2:
tmp.append(list(map(str,range(int(i.split(":")[0]),int(i.split(":")[1])+1))))
else:
tmp.append([i])
return sheet[sheet["resNum"].isin(sum(tmp,[]))]
else:
print("please enter the correct format(resi 1:5 or resi 1,2) ")
def resn_sele(sheet,sele):
"""
#Paramters
only return existed resname
sele example: `resn xxx,xxx`
"""
sele_name=list(filter(None,sele.split(" ")))
if sele_name[0].lower()=="resn":
tmp=[]
for i in sele_name[1].split(","):
tmp.append([i])
return sheet[sheet["resName"].isin(sum(tmp,[]))]
else:
print("please enter the correct format(resn xxx) ")
def atom_sele(sheet,sele):
"""
#Paramters
only return existed atomname
sele example: `name xxx,xxx`
if use `name C*` return all C atom
"""
sele_name=list(filter(None,sele.split(" ")))
tmp=[]
if sele_name[0].lower()=="name":
for i in sele_name[1].split(","):
if i.endswith("*"):
tmp.append(sheet[sheet["atomName"].str.contains(i[0])]["atomName"].unique().tolist())
else:
tmp.append([i])
return sheet[sheet["atomName"].isin(sum(tmp,[]))]
else:
print("please enter the correct format(name C1,C2 or name C*) ")
m=combine(sys.argv[1],sys.argv[2]).gro_xtc()
CA=atom_sele(m,"name CA").reset_index()
tmp=[]
for i in range(int((CA.shape[1]-5)/3)):
tmp.append(np.array(CA.iloc[:,(5+i*3):(8+i*3)]))
allxyz=np.array(tmp)
# tmp储存格式帧,原子,x,y,z
# kabschalgorithm 点云算法修正坐标 fit
# fitxyz
for n in range(allxyz.shape[0]):
cercoord_A=np.mean(np.matrix(allxyz[0]),axis=0)
cercoord_B=np.mean(np.matrix(allxyz[n]),axis=0)
A_c=allxyz[0]-cercoord_A
B_c=allxyz[n]-cercoord_B
H=B_c.T*A_c
U,S,VT=np.linalg.svd(H)
d=np.sign(np.linalg.det(VT.T*U.T))
diag=np.diag([1,1,d])
R=VT.T * diag * U.T
# 坐标以A为主
T=-R*cercoord_B.T+cercoord_A.T
# 修正之后的B坐标
B_calc=(R*allxyz[n].T+T).T
allxyz[n]=B_calc
atomcoord_mean=[]
for i in range(allxyz.shape[1]):
x=[]
y=[]
z=[]
for j in range(allxyz.shape[0]):
x.append(allxyz[j][i][0])
y.append(allxyz[j][i][1])
z.append(allxyz[j][i][2])
atomcoord_mean.append([np.mean(x),np.mean(y),np.mean(z)])
delta0=np.array(atomcoord_mean)
# 计算差值
# 以平均原子坐标为基准
# np.set_printoptions(precision=5)
delta=allxyz-delta0
delta_r=np.empty((delta.shape[1],delta.shape[1],delta.shape[0],))
for n in range(delta.shape[0]):
for i in range(delta.shape[1]):
for j in range(delta.shape[1]):
delta_r[i,j,n]=np.dot(delta[n][i][:],delta[n][j][:])
cij=delta_r
# 先时间平均
deltai=np.empty((delta.shape[1]))
for i in range(delta.shape[1]):
deltai[i]=np.sqrt(np.mean(cij[i,i,:]))
Cij=np.empty((delta.shape[1],delta.shape[1]))
for i in range(delta.shape[1]):
for j in range(delta.shape[1]):
Cij[i,j]=np.mean(cij[i,j,:])/(deltai[i]*deltai[j])
Cout=Cij
# 使用contour+cp=ontour绘图
# 使用contour+cp=ontour绘图
plt.figure(figsize=(14,12))
cmap1 = matplotlib.colors.ListedColormap(['#ff80ff',"#ffabff","#ffd4ff","#fffcff","#d4ffff","#a8ffff","#80ffff"])
norm=matplotlib.colors.BoundaryNorm([-1.0,-0.75,-0.5,-0.25,0.25,0.5,0.75,1.0],cmap1.N)
x=np.array(range(Cij.shape[0]))
y=np.array(range(Cij.shape[1]))
z=Cij
plt.rcParams['font.size'] = 16
plt.contourf(x,y,z,cmap=cmap1,levels=[-1, -0.75, -0.5, -0.25, 0.25, 0.5, 0.75, 1],norm=norm)
plt.colorbar()
plt.contour(x,y,z,colors="grey",levels=[-1, -0.75, -0.5, -0.25, 0.25, 0.5, 0.75, 1],linewidths=0.8,linestyles="solid")
ax=plt.gca()
font1 = {'family' : 'Arial',
'weight' : 'normal',
'size' : 24,
}
ax.set_xlabel("Residue No.",font1,labelpad=15.0)
ax.set_ylabel("Residue No.",font1,labelpad=15.0),
ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(25))
ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(25))
ax.set_title("Residue Cross Correlation",fontweight="bold",fontsize=26,pad=20)
plt.savefig("DCCM.png")