-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdriver.py
125 lines (107 loc) · 3.51 KB
/
driver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 14 17:15:20 2020
@author: saint8312
"""
import pdb_processor as pdbp
import feature_calculator as fc
import time
from multiprocessing import Pool
import pickle
if __name__=="__main__":
import json
with open('config.json') as json_data_file:
conf = json.load(json_data_file)
x_path = conf['root']['PP']
y_path = conf['index']['PP']
complex_files = pdbp.list_files(x_path)
atom_types = ['C','N','O','S']
cutoff = 12
start_time = time.time()
pool = Pool(6)
# '''
# dataset generator
# '''
# filename = "dataset_beta.pkl"
# #y_data loader
# df_y = fc.y_processor(conf['index']['PP'])
#
# #check if id is already existed within file, if yes, skip it
# data = []
# try:
# with open(filename, 'rb') as fr:
# print(filename, 'is found')
# try:
# while True:
# data.append(pickle.load(fr))
# except EOFError:
# pass
# except FileNotFoundError:
# print('File is not found')
# saved_ids = [d['id'] for d in data]
#
# #process and save the data
# try:
# i=0
# for id_name in complex_files:
# if id_name in saved_ids:
# continue
# else:
# print("start of process for ID :",id_name)
# pathfile = x_path+"/"+id_name
# chains = pdbp.loader_pdbbind(pathfile)
# vector = fc.x_processor_mp([chains, id_name, atom_types, cutoff, pool])
# y = df_y.loc[df_y['id']==id_name.split('.')[0]]['log_y'].values[0]
# vector["y"]=y
# print("ID : ", id_name)
# print('value of x vector (R^N) = ', vector)
# with open(filename, 'ab') as f:
# pickle.dump(vector, f)
# i+=1
# except KeyboardInterrupt:
# print('interrupted !!')
#
# end_time = time.time()
# print("the number of protein processed in current run = ",i)
# print('time elapsed =',end_time-start_time,'seconds')
'''
inference data
'''
x_path = conf['root']['zdock']["4AZU"]
complex_files = pdbp.list_files(x_path)
print(complex_files)
filename = "Data/data_4AZU.pkl"
data = []
try:
with open(filename, 'rb') as fr:
print(filename, 'is found')
try:
while True:
data.append(pickle.load(fr))
except EOFError:
pass
except FileNotFoundError:
print('File is not found')
saved_ids = [d['id'] for d in data]
#process and save the data
try:
i=0
for id_name in complex_files:
if id_name in saved_ids:
continue
else:
print("start of process for ID :",id_name)
pathfile = x_path+"/"+id_name
chains = pdbp.loader_pdbbind(pathfile)
vector = fc.x_processor_mp([chains, id_name, atom_types, cutoff, pool])
print("ID : ", id_name)
print('value of x vector (R^N) = ', vector)
with open(filename, 'ab') as f:
pickle.dump(vector, f)
i+=1
except KeyboardInterrupt:
print('interrupted !!')
end_time = time.time()
print("the number of protein processed in current run = ",i)
print('time elapsed =',end_time-start_time,'seconds')