-
Notifications
You must be signed in to change notification settings - Fork 0
/
ncatclf.py
333 lines (301 loc) · 23.7 KB
/
ncatclf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
import os
from collections import defaultdict
import itertools
import shutil
import sys
from models import tf_idf, w2v_w_tf_idf
def rem_acc(acc_train, acc_test, label):
if acc_train > stat[label]['train']['max']:
stat[label]['train']['max'] = acc_train
if acc_train < stat[label]['train']['min']:
stat[label]['train']['min'] = acc_train
if acc_test > stat[label]['test']['max']:
stat[label]['test']['max'] = acc_test
if acc_test < stat[label]['test']['min']:
stat[label]['test']['min'] = acc_test
stat[label]['train']['avg'] += acc_train
stat[label]['test']['avg'] += acc_test
return
# Выбор каталога данных
root = os.path.join(os.curdir, 'n_categories')
category_names = [category for category in os.listdir(root)]
combinations = defaultdict(list)
for i in range(2, 9):
# itertools.combinations(iterable, [r]) - комбинации длиной r из iterable без повторяющихся элементов
subsets = itertools.combinations(category_names, i)
combinations[i] = list(subsets)
for i in range(2, 9):
print(i, len(combinations[i]))
with open('accuracy_results.txt', 'a') as results:
results.write('n_categories\t' +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['svm_lin_tfidf'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['svm_lin_tfidf_w2v'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['svm_lin_tfidf_w2v_c'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['svm_rbf_tfidf'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['svm_rbf_tfidf_w2v'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['svm_rbf_tfidf_w2v_c'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['knn_tfidf'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['knn_tfidf_w2v'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['knn_tfidf_w2v_c'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['rfc_tfidf'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['rfc_tfidf_w2v'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['rfc_tfidf_w2v_c'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['mlp_relu_tfidf'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['mlp_relu_tfidf_w2v'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['mlp_relu_tfidf_w2v_c'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['mlp_logistic_tfidf'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['mlp_logistic_tfidf_w2v'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['mlp_logistic_tfidf_w2v_c'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['mlp_tanh_tfidf'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\t'.format(
*['mlp_tanh_tfidf_w2v'] * 6) +
'{}_train_max\t{}_train_avg\t{}_train_min\t{}_test_max\t{}_test_avg\t{}_test_min\n'.format(
*['mlp_tanh_tfidf_w2v_c'] * 6)
)
for key in sorted(combinations.keys()):
stat = {'SVM_LIN_TFIDF': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'SVM_LIN_TFIDF_W2V': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'SVM_LIN_TFIDF_W2V_K': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'SVM_RBF_TFIDF': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'SVM_RBF_TFIDF_W2V': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'SVM_RBF_TFIDF_W2V_K': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'KNN_TFIDF': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'KNN_TFIDF_W2V': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'KNN_TFIDF_W2V_K': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'RFC_TFIDF': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'RFC_TFIDF_W2V': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'RFC_TFIDF_W2V_K': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_TFIDF': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_TFIDF_W2V': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_TFIDF_W2V_K': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_LOGIST_TFIDF': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_LOGIST_TFIDF_W2V': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_LOGIST_TFIDF_W2V_K': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_TANH_TFIDF': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_TANH_TFIDF_W2V': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
'MLP_TANH_TFIDF_W2V_K': {'train': {'max': 0, 'min': sys.maxsize, 'avg': 0},
'test': {'max': 0, 'min': sys.maxsize, 'avg': 0}},
}
k = 0
for combination in combinations[key]:
if os.path.exists('tmp'):
shutil.rmtree('tmp')
os.mkdir('tmp')
for category_name in list(combination):
sub_folder = os.path.join('tmp', category_name)
shutil.copytree(os.path.join(root, category_name), sub_folder)
rem_acc(*tf_idf(data_dir_path='tmp', classifier='SVM', svm_kernel='linear', verbose=True),
label='SVM_LIN_TFIDF')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='SVM', concatenate=False, svm_kernel='linear',
verbose=True), label='SVM_LIN_TFIDF_W2V')
rem_acc(
*w2v_w_tf_idf(data_dir_path='tmp', classifier='SVM', concatenate=True, svm_kernel='linear', verbose=True),
label='SVM_LIN_TFIDF_W2V_K')
rem_acc(*tf_idf(data_dir_path='tmp', classifier='SVM', svm_kernel='rbf', verbose=True), label='SVM_RBF_TFIDF')
rem_acc(
*w2v_w_tf_idf(data_dir_path='tmp', classifier='SVM', concatenate=False, svm_kernel='rbf', verbose=True),
label='SVM_RBF_TFIDF_W2V')
rem_acc(
*w2v_w_tf_idf(data_dir_path='tmp', classifier='SVM', concatenate=True, svm_kernel='rbf', verbose=True),
label='SVM_RBF_TFIDF_W2V_K')
rem_acc(*tf_idf(data_dir_path='tmp', classifier='KNN', verbose=True), label='KNN_TFIDF')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='KNN', concatenate=False, verbose=True),
label='KNN_TFIDF_W2V')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='KNN', concatenate=True, verbose=True),
label='KNN_TFIDF_W2V_K')
rem_acc(*tf_idf(data_dir_path='tmp', classifier='RFC', verbose=True), label='RFC_TFIDF')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='RFC', concatenate=False, verbose=True),
label='RFC_TFIDF_W2V')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='RFC', concatenate=True, verbose=True),
label='RFC_TFIDF_W2V_K')
rem_acc(*tf_idf(data_dir_path='tmp', classifier='MLP', mlp_activation='relu', verbose=True),
label='MLP_TFIDF')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='MLP', concatenate=False, mlp_activation='relu',
verbose=True), label='MLP_TFIDF_W2V')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='MLP', concatenate=True, mlp_activation='relu',
verbose=True), label='MLP_TFIDF_W2V_K')
rem_acc(*tf_idf(data_dir_path='tmp', classifier='MLP', mlp_activation='logistic', verbose=True),
label='MLP_LOGIST_TFIDF')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='MLP', concatenate=False, mlp_activation='logistic',
verbose=True), label='MLP_LOGIST_TFIDF_W2V')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='MLP', concatenate=True, mlp_activation='logistic',
verbose=True), label='MLP_LOGIST_TFIDF_W2V_K')
rem_acc(*tf_idf(data_dir_path='tmp', classifier='MLP', mlp_activation='tanh', verbose=True),
label='MLP_TANH_TFIDF')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='MLP', concatenate=False, mlp_activation='tanh',
verbose=True), label='MLP_TANH_TFIDF_W2V')
rem_acc(*w2v_w_tf_idf(data_dir_path='tmp', classifier='MLP', concatenate=True, mlp_activation='tanh',
verbose=True), label='MLP_TANH_TFIDF_W2V_K')
k += 1
shutil.rmtree('tmp')
for label in stat.keys():
stat[label]['train']['avg'] /= k
stat[label]['test']['avg'] /= k
with open('accuracy_results.txt', 'a') as results:
results.write('{}\t'.format(key) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['SVM_LIN_TFIDF']['train']['max'],
stat['SVM_LIN_TFIDF']['train']['avg'],
stat['SVM_LIN_TFIDF']['train']['min'],
stat['SVM_LIN_TFIDF']['test']['max'],
stat['SVM_LIN_TFIDF']['test']['avg'],
stat['SVM_LIN_TFIDF']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['SVM_LIN_TFIDF_W2V']['train']['max'],
stat['SVM_LIN_TFIDF_W2V']['train']['avg'],
stat['SVM_LIN_TFIDF_W2V']['train']['min'],
stat['SVM_LIN_TFIDF_W2V']['test']['max'],
stat['SVM_LIN_TFIDF_W2V']['test']['avg'],
stat['SVM_LIN_TFIDF_W2V']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['SVM_LIN_TFIDF_W2V_K']['train']['max'],
stat['SVM_LIN_TFIDF_W2V_K']['train']['avg'],
stat['SVM_LIN_TFIDF_W2V_K']['train']['min'],
stat['SVM_LIN_TFIDF_W2V_K']['test']['max'],
stat['SVM_LIN_TFIDF_W2V_K']['test']['avg'],
stat['SVM_LIN_TFIDF_W2V_K']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['SVM_RBF_TFIDF']['train']['max'],
stat['SVM_RBF_TFIDF']['train']['avg'],
stat['SVM_RBF_TFIDF']['train']['min'],
stat['SVM_RBF_TFIDF']['test']['max'],
stat['SVM_RBF_TFIDF']['test']['avg'],
stat['SVM_RBF_TFIDF']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['SVM_RBF_TFIDF_W2V']['train']['max'],
stat['SVM_RBF_TFIDF_W2V']['train']['avg'],
stat['SVM_RBF_TFIDF_W2V']['train']['min'],
stat['SVM_RBF_TFIDF_W2V']['test']['max'],
stat['SVM_RBF_TFIDF_W2V']['test']['avg'],
stat['SVM_RBF_TFIDF_W2V']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['SVM_RBF_TFIDF_W2V_K']['train']['max'],
stat['SVM_RBF_TFIDF_W2V_K']['train']['avg'],
stat['SVM_RBF_TFIDF_W2V_K']['train']['min'],
stat['SVM_RBF_TFIDF_W2V_K']['test']['max'],
stat['SVM_RBF_TFIDF_W2V_K']['test']['avg'],
stat['SVM_RBF_TFIDF_W2V_K']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['KNN_TFIDF']['train']['max'],
stat['KNN_TFIDF']['train']['avg'],
stat['KNN_TFIDF']['train']['min'],
stat['KNN_TFIDF']['test']['max'],
stat['KNN_TFIDF']['test']['avg'],
stat['KNN_TFIDF']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['KNN_TFIDF_W2V']['train']['max'],
stat['KNN_TFIDF_W2V']['train']['avg'],
stat['KNN_TFIDF_W2V']['train']['min'],
stat['KNN_TFIDF_W2V']['test']['max'],
stat['KNN_TFIDF_W2V']['test']['avg'],
stat['KNN_TFIDF_W2V']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['KNN_TFIDF_W2V_K']['train']['max'],
stat['KNN_TFIDF_W2V_K']['train']['avg'],
stat['KNN_TFIDF_W2V_K']['train']['min'],
stat['KNN_TFIDF_W2V_K']['test']['max'],
stat['KNN_TFIDF_W2V_K']['test']['avg'],
stat['KNN_TFIDF_W2V_K']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['RFC_TFIDF']['train']['max'],
stat['RFC_TFIDF']['train']['avg'],
stat['RFC_TFIDF']['train']['min'],
stat['RFC_TFIDF']['test']['max'],
stat['RFC_TFIDF']['test']['avg'],
stat['RFC_TFIDF']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['RFC_TFIDF_W2V']['train']['max'],
stat['RFC_TFIDF_W2V']['train']['avg'],
stat['RFC_TFIDF_W2V']['train']['min'],
stat['RFC_TFIDF_W2V']['test']['max'],
stat['RFC_TFIDF_W2V']['test']['avg'],
stat['RFC_TFIDF_W2V']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['RFC_TFIDF_W2V_K']['train']['max'],
stat['RFC_TFIDF_W2V_K']['train']['avg'],
stat['RFC_TFIDF_W2V_K']['train']['min'],
stat['RFC_TFIDF_W2V_K']['test']['max'],
stat['RFC_TFIDF_W2V_K']['test']['avg'],
stat['RFC_TFIDF_W2V_K']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['MLP_TFIDF']['train']['max'],
stat['MLP_TFIDF']['train']['avg'],
stat['MLP_TFIDF']['train']['min'],
stat['MLP_TFIDF']['test']['max'],
stat['MLP_TFIDF']['test']['avg'],
stat['MLP_TFIDF']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['MLP_TFIDF_W2V']['train']['max'],
stat['MLP_TFIDF_W2V']['train']['avg'],
stat['MLP_TFIDF_W2V']['train']['min'],
stat['MLP_TFIDF_W2V']['test']['max'],
stat['MLP_TFIDF_W2V']['test']['avg'],
stat['MLP_TFIDF_W2V']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['MLP_TFIDF_W2V_K']['train']['max'],
stat['MLP_TFIDF_W2V_K']['train']['avg'],
stat['MLP_TFIDF_W2V_K']['train']['min'],
stat['MLP_TFIDF_W2V_K']['test']['max'],
stat['MLP_TFIDF_W2V_K']['test']['avg'],
stat['MLP_TFIDF_W2V_K']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['MLP_LOGIST_TFIDF']['train']['max'],
stat['MLP_LOGIST_TFIDF']['train']['avg'],
stat['MLP_LOGIST_TFIDF']['train']['min'],
stat['MLP_LOGIST_TFIDF']['test']['max'],
stat['MLP_LOGIST_TFIDF']['test']['avg'],
stat['MLP_LOGIST_TFIDF']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['MLP_LOGIST_TFIDF_W2V']['train']['max'],
stat['MLP_LOGIST_TFIDF_W2V']['train']['avg'],
stat['MLP_LOGIST_TFIDF_W2V']['train']['min'],
stat['MLP_LOGIST_TFIDF_W2V']['test']['max'],
stat['MLP_LOGIST_TFIDF_W2V']['test']['avg'],
stat['MLP_LOGIST_TFIDF_W2V']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['MLP_LOGIST_TFIDF_W2V_K']['train']['max'],
stat['MLP_LOGIST_TFIDF_W2V_K']['train']['avg'],
stat['MLP_LOGIST_TFIDF_W2V_K']['train']['min'],
stat['MLP_LOGIST_TFIDF_W2V_K']['test']['max'],
stat['MLP_LOGIST_TFIDF_W2V_K']['test']['avg'],
stat['MLP_LOGIST_TFIDF_W2V_K']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['MLP_TANH_TFIDF']['train']['max'],
stat['MLP_TANH_TFIDF']['train']['avg'],
stat['MLP_TANH_TFIDF']['train']['min'],
stat['MLP_TANH_TFIDF']['test']['max'],
stat['MLP_TANH_TFIDF']['test']['avg'],
stat['MLP_TANH_TFIDF']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\t'.format(stat['MLP_TANH_TFIDF_W2V']['train']['max'],
stat['MLP_TANH_TFIDF_W2V']['train']['avg'],
stat['MLP_TANH_TFIDF_W2V']['train']['min'],
stat['MLP_TANH_TFIDF_W2V']['test']['max'],
stat['MLP_TANH_TFIDF_W2V']['test']['avg'],
stat['MLP_TANH_TFIDF_W2V']['test']['min']) +
'{}\t{}\t{}\t{}\t{}\t{}\n'.format(stat['MLP_TANH_TFIDF_W2V_K']['train']['max'],
stat['MLP_TANH_TFIDF_W2V_K']['train']['avg'],
stat['MLP_TANH_TFIDF_W2V_K']['train']['min'],
stat['MLP_TANH_TFIDF_W2V_K']['test']['max'],
stat['MLP_TANH_TFIDF_W2V_K']['test']['avg'],
stat['MLP_TANH_TFIDF_W2V_K']['test']['min'])
)