-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
116 lines (75 loc) · 2.21 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# for data
import pandas as pd
# for deep learning
from tensorflow.keras import models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import confusion_matrix
# constants
DATA_PATH = 'data/'
MODEL_PATH = 'models/'
MAX_LEN = 30
'''
Read prepared test dataset
'''
# read dataset
df_test = pd.read_csv(DATA_PATH + 'test.csv')
print(df_test.head())
# get X
X_test = df_test['text'].values
# get other numerical and categorical features
feat_test = df_test.drop(['text', 'y'], axis=1).values
# get target
y_test = df_test["y"].values
# preparing sequences from test data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_test)
X_test_indices = tokenizer.texts_to_sequences(X_test)
# padding sequences to uniform length
X_test_indices = pad_sequences(X_test_indices, maxlen=MAX_LEN)
'''
Load saved model (text-only)
'''
bst_model_path = MODEL_PATH + 'bid_lstm.h5'
# load model
model = models.load_model(bst_model_path)
model.summary()
'''
Evaluation
'''
# evaluate
scores = model.evaluate(X_test_indices, y_test)
# test
predictions = model.predict(X_test_indices)
predictions = list(map(lambda x: 1 if x > 0.5 else 0, predictions))
# confusion matrix
conf_matrix = confusion_matrix(y_test, predictions)
print('\n-----------------------------------\n')
print('Model 1: text-only')
print('Accuracy: %.2f%%\n' % (scores[1]*100))
print('Confusion matrix: ')
print(conf_matrix)
print('\n-----------------------------------\n')
'''
Load saved model (text + additional features)
'''
bst_model_path = MODEL_PATH + 'bid_lstm_feats.h5'
# load model
model = models.load_model(bst_model_path)
model.summary()
'''
Evaluation
'''
# evaluate
scores = model.evaluate([X_test_indices, feat_test], y_test)
# test
predictions = model.predict([X_test_indices, feat_test])
predictions = list(map(lambda x: 1 if x > 0.5 else 0, predictions))
# confusion matrix
conf_matrix = confusion_matrix(y_test, predictions)
print('\n-----------------------------------\n')
print('Model 2: text + additional features')
print('Accuracy: %.2f%%\n' % (scores[1]*100))
print('Confusion matrix: ')
print(conf_matrix)
print('\n-----------------------------------\n')