-
Notifications
You must be signed in to change notification settings - Fork 1
/
training_testing.py
375 lines (277 loc) · 14.4 KB
/
training_testing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import numpy as np
import pandas as pd
class training_testing():
def loader_creation(self,training_features,training_labels,testing_features,testing_labels,split_frac,batch_size,idx = 1):
'''
split the data and convert it from numpy to torch
:param training_features:
:param training_labels:
:param testing_features:
:param testing_labels:
:param split_frac:
:param batch_size:
:return:
'''
# ##cross validation
split_idx_1 = int(len(training_features) * 0.8)
train_x_80, train_x_1 = training_features[:split_idx_1], training_features[split_idx_1:]
train_y_80, train_y_1 = training_labels[:split_idx_1], training_labels[split_idx_1:]
split_idx_2 = int(len(train_x_80) * 0.5)
train_x_40_1, train_x_40_2 = train_x_80[:split_idx_2], train_x_80[split_idx_2:]
train_y_40_1, train_y_40_2= train_y_80[:split_idx_2], train_y_80[split_idx_2:]
split_idx_3 = int(len(train_y_40_1) * 0.5)
train_x_2, train_x_3 = train_x_40_1[:split_idx_3], train_x_40_1[split_idx_3:]
train_y_2, train_y_3= train_y_40_1[:split_idx_3], train_y_40_1[split_idx_3:]
split_idx_4 = int(len(train_y_40_2) * 0.5)
train_x_4, train_x_5 = train_x_40_2[:split_idx_4], train_x_40_2[split_idx_4:]
train_y_4, train_y_5= train_y_40_2[:split_idx_4], train_y_40_2[split_idx_4:]
def fold(idx):
if idx ==1:
fold_x_train = np.concatenate((train_x_2, train_x_3, train_x_4, train_x_5),axis=0)
fold_x_valid = train_x_1
fold_y_train = np.concatenate((train_y_2, train_y_3, train_y_4, train_y_5),axis=0)
fold_y_valid = train_y_1
return fold_x_train,fold_x_valid,fold_y_train,fold_y_valid
elif idx == 2:
fold_x_train = np.concatenate((train_x_1, train_x_3, train_x_4, train_x_5),axis=0)
fold_x_valid = train_x_2
fold_y_train = np.concatenate((train_y_1, train_y_3, train_y_4, train_y_5),axis=0)
fold_y_valid = train_y_2
return fold_x_train,fold_x_valid,fold_y_train,fold_y_valid
elif idx == 3:
fold_x_train = np.concatenate((train_x_1, train_x_2, train_x_4, train_x_5),axis=0)
fold_x_valid = train_x_3
fold_y_train = np.concatenate((train_y_1, train_y_2, train_y_4, train_y_5),axis=0)
fold_y_valid = train_y_3
return fold_x_train,fold_x_valid,fold_y_train,fold_y_valid
elif idx == 4:
fold_x_train = np.concatenate((train_x_1, train_x_2, train_x_3, train_x_5),axis=0)
fold_x_valid = train_x_4
fold_y_train = np.concatenate((train_y_1, train_y_2, train_y_3, train_y_5),axis=0)
fold_y_valid = train_y_4
return fold_x_train,fold_x_valid,fold_y_train,fold_y_valid
elif idx == 5:
fold_x_train = np.concatenate((train_x_1, train_x_2, train_x_3, train_x_4),axis=0)
fold_x_valid = train_x_5
fold_y_train = np.concatenate((train_y_1, train_y_2, train_y_3, train_y_4),axis=0)
fold_y_valid = train_y_5
return fold_x_train,fold_x_valid,fold_y_train,fold_y_valid
#note later
# idx = 0
# -----
# val = fold[idx]
# for i = 1:4 do:
# j=(idx + i)%5
# train = train.concatenate(fold[j])
# idx+=1
## split data into training, validation, and test data (features and labels, x and y)
# split_idx = int(len(training_features) * split_frac)
train_x, val_x,train_y, val_y = fold(idx)
# train_x, val_x = training_features[:split_idx], training_features[split_idx:]
# train_y, val_y = training_labels[:split_idx], training_labels[split_idx:]
#test_data
test_x = testing_features[:]
test_y = testing_labels[:]
# create Tensor datasets
train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
valid_data = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))
# make sure to SHUFFLE your data
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size,
drop_last=True) # we put drop last in case the data we have can't be divided on batch size
valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size,
drop_last=True) # we put drop last in case the data we have can't be divided on batch size
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size,
drop_last=True) # we put drop last in case the data we have can't be divided on batch size
return train_loader,valid_loader,test_loader
def RNN_training(self,RNN_net,lr=0.001,epochs = 5,train_on_gpu =False
,batch_size=50,train_loader=3000,valid_loader=3000,criterion =0 ,optimizer=0):
# loss and optimization functions
print_every = 100
clip = 5 # gradient clipping
counter = 0
num_correct = 0
# initialize tracker for minimum validation loss
valid_loss_min = np.Inf
if (train_on_gpu):
RNN_net.cuda()
RNN_net.train()
# train for some number of epochs
for e in range(epochs):
# initialize hidden state(return all hidden states zeros)
h = RNN_net.init_hidden(batch_size)
# batch loop
for inputs, labels in train_loader:
counter += 1
if (train_on_gpu):
inputs, labels = inputs.cuda(), labels.cuda()
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
# this can be h and c(so every time new h and c)
h = tuple([each.data for each in h])
# zero accumulated gradients
RNN_net.zero_grad()
# get the output from the model
output, h = RNN_net(inputs, h)
# calculate the loss and perform backprop
"""
**HERE **
we are making sure that our outputs are squeezed so that they
do not have an empty dimension output.squeeze() and
the labels are float tensors, labels.float().
Then we perform backpropagation as usual.
"""
loss = criterion(output.squeeze(), labels.float())
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
nn.utils.clip_grad_norm_(RNN_net.parameters(), clip)
optimizer.step()
# loss stats
if counter % print_every == 0:
# Get validation loss
val_h = RNN_net.init_hidden(batch_size)
val_losses = []
RNN_net.eval()
for inputs, labels in valid_loader:
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
val_h = tuple([each.data for each in val_h])
if (train_on_gpu):
inputs, labels = inputs.cuda(), labels.cuda()
output, val_h = RNN_net(inputs, val_h)
val_loss = criterion(output.squeeze(), labels.float())
val_losses.append(val_loss.item())
# convert output probabilities to predicted class (0 or 1)
pred = torch.round(output.squeeze()) # rounds to the nearest integer
# compare predictions to true label
correct_tensor = pred.eq(labels.float().view_as(pred))
correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(
correct_tensor.cpu().numpy())
num_correct += np.sum(correct)
# -- stats! -- ##
# accuracy over all valid data
valid_acc = num_correct / len(valid_loader.dataset)
RNN_net.train()
print("Epoch: {}/{}...".format(e + 1, epochs),
"Step: {}...".format(counter),
"Loss: {:.6f}...".format(loss.item()),
"Val Loss: {:.6f}".format(np.mean(val_losses)))
# torch.save(RNN_net.state_dict(), 'model_trained_RNN_not_pretrained.pt')
print("Validation accuracy: {:.3f}".format(valid_acc))
if np.mean(val_losses) <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_min,
np.mean(val_losses)))
torch.save(RNN_net.state_dict(), 'model_trained_RNN_not_pretrained2.pt')
valid_loss_min = np.mean(val_losses)
num_correct = 0
def RNN_test(self,RNN_net,lr=0.001,epochs = 5,train_on_gpu =False
,batch_size=50,test_loader=3000,criterion =0 ,optimizer=0):
# Get test data loss and accuracy
test_losses = [] # track loss
num_correct = 0
# init hidden state
h = RNN_net.init_hidden(batch_size)
RNN_net.eval()
# iterate over test data
for inputs, labels in test_loader:
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
h = tuple([each.data for each in h])
if (train_on_gpu):
inputs, labels = inputs.cuda(), labels.cuda()
# get predicted outputs
output, h = RNN_net(inputs, h)
# calculate loss
test_loss = criterion(output.squeeze(), labels.float())
test_losses.append(test_loss.item())
# convert output probabilities to predicted class (0 or 1)
pred = torch.round(output.squeeze()) # rounds to the nearest integer
# compare predictions to true label
correct_tensor = pred.eq(labels.float().view_as(pred))
correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(
correct_tensor.cpu().numpy())
num_correct += np.sum(correct)
# -- stats! -- ##
# avg test loss
print("Test loss: {:.3f}".format(np.mean(test_losses)))
# accuracy over all test data
test_acc = num_correct / len(test_loader.dataset)
print("Test accuracy: {:.3f}".format(test_acc))
def gpu_check(self):
'''
check if cuda is available or not
:return:
'''
# First checking if GPU is available
train_on_gpu = torch.cuda.is_available()
if (train_on_gpu):
print('Training on GPU.')
else:
print('No GPU available, training on CPU.')
return train_on_gpu
class SentimentRNN(nn.Module):
"""
The RNN model that will be used to perform Sentiment analysis.
"""
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers,weights_matrix=0, drop_prob=0.5, train_on_gpu =False,pretrained=False):
"""
Initialize the model by setting up the layers.
"""
super(SentimentRNN, self).__init__()
self.train_on_gpu =train_on_gpu
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
# define all layers
# embedding and LSTM layers
if pretrained == False:
self.embedding = nn.Embedding(vocab_size, embedding_dim)
#
elif pretrained == True:
#self.embedding = nn.Embedding.from_pretrained(weights_matrix) ## this step is same as the following steps
num_embeddings, embedding_dim = weights_matrix.shape
self.embedding = nn.Embedding(num_embeddings, embedding_dim)
self.embedding.load_state_dict({'weight': weights_matrix})
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
# dropout layer
# self.dropout = nn.Dropout(0.3)
# linear and sigmoid layers
self.fc = nn.Linear(hidden_dim, output_size)
self.sig = nn.Sigmoid()
def forward(self, x, hidden):
"""
Perform a forward pass of our model on some input and hidden state.
"""
batch_size = x.size(0) #50 as example
# embeddings and lstm_out
x = x.long()
embeds = self.embedding(x)
lstm_out, hidden = self.lstm(embeds, hidden)
# stack up lstm outputs
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
# dropout and fully-connected layer
# out = self.dropout(lstm_out)
out = self.fc(lstm_out)
# sigmoid function
sig_out = self.sig(out)
# reshape to be batch size first
sig_out = sig_out.view(batch_size, -1)
sig_out = sig_out[:, -1] # get last batch of labels
# return last sigmoid output and hidden state
return sig_out, hidden
def init_hidden(self, batch_size):
''' Initializes hidden state '''
# Create two new tensors with sizes n_layers x batch_size x hidden_dim,
# initialized to zero, for hidden state and cell state of LSTM
weight = next(self.parameters()).data
if (self.train_on_gpu):
# hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
# weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
device = torch.device("cuda:0") # Uncomment this to run on GPU
hidden = (torch.randn(self.n_layers, batch_size, self.hidden_dim,device=device),
torch.randn(self.n_layers, batch_size, self.hidden_dim,device=device))
return hidden