-
Notifications
You must be signed in to change notification settings - Fork 0
/
CNN-using-PyTorch.py
227 lines (185 loc) · 9.05 KB
/
CNN-using-PyTorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# -*- coding: utf-8 -*-
"""
Here, we build a simple convolutional neural network in PyTorch and train it to recognize handwritten digits using the MNIST dataset.
MNIST contains 70,000 images of handwritten digits: 60,000 for training and 10,000 for testing. The images are grayscale, 28x28 pixels, and centered to reduce preprocessing and get started quicker.
"""
from google.colab import drive
drive.mount ('/content/drive')
#Setting up the Pytorch enviornment
import torch
import torchvision #The torchvision package consists of popular datasets, model architectures, and common image transformations for computer visio
# Defining the Hyperparameters:
# number of epochs defines how many times we'll loop over the complete training dataset
# learning_rate and momentum are hyperparameters for the optimizer
# repeatable experiments we have to set random seeds
#cuDNN uses nondeterministic algorithms with: torch.backends.cudnn.enabled = False
#If CuDNN will use deterministic algorithms for these operations, it will always give same input and parameters, and hence yeilds the same output.
n_epochs = 3
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10
random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)
#load the MNIST dataset from the DataLoaders with TorchVision.
#TorchVision offers a lot of handy transformations, such as cropping or normalization
#A batch_size of 64 for training and size 1000 for testing on this dataset.
#The values 0.1307 and 0.3081 used for the Normalize() transformation below are the global mean and standard deviation of the MNIST dataset.
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('/files/', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('/files/', train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_test, shuffle=True)
examples = enumerate(test_loader) #The enumerate() function adds counter to an iterable and returns it. The returned object is an enumerate object.
batch_idx, (example_data, example_targets) = next(examples)
print(type(examples))
# converting to list
print(list(examples)[0:1])
#one test data batch consists of the following dimesions
#we have 1000 examples of 28x28 pixels in grayscale
example_data.shape
import matplotlib.pyplot as plt
fig = plt.figure()
for i in range(6):
plt.subplot(2,3,i+1)
plt.tight_layout()
plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
plt.title("Ground Truth: {}".format(example_targets[i]))
plt.xticks([])
plt.yticks([])
fig
"""**Buiding the Neural Network**
We build a CNN model with two 2-D convolutional layers followed by two fully-connected (or linear) layers. As activation function we'll choose rectified linear units (ReLUs in short) and as a means of regularization we'll use two dropout layers.
"""
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)#-1 implies dynamic number of rows to be included when reshaping
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x,1)
#initialize the network and the optimizer.
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
momentum=momentum)
"""**Training the Model**"""
#to create a nice training curve later on we also create two lists for saving training and testing losses.
# On the x-axis we want to display the number of training examples the network has seen during training.
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]
#Defining the train()
#Here we manually set the gradients to zero using optimizer.zero_grad() since PyTorch by default accumulates gradients.
#We then produce the output of our network (forward pass) and compute a negative log-likelihodd loss between the output and the ground truth label
#The backward() call we now collect a new set of gradients which we propagate back into each of the network's parameters using optimizer.step().
#Neural network modules as well as optimizers have the ability to save and load their internal state using .state_dict()
def train(epoch):
network.train()
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = network(data)
loss = F.nll_loss(output, target)#nll_loss() is the negative log likelihood loss
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
train_losses.append(loss.item())
train_counter.append(
(batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
torch.save(network.state_dict(), '/content/drive/MyDrive/ML Nov/model.pth')
#Neural network modules as well as optimizers can continue training from previously saved state dicts if needed - we'd just need to call .load_state_dict(state_dict).
torch.save(optimizer.state_dict(), '/content/drive/MyDrive/ML Nov/optimizer.pth')
#network.state_dict()
##Defining the test()
#Here we sum up the test loss and keep track of correctly classified digits to compute the accuracy of the network.
def test():
network.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
output = network(data)
test_loss += F.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
test_losses.append(test_loss)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
"""Using the context manager no_grad() we can avoid storing the computations done producing the output of our network in the computation graph.
Time to run the training! We'll manually add a test() call before we loop over n_epochs to evaluate our model with randomly initialized parameters.
"""
test()
for epoch in range(1, n_epochs + 1):
train(epoch)
test()
"""With just 3 epochs of training we already managed to achieve 97% accuracy on the test set!"""
print(train_losses)
print(test_losses)
print(test_counter)
"""**Evaluating the Model Performance**"""
#plot our training curve.
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses[0:4], color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')
fig
#look at a few examples as we did earlier and compare the model's output.
with torch.no_grad():
output = network(example_data)
fig = plt.figure()
for i in range(6):
plt.subplot(2,3,i+1)
plt.tight_layout()
plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
plt.title("Prediction: {}".format(
output.data.max(1, keepdim=True)[1][i].item()))
plt.xticks([])
plt.yticks([])
fig
"""**Continued Training from Checkpoints**
Let's continue training the network, or rather see how we can continue training from the state_dicts we saved during our first training run. We'll initialize a new set of network and optimizers.
"""
continued_network = Net()
continued_optimizer = optim.SGD(network.parameters(), lr=learning_rate,
momentum=momentum)
#Using .load_state_dict() we can now load the internal state of the network and optimizer when we last saved them.
network_state_dict = torch.load('/content/drive/MyDrive/ML Nov/model.pth')
continued_network.load_state_dict(network_state_dict)
optimizer_state_dict = torch.load('/content/drive/MyDrive/ML Nov/optimizer.pth')
continued_optimizer.load_state_dict(optimizer_state_dict)
for i in range(4,9):
test_counter.append(i*len(train_loader.dataset))
train(i)
test()