-
Notifications
You must be signed in to change notification settings - Fork 0
/
hw9_test.py
215 lines (183 loc) · 9.03 KB
/
hw9_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# -*- coding: utf-8 -*-
"""hw9_test.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1IQH3mTGPtxqjiq7unxU-hPhSyf3kHorr
"""
#Import the necessary modules for the job
from pycocotools.coco import COCO
import numpy as np
import matplotlib.pyplot as plt
import skimage.io as io
from PIL import Image
from torchvision import utils
from torch.utils.data import DataLoader
import os
import torch
from PIL import Image
from torchvision import transforms as tvt
import torch.nn as nn
from sklearn.metrics import confusion_matrix
import seaborn as sns
import torch.nn.functional as F
#Import the ViTHelper.py file
os.chdir("/content/drive/MyDrive/Purdue-First Year/BME 64600/hw9_RussellHo/")
from ViTHelper import MasterEncoder, SelfAttention, AttentionHead
#Class for Dataloader (found in hw4)
class MyDataset(torch.utils.data.Dataset):
def __init__(self, dataset):
super().__init__()
self.dataset = dataset
labels = []
for i in range(5):
labels += [i for j in range(500)] #Append labels based on how data was loaded, in this case there are 500 validation images in each class
self.labels = labels
def __len__(self):
# Return the total number of images
return len(self.dataset)
def __getitem__(self, index):
try:
img = Image.open(self.dataset[index])
except OSError as e:
print(f"Error opening file: {self.dataset[index]}")
print(f"Error message: {e}")
raise
img_tensor = tvt.ToTensor()(img) #Convert Image to tensors (C x H x W)
#Ensure all images possess same channels (changing 1 channel images to 3)
if img_tensor.size()[0] == 1:
img_tensor = img_tensor.repeat(3, 1, 1)
int_label = self.labels[index]
# Return the tuple: (augmented tensor, integer label)
return img_tensor, int_label
#Function to create dataset given list
def dataset_appender(dataset, imgIds, coco):
for entry in range(len(imgIds)): #Entry is an integer index
# print(f"Round {entry+1} of {len(imgIds)}")
img = coco.loadImgs(imgIds[entry])[0] #img here is a dictionary
I = os.path.join('train2014', img['file_name']) #Pass full path to the image file when appending to dataset
dataset.append(I)
return dataset
#Data Creator function (Dataset of 5 classes)
def test_datacreator():
#Section for initializing COCO API for instance annotations
os.chdir("/content/drive/MyDrive/Purdue-First Year/BME 64600/hw9_RussellHo")
# print(os.listdir())
dataType = 'train2014'
annFile = 'annotations/instances_{}.json'.format(dataType)
# initialize COCO api for instance annotations
coco=COCO(annFile)
# # display COCO categories and supercategories
# cats = coco.loadCats(coco.getCatIds())
# nms=[cat['name'] for cat in cats]
# print('COCO categories: \n{}\n'.format(' '.join(nms)))
# nms = set([cat['supercategory'] for cat in cats])
# print('COCO supercategories: \n{}'.format(' '.join(nms)))
# get test images containing given categories
categories_list = ['airplane', 'bus', 'cat', 'dog', 'pizza']
dataset = [] #Initializing an empty dataset
os.chdir('train2014/')
for k in range(len(categories_list)):
catIds = coco.getCatIds(catNms=categories_list[k]);
imgIds = coco.getImgIds(catIds=catIds ); #Initializing imgIds as a list
imgIds = imgIds[0:500] #500 images for each category in test
dataset = dataset_appender(dataset, imgIds, coco)
# print(len(dataset)) #Dataset should contain 10000 images given the 2014 test dataset
# #For loop for iterating over the 3 images in the given class
# list_images = []
# for j in range(3):
# img = Image.open(dataset[np.random.randint(0,len(dataset))]) #Selecting an image belonging to one of the categories at random
# list_images.append(img)
# f, axarr = plt.subplots(1, 3)
# axarr[0].imshow(list_images[0])
# plt.axis('off')
# axarr[1].imshow(list_images[1])
# plt.axis('off')
# axarr[2].imshow(list_images[2])
# plt.axis('off')
# plt.show()
# #Loading a random image from the dataset
# img = Image.open(dataset[np.random.randint(0,len(dataset))])
# img.show()
os.chdir("../")
return dataset
class ViT(nn.Module):
def __init__(self, image_size, patch_size, num_channels, num_classes, embedding_size, num_heads, num_layers):
super().__init__()
# Calculate the number of patches and the max sequence length
num_patches = (image_size // patch_size) ** 2
seq_length = num_patches + 1
#Conv2D layer for embedding patches (use kernel size and stride to apply layer onto image without dividing into patches)
self.embedding_conv = nn.Conv2d(num_channels, embedding_size, kernel_size=patch_size, stride=patch_size)
#Initialize class token as a learnable parameter
self.class_token = nn.Parameter(torch.randn(1, 1, embedding_size))
# Setting position embeddings as learnable parameters
self.position_embeddings = nn.Parameter(torch.randn(1, seq_length, embedding_size)) #Dimensions (1, seq_length, embedding_size)
# Transformer encoder
self.encoder = MasterEncoder(seq_length, embedding_size, num_layers, num_heads)
# Final class prediction using the class token and an MLP layer
self.mlp = nn.Linear(embedding_size, num_classes) #Outputs class logits
def forward(self, x):
# Create embeddings using the Conv2D layer
embeddings = self.embedding_conv(x) #First apply Conv2D layer on input image (x)
embeddings = embeddings.view(embeddings.shape[0], -1, embeddings.shape[1]) #Reshape output tensor to have dimensions (batch_size, num_patches, embedding_size)
#Add class token to the sequence
batch_size = x.shape[0]
class_tokens = self.class_token.expand(batch_size, -1, -1) #Expand class token to match batch size
embeddings_with_token = torch.cat([class_tokens, embeddings], dim=1) #add class token to sequence
# Add position embeddings to sequence of embeddings with class token
embeddings_with_token_and_positions = embeddings_with_token + self.position_embeddings
# Pass through the Transformer encoder
transformer_output = self.encoder(embeddings_with_token_and_positions)
#Take the class token (extracted from the transformer's output sequence) and feed it through the MLP
class_token_output = transformer_output[:, 0]
logits = self.mlp(class_token_output) #Produces final Logits
return logits
#Method for evaluating the confusion matrix
def confusionmatrix(net, mydataloader, device, path):
#Set network to evaluation mode
net = net.eval()
correct = 0
total = 0
y_pred = []
y = []
with torch.no_grad():
for i, data in enumerate(mydataloader):
print(f"Processing batch {i+1}...")
images, labels = data #Acquiring image tensors and their respective labels from dataloader
images = images.to(device)
labels = labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1) #Only interested in the labels of the predicted images
total += labels.size(0)
correct += (predicted == labels).sum().item()
# print(predicted)
# print(labels)
for label,prediction in zip(labels,predicted):
y_pred.append(prediction.cpu()) # list of predicted labels
y.append(label.cpu()) # list of true labels (move to cpu) then append to list
# print(y)
print('Accuracy of the network on the test images: %d %%' % (100* correct/total))
cf_matrix = confusion_matrix(y, y_pred) # create the confusion matrix
sns_plot = sns.heatmap(cf_matrix, annot=True, fmt='g', cbar=False) # use heatmap to demonstrate the confusion matrix
fig = sns_plot.get_figure()
#Saving the figure
plt.savefig(os.path.join(path, "confusion_matrix_ViT.jpg"))
plt.show()
if __name__ == "__main__":
#Create the test dataset (from validation images)
test_dataset = test_datacreator()
#Create an instance from the dataloader class for the test dataset
test_my_dataset = MyDataset(test_dataset)
#Wrap the test Dataset within the DataLoader class
test_mydataloader = DataLoader(test_my_dataset, shuffle = True, batch_size = 100, num_workers = 1)
#Loading the model for evaluation on test dataset
vit_model = ViT(image_size=64, patch_size=16, num_channels=3, num_classes=5, embedding_size=256, num_heads=8, num_layers=6)
vit_model.load_state_dict(torch.load("vit_model.pth"))
#First check if CUDA is available
use_cuda = torch.cuda.is_available()
print("Is CUDA computing available? " + str(use_cuda))
device = torch.device("cuda:0"if use_cuda else "cpu")
# Move the model to the GPU
vit_model = vit_model.to(device)
#Evaluate the model on the test data using the confusion matrix function
confusionmatrix(vit_model, test_mydataloader, device, "/content/drive/MyDrive/Purdue-First Year/BME 64600/hw9_RussellHo")