-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathffnn_old.py
132 lines (116 loc) · 4.79 KB
/
ffnn_old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import math
import random
import os
from pathlib import Path
import time
from tqdm import tqdm
from data_loader import fetch_data
unk = '<UNK>'
# Consult the PyTorch documentation for information on the functions used below:
# https://pytorch.org/docs/stable/torch.html
class FFNN(nn.Module):
def __init__(self, input_dim, h):
super(FFNN, self).__init__()
self.h = h
self.W1 = nn.Linear(input_dim, h)
self.activation = nn.ReLU() # The rectified linear unit; one valid choice of activation function
self.W2 = nn.Linear(h, 5)
# The below two lines are not a source for an error
self.softmax = nn.LogSoftmax() # The softmax function that converts vectors into probability distributions; computes log probabilities for computational benefits
self.loss = nn.NLLLoss() # The cross-entropy/negative log likelihood loss taught in class
def compute_Loss(self, predicted_vector, gold_label):
return self.loss(predicted_vector, gold_label)
def forward(self, input_vector):
# The z_i are just there to record intermediary computations for your clarity
z1 = self.W1(input_vector)
z2 = self.W2(self.activation(z1)) #error should be put through two different layers...
predicted_vector = self.softmax(self.activation(z2))
return predicted_vector
# Returns:
# vocab = A set of strings corresponding to the vocabulary
def make_vocab(data):
vocab = set()
for document, _ in data:
for word in document:
vocab.add(word)
return vocab
# Returns:
# vocab = A set of strings corresponding to the vocabulary including <UNK>
# word2index = A dictionary mapping word/token to its index (a number in 0, ..., V - 1)
# index2word = A dictionary inverting the mapping of word2index
def make_indices(vocab):
vocab_list = sorted(vocab)
vocab_list.append(unk)
word2index = {}
index2word = {}
for index, word in enumerate(vocab_list):
word2index[word] = index
index2word[index] = word
vocab.add('unk')
return vocab, word2index, index2word
# Returns:
# vectorized_data = A list of pairs (vector representation of input, y)
def convert_to_vector_representation(data, word2index):
vectorized_data = []
for document, y in data:
vector = torch.zeros(len(word2index))
for word in document:
index = word2index.get(word, word2index[unk])
vector[index] += 1
vectorized_data.append((vector, y))
return vectorized_data
def main(hidden_dim, number_of_epochs):
print("Fetching data")
train_data, valid_data = fetch_data() # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
vocab = make_vocab(train_data)
vocab, word2index, index2word = make_indices(vocab)
print("Fetched and indexed data")
train_data = convert_to_vector_representation(train_data, word2index)
valid_data = convert_to_vector_representation(valid_data, word2index)
print("Vectorized data")
model = FFNN(input_dim = len(vocab), h = hidden_dim)
optimizer = optim.SGD(model.parameters(),lr=0.01, momentum=0.9) # This network is trained by traditional (batch) gradient descent; ignore that this says 'SGD'
print("Training for {} epochs".format(number_of_epochs))
for epoch in range(number_of_epochs):
model.train()
optimizer.zero_grad()
loss = 0
correct = 0
total = 0
start_time = time.time()
print("Training started for epoch {}".format(epoch + 1))
random.shuffle(train_data) # Good practice to shuffle order of training data
for input_vector, gold_label in tqdm(train_data):
predicted_vector = model(input_vector)
predicted_label = torch.argmax(predicted_vector)
correct += int(predicted_label == gold_label)
total += 1
loss = model.compute_Loss(predicted_vector.view(1,-1), torch.tensor([gold_label]))
loss.backward()
optimizer.step()
print("Training completed for epoch {}".format(epoch + 1))
print("Training accuracy for epoch {}: {}".format(epoch + 1, correct / total))
print("Training time for this epoch: {}".format(time.time() - start_time))
loss = 0
correct = 0
total = 0
start_time = time.time()
print("Validation started for epoch {}".format(epoch + 1))
random.shuffle(valid_data) # Good practice to shuffle order of valid data
for input_vector, gold_label in valid_data:
predicted_vector = model(input_vector)
predicted_label = torch.argmax(predicted_vector)
correct += int(predicted_label == gold_label)
total += 1
loss = model.compute_Loss(predicted_vector.view(1,-1), torch.tensor([gold_label]))
# print('Predicted vector size:', predicted_vector.size())
# print('Predicted label size:', predicted_label.size())
print('Gold label:', gold_label)
print("Validation completed for epoch {}".format(epoch + 1))
print("Validation accuracy for epoch {}: {}".format(epoch + 1, correct / total))
print("Validation time for this epoch: {}".format(time.time() - start_time))