-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcomparator_B.py
78 lines (64 loc) · 2.58 KB
/
comparator_B.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import numpy as np
import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import math
import random
import os
import time
from tqdm import tqdm
from data_loader import fetch_data
# Our stuff we imported
from gensim.models import Word2Vec
from collections import Counter
from rnn import RNN
# from ffnn import FFNN, convert_to_vector_representation, make_indices, make_vocab
directory = 'models_b/'
model_paths = ['rnn_sgd_base.pt', 'rnn_rmsprop_base.pt', 'lstm_sgd_base.pt', 'lstm_rmsprop_base.pt']
model1 = RNN(32, 1, 64, True)
model1.load_state_dict(torch.load(os.path.join(directory, model_paths[0])))
model2 = RNN(32, 1, 64, True)
model2.load_state_dict(torch.load(os.path.join(directory, model_paths[1])))
model3 = RNN(32, 1, 64, False)
model3.load_state_dict(torch.load(os.path.join(directory, model_paths[2])))
model4 = RNN(32, 1, 64, False)
model4.load_state_dict(torch.load(os.path.join(directory, model_paths[3])))
models = [model1, model2, model3, model4]
print('models succesfuly loaded')
# Load trained word embeddings
train_data, valid_data = fetch_data()
wv_model = Word2Vec.load("word2vec.model")
validation_samples = []
for v in valid_data:
embedding_list = [wv_model.wv[word] for word in v[0]]
stacked_embedding = np.stack(embedding_list, axis=0)
expanded_embedding = np.expand_dims(stacked_embedding, axis=0)
embedding_tensor = torch.from_numpy(expanded_embedding)
valid_sample = (embedding_tensor, v[1])
validation_samples.append(valid_sample)
N = len(validation_samples)
print('starting validation counts')
correct_outputs = [[], [], [], []]
incorrect_outputs = [[], [], [], []]
rating_sums = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
rating_totals = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
for i, model in enumerate(models):
total = 0
correct = 0
for index in range(N):
input_vector, gold_label = validation_samples[index]
predicted_vector = model(input_vector)
predicted_label = torch.argmax(predicted_vector)
rating_totals[i][gold_label] += 1
if int(predicted_label == gold_label) == 1:
rating_sums[i][gold_label] += 1
correct_outputs[i].append(index)
else:
incorrect_outputs[i].append(index)
correct += int(predicted_label == gold_label)
total += 1
print("Validation accuracy for model {}: {}".format(i, correct / total))
for i in range(4):
for j in range(5):
print('Model {} rating {} percent correct: {}'.format(i, j, rating_sums[i][j]/rating_totals[i][j]))