forked from anantzoid/VQA-Keras-Visual-Question-Answering
-
Notifications
You must be signed in to change notification settings - Fork 0
/
models.py
37 lines (33 loc) · 1.59 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, LSTM, Flatten, Embedding, Merge
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
import h5py
def Word2VecModel(embedding_matrix, num_words, embedding_dim, seq_length, dropout_rate):
print "Creating text model..."
model = Sequential()
model.add(Embedding(num_words, embedding_dim,
weights=[embedding_matrix], input_length=seq_length, trainable=False))
model.add(LSTM(units=512, return_sequences=True, input_shape=(seq_length, embedding_dim)))
model.add(Dropout(dropout_rate))
model.add(LSTM(units=512, return_sequences=False))
model.add(Dropout(dropout_rate))
model.add(Dense(1024, activation='tanh'))
return model
def img_model(dropout_rate):
print "Creating image model..."
model = Sequential()
model.add(Dense(1024, input_dim=4096, activation='tanh'))
return model
def vqa_model(embedding_matrix, num_words, embedding_dim, seq_length, dropout_rate, num_classes):
vgg_model = img_model(dropout_rate)
lstm_model = Word2VecModel(embedding_matrix, num_words, embedding_dim, seq_length, dropout_rate)
print "Merging final model..."
fc_model = Sequential()
fc_model.add(Merge([vgg_model, lstm_model], mode='mul'))
fc_model.add(Dropout(dropout_rate))
fc_model.add(Dense(1000, activation='tanh'))
fc_model.add(Dropout(dropout_rate))
fc_model.add(Dense(num_classes, activation='softmax'))
fc_model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
metrics=['accuracy'])
return fc_model