-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathcnn_model.py
172 lines (141 loc) · 7.19 KB
/
cnn_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# -*- coding: utf-8 -*-
"""
CNN model for text classification implemented in TensorFlow 2.
This implementation is based on the original paper of Yoon Kim [1] for classification using words.
Besides I add charachter level input [2].
# References
- [1] [Convolutional Neural Networks for Sentence Classification](https://arxiv.org/abs/1408.5882)
- [2] [Character-level Convolutional Networks for Text Classification](https://arxiv.org/abs/1509.01626)
@author: Christopher Masch
"""
import tensorflow as tf
from tensorflow.keras import layers
class CNN:
__version__ = '0.2.0'
def __init__(self, embedding_layer=None, num_words=None, embedding_dim=None,
max_seq_length=100, kernel_sizes=[3, 4, 5], feature_maps=[100, 100, 100],
use_char=False, char_embedding_dim=50, char_max_length=200, alphabet_size=None, char_kernel_sizes=[3, 10, 20],
char_feature_maps=[100, 100, 100], hidden_units=100, dropout_rate=None, nb_classes=None):
"""
Arguments:
embedding_layer : If not defined with pre-trained embeddings it will be created from scratch (default: None)
num_words : Maximal amount of words in the vocabulary (default: None)
embedding_dim : Dimension of word representation (default: None)
max_seq_length : Max length of word sequence (default: 100)
filter_sizes : An array of filter sizes per channel (default: [3,4,5])
feature_maps : Defines the feature maps per channel (default: [100,100,100])
use_char : If True, char-based model will be added to word-based model
char_embedding_dim : Dimension of char representation (default: 50)
char_max_length : Max length of char sequence (default: 200)
alphabet_size : Amount of differnent chars used for creating embeddings (default: None)
hidden_units : Hidden units per convolution channel (default: 100)
dropout_rate : If defined, dropout will be added after embedding layer & concatenation (default: None)
nb_classes : Number of classes which can be predicted
"""
# WORD-level
self.embedding_layer = embedding_layer
self.num_words = num_words
self.max_seq_length = max_seq_length
self.embedding_dim = embedding_dim
self.kernel_sizes = kernel_sizes
self.feature_maps = feature_maps
# CHAR-level
self.use_char = use_char
self.char_embedding_dim = char_embedding_dim
self.char_max_length = char_max_length
self.alphabet_size = alphabet_size
self.char_kernel_sizes = char_kernel_sizes
self.char_feature_maps = char_feature_maps
# General
self.hidden_units = hidden_units
self.dropout_rate = dropout_rate
self.nb_classes = nb_classes
def build_model(self):
"""
Build the model
Returns:
Model : Keras model instance
"""
# Checks
if len(self.kernel_sizes) != len(self.feature_maps):
raise Exception('Please define `kernel_sizes` and `feature_maps` with the same amount.')
if not self.embedding_layer and (not self.num_words or not self.embedding_dim):
raise Exception('Please define `num_words` and `embedding_dim` if you not using a pre-trained embedding.')
if self.use_char and (not self.char_max_length or not self.alphabet_size):
raise Exception('Please define `char_max_length` and `alphabet_size` if you are using char.')
# Building word-embeddings from scratch
if self.embedding_layer is None:
self.embedding_layer = layers.Embedding(
input_dim = self.num_words,
output_dim = self.embedding_dim,
input_length = self.max_seq_length,
weights = None,
trainable = True,
name = "word_embedding"
)
# WORD-level
word_input = layers.Input(shape=(self.max_seq_length,), dtype='int32', name='word_input')
x = self.embedding_layer(word_input)
if self.dropout_rate:
x = layers.Dropout(self.dropout_rate)(x)
x = self.building_block(x, self.kernel_sizes, self.feature_maps)
x = layers.Activation('relu')(x)
prediction = layers.Dense(self.nb_classes, activation='softmax')(x)
# CHAR-level
if self.use_char:
char_input = layers.Input(shape=(self.char_max_length,), dtype='int32', name='char_input')
x_char = layers.Embedding(
input_dim = self.alphabet_size + 1,
output_dim = self.char_embedding_dim,
input_length = self.char_max_length,
name = 'char_embedding'
)(char_input)
x_char = self.building_block(x_char, self.char_kernel_sizes, self.char_feature_maps)
x_char = layers.Activation('relu')(x_char)
x_char = layers.Dense(self.nb_classes, activation='softmax')(x_char)
prediction = layers.Average()([prediction, x_char])
return tf.keras.Model(inputs=[word_input, char_input], outputs=prediction, name='CNN_Word_Char')
return tf.keras.Model(inputs=word_input, outputs=prediction, name='CNN_Word')
def building_block(self, input_layer, kernel_sizes, feature_maps):
"""
Creates several CNN channels in parallel and concatenate them
Arguments:
input_layer : Layer which will be the input for all convolutional blocks
kernel_sizes: Array of kernel sizes (working as n-gram filter)
feature_maps: Array of feature maps
Returns:
x : Building block with one or several channels
"""
channels = []
for ix in range(len(kernel_sizes)):
x = self.create_channel(input_layer, kernel_sizes[ix], feature_maps[ix])
channels.append(x)
# Check how many channels, one channel doesn't need a concatenation
if (len(channels) > 1):
x = layers.concatenate(channels)
return x
def create_channel(self, x, kernel_size, feature_map):
"""
Creates a layer, working channel wise
Arguments:
x : Input for convolutional channel
kernel_size : Kernel size for creating Conv1D
feature_map : Feature map
Returns:
x : Channel including (Conv1D + {GlobalMaxPooling & GlobalAveragePooling} + Dense [+ Dropout])
"""
x = layers.SeparableConv1D(
feature_map,
kernel_size = kernel_size,
activation = 'relu',
strides = 1,
padding = 'valid',
depth_multiplier = 4
)(x)
x1 = layers.GlobalMaxPooling1D()(x)
x2 = layers.GlobalAveragePooling1D()(x)
x = layers.concatenate([x1, x2])
x = layers.Dense(self.hidden_units)(x)
if self.dropout_rate:
x = layers.Dropout(self.dropout_rate)(x)
return x