diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..bf2c5b2 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,60 @@ +version: 2 +jobs: + build: + docker: + - image: circleci/python:3.6.1 + + working_directory: ~/repo + + steps: + - checkout + + # Download and cache dependencies + - restore_cache: + keys: + - v1-dependencies-{{ checksum "requirements.txt" }} + # fallback to using the latest cache if no exact match is found + - v1-dependencies- + + - run: + name: install dependencies + command: | + python3 -m venv venv + . venv/bin/activate + pip install -r requirements.txt + - save_cache: + paths: + - ./venv + key: v1-dependencies-{{ checksum "requirements.txt" }} + + test_layer: + docker: + - image: circleci/python:3.6.1 + + working_directory: ~/repo + + steps: + - checkout + + - run: + name: test_layer + command: | + python3 -m venv venv + . venv/bin/activate + pip install -r requirements.txt + python3 -m unittest tests/test_layer.py + - save_cache: + paths: + - ./venv + key: v1-dependencies-{{ checksum "test_layer.py" }} + + + +workflows: + version: 2 + build_and_test: + jobs: + - build + - test_layer + + diff --git a/README.md b/README.md index 3b7302d..6af888d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ [![Gitter](https://badges.gitter.im/s-atmech/community.svg)](https://gitter.im/s-atmech/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) + +[![CircleCI](https://circleci.com/gh/Samsomyajit/s-atmech/tree/master.svg?style=svg)](https://circleci.com/gh/Samsomyajit/s-atmech/tree/master) +
diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0eecf7c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +numpy>=1.16.4 +pandas>=0.22.0 +tensorflow +matplotlib>=3.1.2 +scikit-learn>=0.22 +jupyter>=1.0.0 +Pillow>=6.1.0 +nltk>=3.4.5 +pyYAML>=5.2 +scipy>=1.3.0 +s-atmech diff --git a/s-atmech/__init__.py b/s-atmech/__init__.py index a4a9e2b..d463e08 100644 --- a/s-atmech/__init__.py +++ b/s-atmech/__init__.py @@ -1 +1 @@ -from s-atmech.AttentionLayer import AttentionLayer +from s-atmech import AttentionLayer diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/test_import.py b/tests/test_import.py new file mode 100644 index 0000000..d463e08 --- /dev/null +++ b/tests/test_import.py @@ -0,0 +1 @@ +from s-atmech import AttentionLayer diff --git a/tests/test_layer.py b/tests/test_layer.py new file mode 100644 index 0000000..dde841b --- /dev/null +++ b/tests/test_layer.py @@ -0,0 +1,123 @@ +import tensorflow as tf +import os +from tensorflow.python.keras.layers import Layer +from tensorflow.python.keras import backend as K + + +class AttentionLayer(Layer): + """ + There are three sets of weights introduced W_a, U_a, and V_a + """ + + def __init__(self, **kwargs): + super(AttentionLayer, self).__init__(**kwargs) + + def build(self, input_shape): + assert isinstance(input_shape, list) + # Create a trainable weight variable for this layer. + + self.W_a = self.add_weight(name='W_a', + shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])), + initializer='uniform', + trainable=True) + self.U_a = self.add_weight(name='U_a', + shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])), + initializer='uniform', + trainable=True) + self.V_a = self.add_weight(name='V_a', + shape=tf.TensorShape((input_shape[0][2], 1)), + initializer='uniform', + trainable=True) + + super(AttentionLayer, self).build(input_shape) # Be sure to call this at the end + + def call(self, inputs, verbose=False): + """ + inputs: [encoder_output_sequence, decoder_output_sequence] + """ + assert type(inputs) == list + encoder_out_seq, decoder_out_seq = inputs + if verbose: + print('encoder_out_seq>', encoder_out_seq.shape) + print('decoder_out_seq>', decoder_out_seq.shape) + + def energy_step(inputs, states): + """ Step function for computing energy for a single decoder state """ + + assert_msg = "States must be a list. However states {} is of type {}".format(states, type(states)) + assert isinstance(states, list) or isinstance(states, tuple), assert_msg + + """ Some parameters required for shaping tensors""" + en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2] + de_hidden = inputs.shape[-1] + + """ Computing S.Wa where S=[s0, s1, ..., si]""" + # <= batch_size*en_seq_len, latent_dim + reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden)) + # <= batch_size*en_seq_len, latent_dim + W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden)) + if verbose: + print('wa.s>',W_a_dot_s.shape) + + """ Computing hj.Ua """ + U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # <= batch_size, 1, latent_dim + if verbose: + print('Ua.h>',U_a_dot_h.shape) + + """ tanh(S.Wa + hj.Ua) """ + # <= batch_size*en_seq_len, latent_dim + reshaped_Ws_plus_Uh = K.tanh(K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden))) + if verbose: + print('Ws+Uh>', reshaped_Ws_plus_Uh.shape) + + """ softmax(va.tanh(S.Wa + hj.Ua)) """ + # <= batch_size, en_seq_len + e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len)) + # <= batch_size, en_seq_len + e_i = K.softmax(e_i) + + if verbose: + print('ei>', e_i.shape) + + return e_i, [e_i] + + def context_step(inputs, states): + """ Step function for computing ci using ei """ + # <= batch_size, hidden_size + c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1) + if verbose: + print('ci>', c_i.shape) + return c_i, [c_i] + + def create_inital_state(inputs, hidden_size): + + fake_state = K.zeros_like(inputs) # <= (batch_size, enc_seq_len, latent_dim + fake_state = K.sum(fake_state, axis=[1, 2]) # <= (batch_size) + fake_state = K.expand_dims(fake_state) # <= (batch_size, 1) + fake_state = K.tile(fake_state, [1, hidden_size]) # <= (batch_size, latent_dim + return fake_state + + fake_state_c = create_inital_state(encoder_out_seq, encoder_out_seq.shape[-1]) + fake_state_e = create_inital_state(encoder_out_seq, encoder_out_seq.shape[1]) # <= (batch_size, enc_seq_len, latent_dim + + """ Computing energy outputs """ + # e_outputs => (batch_size, de_seq_len, en_seq_len) + last_out, e_outputs, _ = K.rnn( + energy_step, decoder_out_seq, [fake_state_e], + ) + + """ Computing context vectors """ + last_out, c_outputs, _ = K.rnn( + context_step, e_outputs, [fake_state_c], + ) + + return c_outputs, e_outputs + + def compute_output_shape(self, input_shape): + """ Outputs produced by the layer """ + return [ + tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])), + tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1])) + ] + +