In [1]:
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Input, Embedding, Dense, TimeDistributed, Concatenate, Attention
import tensorflow_addons as tfa
from tensorflow_addons.seq2seq import AttentionWrapper
import keras
from tensorflow.keras.callbacks import EarlyStopping



## Load Preprocessed Data

In [6]:
data = np.load('../../preprocessed_oh.npz')
text_vec = data['text_word2vec']
summary_vec = data['summary_word2vec']
text_existence = data['text_existence']
text_count = data['text_count']
summary_existence = data['summary_existence']
summary_count = data['summary_count']
labels = data["labels"]
text_voc_size = data['text_voc_size']
sum_voc_size = data['sum_voc_size']

## Create an attention mechanism
https://www.analyticsvidhya.com/blog/2019/11/comprehensive-guide-attention-mechanism-deep-learning/

In [3]:
import keras.backend as K

In [4]:
class attention(keras.layers.Layer):
    def build(self, input_shape):
        self.W = self.add_weight(name = "att_weight", shape = (input_shape[-1],1))
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(attention, self).build(input_shape)
    def call(self, x):
        """
        inputs: x = [encoder_output_sequence, decoder_output_sequence]
        """
        et = K.squeeze(K.tanh(K.dot(x,self.W)+ self.b), axis = -1)
        at = K.expand_dims(K.softmax(et), axis = -1)
        output = x*at
        return K.sum(output,axis =1)
    def compute_output_shape(self,input_shape):
        return (input_shape[0], input_shape[-1])
    def get_config(self):
        return super(attention,self).get_config()
        

In this notebook we build up the Encoder-Decoder architecture using LSTM model. 

In [5]:
hidden_size = 1000

#https://www.analyticsvidhya.com/blog/2019/06/comprehensive-guide-text-summarization-using-deep-learning-python/
def create_model(max_text_len,x_voc_size, hidden_size):
    #Encoder
    encoder_input = Input(shape=(max_text_len,))
    enc_emb = Embedding(x_voc_size, hidden_size,trainable=True)(encoder_input) 
    
    enc_lstm1 = LSTM(hidden_size, return_sequences = True, return_state = True)
    encoder_out1, state_h1, state_c1 = enc_lstm1(enc_emb)
    
    enc_lstm2 = LSTM(hidden_size, return_sequences = True, return_state = True)
    encoder_out2, state_h2, state_c2 = enc_lstm2(enc_emb)
    
    enc_lstm3 = LSTM(hidden_size, return_sequences = True, return_state = True)
    encoder_out3, state_h3, state_c3 = enc_lstm3(enc_emb)
    
    decoder_input = Input(shape = (None,))
    dec_emb_layer = Embedding(y_voc_size, hidden_size, trainable = True)
    dec_emb = dec_emb_layer(decoder_input) 
    
    dec_lstm = LSTM(hidden_size, return_sequences=True, return_state=True) 
    dec_outputs,decoder_fwd_state, decoder_back_state = dec_lstm(dec_emb,initial_state=[state_h3, state_c3]) 
    

    print(f"dec_outputs: {np.shape(dec_outputs)} encoder_out3: {np.shape(encoder_out3)}")
    attention_out = Attention()([encoder_out3,dec_outputs])
    decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([dec_outputs, attention_out])
    decoder_dense = TimeDistributed(Dense(y_voc_size, activation='softmax'))  
    decoder_outputs = decoder_dense(decoder_concat_input)   
    model =Model([encoder_input, decoder_input],decoder_outputs)
    model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy')
    return model
#attention = AttentionWrapper()

In [6]:
model = create_model(max_text_len,x_voc_size, hidden_size)
model.summary()

dec_outputs: (None, None, 1000) encoder_out3: (None, 20000, 1000)
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 20000)]      0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, 20000, 1000)  11866000    ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, None, 1000)   11866000    ['input_2[0][0]']                
                            

In [19]:
print(x_train.shape, y_train.shape)

(1287, 20000) (1287, 200)


In [18]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
history=model.fit([x_train,y_train[:,:-1]], y_train.reshape(y_train.shape[0],y_train.shape[1], 1)[:,1:] ,epochs=50,callbacks=[es],batch_size=512, validation_data=([x_test,y_test[:,:-1]], y_test.reshape(y_test.shape[0],y_test.shape[1], 1)[:,1:]))

Epoch 1/50


ValueError: in user code:

    File "/users/eleves-b/2020/yufei.liu/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/users/eleves-b/2020/yufei.liu/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/users/eleves-b/2020/yufei.liu/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/users/eleves-b/2020/yufei.liu/.local/lib/python3.8/site-packages/keras/engine/training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "/users/eleves-b/2020/yufei.liu/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/users/eleves-b/2020/yufei.liu/.local/lib/python3.8/site-packages/keras/backend.py", line 3313, in concatenate
        return tf.concat([to_dense(x) for x in tensors], axis)

    ValueError: Exception encountered when calling layer "concat_layer" (type Concatenate).
    
    Dimension 1 in both shapes must be equal, but are 199 and 20000. Shapes are [?,199] and [?,20000]. for '{{node model/concat_layer/concat}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](model/lstm_3/PartitionedCall:1, model/attention/MatMul_1, model/concat_layer/concat/axis)' with input shapes: [?,199,1000], [?,20000,1000], [] and with computed input tensors: input[2] = <2>.
    
    Call arguments received:
      • inputs=['tf.Tensor(shape=(None, 199, 1000), dtype=float32)', 'tf.Tensor(shape=(None, 20000, 1000), dtype=float32)']
