In [1]:
from attention import AttentionLayer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import LSTM, Input, Dense,Embedding, Concatenate, TimeDistributed
from tensorflow.keras.models import Model,load_model, model_from_json
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing.text import one_hot, Tokenizer
from tensorflow.keras.callbacks import EarlyStopping
import pickle as pkl
import numpy as np

In [3]:
with open("pos-train",'r') as f:
  data_train = f.read()
uncleaned_data_list_train = data_train.split('\n')
# Training Data
burmese_train= []
pos_train = []
for word in uncleaned_data_list_train:
  word = word.strip().split("<|||>")
  if(len(word) == 2):
      burmese_train.append(word[0])
      pos_train.append(word[1])
# Putting the start and end words in the marathi sentances
pos_train = ["start " + x.strip() + " end" for x in pos_train]
burmeseTokenizer = Tokenizer(oov_token="OOV")
burmeseTokenizer.fit_on_texts(burmese_train)

Bword2index = burmeseTokenizer.word_index
vocab_size_source = len(Bword2index) + 1
posTokenizer = Tokenizer(oov_token="OOV")

posTokenizer.fit_on_texts(pos_train)
Pword2index = posTokenizer.word_index
vocab_size_target = len(Pword2index) + 1

# loading the model architecture and asigning the weights
json_file = open('NMT_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_loaded = model_from_json(loaded_model_json, custom_objects={'AttentionLayer': AttentionLayer})
# load weights into new model
model_loaded.load_weights("NMT_model_weight.h5")

with open('NMT_Btokenizer.pkl','rb') as f:
    vocab_size_source, Bword2index, Bindex2word,burmeseTokenizer = pkl.load(f)

with open('NMT_Ptokenizer.pkl', 'rb') as f:
    vocab_size_target, Pword2index, Pindex2word,posTokenizer = pkl.load(f)


In [5]:
latent_dim=500
# encoder inference
encoder_inputs = model_loaded.input[0]  #loading encoder_inputs
encoder_outputs, state_h, state_c = model_loaded.layers[6].output #loading encoder_outputs

print(encoder_outputs.shape)

encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c])

# decoder inference
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_hidden_state_input = Input(shape=(150,latent_dim))

# Get the embeddings of the decoder sequence
decoder_inputs = model_loaded.layers[3].output

print(decoder_inputs.shape)
dec_emb_layer = model_loaded.layers[5]

dec_emb2= dec_emb_layer(decoder_inputs)

# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_lstm = model_loaded.layers[7]
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])

#attention inference
attn_layer = model_loaded.layers[8]
attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_input, decoder_outputs2])

concate = model_loaded.layers[9]
decoder_inf_concat = concate([decoder_outputs2, attn_out_inf])

# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_dense = model_loaded.layers[10]
decoder_outputs2 = decoder_dense(decoder_inf_concat)

# Final decoder model
decoder_model = Model(
[decoder_inputs] + [decoder_hidden_state_input,decoder_state_input_h, decoder_state_input_c],
[decoder_outputs2] + [state_h2, state_c2])

(None, 150, 500)
(None, None)


In [None]:
def decode_sequence_attention(input_seq):
    # Encode the input as state vectors.
    e_out, e_h, e_c = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))

    # Chose the 'start' word as the first word of the target sequence
    target_seq[0, 0] = Pword2index['start']

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + [e_out, e_h, e_c])

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        if sampled_token_index == 0:
          break
        else:
          sampled_token = Pindex2word[sampled_token_index]

          if(sampled_token!='end'):
              decoded_sentence += ' '+sampled_token

              # Exit condition: either hit max length or find stop word.
              if (sampled_token == 'end' or len(decoded_sentence.split()) >= (26-1)):
                  stop_condition = True

          # Update the target sequence (of length 1).
          target_seq = np.zeros((1,1))
          target_seq[0, 0] = sampled_token_index

          # Update internal states
          e_h, e_c = h, c

    return decoded_sentence

In [None]:
input_data = ["ကျွန်တော် ဖြိုးသူထက် ပါ ။"]
input_data = input_data
test =burmeseTokenizer.texts_to_sequences(input_data)
test = pad_sequences(test, maxlen=150, padding='post')

In [None]:
print("Input:",input_data)
print("LSTM with attention:", decode_sequence_attention(test.reshape(1,150)))