## Attention models : 2

### data laoding

In [1]:
import pandas as pd
import numpy as np

In [2]:
path = 'fra.txt'
english_texts, french_texts = [], []
num_samples = 10000

In [3]:
with open(path, 'r', encoding = 'UTF-8') as f : 
    lines = f.read().split('\n')

    for line in lines[:num_samples] : 
        english_words, french_words = line.split('\t')[:2]

        french_words = '\t' + french_words + '\n'

        english_texts.append(english_words)
        french_texts.append(french_words)

In [4]:
max_eng_word = max([len(words) for words in english_texts])
max_fra_word = max([len(words) for words in french_texts])
print(f"{max_eng_word} :: {max_fra_word}")

14 :: 59


### data prep : tokenizer and pad_sequences

In [5]:
import tensorflow
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [6]:
eng_tokenizer = Tokenizer(filters = ' ', lower = True)
eng_tokenizer.fit_on_texts(english_texts)
english_texts = eng_tokenizer.texts_to_sequences(english_texts)

In [7]:
english_texts

[[30],
 [30],
 [30],
 [30],
 [492],
 [492],
 [445],
 [445],
 [445],
 [445],
 [445],
 [445],
 [445],
 [445],
 [144],
 [144],
 [144],
 [144],
 [144],
 [144],
 [144],
 [144],
 [1232],
 [961],
 [961],
 [961],
 [962],
 [962],
 [962],
 [1233],
 [1822],
 [643],
 [643],
 [963],
 [1234],
 [644],
 [644],
 [644],
 [964],
 [964],
 [964],
 [145],
 [145],
 [145],
 [145],
 [768],
 [768],
 [21, 114],
 [21, 114],
 [21, 114],
 [1235],
 [1235],
 [645],
 [645],
 [645],
 [645],
 [1, 210],
 [1, 210],
 [1, 153],
 [1, 769],
 [1, 769],
 [1, 118],
 [1823, 1824],
 [181],
 [181],
 [181],
 [181],
 [181],
 [181],
 [181],
 [181],
 [181],
 [181],
 [181],
 [181],
 [770],
 [770],
 [355],
 [355],
 [355],
 [1825],
 [771],
 [771],
 [771],
 [771],
 [194, 8],
 [194, 8],
 [194, 8],
 [194, 8],
 [772],
 [772],
 [772],
 [772],
 [159, 8],
 [159, 8],
 [1826],
 [16, 23],
 [16, 23],
 [16, 23],
 [21, 112],
 [21, 112],
 [21, 112],
 [36, 89],
 [36, 89],
 [36, 89],
 [36, 71],
 [36, 71],
 [36, 71],
 [1236, 72],
 [1236, 72],
 [493, 15],


In [8]:
fra_tokenizer = Tokenizer(filters = ' ', lower = True)
fra_tokenizer.fit_on_texts(french_texts)
french_texts = fra_tokenizer.texts_to_sequences(french_texts)
french_texts

[[92, 2],
 [991],
 [145, 334, 2],
 [335, 2],
 [753, 2],
 [1417],
 [1418],
 [1419],
 [100, 251, 617, 14, 251, 992, 2],
 [754, 2],
 [993, 2],
 [994, 2],
 [1420, 2],
 [1421, 2],
 [1418],
 [1419],
 [100, 251, 617, 14, 251, 992, 2],
 [754, 2],
 [993, 2],
 [994, 2],
 [1420, 2],
 [1421, 2],
 [41, 5],
 [33, 2547],
 [2548],
 [2549],
 [106, 2550],
 [2551],
 [2552],
 [301, 755, 2],
 [106, 1422, 2],
 [2553],
 [2554],
 [995],
 [995],
 [33, 1423],
 [2555],
 [618, 2],
 [192, 2],
 [237, 2],
 [619],
 [192, 2],
 [237, 2],
 [1424],
 [619],
 [1425],
 [1426],
 [2556],
 [2557],
 [2558],
 [996, 2],
 [753, 2],
 [996, 2],
 [753, 2],
 [1427],
 [1417],
 [1, 1428],
 [2559],
 [1429],
 [10, 302, 2],
 [1, 59, 513, 2],
 [86, 188],
 [2560, 336, 2],
 [997],
 [998],
 [999],
 [1430, 1431],
 [1432, 1433],
 [176, 756],
 [1000, 2],
 [620, 2],
 [1434],
 [757, 2],
 [176, 122],
 [252],
 [1435, 2],
 [1436, 2],
 [2561],
 [2562, 163, 17, 2563],
 [2564],
 [1001, 5],
 [2565, 2],
 [2566, 2],
 [106, 2567, 2],
 [106, 2568],
 [2569, 2]

In [16]:
encoder_input_data = pad_sequences(english_texts, maxlen = max_eng_word, padding = 'pre')
decoder_input_data = pad_sequences(french_texts, maxlen = max_fra_word, padding = 'pre')

In [17]:
decoder_target_data = np.zeros_like(decoder_input_data)
decoder_target_data[:, :-1] = decoder_input_data[: , 1:]

In [18]:
decoder_target_data

array([[   0,    0,    0, ...,   92,    2,    0],
       [   0,    0,    0, ...,    0,  991,    0],
       [   0,    0,    0, ...,  334,    2,    0],
       ...,
       [   0,    0,    0, ..., 5804,  380,    0],
       [   0,    0,    0, ..., 5805,  380,    0],
       [   0,    0,    0, ...,  893,  456,    0]])

In [19]:
decoder_target_data.shape

(10000, 59)

In [15]:
eng_vocab_size = len(eng_tokenizer.word_index) + 1
fra_vocab_size = len(fra_tokenizer.word_index) + 1
print(f"{eng_vocab_size} :: {fra_vocab_size}")

2707 :: 5806


### creating the encoder/decoders

In [21]:
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate
from tensorflow.keras.models import Model

In [22]:
latent_dim = 256

In [23]:
encoder_inputs = Input(shape = (max_eng_word, ))
encoder_emb = Embedding(eng_vocab_size, latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state = True, return_sequences = True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)

In [24]:
decoder_inputs = Input(shape = (max_fra_word, ))
decoder_emb_layers = Embedding(fra_vocab_size, latent_dim)
decoder_emb = decoder_emb_layers(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_state = True, return_sequences = True)

### creating the base badhnau attention class

In [25]:
from tensorflow.keras.layers import Dense, Layer

In [29]:
class BahdanauAttention(Layer) : 
    def __init__(self, units) : 
        super(BahdanauAttention, self). __init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, values) : 
        query_with_time_axis = tensorflow.expand_dims(query, 1)
        score = self.V(tensorflow.nn.tanh(self.W1(values)) + self.W2(query_with_time_axis))
        attention_weights = tensorflow.nn.softmax(score, axis = 1)
        context_vector = attention_weights * score
        context_vector = tensorflow.reduce_sum(context_vector, axis = 1)

        return context_vector, attention_weights

In [30]:
attention = BahdanauAttention(latent_dim)
all_outputs = []
decoder_state_h, decoder_state_c = state_h, state_c
for t in range(max_fra_word) : 
    context_vector, _ = attention(decoder_state_h, decoder_state_c)
    X = tensorflow.expand_dims(decoder_emb[:, t], 1)
    X = Concatenate(axis = -1)([context_vector[:, None, :], X])
    output, decoder_state_h, decoder_state_c = decoder_lstm(X, initial_state = [decoder_state_h, decoder_state_c])
    output = Dense(fra_vocab_size, activation = 'softmax')(output)

    all_outputs.append(output)

decoder_outputs = tensorflow.concat(all_outputs, axis = 1)

In [31]:
model = Model()

In [32]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [33]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 14)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 14, 256)      692992      ['input_1[0][0]']                
                                                                                                  
 input_2 (InputLayer)           [(None, 59)]         0           []                               
                                                                                                  
 lstm (LSTM)                    [(None, 14, 256),    525312      ['embedding[0][0]']              
                                 (None, 256),                                               

In [34]:
model.fit(
    [encoder_input_data, decoder_input_data], 
    np.expand_dims(decoder_target_data, -1),
    batch_size = 64, 
    epochs = 10,
    validation_split = 0.2
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1fb06219e70>