## Attention models : 3

### data prep

In [1]:
import pandas as pd
import numpy as np

In [2]:
path = 'fra.txt'
eng_texts, fra_texts = [], []
num_samples = 10000

In [3]:
with open(path, 'r', encoding = 'UTF-8') as f : 
    lines = f.read().split('\n')

    for line in lines[:num_samples] : 
        eng_words, fra_words = line.split('\t')[:2]

        fra_words = '\t' + fra_words + '\n'

        eng_texts.append(eng_words)
        fra_texts.append(fra_words)

In [4]:
eng_texts

['Go.',
 'Go.',
 'Go.',
 'Go.',
 'Hi.',
 'Hi.',
 'Run!',
 'Run!',
 'Run!',
 'Run!',
 'Run!',
 'Run!',
 'Run!',
 'Run!',
 'Run.',
 'Run.',
 'Run.',
 'Run.',
 'Run.',
 'Run.',
 'Run.',
 'Run.',
 'Who?',
 'Wow!',
 'Wow!',
 'Wow!',
 'Duck!',
 'Duck!',
 'Duck!',
 'Fire!',
 'Help!',
 'Hide.',
 'Hide.',
 'Jump!',
 'Jump.',
 'Stop!',
 'Stop!',
 'Stop!',
 'Wait!',
 'Wait!',
 'Wait!',
 'Wait.',
 'Wait.',
 'Wait.',
 'Wait.',
 'Begin.',
 'Begin.',
 'Go on.',
 'Go on.',
 'Go on.',
 'Hello!',
 'Hello!',
 'Hello.',
 'Hello.',
 'Hello.',
 'Hello.',
 'I see.',
 'I see.',
 'I try.',
 'I won!',
 'I won!',
 'I won.',
 'Oh no!',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Relax.',
 'Shoot!',
 'Shoot!',
 'Smile.',
 'Smile.',
 'Smile.',
 'Sorry?',
 'Attack!',
 'Attack!',
 'Attack!',
 'Attack!',
 'Buy it.',
 'Buy it.',
 'Buy it.',
 'Buy it.',
 'Cheers!',
 'Cheers!',
 'Cheers!',
 'Cheers!',
 'Eat it.',
 'Eat it.',
 'Exhale.',
 'Get 

In [11]:
len(fra_texts)

10000

In [5]:
max_eng_words = max([len(words) for words in eng_texts])
max_fra_words = max([len(words) for words in fra_texts])
print(f"{max_eng_words} :: {max_fra_words}")

14 :: 59


### tokenizer and pad_sequences

In [6]:
import tensorflow
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [7]:
eng_tokenizer = Tokenizer(filters = ' ', lower = True)
eng_tokenizer.fit_on_texts(eng_texts)
encoder_input_data = eng_tokenizer.texts_to_sequences(eng_texts)

In [8]:
fra_tokenizer = Tokenizer(filters = ' ', lower = True)
fra_tokenizer.fit_on_texts(fra_texts)
decoder_input_data = fra_tokenizer.texts_to_sequences(fra_texts)

In [9]:
encoder_input_data = pad_sequences(encoder_input_data, maxlen = max_eng_words, padding = 'pre')

In [10]:
decoder_input_data = pad_sequences(decoder_input_data, maxlen = max_fra_words, padding = 'pre')

In [11]:
encoder_input_data.shape

(10000, 14)

In [12]:
decoder_input_data.shape

(10000, 59)

In [13]:
decoder_target_data = np.zeros_like(decoder_input_data)
decoder_target_data[:, :-1] = decoder_input_data[:, 1:]

In [14]:
decoder_target_data.shape

(10000, 59)

In [16]:
decoder_input_data

array([[   0,    0,    0, ...,    0,   92,    2],
       [   0,    0,    0, ...,    0,    0,  991],
       [   0,    0,    0, ...,  145,  334,    2],
       ...,
       [   0,    0,    0, ...,   20, 5804,  380],
       [   0,    0,    0, ...,   20, 5805,  380],
       [   0,    0,    0, ..., 2286,  893,  456]])

In [15]:
decoder_target_data

array([[   0,    0,    0, ...,   92,    2,    0],
       [   0,    0,    0, ...,    0,  991,    0],
       [   0,    0,    0, ...,  334,    2,    0],
       ...,
       [   0,    0,    0, ..., 5804,  380,    0],
       [   0,    0,    0, ..., 5805,  380,    0],
       [   0,    0,    0, ...,  893,  456,    0]])

In [54]:
eng_vocab = len(eng_tokenizer.word_index) + 1
fra_vocab = len(fra_tokenizer.word_index) + 1
print(f"{eng_vocab} :: {fra_vocab}")

2707 :: 5806


### building the encoders/decoders

In [55]:
from tensorflow.keras.layers import Input, Embedding, LSTM

In [57]:
latent_dim = 256

In [60]:
encoder_inputs = Input(shape = (max_eng_words, ))
encoder_emb = Embedding(eng_vocab, latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state = True, return_sequences = True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)

In [61]:
decoder_inputs = Input(shape = (max_fra_words, ))
decoder_emb_layer = Embedding(fra_vocab, latent_dim)
decoder_emb = decoder_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_state = True, return_sequences = True)

### building the Attention model

In [64]:
from tensorflow.keras.layers import Layer, Dense, Concatenate
from tensorflow.keras.models import Model

In [83]:
class BahdanauAttention(Layer) : 
    def __init__(self, units) : 
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, vlaues) : 
        query_with_time_axis = tensorflow.expand_dims(query, 1)
        score = self.V(tensorflow.nn.tanh(self.W1(vlaues)) + self.W2(query_with_time_axis))
        attention_weights = tensorflow.nn.softmax(score, axis = 1)
        context_vector = attention_weights * score
        context_vector = tensorflow.reduce_sum(context_vector, axis = 1)

        return context_vector, attention_weights

In [97]:
attention = BahdanauAttention(latent_dim)
all_outputs = []
decoder_state_h, decoder_state_c = state_h, state_c
for t in range(max_fra_words) : 
    context_vector, _ = attention(decoder_state_h, decoder_state_c)
    X = tensorflow.expand_dims(decoder_emb[:, t], 1)
    X = Concatenate(axis = -1)([context_vector[:, None, :], X])
    outputs, decoder_state_h, decoder_state_c = decoder_lstm(X, initial_state = [decoder_state_h, decoder_state_c])
    outputs = Dense(fra_vocab, activation = 'softmax')(outputs)

    all_outputs.append(outputs)

decoder_outputs = tensorflow.concat(all_outputs, axis = 1)

In [98]:
model = Model()

In [99]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [100]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 14)]         0           []                               
                                                                                                  
 embedding_2 (Embedding)        (None, 14, 256)      692992      ['input_3[0][0]']                
                                                                                                  
 input_4 (InputLayer)           [(None, 59)]         0           []                               
                                                                                                  
 lstm_2 (LSTM)                  [(None, 14, 256),    525312      ['embedding_2[0][0]']            
                                 (None, 256),                                               

In [101]:
model.fit(
    [encoder_input_data, decoder_input_data],
    np.expand_dims(decoder_target_data, -1),
    batch_size = 64,
    epochs = 3,
    validation_split = 0.2
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x21ac2f38250>