## Attention models : 1

### 1. Load and prepare the dataset

In [1]:
import pandas as pd
import numpy as np

In [2]:
eng_texts, fra_texts = [], []
num_samples = 10000
path = 'fra.txt'

In [3]:
with open(path, 'r', encoding = 'UTF-8') as f :
    lines = f.read().split('\n')

    for line in lines[:num_samples] : 

        eng_text, fra_text = line.split('\t')[:2]
        fra_text = '\t' + fra_text + '\n'

        eng_texts.append(eng_text)
        fra_texts.append(fra_text)

In [4]:
len(eng_texts)

10000

In [5]:
len(fra_texts)

10000

In [6]:
max_eng_len = max([len(word) for word in eng_texts])
max_fra_len = max([len(word) for word in fra_texts])

In [7]:
print(f"{max_eng_len} :: {max_fra_len}")

14 :: 59


### 2. Tokenization and pad_sequences

In [9]:
import tensorflow
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence  import pad_sequences

In [10]:
eng_tokenizer = Tokenizer(filters = '', lower = True)
eng_tokenizer.fit_on_texts(eng_texts)
eng_sequences = eng_tokenizer.texts_to_sequences(eng_texts)

In [11]:
fra_tokenizer = Tokenizer(filters = '', lower = True)
fra_tokenizer.fit_on_texts(fra_texts)
fra_sequences = fra_tokenizer.texts_to_sequences(fra_texts)

In [12]:
eng_vocab_size = len(eng_tokenizer.word_index) + 1
fra_vocab_size = len(fra_tokenizer.word_index) + 1
print(f"{eng_vocab_size} :: {fra_vocab_size}")

2707 :: 5806


In [13]:
encoder_input_data = pad_sequences(eng_sequences, maxlen = max_eng_len, padding = 'pre')
decoder_input_data = pad_sequences(fra_sequences, maxlen = max_fra_len, padding = 'pre')

decoder_target_data = np.zeros_like(decoder_input_data)
decoder_target_data[:, :-1] = decoder_input_data[:, 1:]

In [14]:
encoder_input_data.shape

(10000, 14)

In [15]:
decoder_input_data.shape

(10000, 59)

In [16]:
decoder_target_data.shape

(10000, 59)

### 3. define the bahdanau attention layer

In [17]:
from tensorflow.keras.layers import Layer, Dense

In [18]:
class BahdanauAttention(Layer) : 
    def __init__(self, units) : 
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, values) : 
        quer_with_time_axis = tensorflow.expand_dims(query, 1)
        score = self.V(tensorflow.nn.tanh(self.W1(values) + self.W2(quer_with_time_axis)))
        attention_weights = tensorflow.nn.softmax(score, axis = 1)
        context_vector = attention_weights * values
        context_vector = tensorflow.reduce_sum(context_vector, axis = 1)

        return context_vector, attention_weights

### 4. Building the encoder/decoder with the attention

In [19]:
from tensorflow.keras.layers import Input, Embedding, LSTM, Concatenate, Dense
from tensorflow.keras.models import Model

In [20]:
latent_dim = 256

In [21]:
encoder_inputs = Input(shape = (max_eng_len, ))
encoder_emb = Embedding(eng_vocab_size, latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state = True, return_sequences = True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)

In [22]:
decoder_inputs = Input(shape = (max_fra_len))
decoder_emb_layer = Embedding(fra_vocab_size, latent_dim)
decoder_emb = decoder_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_state = True, return_sequences = True)

In [23]:
attention = BahdanauAttention(latent_dim)
all_outputs = []
decoder_state_h, decoder_state_c = state_h, state_c
for t in range(max_fra_len) : 
    context_vector, _ = attention(decoder_state_h, decoder_state_c)
    X = tensorflow.expand_dims(decoder_emb[:, t], 1)
    X = Concatenate(axis = -1)([context_vector[:, None, :], X])
    output, decoder_state_h, decoder_state_c = decoder_lstm(X, initial_state = [decoder_state_h, decoder_state_c])
    output = Dense(fra_vocab_size, activation = 'softmax')(output)
    all_outputs.append(output)

decoder_outputs = tensorflow.concat(all_outputs, axis = 1)

In [24]:
model = Model()

In [26]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [27]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 14)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 14, 256)      692992      ['input_1[0][0]']                
                                                                                                  
 input_2 (InputLayer)           [(None, 59)]         0           []                               
                                                                                                  
 lstm (LSTM)                    [(None, 14, 256),    525312      ['embedding[0][0]']              
                                 (None, 256),                                               

In [32]:
model.fit(
    [encoder_input_data, decoder_input_data], 
    np.expand_dims(decoder_target_data, -1),
    batch_size = 64,
    epochs = 5,
    validation_split = 0.2
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x16a19724a30>