In [47]:
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping

In [48]:
# sample data = english t french translation
english_sentences = ['hello', 'how are you', 'good morning', 'good night', 'thank you']
french_sentences = ['bonjour', 'comment ca va', 'bonjour', 'bonne nuit', 'merci']

In [49]:
# Add START and END tokens to the French (output) sentences
french_sentences = ['starttoken '+ sentence+' endtoken' for sentence in french_sentences]

# Hyperparameters
batch_size =2
epochs= 100
latent_dim = 256 # Dimensionality of the encoding space

In [50]:
french_sentences

['starttoken bonjour endtoken',
 'starttoken comment ca va endtoken',
 'starttoken bonjour endtoken',
 'starttoken bonne nuit endtoken',
 'starttoken merci endtoken']

In [51]:
# Initialize the Tokenizer for both source and target languages
eng_tokenizer = Tokenizer(char_level = False)
fra_tokenizer = Tokenizer(char_level = False)

# Fit the Tokenizer on the sentences
eng_tokenizer.fit_on_texts(english_sentences)
fra_tokenizer.fit_on_texts(french_sentences)

In [52]:
display(eng_tokenizer.word_index)
display(fra_tokenizer.word_index)

{'you': 1,
 'good': 2,
 'hello': 3,
 'how': 4,
 'are': 5,
 'morning': 6,
 'night': 7,
 'thank': 8}

{'starttoken': 1,
 'endtoken': 2,
 'bonjour': 3,
 'comment': 4,
 'ca': 5,
 'va': 6,
 'bonne': 7,
 'nuit': 8,
 'merci': 9}

In [53]:
# convert the sentences into sequences of integes
encoder_input_data = eng_tokenizer.texts_to_sequences(english_sentences)
decoder_input_data = fra_tokenizer.texts_to_sequences(french_sentences)

In [54]:
# Pad the sequences to ensure uniform length
max_encoder_seq_length = max([len(seq) for seq in encoder_input_data])
max_decoder_seq_length = max([len(seq) for seq in decoder_input_data])

In [55]:
max_encoder_seq_length,  max_decoder_seq_length

(3, 5)

In [56]:
encoder_input_data= pad_sequences(encoder_input_data, maxlen=max_encoder_seq_length, padding = 'post')
decoder_input_data= pad_sequences(decoder_input_data, maxlen=max_decoder_seq_length, padding = 'post')

In [57]:
encoder_input_data

array([[3, 0, 0],
       [4, 5, 1],
       [2, 6, 0],
       [2, 7, 0],
       [8, 1, 0]], dtype=int32)

In [58]:
decoder_input_data

array([[1, 3, 2, 0, 0],
       [1, 4, 5, 6, 2],
       [1, 3, 2, 0, 0],
       [1, 7, 8, 2, 0],
       [1, 9, 2, 0, 0]], dtype=int32)

In [59]:
# Prepare the target data (decoder output, shifted by one time step)
decoder_output_data = np.zeros_like(decoder_input_data)
decoder_output_data[:,0:-1] = decoder_input_data[:,1:]

In [60]:
decoder_output_data

array([[3, 2, 0, 0, 0],
       [4, 5, 6, 2, 0],
       [3, 2, 0, 0, 0],
       [7, 8, 2, 0, 0],
       [9, 2, 0, 0, 0]], dtype=int32)

In [61]:
decoder_input_data # time step differnce between the output data and input data

array([[1, 3, 2, 0, 0],
       [1, 4, 5, 6, 2],
       [1, 3, 2, 0, 0],
       [1, 7, 8, 2, 0],
       [1, 9, 2, 0, 0]], dtype=int32)

In [62]:
# Define the vocabulary size (total unique tokens)
num_encoder_tokens = len(eng_tokenizer.word_index) + 1
num_decoder_tokens = len(fra_tokenizer.word_index) + 1

In [63]:
num_encoder_tokens, num_decoder_tokens

(9, 10)

In [64]:
# Define the input sequence and output sequence for the Seq2Seq mode
encoder_inputs = Input(shape=(None,))
decoder_inputs = Input(shape=(None,))

In [65]:
encoder_inputs

<KerasTensor shape=(None, None), dtype=float32, sparse=False, name=keras_tensor_28>

In [66]:
# Encoder
encoder_embedding = Embedding(input_dim = num_encoder_tokens, output_dim = latent_dim, input_length = max_encoder_seq_length)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm (encoder_embedding)



In [67]:
encoder_outputs

<KerasTensor shape=(None, 256), dtype=float32, sparse=False, name=keras_tensor_31>

In [68]:
state_h

<KerasTensor shape=(None, 256), dtype=float32, sparse=False, name=keras_tensor_32>

In [69]:
state_c


<KerasTensor shape=(None, 256), dtype=float32, sparse=False, name=keras_tensor_33>

In [70]:
decoder_embedding = Embedding(input_dim = num_decoder_tokens, output_dim = latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim,return_sequences= True, return_state=True)
decoder_outputs, _, _ = decoder_lstm (decoder_embedding, initial_state= [state_h, state_c])

In [71]:
# Dense layer for generating predictions
decoder_dense= Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [72]:
# Define the model
model = Model ([encoder_inputs, decoder_inputs], decoder_outputs)

In [73]:
model.summary()

In [74]:
# Compile the model
model.compile( optimizer ='adam',
              loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

In [75]:
# Train the model
model.fit([encoder_input_data, decoder_input_data], np.expand_dims(decoder_output_data, -1),
          batch_size=batch_size,
          epochs=epochs)

Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - accuracy: 0.2225 - loss: 2.2816
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.4525 - loss: 2.1479
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5025 - loss: 1.9051
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.5275 - loss: 1.4421
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.4775 - loss: 1.2824
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.4525 - loss: 1.3194
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.5025 - loss: 1.0423
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.7275 - loss: 0.8970
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x79ddf524fca0>

In [78]:
# Create infernece models for the encoder and decocder for predictions
#Encoder model
encoder_model = Model(encoder_inputs, [encoder_outputs, state_h, state_c])

In [79]:
# Decoder model (for inference)
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_lstm_inf =LSTM (latent_dim, return_sequences= True, return_state=True)
decoder_output_inf, state_h_inf, state_c_inf = decoder_lstm_inf(
    decoder_embedding, initial_state=[decoder_state_input_h, decoder_state_input_c])
decoder_output_inf = decoder_dense(decoder_output_inf)
decoder_model = Model([decoder_inputs, decoder_state_input_h,
                       decoder_state_input_c],
                      [decoder_output_inf, state_h_inf, state_c_inf])

In [89]:
# Function to decode the sequence
def decode_sequence(input_seq):
  # Get the encoder states
  states_value = encoder_model.predict(input_seq)
  # Generate an initial target sequence (the start token)
  target_seq = np.zeros((1, 1))
  target_seq[0, 0] = fra_tokenizer.word_index['starttoken'] # start token index

  # Sample output tokens
  stop_condition = False
  decoded_sentence = ''
  while not stop_condition:
    # Correct the prediction input to match expected inputs
    output_tokens, h, c = decoder_model.predict([target_seq, states_value[1], states_value[2]])

    # Sample the next token
    sampled_token_index = np.argmax(output_tokens[0,
                                                  -1,:])
    sampled_token = fra_tokenizer.index_word[sampled_token_index]

    decoded_sentence += ' ' + sampled_token

    # Stop if we hit the end token or reach max length
    if sampled_token == 'endtoken' or len(decoded_sentence) > max_decoder_seq_length:
      stop_condition = True

    # Update the target sequence and states
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = sampled_token_index

    #update the states
    states_value = [h, c]

  return decoded_sentence

In [94]:
# test the decoder with a simple sentence
input_seq = encoder_input_data[2:3] # sample input sentance


In [95]:
input_seq

array([[2, 6, 0]], dtype=int32)

In [96]:
encoder_input_data

array([[3, 0, 0],
       [4, 5, 1],
       [2, 6, 0],
       [2, 7, 0],
       [8, 1, 0]], dtype=int32)

In [97]:
decoded_sentence = decode_sequence(input_seq)
print(decoded_sentence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
 bonjour


In [98]:
new = 'good night'

In [99]:
input_seq = encoder_input_data[3:4]

In [100]:
decoded_sentence = decode_sequence(input_seq)
print(decoded_sentence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
 bonne
