### Sequence to Sequence Models


In [1]:
def print_line(*args):
    args1 = [str(arg) for arg in args]
    str_ = ' '.join(args1)
    print('\r' + str_, end='')

In [2]:
def load_data():
  
  import os
  from datasets import load_dataset
  dataset_path = os.path.join('a4-data', 'dataset')
  ds= load_dataset('iwslt2017', 'iwslt2017-en-fr',cache_dir=dataset_path, verification_mode='no_checks')
  return ds

In [3]:
def prepros_sen(data,lang):
  sentences=[]
  for i in range(0,len(data)):
      sentences.append((data[i][lang].lower()))
  return sentences

In [4]:
def load_tokenizer(tokenizer_file):
  
  from tokenizers import Tokenizer
  tokenizer_load= Tokenizer.from_file(tokenizer_file)
  return tokenizer_load

In [5]:
def Encode(data,tokenizer):
  
  token_ids=[]
  for i in range(len(data)):
      encode2id= tokenizer.encode(data[i])
      token_ids.append(encode2id.ids)# token ids are obtained
  return token_ids 

In [6]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')
from tensorflow.keras.models import Model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GRU, Embedding,Dropout,Dense,Attention
from typing import List


class Encoder(Model):
  def __init__(self, vocab_size, embedding_size, units):
    super().__init__()
    self.embedding = Embedding(vocab_size, embedding_size) # embedding part 
    self.gru = GRU(units, return_sequences=True, return_state = True) # gru part 

  def call(self, source_ids, source_mask):
    """
      Implements the call method of the Encoder class to encode the input.

      Args:
          source_ids (tf.Tensor): Tensor of source token IDs.
          source_mask (tf.Tensor): Tensor of source mask indicating valid tokens.

      Returns:
          tf.Tensor: Tensor of encoded outputs from the GRU layer.
          tf.Tensor: Tensor of final state from the GRU layer.

      """
    source_ids_embeddings = self.embedding(source_ids)
    enc_outputs, final_state = self.gru(inputs=source_ids_embeddings, mask=source_mask)
    return enc_outputs, final_state
    

In [7]:
class Decoder(Model):
  def __init__(self, vocab_size, embedding_size, units, dropout_rate):
    """
    Decoder class that extends the Model class for a custom decoder model.

    Args:
        vocab_size (int): Vocabulary size, representing the number of unique tokens in the output.
        embedding_size (int): Size of the embedding vector for each token.
        units (int): Number of units in the GRU layer.
        dropout_rate (float): Dropout rate for regularization.

    """
    super(Decoder, self).__init__()
    self.embedding = Embedding(vocab_size, embedding_size)
    self.dropout = Dropout(dropout_rate)
    self.gru = GRU(units, return_sequences=True)
    self.classifier = Dense(units=vocab_size)

  def call(self, target_ids, initial_state, target_mask):
    """
        Implements the call method of the Decoder class to generate output sequences.

        Args:
            target_ids (tf.Tensor): Tensor of target token IDs.
            initial_state (tf.Tensor): Tensor of initial state for the GRU layer.
            target_mask (tf.Tensor): Tensor of target mask indicating valid tokens.

        Returns:
            tf.Tensor: Tensor of generated output sequences from the GRU layer.

        """

    embedded_target_ids = self.embedding(target_ids)
    dec_outputs = self.gru(inputs=embedded_target_ids, mask=target_mask, initial_state=initial_state)
    dec_outputs = self.dropout(dec_outputs)
    dec_outputs = self.classifier(dec_outputs)
    return dec_outputs

  def predict(self, target_ids, initial_state):
    """
        Implements the predict method of the Decoder class to generate output sequences during inference.

        Args:
            target_ids (tf.Tensor): Tensor of target token IDs.
            initial_state (tf.Tensor): Tensor of initial state for the GRU layer.

        Returns:
            tf.Tensor: Tensor of generated output sequences from the GRU layer during inference.
            tf.Tensor: Tensor of updated state after generating the output sequences.

        """
    gru_cell = self.gru.cell
    embedded_target_ids = self.embedding(target_ids)
    dec_outputs, state = gru_cell(inputs=embedded_target_ids, states=initial_state, training=False)
    dec_outputs = self.classifier(dec_outputs)
    return dec_outputs, state


In [8]:
class Seq2seq(Model):
  """
    Seq2seq class that extends the Model class for a custom sequence-to-sequence model.

    Args:
        source_vocab_size (int): Vocabulary size of the source language, representing the number of unique tokens.
        target_vocab_size (int): Vocabulary size of the target language, representing the number of unique tokens.
        embedding_size (int): Size of the embedding vector for each token.
        units (int): Number of units in the encoder and decoder GRU layers.
        dropout_rate (float): Dropout rate for regularization.

    """
  def __init__(self, source_vocab_size, target_vocab_size, embedding_size, units, dropout_rate):
    
    super(Seq2seq, self).__init__()
    self.encoder = Encoder(vocab_size=source_vocab_size, embedding_size=embedding_size, units=units)
    self.decoder = Decoder(vocab_size=target_vocab_size, embedding_size=embedding_size, units=units, dropout_rate=dropout_rate)

  def call(self, source_ids, source_seq_lens, target_ids, target_seq_lens):
    """
        Implements the call method of the Seq2seq class for generating output sequences.

        Args:
            source_ids (tf.Tensor): Tensor of source token IDs.
            source_seq_lens (tf.Tensor): Tensor of source sequence lengths.
            target_ids (tf.Tensor): Tensor of target token IDs.
            target_seq_lens (tf.Tensor): Tensor of target sequence lengths.

        Returns:
            tf.Tensor: Tensor of generated output sequences from the decoder.

        """
    pad_token = fr_tokenizer.token_to_id('<pad>')
    enc_outputs, enc_state = self.encoder(source_ids, source_ids != pad_token)
    dec_outputs = self.decoder(target_ids, enc_state, target_ids != pad_token)
    return dec_outputs


In [10]:
def seq2seq_loss(outputs, target, seq_lens):
  """
    Computes the sequence-to-sequence loss using cross-entropy with sequence mask.

    Args:
        outputs (tf.Tensor): Tensor of model outputs, representing the predicted sequences.
        target (tf.Tensor): Tensor of target sequences.
        seq_lens (tf.Tensor): Tensor of sequence lengths for target sequences.

    Returns:
        tf.Tensor: Tensor of computed loss.

    """
  from tensorflow_addons.seq2seq import sequence_loss
  loss = 0
  seq_mask = tf.sequence_mask(seq_lens, dtype = tf.dtypes.float32)
  loss = sequence_loss(outputs, target, seq_mask, sum_over_timesteps=False, sum_over_batch=False, average_across_batch=True, average_across_timesteps=True)
  return loss

In [11]:
def source_batch_pad(source_batch, source_seq_lens, pad_val):
  """
    Pads the source batch of sequences with a padding value to ensure equal lengths.

    Args:
        source_batch (List[List[int]]): List of source sequences in the batch, represented as lists of integers.
        source_seq_lens (List[int]): List of source sequence lengths in the batch.
        pad_val (int): Padding value to be appended to the sequences.

    Returns:
        Tuple[tf.Tensor, tf.Tensor]: Tuple of padded source batch and source sequence lengths as tensors.

    """
  padded_source=[]
  max_length = max(source_seq_lens)
  for i in source_batch:
      padded_source.append(i + [pad_val]*(max_length - len(i)))
  source_batch = tf.convert_to_tensor(padded_source, dtype=tf.int64)
  source_seq_lens_batch = tf.convert_to_tensor(source_seq_lens, dtype=tf.int64)
  return source_batch, source_seq_lens_batch

In [12]:
def target_batch_pad(target_batch, target_seq_lens, pad_val):
  """
    Pads the target batch of sequences with a padding value to ensure equal lengths for both input and output sequences.

    Args:
        target_batch (List[List[int]]): List of target sequences in the batch, represented as lists of integers.
        target_seq_lens (List[int]): List of target sequence lengths in the batch.
        pad_val (int): Padding value to be appended to the sequences.

    Returns:
        Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: Tuple of padded target input batch, padded target output batch, and target sequence lengths as tensors.

    """
  target_x_batch, target_y_batch, target_seq_lens_batch = [], [], []
  padded_target_x=[]
  padded_target_y=[]
  for sent, seq_len in zip(target_batch, target_seq_lens):
      target_x_batch.append(sent[:-1])
      target_y_batch.append(sent[1:])
      target_seq_lens_batch.append(seq_len-1)

  max_target_length = max(target_seq_lens_batch)
  for i,j in zip(target_x_batch,target_y_batch):
      padded_target_x.append(i + [pad_val]*(max_target_length - len(i)))
      padded_target_y.append(j + [pad_val]*(max_target_length - len(j)))
  target_x_batch = tf.convert_to_tensor(padded_target_x, dtype=tf.int64)
  target_y_batch = tf.convert_to_tensor(padded_target_y, dtype=tf.int64)
  target_seq_lens_batch = tf.convert_to_tensor(target_seq_lens_batch, dtype=tf.int64)
  return target_x_batch, target_y_batch, target_seq_lens_batch

In [13]:
def pad_batch(source_batch, source_seq_lens, target_batch, target_seq_lens, pad_val):
  """
    Pads both source and target batches of sequences with a padding value to ensure equal lengths for input and output sequences.

    Args:
        source_batch (List[List[int]]): List of source sequences in the batch, represented as lists of integers.
        source_seq_lens (List[int]): List of source sequence lengths in the batch.
        target_batch (List[List[int]]): List of target sequences in the batch, represented as lists of integers.
        target_seq_lens (List[int]): List of target sequence lengths in the batch.
        pad_val (int): Padding value to be appended to the sequences.

    Returns:
        Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: Tuple of padded source batch, source sequence lengths, padded target input batch, padded target output batch, and target sequence lengths as tensors.

    """
  source_batch, source_seq_lens_batch = source_batch_pad(source_batch, source_seq_lens, pad_val)
  target_x_batch, target_y_batch, target_seq_lens_batch = target_batch_pad(target_batch, target_seq_lens, pad_val)
  return source_batch, source_seq_lens_batch, target_x_batch, target_y_batch, target_seq_lens_batch

In [14]:
from typing import List
import numpy as np 
class SeqLenBatchSampler:
  """
    Custom batch sampler for generating batches of indices based on sequence lengths.

    Args:
        seq_lens (List[int]): List of sequence lengths.
        batch_size (int): Batch size for generating batches.
        seed (int, optional): Seed for random shuffling of batches. Defaults to 6666.

    Attributes:
        seq_lens (List[int]): List of sequence lengths.
        batch_size (int): Batch size for generating batches.
        batches (List[List[int]]): List of batches, where each batch is a list of indices.
        n_batch (int): Total number of batches.
        counter (int): Counter for keeping track of the current batch index during iteration.

    """
  def __init__(self, seq_lens, batch_size, seed: int = 6666):
      np.random.seed(seed)
      self.seq_lens = seq_lens
      self.batch_size = batch_size
      self.batches = self._make_batch_index()

      self.n_batch = len(self.batches)
      self.counter = -1
      
  def _make_batch_index(self) -> List[List[int]]:
      n = len(self.seq_lens)
      n_batch = int(np.ceil(n / self.batch_size))
      batches = []
      # Step 1. Use np.argsort to get all indices with sorted length
      # Step 2. Split the indices into batches using a for loop: `for i in range(n_batch):`
      sorted_seq_lens = np.argsort(self.seq_lens)
      for i in range(n_batch):
          start = i * self.batch_size
          end = start + self.batch_size
          batches.append(sorted_seq_lens[start:end])
      return batches
  
  def __len__(self):
      return self.n_batch
  
  def __getitem__(self, index):
      return self.batches[index]
  
  def __iter__(self):
      np.random.shuffle(self.batches)
      self.counter = -1
      return self

  def __next__(self):
      self.counter += 1
      if self.counter < self.n_batch:
          return self.batches[self.counter]
      raise StopIteration


In [15]:
def train_model(model, train_fr, train_seq_lens_fr, train_en, train_seq_lens_en, fr_val, valid_seq_lens_fr, eng_val, valid_seq_lens_en,
              num_epoch, batch_size, optimizer, pad_token_id):
  import numpy as np
  """
    Train a sequence-to-sequence model for machine translation.

    Args:
        model (tf.keras.Model): The sequence-to-sequence model to be trained.
        train_fr (List[List[int]]): The training source sequences as lists of token indices.
        train_seq_lens_fr (List[int]): The lengths of the training source sequences.
        train_en (List[List[int]]): The training target sequences as lists of token indices.
        train_seq_lens_en (List[int]): The lengths of the training target sequences.
        fr_val (List[List[int]]): The validation source sequences as lists of token indices.
        valid_seq_lens_fr (List[int]): The lengths of the validation source sequences.
        eng_val (List[List[int]]): The validation target sequences as lists of token indices.
        valid_seq_lens_en (List[int]): The lengths of the validation target sequences.
        num_epoch (int): The number of epochs to train the model.
        batch_size (int): The batch size used for training and validation.
        optimizer (tf.keras.optimizers.Optimizer): The optimizer used for model optimization.
        pad_token_id (int): The token index for padding in the sequences.

    Returns:
        train_losses (List[float]): The training losses for each epoch.
        valid_losses (List[float]): The validation losses for each epoch.
    """

  n_valid_batch = int(np.ceil(len(fr_val) / batch_size))
  train_losses, valid_losses = [], []

  for epoch in range(num_epoch):
      epoch_loss = 0.0
      for batch_idx, data_index in enumerate(train_batch_sampler):
          source_batch, source_seq_lens = train_fr[data_index], train_seq_lens_fr[data_index]
          target_batch, target_seq_lens = train_en[data_index], train_seq_lens_en[data_index]
          (source_batch, source_seq_lens_batch,
            target_x_batch, target_y_batch, target_seq_lens_batch) = pad_batch(source_batch, source_seq_lens,
                                                                      target_batch, target_seq_lens,
                                                                      pad_val=pad_token_id)

          with tf.GradientTape() as tape:
              output = model(source_batch, source_seq_lens_batch, target_x_batch, target_seq_lens_batch)
              loss = seq2seq_loss(output, target_y_batch, target_seq_lens_batch)

          print_line(f'Epoch {epoch + 1} / {num_epoch} - Step {batch_idx + 1} / {len(train_batch_sampler)} - loss: {loss:.4f}')

          trainable_vars = model.trainable_variables
          gradients = tape.gradient(loss, trainable_vars)

          # Update weights
          optimizer.apply_gradients(zip(gradients, trainable_vars))
          epoch_loss += loss * len(source_batch)

      valid_loss = 0.0
      for batch_idx in range(n_valid_batch):
          start = batch_idx * batch_size
          end = start + batch_size
          source_batch, source_seq_lens = fr_val[start:end], valid_seq_lens_fr[start:end]
          target_batch, target_seq_lens = eng_val[start:end], valid_seq_lens_en[start:end]
          (source_batch, source_seq_lens_batch,
            target_x_batch, target_y_batch, target_seq_lens_batch) = pad_batch(source_batch, source_seq_lens,
                                                                      target_batch, target_seq_lens,
                                                                      pad_val=pad_token_id)
          output = model(source_batch, source_seq_lens_batch, target_x_batch, target_seq_lens_batch, training=False)
          loss = seq2seq_loss(output, target_y_batch, target_seq_lens_batch)

          if batch_idx % 1 == 0 or batch_idx == len(eng_val) - 1:
              print_line(f'Epoch {epoch + 1} / {num_epoch} - Step {batch_idx + 1} / {n_valid_batch} - loss: {loss:.4f}')

          valid_loss += loss * len(source_batch)

      train_epoch_loss = epoch_loss / len(train_fr)
      valid_epoch_loss = valid_loss / len(eng_val)
      train_losses.append(train_epoch_loss)
      valid_losses.append(valid_epoch_loss)
      print(f'\rEpoch {epoch + 1} / {num_epoch} - Step {len(train_batch_sampler)} / {len(train_batch_sampler)} - train loss: {train_epoch_loss:.4f} - valid loss: {valid_epoch_loss:.4f}')
  return train_losses,valid_losses


In [16]:
def fr_to_eng(encoder,decoder,french_sentences):
  """
    Translates French sentences to English using an encoder-decoder model.

    Parameters:
        encoder (tf.keras.Model): The encoder model that takes French sentences as input and generates encoder output and final state.
        decoder (tf.keras.Model): The decoder model that takes encoder output and final state as input and generates predictions for the next token in the English sentence.
        french_sentences (List[List[int]]): A list of French sentences to be translated to English. Each French sentence is represented as a list of integer token IDs.

    Returns:
        List[str]: A list of predicted English sentences corresponding to the input French sentences.

    """
  pred_sentences = []
  for source_ids in french_sentences:
      source_ids = tf.expand_dims(tf.convert_to_tensor(source_ids, dtype=tf.int64), axis=0)
      encoder_output, final_state = encoder(source_ids, source_ids != pad_token_id)
      while [start_os_token_id][-1] != end_os_token_id and len([start_os_token_id]) < max_pred_len:
          token = tf.reshape(tf.convert_to_tensor([start_os_token_id][-1]),(1,))
          decoder_output, final_state = decoder.predict(token, final_state)
          max_index = tf.math.argmax(decoder_output[0]).numpy()
          [start_os_token_id].append(max_index)
      pred_sentence = eng_tokenizer.decode([start_os_token_id])
      pred_sentences.append(pred_sentence)
  return pred_sentences



In [17]:
def sacrebleu_score(eng_sentences_test,test_pred):
  """
  Calculate the BLEU score using the SacreBLEU library.

  Args:
    eng_sentences_test (list): A list of English sentences that serve as the reference translations.
    test_pred (list): A list of predicted translations for the English sentences.

  Returns:
    float: The computed BLEU score.
  """
  import evaluate
  import os
  dataset_path = os.path.join('a4-data', 'dataset')
  sacrebleu = evaluate.load('sacrebleu', cache_dir=dataset_path)
  references = []
  predictions = []
  for i in range(len(eng_sentences_test)):
    references.append([eng_sentences_test[i]])
    predictions.append(test_pred[i])
  results = sacrebleu.compute(predictions=predictions, references=references)
  return results['score']

In [18]:
if __name__== '__main__':

    import random
    import numpy as np
    import tensorflow as tf
   
   # Splitting data into train,test and validation
    dataset= load_data()
    train_data= dataset['train']['translation']
    test_data= dataset['test']['translation']
    val_data= dataset['validation']['translation']
    print(len(train_data),len(test_data),len(val_data))

  # Using the prepos function to convert all the text to lower case
    eng_sentences_train= prepros_sen(train_data,'en')
    eng_sentences_test= prepros_sen(test_data,'en')
    eng_sentences_val= prepros_sen(val_data,'en')

    fr_sentences_train= prepros_sen(train_data,'fr')
    fr_sentences_test= prepros_sen(test_data,'fr')
    fr_sentences_val= prepros_sen(val_data,'fr')

  # Loading the tokenzier file using the load_tokenzier function
    eng_tokenizer= load_tokenizer('en_tokenizer.json')
    fr_tokenizer= load_tokenizer('fr_tokenizer.json')
  
  # Encoding english and french sentences by converting words to token ids using Encode function
    eng_train= Encode(eng_sentences_train,eng_tokenizer)
    eng_test= Encode(eng_sentences_test,eng_tokenizer)
    eng_val= Encode(eng_sentences_val,eng_tokenizer)

    fr_train= Encode(fr_sentences_train,fr_tokenizer)
    fr_test= Encode(fr_sentences_test,fr_tokenizer)
    fr_val= Encode(fr_sentences_val,fr_tokenizer)

  # Obtaining the sequence length 
    np.random.seed(6666)
    train_seq_lens_en = [len(en_sent) for en_sent in eng_train]
    train_seq_lens_fr = [len(fr_sent) for fr_sent in fr_train]
    valid_seq_lens_en = [len(en_sent) for en_sent in eng_val]
    valid_seq_lens_fr = [len(fr_sent) for fr_sent in fr_val]
    test_seq_lens_en = [len(en_sent) for en_sent in eng_test]
    test_seq_lens_fr = [len(fr_sent) for fr_sent in fr_test]

    train_en = np.array(eng_train, dtype=object)
    train_seq_lens_en = np.array(train_seq_lens_en)
    train_fr = np.array(fr_train, dtype=object)
    train_seq_lens_fr = np.array(train_seq_lens_fr)
  
  # Specifying the parameters for the seq2seq model
    seed = 6666
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    source_vocab_size = len(fr_tokenizer.get_vocab())
    target_vocab_size = len(eng_tokenizer.get_vocab())
    hidden_units = 256
    embedding_dim = 128
    dropout_rate = 0.0
    num_epoch = 15
    batch_size = 256
    learning_rate = 1e-3
    pad_token_id= fr_tokenizer.token_to_id('<pad>')
    start_os_token_id = eng_tokenizer.token_to_id('<s>')
    end_os_token_id = eng_tokenizer.token_to_id('</s>')
    max_pred_len = 200

    # defining the model
    model = Seq2seq(source_vocab_size, target_vocab_size, embedding_dim, hidden_units, dropout_rate)
    # defining the optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    #defining the batch sampler
    train_batch_sampler = SeqLenBatchSampler(train_seq_lens_fr, batch_size)



Found cached dataset iwslt2017 (/Users/vishal./CS584/Assignment 4/a4-data/dataset/iwslt2017/iwslt2017-en-fr/1.0.0/03ce9110373117c6f6687719f49f269486a8cd49dcad2527993a316cd4b6ad49)


  0%|          | 0/3 [00:00<?, ?it/s]

232825 8597 890
Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2023-04-18 15:25:46.628326: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-04-18 15:25:46.628756: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [27]:
# training the model and obtaining train losses and valid losses
train_losses,valid_losses= train_model(model, train_fr, train_seq_lens_fr, train_en, train_seq_lens_en, fr_val, valid_seq_lens_fr, eng_val, valid_seq_lens_en,
                num_epoch, batch_size, optimizer, pad_token_id)

Epoch 1 / 15 - Step 910 / 910 - train loss: 5.8705 - valid loss: 5.7153
Epoch 2 / 15 - Step 910 / 910 - train loss: 4.7454 - valid loss: 4.9459
Epoch 3 / 15 - Step 910 / 910 - train loss: 4.1118 - valid loss: 4.6393
Epoch 4 / 15 - Step 910 / 910 - train loss: 3.7550 - valid loss: 4.4587
Epoch 5 / 15 - Step 910 / 910 - train loss: 3.5060 - valid loss: 4.3174
Epoch 6 / 15 - Step 910 / 910 - train loss: 3.3088 - valid loss: 4.2379
Epoch 7 / 15 - Step 910 / 910 - train loss: 3.1509 - valid loss: 4.1491
Epoch 8 / 15 - Step 910 / 910 - train loss: 3.0229 - valid loss: 4.1121
Epoch 9 / 15 - Step 910 / 910 - train loss: 2.9158 - valid loss: 4.0595
Epoch 10 / 15 - Step 910 / 910 - train loss: 2.8252 - valid loss: 4.0495
Epoch 11 / 15 - Step 910 / 910 - train loss: 2.7489 - valid loss: 4.0382
Epoch 12 / 15 - Step 910 / 910 - train loss: 2.6808 - valid loss: 4.0145
Epoch 13 / 15 - Step 910 / 910 - train loss: 2.6213 - valid loss: 4.0032
Epoch 14 / 15 - Step 910 / 910 - train loss: 2.5694 - valid 

In [28]:
# Prediction of translating french sentences to english sentences by using the french sentences in the test data
test_pred = fr_to_eng(model.encoder, model.decoder, fr_test)

In [38]:
# Comparing 20 examples of prediction from the model with the actual sentences
# Generate random indices without replacement
indices = random.sample(range(len(fr_sentences_test)), 20)

# Loop through the selected indices and print the corresponding sentences
for i in range(20):
    print(f"Fr: {fr_sentences_test[indices[i]]}")
    print(f"En: {eng_sentences_test[indices[i]]}")
    print(f"Pred_en: {test_pred[indices[i]]}")
    print("\n")

Fr: si vous pensez que celle de gauche est la jazz, et celle de droite est la swing, applaudissez.
En: if you think the one on the left is jazz and the one on the right is swing,  clap your hands.
Pred_en:  if you think the one hand, the one hand, the one hand, the left is a brown, and it's a secret.


Fr: ça poussera beaucoup de monde à regarder, parce que les gens veulent vivre cette expérience.
En: it will make tons of people watch,  because people want this experience.
Pred_en:  it's a big life on people, because people want to live in this kind of hard experience.


Fr: en 2003, mon frère issu d'une autre mère et d'un autre père, dean obeidallah et moi-même, avons créé le festival du rire arabo-américain de new york, qui en est à sa dixième année maintenant.
En: in 2003, my brother from another mother and father  dean obeidallah and i started  the new york arab-american comedy festival,  now in its 10th year.
Pred_en:  in 2003, my brother and another new generation of mine, and i 

In [54]:
BLEU_score= sacrebleu_score(eng_sentences_test,test_pred)
BLEU_score

7.268485393056628

*   Both the encoder and decoder are built on GRU layers
*   Train loss of 2.52 and valid loss of 4 is obtained after training the model for 15 epochs
*   BLEU score is used as an evaluation metric to assess the machine translation model
*   BLEU score obtained by the model is 7.26
*   Coming to the model's performance, the model can be trained more because from the 20 sample predictions we can see that the translation is not accurate
*   It is observed that prediction of sentences with a shorter length is better than longer sentences