# Machine Translation

# Seq2Seq Machine Translation model for Nyanja-English
# UNZA
## Zambezi-Voice
### Supervisor: Mr. Clayton Sikasote

## Installing Required Libraries

In [1]:
import tensorflow as tf

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split

import unicodedata
import re
import numpy as np
import os
import io
import time

### Dataset Preparation:

In [2]:
file = open("nya.txt", 'r', encoding = "utf8")
raw_data = []

for line in file:
    pos = line.find("CC-BY")
    line = line[:pos-1]
    
    # Split the data into english and Italian
    eng, nya = line.split('\t')
    
    # form tuples of the data
    data = eng, nya
    raw_data.append(data)
    
file.close()

def convert(list): 
    return tuple(list) 

data = convert(raw_data)

In [3]:
data

(('Bere.', 'Breast.'),
 ('Bereka.', 'to bear.'),
 ('Biliwila.', 'to be green.'),
 ('Bingu.', 'roar of thunder.'),
 ('Birimankhwe.', 'Chameleon.'),
 ('Bisa.', 'To hide something.'),
 ('Bisala.', 'to hide oneself.'),
 ('Bodza.', 'a lie.'),
 ('Bondo.', 'Knee.'),
 ('Bongo-Bongo.', 'Brain.'),
 ('Boola.', 'to pierce.'),
 ('Bovu.', 'cheeks.'),
 ('Buluzi.', 'Lizard.'),
 ('Busa.', 'to herd.'),
 ('Busa.', 'place for flock to rest or eat.'),
 ('Buthu.', 'a little girl who has not yet reached puberty.'),
 ('Buula.', 'to groan.'),
 ('Bvaka.', 'to clothe someone.'),
 ('Bvala.', 'to put on clothes.'),
 ('Bveka.', 'to clothe someone.'),
 ('Bvika.', 'to thatch.'),
 ('Bviika.', 'to dip.'),
 ('Bvina.', 'to dance.'),
 ('Bvumbala.', 'to take out.'),
 ('Bvomera.', 'to agree.'),
 ('Bvula.', 'to take off clothes.'),
 ('Bvulala.', 'to be wounded.'),
 ('Bvulaza.', 'to hurt someone seriously.'),
 ('Bvumbwe.', 'wild cat.'),
 ('Bvumbwa.', 'to be wet with rain.'),
 ('Bvunda.', 'to be rotten.'),
 ('Bvundikila.', 'to

## Encodding Characters

In [4]:
def unicode_to_ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')


def preprocess_sentence(s):
    s = unicode_to_ascii(s.lower())
    s = re.sub(r'([!.?])', r' \1', s)
    s = re.sub(r'[^a-zA-Z.!?]+', r' ', s)
    s = re.sub(r'\s+', r' ', s)

    s = s.strip()
    s = '<start>' +' '+ s +' '+' <end>'
    return s

## Splitting the Data Required to Train the Model

In [5]:
# Limiting the data and Splitting into seperate lists and add tokens

data = data[:150]

lang_eng = []
lang_nya = []

raw_data_eng, raw_data_nya = list(zip(*data))
raw_data_eng, raw_data_nya = list(raw_data_eng), list(raw_data_nya)

for i, j in zip(raw_data_eng, raw_data_nya):
  preprocessed_data_eng = preprocess_sentence(i)
  preprocessed_data_nya = preprocess_sentence(j)
  lang_eng.append(preprocessed_data_eng)
  lang_nya.append(preprocessed_data_nya)

def tokenize(lang):
  lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
      filters='')
  lang_tokenizer.fit_on_texts(lang)

  tensor = lang_tokenizer.texts_to_sequences(lang)

  tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                         padding='post')

  return tensor, lang_tokenizer

input_tensor, inp_lang = tokenize(lang_eng)
target_tensor, targ_lang = tokenize(lang_nya)

max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]

In [6]:
# Creating training and validation sets using an 80-20 split
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

# Show length
print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val))

def convert(lang, tensor):
  for t in tensor:
    if t!=0:
      print ("%d ----> %s" % (t, lang.index_word[t]))

print ("Input Language; index to word mapping")
convert(inp_lang, input_tensor_train[0])
print ()
print ("Target Language; index to word mapping")
convert(targ_lang, target_tensor_train[0])



120 120 30 30
Input Language; index to word mapping
1 ----> <start>
12 ----> biliwila
3 ----> .
2 ----> <end>

Target Language; index to word mapping
1 ----> <start>
4 ----> to
6 ----> be
41 ----> green
3 ----> .
2 ----> <end>


## Creating a Buffer and a Batch Size

In [7]:
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)//BATCH_SIZE

vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1

dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

dataset

Metal device set to: Apple M1


2022-10-27 19:01:26.909955: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-10-27 19:01:26.910056: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


<BatchDataset element_spec=(TensorSpec(shape=(64, 5), dtype=tf.int32, name=None), TensorSpec(shape=(64, 12), dtype=tf.int32, name=None))>

### Encoder Architecture:

In [8]:
class Encoder(tf.keras.Model):

    def __init__(self, inp_vocab_size, embedding_size, lstm_size, input_length):
        super(Encoder, self).__init__()
        
        #Initialize Embedding layer
        #Intialize Encoder LSTM layer
        
        self.lstm_size = lstm_size
        self.embedding = tf.keras.layers.Embedding(inp_vocab_size, embedding_size)
        self.lstm = tf.keras.layers.LSTM(lstm_size, return_sequences=True, return_state=True)

    def call(self, input_sequence, states):
      
        embed = self.embedding(input_sequence)
        output, state_h, state_c = self.lstm(embed, initial_state=states)

        return output, state_h, state_c
    
    def initialize_states(self,batch_size):
    
        return (tf.zeros([batch_size, self.lstm_size]),
                tf.zeros([batch_size, self.lstm_size]))

### Dot Attention:

In [9]:
class Attention(tf.keras.layers.Layer):
    def __init__(self,scoring_function, att_units):
        super(Attention, self).__init__()
        
        self.scoring_function = scoring_function
        self.att_units = att_units

        if self.scoring_function=='dot':
            pass
            # For general, it would be self.wa = tf.keras.layers.Dense(att_units)


    def call(self,decoder_hidden_state,encoder_output):

        if self.scoring_function == 'dot':
            
            new_state = tf.expand_dims(decoder_hidden_state, -1)
            score = tf.matmul(encoder_output, new_state)
            weights = tf.nn.softmax(score, axis=1)
            context = weights * encoder_output
            context_vector = tf.reduce_sum(context, axis=1)
                                
            return context_vector, weights

### One Step Decoder

In [10]:
class One_Step_Decoder(tf.keras.Model):
    def __init__(self, tar_vocab_size, embedding_dim, input_length, dec_units, score_fun, att_units):
        super(One_Step_Decoder, self).__init__()
        # Initialize decoder embedding layer, LSTM and any other objects needed
        self.tar_vocab_size = tar_vocab_size
        self.embedding_dim = embedding_dim
        self.input_length = input_length
        self.dec_units = dec_units
        self.score_fun = score_fun
        self.att_units = att_units
        self.embedding = tf.keras.layers.Embedding(self.tar_vocab_size, self.embedding_dim, 
                                                   input_length=self.input_length)
        
        self.lstm = tf.keras.layers.LSTM(self.dec_units, return_sequences=True, 
                                         return_state=True)
        
        self.output_layer = tf.keras.layers.Dense(self.tar_vocab_size)
        
        self.attention = Attention(self.score_fun, self.att_units)

    def call(self, input_to_decoder, encoder_output, state_h, state_c):
        
        result = self.embedding(input_to_decoder)
        
        context_vector, weights = self.attention(state_h, encoder_output)
        
        concat = tf.concat([tf.expand_dims(context_vector, 1), result], axis=-1)
        
        decoder_output, hidden_state, cell_state = self.lstm(concat, initial_state=[state_h, state_c])
        
        final_output = tf.reshape(decoder_output, (-1, decoder_output.shape[2]))
        final_output = self.output_layer(final_output)
        
        return final_output, hidden_state, cell_state, weights, context_vector

### Decoder

In [11]:
class Decoder(tf.keras.Model):
    def __init__(self, out_vocab_size, embedding_dim, output_length, dec_units ,score_fun ,att_units):
        #Intialize necessary variables and create an object from the class onestepdecoder
        super(Decoder, self).__init__()
        self.out_vocab_size = out_vocab_size
        self.embedding_dim = embedding_dim
        self.output_length = output_length
        self.dec_units = dec_units
        self.score_fun = score_fun
        self.att_units = att_units
        self.onestepdecoder = One_Step_Decoder(self.out_vocab_size, self.embedding_dim, self.output_length,
                                               self.dec_units, self.score_fun, self.att_units)
        
    def call(self, input_to_decoder,encoder_output,decoder_hidden_state,decoder_cell_state):
        
        all_outputs= tf.TensorArray(tf.float32, size=input_to_decoder.shape[1], name="output_arrays")
        
        
        for timestep in range(input_to_decoder.shape[1]):
            output, decoder_hidden_state, decoder_cell_state, weights, context_vector = self.onestepdecoder(
                                                                                    input_to_decoder[:,timestep:timestep+1], 
                                                                                    encoder_output, 
                                                                                    decoder_hidden_state,
                                                                                    decoder_cell_state)
            
            all_outputs = all_outputs.write(timestep, output)
        
        all_outputs = tf.transpose(all_outputs.stack(), (1, 0, 2)) 

        return all_outputs

### Call The Encoder Decoder Architecture:

In [12]:
class encoder_decoder(tf.keras.Model):
    def __init__(self, inp_vocab_size, out_vocab_size, embedding_size, lstm_size, 
                 input_length, output_length, dec_units ,score_fun ,att_units, batch_size):
        
        super(encoder_decoder, self).__init__()
        
        self.encoder = Encoder(inp_vocab_size, embedding_size, lstm_size, input_length)
        self.decoder = Decoder(out_vocab_size, embedding_size, output_length, 
                               dec_units, score_fun, att_units)
    
    def call(self, data):
        
        input_sequence, input_to_decoder = data[0],data[1]
        initial_state = self.encoder.initialize_states(batch_size=64)
        encoder_output, state_h, state_c = self.encoder(input_sequence, initial_state)
        decoder_hidden_state = state_h
        decoder_cell_state = state_c
        decoder_output = self.decoder(input_to_decoder, encoder_output, decoder_hidden_state, decoder_cell_state)
        
        return decoder_output

### Custom Loss Function: 

In [13]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

optimizer = tf.keras.optimizers.Adam()

### Training:

In [14]:
!mkdir logs

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import TensorBoard

checkpoint = ModelCheckpoint("dot.h5", monitor='val_loss', verbose=1, save_weights_only=True)

logdir='logs'
tensorboard_Visualization = TensorBoard(log_dir=logdir)

input_vocab_size = len(inp_lang.word_index)+1
output_vocab_size = len(targ_lang.word_index)+1

input_len = max_length_inp
output_len = max_length_targ

lstm_size = 128
att_units = 256
dec_units = 128
embedding_size = 300
embedding_dim = 300
score_fun = 'dot'
steps = len(input_tensor)//64
batch_size=64

model = encoder_decoder(input_vocab_size,output_vocab_size,embedding_size,lstm_size,input_len,output_len,dec_units,score_fun,att_units, batch_size)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=model.layers[0],
                                 decoder=model.layers[1])

mkdir: logs: File exists


In [15]:
@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_hidden,enc_state = model.layers[0](inp, enc_hidden)


    dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

    for t in range(1, targ.shape[1]):
      predictions = model.layers[1](dec_input,enc_output,enc_hidden,enc_state)

      loss += loss_function(targ[:, t], predictions)

      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = model.layers[0].trainable_variables + model.layers[1].trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [32]:
EPOCHS = 1000 # specifying the number of epochs or runs for training the model

for epoch in range(EPOCHS):
  start = time.time()

  enc_hidden = model.layers[0].initialize_states(64)
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
      
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix = checkpoint_prefix)

  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 0.2881
Epoch 1 Loss 0.2881
Time taken for 1 epoch 0.20052194595336914 sec

Epoch 2 Batch 0 Loss 0.2891
Epoch 2 Loss 0.2891
Time taken for 1 epoch 0.1633610725402832 sec

Epoch 3 Batch 0 Loss 0.2939
Epoch 3 Loss 0.2939
Time taken for 1 epoch 0.10665607452392578 sec

Epoch 4 Batch 0 Loss 0.2868
Epoch 4 Loss 0.2868
Time taken for 1 epoch 0.15123605728149414 sec

Epoch 5 Batch 0 Loss 0.2783
Epoch 5 Loss 0.2783
Time taken for 1 epoch 0.08801102638244629 sec

Epoch 6 Batch 0 Loss 0.2872
Epoch 6 Loss 0.2872
Time taken for 1 epoch 0.16629600524902344 sec

Epoch 7 Batch 0 Loss 0.2770
Epoch 7 Loss 0.2770
Time taken for 1 epoch 0.14885687828063965 sec

Epoch 8 Batch 0 Loss 0.2816
Epoch 8 Loss 0.2816
Time taken for 1 epoch 0.3919680118560791 sec

Epoch 9 Batch 0 Loss 0.2847
Epoch 9 Loss 0.2847
Time taken for 1 epoch 0.11553215980529785 sec

Epoch 10 Batch 0 Loss 0.2729
Epoch 10 Loss 0.2729
Time taken for 1 epoch 0.2240908145904541 sec

Epoch 11 Batch 0 Loss 0.2812
Epoch 11 Los

Epoch 86 Loss 0.2140
Time taken for 1 epoch 0.15285468101501465 sec

Epoch 87 Batch 0 Loss 0.2156
Epoch 87 Loss 0.2156
Time taken for 1 epoch 0.07833290100097656 sec

Epoch 88 Batch 0 Loss 0.2087
Epoch 88 Loss 0.2087
Time taken for 1 epoch 0.15641188621520996 sec

Epoch 89 Batch 0 Loss 0.2105
Epoch 89 Loss 0.2105
Time taken for 1 epoch 0.08012628555297852 sec

Epoch 90 Batch 0 Loss 0.2081
Epoch 90 Loss 0.2081
Time taken for 1 epoch 0.16442394256591797 sec

Epoch 91 Batch 0 Loss 0.2123
Epoch 91 Loss 0.2123
Time taken for 1 epoch 0.08856081962585449 sec

Epoch 92 Batch 0 Loss 0.2115
Epoch 92 Loss 0.2115
Time taken for 1 epoch 0.25122618675231934 sec

Epoch 93 Batch 0 Loss 0.2159
Epoch 93 Loss 0.2159
Time taken for 1 epoch 0.10761022567749023 sec

Epoch 94 Batch 0 Loss 0.2061
Epoch 94 Loss 0.2061
Time taken for 1 epoch 0.6066148281097412 sec

Epoch 95 Batch 0 Loss 0.2125
Epoch 95 Loss 0.2125
Time taken for 1 epoch 0.07062816619873047 sec

Epoch 96 Batch 0 Loss 0.2003
Epoch 96 Loss 0.2003


Epoch 170 Loss 0.1609
Time taken for 1 epoch 0.14916586875915527 sec

Epoch 171 Batch 0 Loss 0.1543
Epoch 171 Loss 0.1543
Time taken for 1 epoch 0.07287788391113281 sec

Epoch 172 Batch 0 Loss 0.1640
Epoch 172 Loss 0.1640
Time taken for 1 epoch 0.1502211093902588 sec

Epoch 173 Batch 0 Loss 0.1567
Epoch 173 Loss 0.1567
Time taken for 1 epoch 0.07272195816040039 sec

Epoch 174 Batch 0 Loss 0.1614
Epoch 174 Loss 0.1614
Time taken for 1 epoch 0.15229320526123047 sec

Epoch 175 Batch 0 Loss 0.1513
Epoch 175 Loss 0.1513
Time taken for 1 epoch 0.0713191032409668 sec

Epoch 176 Batch 0 Loss 0.1568
Epoch 176 Loss 0.1568
Time taken for 1 epoch 0.15540409088134766 sec

Epoch 177 Batch 0 Loss 0.1583
Epoch 177 Loss 0.1583
Time taken for 1 epoch 0.08236479759216309 sec

Epoch 178 Batch 0 Loss 0.1589
Epoch 178 Loss 0.1589
Time taken for 1 epoch 0.15497398376464844 sec

Epoch 179 Batch 0 Loss 0.1601
Epoch 179 Loss 0.1601
Time taken for 1 epoch 0.07504534721374512 sec

Epoch 180 Batch 0 Loss 0.1505
Ep

Epoch 254 Loss 0.1107
Time taken for 1 epoch 0.14899396896362305 sec

Epoch 255 Batch 0 Loss 0.1088
Epoch 255 Loss 0.1088
Time taken for 1 epoch 0.07377099990844727 sec

Epoch 256 Batch 0 Loss 0.1080
Epoch 256 Loss 0.1080
Time taken for 1 epoch 0.15171575546264648 sec

Epoch 257 Batch 0 Loss 0.1069
Epoch 257 Loss 0.1069
Time taken for 1 epoch 0.0728607177734375 sec

Epoch 258 Batch 0 Loss 0.1128
Epoch 258 Loss 0.1128
Time taken for 1 epoch 0.15148305892944336 sec

Epoch 259 Batch 0 Loss 0.1114
Epoch 259 Loss 0.1114
Time taken for 1 epoch 0.07353615760803223 sec

Epoch 260 Batch 0 Loss 0.1119
Epoch 260 Loss 0.1119
Time taken for 1 epoch 0.14883995056152344 sec

Epoch 261 Batch 0 Loss 0.1092
Epoch 261 Loss 0.1092
Time taken for 1 epoch 0.07154273986816406 sec

Epoch 262 Batch 0 Loss 0.1092
Epoch 262 Loss 0.1092
Time taken for 1 epoch 0.14907193183898926 sec

Epoch 263 Batch 0 Loss 0.1068
Epoch 263 Loss 0.1068
Time taken for 1 epoch 0.07131719589233398 sec

Epoch 264 Batch 0 Loss 0.1116
E

Epoch 338 Loss 0.0784
Time taken for 1 epoch 0.15041494369506836 sec

Epoch 339 Batch 0 Loss 0.0711
Epoch 339 Loss 0.0711
Time taken for 1 epoch 0.07291793823242188 sec

Epoch 340 Batch 0 Loss 0.0700
Epoch 340 Loss 0.0700
Time taken for 1 epoch 0.14847278594970703 sec

Epoch 341 Batch 0 Loss 0.0796
Epoch 341 Loss 0.0796
Time taken for 1 epoch 0.0709681510925293 sec

Epoch 342 Batch 0 Loss 0.0834
Epoch 342 Loss 0.0834
Time taken for 1 epoch 0.15108895301818848 sec

Epoch 343 Batch 0 Loss 0.0752
Epoch 343 Loss 0.0752
Time taken for 1 epoch 0.07147598266601562 sec

Epoch 344 Batch 0 Loss 0.0738
Epoch 344 Loss 0.0738
Time taken for 1 epoch 0.15106797218322754 sec

Epoch 345 Batch 0 Loss 0.0722
Epoch 345 Loss 0.0722
Time taken for 1 epoch 0.07056713104248047 sec

Epoch 346 Batch 0 Loss 0.0752
Epoch 346 Loss 0.0752
Time taken for 1 epoch 0.1516869068145752 sec

Epoch 347 Batch 0 Loss 0.0748
Epoch 347 Loss 0.0748
Time taken for 1 epoch 0.07290196418762207 sec

Epoch 348 Batch 0 Loss 0.0697
Ep

Epoch 422 Loss 0.0504
Time taken for 1 epoch 0.15027499198913574 sec

Epoch 423 Batch 0 Loss 0.0535
Epoch 423 Loss 0.0535
Time taken for 1 epoch 0.07241582870483398 sec

Epoch 424 Batch 0 Loss 0.0489
Epoch 424 Loss 0.0489
Time taken for 1 epoch 0.14715003967285156 sec

Epoch 425 Batch 0 Loss 0.0491
Epoch 425 Loss 0.0491
Time taken for 1 epoch 0.07011103630065918 sec

Epoch 426 Batch 0 Loss 0.0453
Epoch 426 Loss 0.0453
Time taken for 1 epoch 0.14715290069580078 sec

Epoch 427 Batch 0 Loss 0.0514
Epoch 427 Loss 0.0514
Time taken for 1 epoch 0.07131409645080566 sec

Epoch 428 Batch 0 Loss 0.0450
Epoch 428 Loss 0.0450
Time taken for 1 epoch 0.15179705619812012 sec

Epoch 429 Batch 0 Loss 0.0491
Epoch 429 Loss 0.0491
Time taken for 1 epoch 0.07278323173522949 sec

Epoch 430 Batch 0 Loss 0.0462
Epoch 430 Loss 0.0462
Time taken for 1 epoch 0.15030312538146973 sec

Epoch 431 Batch 0 Loss 0.0488
Epoch 431 Loss 0.0488
Time taken for 1 epoch 0.07114696502685547 sec

Epoch 432 Batch 0 Loss 0.0545


Epoch 506 Loss 0.0384
Time taken for 1 epoch 0.15035724639892578 sec

Epoch 507 Batch 0 Loss 0.0368
Epoch 507 Loss 0.0368
Time taken for 1 epoch 0.07377886772155762 sec

Epoch 508 Batch 0 Loss 0.0330
Epoch 508 Loss 0.0330
Time taken for 1 epoch 0.14926910400390625 sec

Epoch 509 Batch 0 Loss 0.0335
Epoch 509 Loss 0.0335
Time taken for 1 epoch 0.07510995864868164 sec

Epoch 510 Batch 0 Loss 0.0366
Epoch 510 Loss 0.0366
Time taken for 1 epoch 0.15085792541503906 sec

Epoch 511 Batch 0 Loss 0.0299
Epoch 511 Loss 0.0299
Time taken for 1 epoch 0.07138180732727051 sec

Epoch 512 Batch 0 Loss 0.0336
Epoch 512 Loss 0.0336
Time taken for 1 epoch 0.1488661766052246 sec

Epoch 513 Batch 0 Loss 0.0370
Epoch 513 Loss 0.0370
Time taken for 1 epoch 0.08832812309265137 sec

Epoch 514 Batch 0 Loss 0.0349
Epoch 514 Loss 0.0349
Time taken for 1 epoch 0.1489732265472412 sec

Epoch 515 Batch 0 Loss 0.0342
Epoch 515 Loss 0.0342
Time taken for 1 epoch 0.06948399543762207 sec

Epoch 516 Batch 0 Loss 0.0326
Ep

Epoch 589 Batch 0 Loss 0.0251
Epoch 589 Loss 0.0251
Time taken for 1 epoch 0.07158827781677246 sec

Epoch 590 Batch 0 Loss 0.0246
Epoch 590 Loss 0.0246
Time taken for 1 epoch 0.1508631706237793 sec

Epoch 591 Batch 0 Loss 0.0236
Epoch 591 Loss 0.0236
Time taken for 1 epoch 0.07259392738342285 sec

Epoch 592 Batch 0 Loss 0.0229
Epoch 592 Loss 0.0229
Time taken for 1 epoch 0.15047979354858398 sec

Epoch 593 Batch 0 Loss 0.0257
Epoch 593 Loss 0.0257
Time taken for 1 epoch 0.07172107696533203 sec

Epoch 594 Batch 0 Loss 0.0227
Epoch 594 Loss 0.0227
Time taken for 1 epoch 0.15022611618041992 sec

Epoch 595 Batch 0 Loss 0.0240
Epoch 595 Loss 0.0240
Time taken for 1 epoch 0.07042098045349121 sec

Epoch 596 Batch 0 Loss 0.0229
Epoch 596 Loss 0.0229
Time taken for 1 epoch 0.14950084686279297 sec

Epoch 597 Batch 0 Loss 0.0244
Epoch 597 Loss 0.0244
Time taken for 1 epoch 0.07096600532531738 sec

Epoch 598 Batch 0 Loss 0.0213
Epoch 598 Loss 0.0213
Time taken for 1 epoch 0.1515810489654541 sec

Ep

Epoch 672 Loss 0.0153
Time taken for 1 epoch 0.15277695655822754 sec

Epoch 673 Batch 0 Loss 0.0166
Epoch 673 Loss 0.0166
Time taken for 1 epoch 0.07787513732910156 sec

Epoch 674 Batch 0 Loss 0.0176
Epoch 674 Loss 0.0176
Time taken for 1 epoch 0.14922213554382324 sec

Epoch 675 Batch 0 Loss 0.0173
Epoch 675 Loss 0.0173
Time taken for 1 epoch 0.07046794891357422 sec

Epoch 676 Batch 0 Loss 0.0157
Epoch 676 Loss 0.0157
Time taken for 1 epoch 0.15015220642089844 sec

Epoch 677 Batch 0 Loss 0.0154
Epoch 677 Loss 0.0154
Time taken for 1 epoch 0.07043099403381348 sec

Epoch 678 Batch 0 Loss 0.0152
Epoch 678 Loss 0.0152
Time taken for 1 epoch 0.14995908737182617 sec

Epoch 679 Batch 0 Loss 0.0194
Epoch 679 Loss 0.0194
Time taken for 1 epoch 0.07080602645874023 sec

Epoch 680 Batch 0 Loss 0.0202
Epoch 680 Loss 0.0202
Time taken for 1 epoch 0.15211987495422363 sec

Epoch 681 Batch 0 Loss 0.0205
Epoch 681 Loss 0.0205
Time taken for 1 epoch 0.07129979133605957 sec

Epoch 682 Batch 0 Loss 0.0166


Epoch 756 Loss 0.0155
Time taken for 1 epoch 0.15108299255371094 sec

Epoch 757 Batch 0 Loss 0.0150
Epoch 757 Loss 0.0150
Time taken for 1 epoch 0.07419204711914062 sec

Epoch 758 Batch 0 Loss 0.0109
Epoch 758 Loss 0.0109
Time taken for 1 epoch 0.15004587173461914 sec

Epoch 759 Batch 0 Loss 0.0146
Epoch 759 Loss 0.0146
Time taken for 1 epoch 0.0713498592376709 sec

Epoch 760 Batch 0 Loss 0.0112
Epoch 760 Loss 0.0112
Time taken for 1 epoch 0.15061593055725098 sec

Epoch 761 Batch 0 Loss 0.0123
Epoch 761 Loss 0.0123
Time taken for 1 epoch 0.07236981391906738 sec

Epoch 762 Batch 0 Loss 0.0120
Epoch 762 Loss 0.0120
Time taken for 1 epoch 0.15028905868530273 sec

Epoch 763 Batch 0 Loss 0.0133
Epoch 763 Loss 0.0133
Time taken for 1 epoch 0.07136416435241699 sec

Epoch 764 Batch 0 Loss 0.0118
Epoch 764 Loss 0.0118
Time taken for 1 epoch 0.15413594245910645 sec

Epoch 765 Batch 0 Loss 0.0140
Epoch 765 Loss 0.0140
Time taken for 1 epoch 0.0710608959197998 sec

Epoch 766 Batch 0 Loss 0.0118
Ep

Epoch 839 Batch 0 Loss 0.0105
Epoch 839 Loss 0.0105
Time taken for 1 epoch 0.07169389724731445 sec

Epoch 840 Batch 0 Loss 0.0122
Epoch 840 Loss 0.0122
Time taken for 1 epoch 0.15364408493041992 sec

Epoch 841 Batch 0 Loss 0.0091
Epoch 841 Loss 0.0091
Time taken for 1 epoch 0.07257795333862305 sec

Epoch 842 Batch 0 Loss 0.0115
Epoch 842 Loss 0.0115
Time taken for 1 epoch 0.15463805198669434 sec

Epoch 843 Batch 0 Loss 0.0138
Epoch 843 Loss 0.0138
Time taken for 1 epoch 0.07201385498046875 sec

Epoch 844 Batch 0 Loss 0.0126
Epoch 844 Loss 0.0126
Time taken for 1 epoch 0.15090370178222656 sec

Epoch 845 Batch 0 Loss 0.0112
Epoch 845 Loss 0.0112
Time taken for 1 epoch 0.07092714309692383 sec

Epoch 846 Batch 0 Loss 0.0112
Epoch 846 Loss 0.0112
Time taken for 1 epoch 0.1507279872894287 sec

Epoch 847 Batch 0 Loss 0.0125
Epoch 847 Loss 0.0125
Time taken for 1 epoch 0.07429885864257812 sec

Epoch 848 Batch 0 Loss 0.0115
Epoch 848 Loss 0.0115
Time taken for 1 epoch 0.1527407169342041 sec

Ep

Epoch 922 Loss 0.0115
Time taken for 1 epoch 0.15752315521240234 sec

Epoch 923 Batch 0 Loss 0.0107
Epoch 923 Loss 0.0107
Time taken for 1 epoch 0.07538199424743652 sec

Epoch 924 Batch 0 Loss 0.0085
Epoch 924 Loss 0.0085
Time taken for 1 epoch 0.1520380973815918 sec

Epoch 925 Batch 0 Loss 0.0078
Epoch 925 Loss 0.0078
Time taken for 1 epoch 0.07395386695861816 sec

Epoch 926 Batch 0 Loss 0.0113
Epoch 926 Loss 0.0113
Time taken for 1 epoch 0.1601710319519043 sec

Epoch 927 Batch 0 Loss 0.0092
Epoch 927 Loss 0.0092
Time taken for 1 epoch 0.07892894744873047 sec

Epoch 928 Batch 0 Loss 0.0135
Epoch 928 Loss 0.0135
Time taken for 1 epoch 0.1532139778137207 sec

Epoch 929 Batch 0 Loss 0.0083
Epoch 929 Loss 0.0083
Time taken for 1 epoch 0.07306814193725586 sec

Epoch 930 Batch 0 Loss 0.0111
Epoch 930 Loss 0.0111
Time taken for 1 epoch 0.1566789150238037 sec

Epoch 931 Batch 0 Loss 0.0100
Epoch 931 Loss 0.0100
Time taken for 1 epoch 0.07250714302062988 sec

Epoch 932 Batch 0 Loss 0.0123
Epoc

### Translate:

In [33]:
def predict(input_sentence):

  attention_plot = np.zeros((output_len, input_len))

  input_sentence = preprocess_sentence(input_sentence)

  inputs = [inp_lang.word_index[i] for i in input_sentence.split()]
  inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=input_len,
                                                         padding='post')
  inputs = tf.convert_to_tensor(inputs)

  result = ''
  
  encoder_output,state_h,state_c = model.layers[0](inputs,[tf.zeros((1, lstm_size)),tf.zeros((1, lstm_size))])

  dec_input = tf.expand_dims([targ_lang.word_index['<start>']], 0)

  for t in range(output_len):
   predictions,state_h,state_c,attention_weights,context_vector = model.layers[1].onestepdecoder(dec_input,
                                                                                                 encoder_output,
                                                                                                 state_h,
                                                                                                 state_c)

   attention_weights = tf.reshape(attention_weights, (-1, ))
   attention_plot[t] = attention_weights.numpy()

   predicted_id = tf.argmax(predictions[0]).numpy()

   result += targ_lang.index_word[predicted_id] + ' '

   if targ_lang.index_word[predicted_id] == '<end>':
     return result, input_sentence, attention_plot

   dec_input = tf.expand_dims([predicted_id], 0)

  return result, input_sentence, attention_plot

In [34]:
def translate(sentence):
  result, sent, attention_plot = predict(sentence)

  print('Input: %s' % (sent))
  print('Predicted translation: {}'.format(result))

In [39]:
translate('Lasa')

KeyError: 'lasa'

Traceback (most recent call last):
  File "/Users/cm15/miniforge3/envs/tensorflow-env/lib/python3.10/site-packages/gradio/routes.py", line 298, in run_predict
    output = await app.blocks.process_api(
  File "/Users/cm15/miniforge3/envs/tensorflow-env/lib/python3.10/site-packages/gradio/blocks.py", line 790, in process_api
    result = await self.call_function(fn_index, inputs, iterator)
  File "/Users/cm15/miniforge3/envs/tensorflow-env/lib/python3.10/site-packages/gradio/blocks.py", line 697, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/Users/cm15/miniforge3/envs/tensorflow-env/lib/python3.10/site-packages/anyio/to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/Users/cm15/miniforge3/envs/tensorflow-env/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "/Users/cm15/miniforge3/envs/tensorflow-env/lib/python3.10/site-packages

In [20]:
import gradio as gd

In [21]:
def translate(sentence):
  result, sent, attention_plot = predict(sentence)

  return result
UI = gd.Interface(translate, inputs='text', outputs='text')

In [22]:
UI.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x2e6588d90>, 'http://127.0.0.1:7860/', None)