# Evluate the translator using BLEU score

# Library

In [1]:
import tensorflow as tf
import tensorflow_text
from pickle import load, dump
from time import time 
import numpy as np
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu
import gc

In [12]:
print(tf.__version__)

2.10.1


# Load Tokenizer

Tokenization is the process of breaking up text, into "tokens". Depending on the tokenizer, these tokens can represent sentence-pieces, words, subwords, or characters. To learn more about tokenization, visit this guide.

This tutorial uses the tokenizers built in the subword tokenizer tutorial. That tutorial optimizes two text.BertTokenizer objects (one for English, one for Portuguese) for this dataset and exports them in a TensorFlow saved_model format.


In [8]:
reloaded_tokenizers = tf.saved_model.load('./Bootcamp/Tranformer_TF/deu-eng/metadata/tokenizer_deu_eng_1')
string = "When writing a sentence, generally you start with a capital letter and finish with a period (.), an exclamation mark (!), or a question mark (?)."
tokens = reloaded_tokenizers.eng.tokenize([string])
round_trip = reloaded_tokenizers.eng.detokenize(tokens)
print(round_trip.numpy()[0].decode('utf-8'))


<class 'tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject'>
<class 'tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject'>


# Load dataset

In [15]:
filename = './Bootcamp/Tranformer_TF/deu-eng/data/deu-eng-test.pkl'
with open(filename, 'rb') as file:
    test_data = load(file)
type(test_data)    

Tom zahlt ein Bußgeld.
b'Tom is paying a fine.'


Unter dem Bett ist eine Katze.
b'There is a cat under the bed.'




# Load Translator

In [16]:
translator = tf.saved_model.load('./Bootcamp/Tranformer_TF/deu-eng/metadata/translator_1')

In [18]:
translation, _, _ = translator('Wir bekommen ein neues Auto nächsten Monat.')
print(translation.numpy())

In [None]:
idx = 10
raw_source, raw_target = test_data[idx][0].decode('utf-8'), test_data[idx][1].decode('utf-8')
translation, _, _ = translator(raw_source)
print(f"src={raw_source}")
print(f"target={raw_target}")
print(f"predict={translation.numpy().decode('utf-8')}")

In [None]:
sentence = tf.constant(raw_source)
assert isinstance(sentence, tf.Tensor)
print(sentence.shape)
# sentence = sentence[tf.newaxis]
print(sentence.shape)
print(sentence.numpy())
#
translation, _, _ = translator(sentence)
print(f"src={sentence}")
print(f"target={raw_target}")
print(f"predict={translation.numpy().decode('utf-8')}")

# BLEU score

In [None]:
target = "When writing a sentence, generally you start with a capital letter and finish with a period (.), an exclamation mark (!), or a question mark (?)."
target = reloaded_tokenizers.eng.tokenize([target])
target = reloaded_tokenizers.eng.detokenize(target)
target = target.numpy()[0].decode('utf-8')
# print(target)
actual = [[target.split()]]
print(actual)

predict = "When writing a sentence, generally you start with a capital letter and finish with a period (.), an exclamation mark (!), or a question mark (?)."
predict = reloaded_tokenizers.eng.tokenize([predict])
predict = reloaded_tokenizers.eng.detokenize(predict)
predict = predict.numpy()[0].decode('utf-8')
# print(predict)
predicted = [predict.split()]
print(predicted)

print('BLEU-1    %f' % corpus_bleu(actual, predicted, weights=(1.0, 0.0, 0.0, 0.0)))
print('BLEU-2    %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0.0, 0.0)))    
print('BLEU-3    %f' % corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.33, 0.0))) 
print('BLEU-4    %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))  


In [None]:
print(len(test_data))
print(len(test_data)*0.07/60, 'min to predict all the test data')

In [None]:
actual, predicted = list(), list()
BLEU1, BLEU2, BLEU3, BLEU4, length = 0, 0, 0, 0, 0
f = open( "./Bootcamp/Tranformer_TF/deu-eng/metadata/eng_processed.txt", 'rt')
texts = f.read()
f.close()
texts = texts.strip().split('\n')

In [None]:
time0 = time()

for i, source in enumerate(test_data):
    raw_src = source[0].decode('utf-8')
    # raw_target = source[1].decode('utf-8')
    raw_target = texts[i]
    # if i == 100: break
    #
    translation, _, _ = translator(raw_src)
    translation = translation.numpy().decode('utf-8')
    if i < 3: 
        print(f"src = {raw_src}")
        print(f"target = {raw_target}")
        print(f"predict = {translation}")
        print("\n")
    #
    actual.append([raw_target.split()])
    predicted.append(translation.split())
    
    length += 1
    if length % 200 ==0:
        print(length, time()-time0)


In [None]:
print(f'Predict time = {time()-time0}')
print('BLEU-1 %f' % corpus_bleu(actual, predicted, weights=(1.0, 0.0, 0.0, 0.0)))
print('BLEU-2 %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0.0, 0.0)))    
print('BLEU-3 %f' % corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.33, 0.0))) 
print('BLEU-4 %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))  
print(f'BLEU time = {time()-time0}')