# Evluate the translator using BLEU score

# Library

In [1]:
import tensorflow as tf
import tensorflow_text
from pickle import load, dump
from time import time 
import numpy as np
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu
import gc

In [2]:
print(tf.__version__)

2.10.1


# Load Tokenizer

Tokenization is the process of breaking up text, into "tokens". Depending on the tokenizer, these tokens can represent sentence-pieces, words, subwords, or characters. To learn more about tokenization, visit this guide.

This tutorial uses the tokenizers built in the subword tokenizer tutorial. That tutorial optimizes two text.BertTokenizer objects (one for English, one for Portuguese) for this dataset and exports them in a TensorFlow saved_model format.


In [5]:
reloaded_tokenizers = tf.saved_model.load('./metadata/tokenizer_deu_eng')
string = "When writing a sentence, generally you start with a capital letter and finish with a period (.), an exclamation mark (!), or a question mark (?)."
tokens = reloaded_tokenizers.eng.tokenize([string])
round_trip = reloaded_tokenizers.eng.detokenize(tokens)
print(round_trip.numpy()[0].decode('utf-8'))


when writing a sentence , generally you start with a capital letter and finish with a period ( . ) , an exclamation mark ( ! ) , or a question mark ( ? ) .


# Load dataset

In [6]:
filename = '../data/deu-eng-test.pkl'
with open(filename, 'rb') as file:
    test_data = load(file)
type(test_data)    

numpy.ndarray

# Preprocess the test dataset 

Tokenize-and-detokenize the target sentence for later BLEU score

In [14]:
# f = open( "../data/eng_processed.txt", 'a' )
# length = 0
# time0 = time()
# for i, source in enumerate(test_data):
#     # if i < 11980: continue
#     # if i == 400: break
#     raw_target = source[1].decode('utf-8')
#     #
#     raw_target = reloaded_tokenizers.eng.tokenize([raw_target])
#     raw_target = reloaded_tokenizers.eng.detokenize(raw_target)
#     raw_target = raw_target.numpy()[0].decode('utf-8')
#     f.write(raw_target + '\n')
#     #
#     length += 1
#     if length % 200 ==0:
#         del reloaded_tokenizers
#         gc.collect()
#         reloaded_tokenizers = tf.saved_model.load('./metadata/tokenizer_deu_eng')
#         f.close()
#         f = open( "../data/eng_processed.txt", 'a' )
#         print(length, time()-time0)
# f.close()

In [17]:
# actual, predicted = list(), list()
# BLEU1, BLEU2, BLEU3, BLEU4, length = 0, 0, 0, 0, 0
f = open( "../data/eng_processed.txt", 'rt')
texts = f.read()
f.close()
texts = texts.strip().split('\n')
texts

['try not to fall .',
 "i ' ve never seen you laugh .",
 'they sat on a bench in the park .',
 'i know that , tom .',
 'tom offered mary and john some potato chips .',
 'i have an older brother and an older sister .',
 "they ' re all guilty .",
 'the meeting is adjourned .',
 "don ' t make fun of others .",
 "tom wasn ' t happy with the results .",
 'is eating less meat a good idea ?',
 "it ' s a tiny country that most people have never heard of .",
 'the protesters threw stones at the police .',
 'umbrellas sell well .',
 "we ' re going to the station .",
 "that ' s one of the things that i like about tom .",
 'now we need to wait a while .',
 'tom often complains about mosquitoes .',
 'he was hurt in the accident .',
 'if you want , you can go .',
 "we ' re busy .",
 'i have less than you .',
 'we can still get there on time .',
 'no one can do everything .',
 'tom hid behind the door .',
 'put yourself in my place .',
 "is this tom ' s book ?",
 'i only use cane sugar .',
 'he under

# Load Translator

In [18]:
translator = tf.saved_model.load('./metadata/translator_1')

In [19]:
# Die Demonstranten warfen Steine auf die Polizei.	The protesters threw stones at the police.
translation, _, _ = translator('Die Demonstranten warfen Steine auf die Polizei.')
print(translation.numpy())

b'the demonstrations threw stones to the police .'


In [20]:
idx = 10
raw_source, raw_target = test_data[idx][0].decode('utf-8'), test_data[idx][1].decode('utf-8')
translation, _, _ = translator(raw_source)
print(f"src={raw_source}")
print(f"target={raw_target}")
print(f"predict={translation.numpy().decode('utf-8')}")

src=Ist es gut, weniger Fleisch zu essen?
target=Is eating less meat a good idea?
predict=is it good to eat less than less meat ?


In [21]:
sentence = tf.constant(raw_source)
assert isinstance(sentence, tf.Tensor)
print(sentence.shape)
# sentence = sentence[tf.newaxis]
print(sentence.shape)
print(sentence.numpy())
#
translation, _, _ = translator(sentence)
print(f"src={sentence}")
print(f"target={raw_target}")
print(f"predict={translation.numpy().decode('utf-8')}")

()
()
b'Ist es gut, weniger Fleisch zu essen?'
src=b'Ist es gut, weniger Fleisch zu essen?'
target=Is eating less meat a good idea?
predict=is it good to eat less than less meat ?


# BLEU score

In [22]:
target = "When writing a sentence, generally you start with a capital letter and finish with a period (.), an exclamation mark (!), or a question mark (?)."
target = reloaded_tokenizers.eng.tokenize([target])
target = reloaded_tokenizers.eng.detokenize(target)
target = target.numpy()[0].decode('utf-8')
# print(target)
actual = [[target.split()]]
print(actual)

predict = "When writing a sentence, generally you start with a capital letter and finish with a period (.), an exclamation mark (!), or a question mark (?)."
predict = reloaded_tokenizers.eng.tokenize([predict])
predict = reloaded_tokenizers.eng.detokenize(predict)
predict = predict.numpy()[0].decode('utf-8')
# print(predict)
predicted = [predict.split()]
print(predicted)

print('BLEU-1    %f' % corpus_bleu(actual, predicted, weights=(1.0, 0.0, 0.0, 0.0)))
print('BLEU-2    %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0.0, 0.0)))    
print('BLEU-3    %f' % corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.33, 0.0))) 
print('BLEU-4    %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))  


[[['when', 'writing', 'a', 'sentence', ',', 'generally', 'you', 'start', 'with', 'a', 'capital', 'letter', 'and', 'finish', 'with', 'a', 'period', '(', '.', ')', ',', 'an', 'exclamation', 'mark', '(', '!', ')', ',', 'or', 'a', 'question', 'mark', '(', '?', ')', '.']]]
[['when', 'writing', 'a', 'sentence', ',', 'generally', 'you', 'start', 'with', 'a', 'capital', 'letter', 'and', 'finish', 'with', 'a', 'period', '(', '.', ')', ',', 'an', 'exclamation', 'mark', '(', '!', ')', ',', 'or', 'a', 'question', 'mark', '(', '?', ')', '.']]
BLEU-1    1.000000
BLEU-2    1.000000
BLEU-3    1.000000
BLEU-4    1.000000


In [23]:
print(len(test_data))
print(len(test_data)*0.07/60, 'min to predict all the test data')

26150
30.508333333333336 min to predict all the test data


In [32]:
time0 = time()
length = 0
for i, source in enumerate(test_data):
    raw_src = source[0].decode('utf-8')
    raw_target = source[1].decode('utf-8')
    # raw_target = texts[i]
    if i == 10: break
    #
    translation, _, _ = translator(raw_src)
    translation = translation.numpy().decode('utf-8')
    if i < 3: 
        print(f"src = {raw_src}")
        print(f"target = {raw_target}")
        print(f"predict = {translation}")
        print("\n")
    #
    actual.append([raw_target.split()])
    predicted.append(translation.split())
    
    length += 1
    if length % 200 ==0:
        print(length, time()-time0)


src = Versuche, nicht zu fallen.
target = Try not to fall.
predict = try not to fall .


src = Ich habe Sie noch nie lachen gesehen.
target = I've never seen you laugh.
predict = i ' ve never seen her laugh .


src = Sie setzten sich auf eine Parkbank.
target = They sat on a bench in the park.
predict = they sat down on a park bench .




In [29]:
print(f'Predict time = {time()-time0}')
print('BLEU-1 %f' % corpus_bleu(actual, predicted, weights=(1.0, 0.0, 0.0, 0.0)))
print('BLEU-2 %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0.0, 0.0)))    
print('BLEU-3 %f' % corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.33, 0.0))) 
print('BLEU-4 %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))  
print(f'BLEU time = {time()-time0}')

Predict time = 3131.0587108135223
BLEU-1 0.557656
BLEU-2 0.469400
BLEU-3 0.408180
BLEU-4 0.352869
BLEU time = 3141.146869659424
