In [4]:
#From scratch: https://stackoverflow.com/questions/56968434/bleu-score-in-python-from-scratch

import numpy as np
from collections import Counter
import math

def n_gram_generator(sentence,n= 2,n_gram= False):
    sentence = sentence.lower()  # converting to lower case
    sent_arr = np.array(sentence.split())  # split to string arrays
    length = len(sent_arr)

    word_list = []
    for i in range(length+1):
        if i < n:
            continue
        word_range = list(range(i-n,i))
        s_list = sent_arr[word_range]
        string = ' '.join(s_list)  # converting list to strings
        word_list.append(string) # append to word_list
        if n_gram:
            word_list = list(set(word_list))
    return word_list

def from_scratch_bleu_score(original, reference):
    '''
    Bleu score function given a orginal and a reference ot target sentences
    '''
    rf_length = len(reference.split())
    o_length  = len(original.split())

    # Brevity Penalty
    if rf_length > o_length:
        BP=1
    else:
        penality=1-(rf_length/o_length)
        BP = np.exp(penality)

    # Clipped precision
    clipped_precision_score = []
    for ngram_level in range(1, 4):  # 1-gram to 4-gram
        
        
        original_ngram_list = n_gram_generator(original, ngram_level)
        original_n_gram = Counter(original_ngram_list)
        
        reference_ngram_list = n_gram_generator(reference, ngram_level)
        reference_n_gram = Counter(reference_ngram_list)
        
        
        num_ngrams_in_translation = sum(reference_n_gram.values())  # number of ngrams in translation
        
        # iterate the unique ngrams in translation (candidate)
        for j in reference_n_gram:
            
            if j in original_n_gram:  # if found in reference
                
                if reference_n_gram[j] > original_n_gram[j]:  # CLIPPING - if found in translation more than in source, clip
                    reference_n_gram[j] = original_n_gram[j]
                    
            else:
                reference_n_gram[j] = 0

        #print (sum(machine_n_gram.values()), c)
        clipped_precision_score.append(float(sum(reference_n_gram.values())) / num_ngrams_in_translation)

    #print (clipped_precision_score)

    weights = [0.25]*4

    s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, clipped_precision_score))
    s = BP * math.exp(math.fsum(s))
    return s

In [8]:
#For Anything goes
#Anything goes ref: https://machinelearningmastery.com/calculate-bleu-score-for-text-python/

from nltk.translate.bleu_score import sentence_bleu
import random

In [12]:
def evaluate(Source, Target):

    # opening the train_source file in read mode
    my_file = open(Source, "r", encoding='UTF-8')
    data = my_file.read()
    train_source_list = data.replace('\n'," ").split('<s>')
    my_file.close()
    for item in range(len(train_source_list)):
        train_source_list[item] = train_source_list[item].replace('</s>', '')

    # opening the train_target file in read mode
    my_file = open(Target, "r", encoding='UTF-8')
    data = my_file.read()
    train_target_list = data.replace('\n'," ").split('<s>')
    my_file.close()
    for item in range(len(train_target_list)):
        train_target_list[item] = train_target_list[item].replace('</s>', '')

    # From Scratch
    candidate = random.randint(0,len(train_target_list))
    original_train = train_source_list[candidate]
    reference_train = train_target_list[candidate]

    From_Scratch_Score = from_scratch_bleu_score(original_train, reference_train)
    print ("BLEU Score from scratch : ", From_Scratch_Score)
    
    #Anything goes
    
    #Selecting a random sentence from target list
    candidate = random.choice(train_target_list)
    print('NLTK Sentence BLUE 1-gram: %f' % sentence_bleu(train_source_list, candidate, weights=(1, 0, 0, 0)))
    print('NLTK Sentence BLUE 2-gram: %f' % sentence_bleu(train_source_list, candidate, weights=(0.5, 0.5, 0, 0)))
    print('NLTK Sentence BLUE 3-gram: %f' % sentence_bleu(train_source_list, candidate, weights=(0.33, 0.33, 0.33, 0)))
    print('NLTK Sentence BLUE 4-gram: %f' % sentence_bleu(train_source_list, candidate, weights=(0.25, 0.25, 0.25, 0.25)))

    return True
    

In [13]:
evaluate("train-source.txt","train-target.txt")

BLEU Score from scratch :  0.4583034067124109
NLTK Sentence BLUE 1-gram: 1.000000
NLTK Sentence BLUE 2-gram: 1.000000
NLTK Sentence BLUE 3-gram: 1.000000
NLTK Sentence BLUE 4-gram: 1.000000


True

In [11]:
evaluate("test-source.txt","test-target.txt")

BLEU Score from scratch :  0.7183093189481372
NLTK Sentence BLUE 1-gram: 1.000000
NLTK Sentence BLUE 2-gram: 1.000000
NLTK Sentence BLUE 3-gram: 0.996400
NLTK Sentence BLUE 4-gram: 0.983281


True