# Loading data function

In [33]:
def load_data(path):
    with open(path,"r",encoding = "utf-8") as file:
        return [line.strip() for line in file.readlines()]





## Loading original 50 sentence

In [34]:
en_true = load_data("./data/test.en")
hi_true = load_data("./data/test.hi")
mar_true = load_data("./data/test.mr")

In [35]:
import random
random.seed(27)
sample_indices = random.sample(range(len(hi_true)),50)
def random_sentences(language):
    random_sentences = [language[i] for i in sample_indices]
    return random_sentences


In [36]:
eng_50_sentences = random_sentences(en_true)
hi_50_sentences =  random_sentences(hi_true)
mar_50_sentences = random_sentences(mar_true)


In [37]:
print(len(eng_50_sentences))
print(len(hi_50_sentences))
print(len(mar_50_sentences))

50
50
50


## Loading chatgpt Translated sentence

In [38]:
eng_to_hindi = load_data("./Translation/chatgpt_eng_to_hindi.txt")
hindi_to_eng = load_data("./Translation/chatgpt_hindi_to_eng.txt")
hindi_to_mar = load_data("./Translation/chatgpt_hindi_to_mar.txt")
mar_to_hindi = load_data("./Translation/chatgpt_mar_to_hindi.txt")


In [39]:
print(len(eng_to_hindi))
print(len(hindi_to_eng))
print(len(hindi_to_mar))
print(len(mar_to_hindi))

50
50
50
50


# Blue Scores

In [40]:
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu,SmoothingFunction
import string

### Corpus-level BLEU Score

In [41]:
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu

def corpus_level_bleu_score(original,translated):
      bleuScore =corpus_bleu([[ref] for ref in original], translated)
      return bleuScore


In [42]:
eng_to_hindi_blue = corpus_level_bleu_score(hi_50_sentences,eng_to_hindi)
hindi_to_eng_blue = corpus_level_bleu_score(eng_50_sentences,hindi_to_eng)
hindi_to_mar_blue = corpus_level_bleu_score(mar_50_sentences,hindi_to_mar)
mar_to_hindi_blue = corpus_level_bleu_score(hi_50_sentences,mar_to_hindi)

print(f"corpus_level_bleu_score:\nEnglish to hindi: {eng_to_hindi_blue}\nHindi to English: {hindi_to_eng_blue}\nHindi to Marathi: {hindi_to_mar_blue}\nMarathi to Hindi: {mar_to_hindi_blue}")

corpus_level_bleu_score:
English to hindi: 0.5571403063814033
Hindi to English: 0.6759954593557138
Hindi to Marathi: 0.45905760771010506
Marathi to Hindi: 0.5002641419433387


### Sentence-level BLEU Score

In [43]:
def clean_sentence(sentence):
    # Convert to lowercase
    cleaned_sentence = sentence.lower()
    
    # Remove punctuation
    cleaned_sentence = cleaned_sentence.translate(str.maketrans('', '', string.punctuation))
    
    # Remove extra whitespace
    cleaned_sentence = ' '.join(cleaned_sentence.split())
    
    return cleaned_sentence

In [44]:
def sentence_level_bleu_score(original, translation):
    bleu_scores = []
    smoothing_function = SmoothingFunction()
    for original_sentence, translation_sentence in zip(original, translation):
        original_sentence = clean_sentence(original_sentence)
        translation_sentence = clean_sentence(translation_sentence)
        original_tokens = original_sentence.split()
        translation_tokens = translation_sentence.split()

        # Calculate BLEU score
        bleu_score = sentence_bleu([original_tokens], translation_tokens,smoothing_function=smoothing_function.method2)
        bleu_scores.append(bleu_score)
    
    # Calculate average BLEU score
    overall_bleu_score = sum(bleu_scores) / len(bleu_scores)
    return overall_bleu_score




In [45]:
eng_to_hindi_blue = sentence_level_bleu_score(hi_50_sentences,eng_to_hindi)
hindi_to_eng_blue = sentence_level_bleu_score(eng_50_sentences,hindi_to_eng)
hindi_to_mar_blue = sentence_level_bleu_score(mar_50_sentences,hindi_to_mar)
mar_to_hindi_blue = sentence_level_bleu_score(hi_50_sentences,mar_to_hindi)

print(f"sentence_level_bleu_score:\nEnglish to hindi: {eng_to_hindi_blue}\nHindi to English: {hindi_to_eng_blue}\nHindi to Marathi: {hindi_to_mar_blue}\nMarathi to Hindi: {mar_to_hindi_blue}")

sentence_level_bleu_score:
English to hindi: 0.2352689404776619
Hindi to English: 0.38535910617991326
Hindi to Marathi: 0.12364932614313781
Marathi to Hindi: 0.1891186551083842


# Rouge Scores

In [46]:
from rouge import Rouge
def calculate_rouge_scores(original, translated):
    rouge = Rouge()
    scores = rouge.get_scores(translated, original, avg=True)
    return scores

### English to Hindi

In [47]:
rouge_eng_to_hin=calculate_rouge_scores(hi_50_sentences,eng_to_hindi)
print("ROUGE Score English to Hindi:")
print("ROUGE_1:",rouge_eng_to_hin['rouge-1'])
print("ROUGE_2:",rouge_eng_to_hin['rouge-2'])
print("ROUGE_L:",rouge_eng_to_hin['rouge-l'])

ROUGE Score English to Hindi:
ROUGE_1: {'r': 0.46664925471651636, 'p': 0.4774867543499078, 'f': 0.46892200167874676}
ROUGE_2: {'r': 0.24435760851261928, 'p': 0.24962376462546743, 'f': 0.24512652237922683}
ROUGE_L: {'r': 0.4375362768992974, 'p': 0.44736179396291303, 'f': 0.43952126948726256}


### Hindi to English

In [48]:
rouge_hin_to_eng=calculate_rouge_scores(eng_50_sentences,hindi_to_eng)
print("ROUGE Score Hindi to English:")
print("ROUGE_1:",rouge_hin_to_eng['rouge-1'])
print("ROUGE_2:",rouge_hin_to_eng['rouge-2'])
print("ROUGE_L:",rouge_hin_to_eng['rouge-l'])

ROUGE Score Hindi to English:
ROUGE_1: {'r': 0.6002958094238213, 'p': 0.6003428024658658, 'f': 0.5956924723219316}
ROUGE_2: {'r': 0.39434417148039436, 'p': 0.39918128346056053, 'f': 0.3940595129073588}
ROUGE_L: {'r': 0.5684085799477443, 'p': 0.5680184328640194, 'f': 0.5640323267855879}


### Marathi to Hindi

In [49]:
rouge_mar_to_hin=calculate_rouge_scores(hi_50_sentences,mar_to_hindi)
print("ROUGE Score Marathi to Hindi:")
print("ROUGE_1:",rouge_mar_to_hin['rouge-1'])
print("ROUGE_2:",rouge_mar_to_hin['rouge-2'])
print("ROUGE_L:",rouge_mar_to_hin['rouge-l'])

ROUGE Score Marathi to Hindi:
ROUGE_1: {'r': 0.41943437009311885, 'p': 0.4392298157108293, 'f': 0.42421859247999805}
ROUGE_2: {'r': 0.1937645936409801, 'p': 0.2035800031168385, 'f': 0.19628101822124502}
ROUGE_L: {'r': 0.3820058452399461, 'p': 0.40064674680548956, 'f': 0.387004991954881}


### Hindi to Marathi

In [50]:
rouge_hin_to_mar=calculate_rouge_scores(mar_50_sentences,hindi_to_mar)
print("ROUGE Score Hindi to Marathi:")
print("ROUGE_1:",rouge_hin_to_mar['rouge-1'])
print("ROUGE_2:",rouge_hin_to_mar['rouge-2'])
print("ROUGE_L:",rouge_hin_to_mar['rouge-l'])

ROUGE Score Hindi to Marathi:
ROUGE_1: {'r': 0.24206635048817712, 'p': 0.23586376875045598, 'f': 0.23574168046239743}
ROUGE_2: {'r': 0.06298102714279186, 'p': 0.06418563364739835, 'f': 0.0630364994229708}
ROUGE_L: {'r': 0.23265553177472684, 'p': 0.22696298738527837, 'f': 0.22669029762240825}
