# NLLB Metrics

In [None]:
import pandas as pd
nllb_df = pd.read_csv('nllb_translation_metrics.csv')
nllb_df

In [None]:
# make a new column that has the number of words for en, es, predicted_es, and predicted_en
nllb_df['en_word_count'] = nllb_df['en'].str.split().str.len()
nllb_df['es_word_count'] = nllb_df['es'].str.split().str.len()
nllb_df['predicted_es_word_count'] = nllb_df['predicted_es'].str.split().str.len()
nllb_df['predicted_en_word_count'] = nllb_df['predicted_en'].str.split().str.len()

# make a new column that has the number of characters for en, es, predicted_es, and predicted_en
nllb_df['en_char_count'] = nllb_df['en'].str.len()
nllb_df['es_char_count'] = nllb_df['es'].str.len()
nllb_df['predicted_es_char_count'] = nllb_df['predicted_es'].str.len()
nllb_df['predicted_en_char_count'] = nllb_df['predicted_en'].str.len()

nllb_df.head()

In [None]:
# the average number of words in the en, es, predicted_es, and predicted_en columns
avg_word_count = nllb_df[['en_word_count', 'es_word_count', 'predicted_es_word_count', 'predicted_en_word_count']].mean()
print(avg_word_count)


In [None]:
# the average percentage of overall predicted words relative to the actual words
print(avg_word_count['predicted_es_word_count'] / avg_word_count['es_word_count'])
print(avg_word_count['predicted_en_word_count'] / avg_word_count['en_word_count'])

In [None]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Create a smoothing function
smoothie = SmoothingFunction(k=1).method7  # You can experiment with different methods (method1 through method7)

# Update your calculation to use the smoothing function
nllb_df['es_bleu'] = nllb_df.apply(lambda row: sentence_bleu([row['es'].split()], row['predicted_es'].split(), smoothing_function=smoothie), axis=1)
nllb_df['en_bleu'] = nllb_df.apply(lambda row: sentence_bleu([row['en'].split()], row['predicted_en'].split(), smoothing_function=smoothie), axis=1)


In [None]:
# get the average BLEU score for the predicted_es and predicted_en columns
avg_bleu = nllb_df[['es_bleu', 'en_bleu']].mean()
print(f'Average BLEU score for predicted_es: {avg_bleu["es_bleu"]}')
print(f'Average BLEU score for predicted_en: {avg_bleu["en_bleu"]}')

In [None]:
# the average time it takes to translate en to es and es to en
avg_es_time = nllb_df['prediction_time_es'].mean()
avg_en_time = nllb_df['prediction_time_es'].mean()
print(f'Average time to translate en to es: {avg_es_time}')
print(f'Average time to translate es to en: {avg_en_time}')

In [None]:
# the upper bound and the lower of the average time it takes to translate en to es and es to en
upper_es_time = nllb_df['prediction_time_es'].max()
lower_es_time = nllb_df['prediction_time_es'].min()
upper_en_time = nllb_df['prediction_time_en'].max()
lower_en_time = nllb_df['prediction_time_en'].min()
print(f'Upper bound time to translate en to es: {upper_es_time}')
print(f'Lower bound time to translate en to es: {lower_es_time}')
print(f'Upper bound time to translate es to en: {upper_en_time}')
print(f'Lower bound time to translate es to en: {lower_en_time}')

In [None]:
# the average time it takes to translate en to es and es to en relative to the number of words
nllb_df['prediction_time_es_per_word'] = nllb_df['prediction_time_es'] / nllb_df['en_word_count']
nllb_df['prediction_time_en_per_word'] = nllb_df['prediction_time_en'] / nllb_df['es_word_count']
avg_es_time_per_word = nllb_df['prediction_time_es_per_word'].mean()
avg_en_time_per_word = nllb_df['prediction_time_en_per_word'].mean()
print(f'Average time to translate en to es per word: {avg_es_time_per_word}')
print(f'Average time to translate es to en per word: {avg_en_time_per_word}')


In [None]:
# the average time it takes to translate en to es and es to en relative to the number of characters
nllb_df['prediction_time_es_per_char'] = nllb_df['prediction_time_es'] / nllb_df['en_char_count']
nllb_df['prediction_time_en_per_char'] = nllb_df['prediction_time_en'] / nllb_df['es_char_count']
avg_es_time_per_char = nllb_df['prediction_time_es_per_char'].mean()
avg_en_time_per_char = nllb_df['prediction_time_en_per_char'].mean()
print(f'Average time to translate en to es per char: {avg_es_time_per_char}')
print(f'Average time to translate es to en per char: {avg_en_time_per_char}')

In [None]:
# the average wer for the predicted_es and predicted_en columns
avg_wer_en = nllb_df['WER_en'].mean()
avg_wer_es = nllb_df['WER_es'].mean()
print(f'Average WER for predicted_en: {avg_wer_en}')
print(f'Average WER for predicted_es: {avg_wer_es}')

In [None]:
# the average wer for the predicted_es and predicted_en columns relative to the number of words
nllb_df['WER_en_per_word'] = nllb_df['WER_en'] / nllb_df['en_word_count']
nllb_df['WER_es_per_word'] = nllb_df['WER_es'] / nllb_df['es_word_count']
avg_wer_en_per_word = nllb_df['WER_en_per_word'].mean()
avg_wer_es_per_word = nllb_df['WER_es_per_word'].mean()
print(f'Average WER for predicted_en per word: {avg_wer_en_per_word}')
print(f'Average WER for predicted_es per word: {avg_wer_es_per_word}')

In [None]:
# the average cer for the predicted_es and predicted_en columns
avg_cer_en = nllb_df['CER_en'].mean()
avg_cer_es = nllb_df['CER_es'].mean()
print(f'Average CER for predicted_en: {avg_cer_en}')
print(f'Average CER for predicted_es: {avg_cer_es}')


In [None]:
# the average mer for the predicted_es and predicted_en columns
avg_mer_en = nllb_df['MER_en'].mean()
avg_mer_es = nllb_df['MER_es'].mean()
print(f'Average MER for predicted_en: {avg_mer_en}')
print(f'Average MER for predicted_es: {avg_mer_es}')


In [None]:
# load bert from transformers to check the sentiment similarity between the en and predicted_en columns
from transformers import pipeline
sentiment_analysis = pipeline("sentiment-analysis")
nllb_df['en_sentiment'] = nllb_df['en'].apply(lambda x: sentiment_analysis(x)[0]['label'])
nllb_df['predicted_en_sentiment'] = nllb_df['predicted_en'].apply(lambda x: sentiment_analysis(x)[0]['label'])
nllb_df['en_sentiment_score'] = nllb_df['en'].apply(lambda x: sentiment_analysis(x)[0]['score'])
nllb_df['predicted_en_sentiment_score'] = nllb_df['predicted_en'].apply(lambda x: sentiment_analysis(x)[0]['score'])
nllb_df['sentiment_similarity_en'] = nllb_df.apply(lambda row: row['en_sentiment'] == row['predicted_en_sentiment'], axis=1)
sentiment_similarity_en = nllb_df['sentiment_similarity_en'].mean()
print(f'Sentiment similarity between en and predicted_en: {sentiment_similarity_en}')


In [None]:
# load bert from transformers to check the sentiment similarity between the es and predicted_es columns
nllb_df['es_sentiment'] = nllb_df['es'].apply(lambda x: sentiment_analysis(x)[0]['label'])
nllb_df['predicted_es_sentiment'] = nllb_df['predicted_es'].apply(lambda x: sentiment_analysis(x)[0]['label'])
nllb_df['es_sentiment_score'] = nllb_df['es'].apply(lambda x: sentiment_analysis(x)[0]['score'])
nllb_df['predicted_es_sentiment_score'] = nllb_df['predicted_es'].apply(lambda x: sentiment_analysis(x)[0]['score'])
nllb_df['sentiment_similarity_es'] = nllb_df.apply(lambda row: row['es_sentiment'] == row['predicted_es_sentiment'], axis=1)
sentiment_similarity_es = nllb_df['sentiment_similarity_es'].mean()
print(f'Sentiment similarity between es and predicted_es: {sentiment_similarity_es}')

# TTS EN

In [None]:
df_tts_en = pd.read_csv('tts_metrics_en.csv')
df_tts_en['tts_time'] = df_tts_en['tts_time'] 
df_tts_en

In [None]:
# get the audios length of "path" and "audio" columns
import librosa
df_tts_en['audio_length'] = df_tts_en['path'].apply(lambda x: librosa.get_duration(filename=x))
df_tts_en.head()

In [None]:
# get all files in the folder "orignals_en" and get the length of each audio in order and append them to the df_tts_en dataframe in a new column called "original_audio_length" in the same order
# their names are result_{i}.wav where i is the index of the row
import os
import numpy as np
audio_lengths = []
for i in range(len(df_tts_en)):
    audio_path = f'originals_en/result_{i}.wav'
    audio_lengths.append(librosa.get_duration(filename=audio_path))
df_tts_en['predicted_audio_length'] = audio_lengths


In [None]:
# get the average audio length of the "audio_length" and "predicted_audio_length" columns
avg_audio_length = df_tts_en[['audio_length', 'predicted_audio_length']].mean()
avg_audio_length

In [None]:
# average percentage of the predicted audio length relative to the original audio length
print(avg_audio_length['predicted_audio_length'] / avg_audio_length['audio_length'])

In [None]:
# get the average similarity column as it is beteen 0 and 1
avg_similarity = df_tts_en['similarity'].mean()
print(f'Average similarity: {avg_similarity}')


In [None]:
# make new columns for the number of words and characters in the "transcription" column
df_tts_en['transcription_word_count'] = df_tts_en['transcription'].str.split().str.len()
df_tts_en['transcription_char_count'] = df_tts_en['transcription'].str.len()


In [None]:
# get the average words and characters in the "transcription" column
avg_transcription_word_count = df_tts_en['transcription_word_count'].mean()
avg_transcription_char_count = df_tts_en['transcription_char_count'].mean()
print(f'Average transcription word count: {avg_transcription_word_count}')
print(f'Average transcription char count: {avg_transcription_char_count}')

In [None]:
# get the average tts_time column
avg_tts_time = df_tts_en['tts_time'].mean()
print(f'Average tts time: {avg_tts_time}')

In [None]:
# get the average tts_time column relative to the number of words
df_tts_en['tts_time_per_word'] = df_tts_en['tts_time'] / df_tts_en['transcription_word_count']
avg_tts_time_per_word = df_tts_en['tts_time_per_word'].mean()
print(f'Average tts time per word: {avg_tts_time_per_word}')

In [None]:
# get the average tts_time column relative to the number of characters
df_tts_en['tts_time_per_char'] = df_tts_en['tts_time'] / df_tts_en['transcription_char_count']
avg_tts_time_per_char = df_tts_en['tts_time_per_char'].mean()
print(f'Average tts time per char: {avg_tts_time_per_char}')

In [None]:
# get the upper and lower bounds of the tts_time column
upper_tts_time = df_tts_en['tts_time'].max()
lower_tts_time = df_tts_en['tts_time'].min()
print(f'Upper bound tts time: {upper_tts_time}')
print(f'Lower bound tts time: {lower_tts_time}')

# TTS ES

In [None]:
df_tts_es = pd.read_csv('tts_metrics_es.csv')
df_tts_es['tts_time'] = df_tts_es['tts_time'] 
df_tts_es

In [None]:
# get the audios length of "path" and "audio" columns
df_tts_es['audio_length'] = df_tts_es['path'].apply(lambda x: librosa.get_duration(filename=x))
df_tts_es.head()

In [None]:
# get all files in the folder "orignals_es" and get the length of each audio in order and append them to the df_tts_es dataframe in a new column called "original_audio_length" in the same order
# their names are result_{i}.wav where i is the index of the row
audio_lengths = []
for i in range(len(df_tts_es)):
    audio_path = f'originals_es/result_{i}.wav'
    audio_lengths.append(librosa.get_duration(filename=audio_path))
df_tts_es['predicted_audio_length'] = audio_lengths


In [None]:
# get the average audio length of the "audio_length" and "predicted_audio_length" columns
avg_audio_length = df_tts_es[['audio_length', 'predicted_audio_length']].mean()
avg_audio_length

In [None]:
# average percentage of the predicted audio length relative to the original audio length
print(avg_audio_length['predicted_audio_length'] / avg_audio_length['audio_length'])

In [None]:
# get the average similarity column as it is beteen 0 and 1
avg_similarity = df_tts_es['similarity'].mean()
print(f'Average similarity: {avg_similarity}')


In [None]:
# make new columns for the number of words and characters in the "transcription" column
df_tts_es['transcription_word_count'] = df_tts_es['transcription'].str.split().str.len()
df_tts_es['transcription_char_count'] = df_tts_es['transcription'].str.len()


In [None]:
# get the average words and characters in the "transcription" column
avg_transcription_word_count = df_tts_es['transcription_word_count'].mean()
avg_transcription_char_count = df_tts_es['transcription_char_count'].mean()
print(f'Average transcription word count: {avg_transcription_word_count}')
print(f'Average transcription char count: {avg_transcription_char_count}')


In [None]:
# get the average tts_time column
avg_tts_time = df_tts_es['tts_time'].mean()
print(f'Average tts time: {avg_tts_time}')


In [None]:
# get the average tts_time column relative to the number of words
df_tts_es['tts_time_per_word'] = df_tts_es['tts_time'] / df_tts_es['transcription_word_count']
avg_tts_time_per_word = df_tts_es['tts_time_per_word'].mean()
print(f'Average tts time per word: {avg_tts_time_per_word}')

In [None]:
# get the average tts_time column relative to the number of characters
df_tts_es['tts_time_per_char'] = df_tts_es['tts_time'] / df_tts_es['transcription_char_count']
avg_tts_time_per_char = df_tts_es['tts_time_per_char'].mean()
print(f'Average tts time per char: {avg_tts_time_per_char}')


In [None]:
# get the upper and lower bounds of the tts_time column
upper_tts_time = df_tts_es['tts_time'].max()
lower_tts_time = df_tts_es['tts_time'].min()
print(f'Upper bound tts time: {upper_tts_time}')
print(f'Lower bound tts time: {lower_tts_time}')

# Transcription EN

In [None]:
df_trans_en = pd.read_csv("transcriptions_df_en.csv")
df_trans_en

In [None]:
# get the number of words and characters in the "Original Transcription" and "Predicted Transcription" columns
df_trans_en['original_transcription_word_count'] = df_trans_en['Original Transcription'].str.split().str.len()
df_trans_en['predicted_transcription_word_count'] = df_trans_en['Predicted Transcription'].str.split().str.len()
df_trans_en['original_transcription_char_count'] = df_trans_en['Original Transcription'].str.len()
df_trans_en['predicted_transcription_char_count'] = df_trans_en['Predicted Transcription'].str.len()



In [None]:
# print the average WER, CER, MER, RTF columns
avg_wer = df_trans_en['WER'].mean()
avg_cer = df_trans_en['CER'].mean()
avg_mer = df_trans_en['MER'].mean()
avg_rtf = df_trans_en['RTF'].mean()
print(f'Average WER: {avg_wer}')
print(f'Average CER: {avg_cer}')
print(f'Average MER: {avg_mer}')
print(f'Average RTF: {avg_rtf}')

In [None]:
# get the average words and chars for original and predicted 
avg_original_transcription_word_count = df_trans_en['original_transcription_word_count'].mean()
avg_predicted_transcription_word_count = df_trans_en['predicted_transcription_word_count'].mean()
avg_original_transcription_char_count = df_trans_en['original_transcription_char_count'].mean()
avg_predicted_transcription_char_count = df_trans_en['predicted_transcription_char_count'].mean()

print(f'Average original transcription word count: {avg_original_transcription_word_count}')
print(f'Average predicted transcription word count: {avg_predicted_transcription_word_count}')
print(f'Average original transcription char count: {avg_original_transcription_char_count}')
print(f'Average predicted transcription char count: {avg_predicted_transcription_char_count}')

In [None]:
# the average percentage of overall predicted words relative to the actual words
print(avg_original_transcription_word_count/avg_predicted_transcription_word_count)


In [None]:
# get the average RTF 
avg_rtf = df_trans_en['RTF'].mean()
print(f'Average RTF: {avg_rtf}')


In [None]:
# get average Prediction Time (s)
avg_prediction=df_trans_en['Prediction Time (s)'].mean()
print(f'Average Prediction Time (s): {avg_prediction}')

In [None]:
# the upper bound and the lower of the average time it takes to predict
upper_prediction = df_trans_en['Prediction Time (s)'].max()
lower_prediction = df_trans_en['Prediction Time (s)'].min()
print(f'Upper bound time to predict: {upper_prediction}')
print(f'Lower bound time to predict: {lower_prediction}')

# Transcription ES

In [None]:
df_trans_es = pd.read_csv("transcriptions_df_es.csv")
df_trans_es

In [None]:
# get the number of words and characters in the "Original Transcription" and "Predicted Transcription" columns
df_trans_es['original_transcription_word_count'] = df_trans_es['Original Transcription'].str.split().str.len()
df_trans_es['predicted_transcription_word_count'] = df_trans_es['Predicted Transcription'].str.split().str.len()
df_trans_es['original_transcription_char_count'] = df_trans_es['Original Transcription'].str.len()
df_trans_es['predicted_transcription_char_count'] = df_trans_es['Predicted Transcription'].str.len()



In [None]:
# print the average WER, CER, MER, RTF columns
avg_wer = df_trans_es['WER'].mean()
avg_cer = df_trans_es['CER'].mean()
avg_mer = df_trans_es['MER'].mean()
avg_rtf = df_trans_es['RTF'].mean()
print(f'Average WER: {avg_wer}')
print(f'Average CER: {avg_cer}')
print(f'Average MER: {avg_mer}')
print(f'Average RTF: {avg_rtf}')

In [None]:
# get the average words and chars for original and predicted 
avg_original_transcription_word_count = df_trans_es['original_transcription_word_count'].mean()
avg_predicted_transcription_word_count = df_trans_es['predicted_transcription_word_count'].mean()
avg_original_transcription_char_count = df_trans_es['original_transcription_char_count'].mean()
avg_predicted_transcription_char_count = df_trans_es['predicted_transcription_char_count'].mean()

print(f'Average original transcription word count: {avg_original_transcription_word_count}')
print(f'Average predicted transcription word count: {avg_predicted_transcription_word_count}')
print(f'Average original transcription char count: {avg_original_transcription_char_count}')
print(f'Average predicted transcription char count: {avg_predicted_transcription_char_count}')

In [None]:
print(avg_original_transcription_word_count/avg_predicted_transcription_word_count)


In [None]:
# get the average RTF
avg_rtf = df_trans_es['RTF'].mean()
print(f'Average RTF: {avg_rtf}')

In [None]:
# get average Prediction Time (s)
avg_prediction = df_trans_es['Prediction Time (s)'].mean()
print(f'Average Prediction Time (s): {avg_prediction}')

In [None]:
# the upper bound and the lower of the average time it takes to predict
upper_prediction = df_trans_es['Prediction Time (s)'].max()
lower_prediction = df_trans_es['Prediction Time (s)'].min()
print(f'Upper bound time to predict: {upper_prediction}')
print(f'Lower bound time to predict: {lower_prediction}')