## Section 1 Load the grammar csv

In [None]:
import openai
import pandas as pd

In [None]:
# In our application, we provide the option for users to translate the grammar feedback and standard chatbot messages together.
# Thus, we want to reuse the grammar evaluation dataset,
# focuses soely on AI's messgae and the grammar feedback to test our application's transalation performance
# Specifically, we will compute the similarities between GPT's translation with the translation we get from DeepL API using BLEU score

# Merge the "Message" column and "GPT Output" into one "English Text" column for further translation
translation_df = pd.read_csv('GrammarEvaluation.csv')
df = translation_df.copy()
df.rename(columns={"Message": "English Text"}, inplace=True)
translation_df.rename(columns={"GPT Output": "English Text"}, inplace=True)
df = pd.concat([translation_df, df], ignore_index=True)
df = df[["English Text"]]
df

Unnamed: 0,English Text
0,Your response is well-constructed and free fro...
1,Your response is grammatically correct and eff...
2,"Your sentence is grammatically correct, and yo..."
3,"Your response is excellent! It's detailed, cle..."
4,"Your sentence is grammatically correct, well d..."
...,...
155,Nicely done! 😊 Hermione is indeed an iconic ch...
156,"Well done!⚽️ Now, let's shift gears and practi..."
157,"Well done! 📚 Jolly good! Now, let's use a bit ..."
158,"Fantastic response! 💫 Now, let's focus on desc..."


## Section 2 Use openai API and deepl API for the translation

In [None]:
from openai import OpenAI
import os
import time
client = OpenAI(
    # This is the default and can be omitted
    api_key='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'
)

In [None]:
TRANSLATE_PROMPT = """
        Translate the following paragraph to {native_language}:

        """

def translate_openai(native_language, message):

  prompt = TRANSLATE_PROMPT.format(native_language=native_language, message=message)
  time.sleep(2)
  api_response = client.chat.completions.create(
      model="gpt-4",
      messages=[
          {
              "role": "system",
              "content": prompt
          },
          {
              "role": "user",
              "content": message
          }
      ]
  )
  result = api_response.choices[0].message.content
  print(result)
  return(result)



In [None]:
print(translate_openai('Chinese', 'Hello'))

你好
你好


In [None]:
import deepl

# Replace with your actual DeepL API key
deepl_api_key = 'a173f71b-52b0-8ecf-c420-5d031ce980b5:fx'

# Create a Translator object
translator = deepl.Translator(deepl_api_key)


def translate_deepl(native_language, message):
  # convert the target language
  if native_language == 'Chinese':
    native_language = 'ZH'
  elif native_language == 'German':
    native_language = 'DE'
  elif native_language == 'French':
    native_language = 'FR'
  elif native_language == 'Spanish':
    native_language = 'ES'

  try:
      result = translator.translate_text(message, target_lang=native_language)
      print(result.text)
      return (result.text)
  except deepl.DeepLException as error:
      print(f"An error occurred: {error}")


In [None]:
print( translate_deepl('Chinese', 'Hello'))

您好
您好


In [None]:
# Define languages to test
languages = ['Chinese', 'German', 'French', 'Spanish']

# Add new columns for each translation method and language
for lang in languages:
    df[f'Deepl_Translation_{lang}'] = ''
    df[f'OpenAI_Translation_{lang}'] = ''

# Perform translations and update DataFrame
for index, row in df.iterrows():
    for lang in languages:
        deepl_result = translate_deepl(lang, row['English Text'])
        openai_result = translate_openai(lang, row['English Text'])
        df.at[index, f'Deepl_Translation_{lang}'] = deepl_result
        df.at[index, f'OpenAI_Translation_{lang}'] = openai_result

# Optionally save to new CSV
df.to_csv('translated_sentences.csv', index=False)

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
Erreurs identifiées et explications :
1. "Je suis heureux" - Il manque le verbe "être". Le verbe correct est "je suis heureux". Nous utilisons "am" avec le pronom "I" au présent du verbe "be" pour exprimer des états, tels que des sentiments ou qui ou ce que sont les gens.
Exemples : Je suis fatigué. Je suis professeur.

2. "It is fun book" - L'article indéfini "a" est manquant. L'usage correct est "C'est un livre amusant". L'article "A" s'utilise devant les noms singuliers, dénombrables et non spécifiques.
Exemple : "J'ai vu un chien dans le parc : J'ai vu un chien dans le parc.

3. "J'aime" - Le présent simple doit être utilisé ici. La version correcte est "J'aime".
Exemple : J'aime la pizza.

4. "dans le livre" - L'article défini "le" est manquant. L'usage correct est "dans le livre". Le terme "le" est utilisé pour désigner quelque chose de spécifique.
Exemple : Pouvez-vous ouvrir la porte ?

5. Ce livre fait" - Pour les sujets singuliers à la

## Section3 Calculate the BLEU Score

In [None]:
import pandas as pd
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize
import numpy as np

# Downloading the Punkt tokenizer models (used for nltk word tokenization)
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

#### note: since the tokenization for chinese is different, BLEU score calculations rely heavily on correct tokenization of both the reference and the candidate text. Chinese, unlike languages using the Latin alphabet, does not use spaces to separate words. This means standard tokenization methods (like the one used in the nltk library) can not work correctly. So we use the jieba as our tokenizer.

In [None]:
import jieba

In [None]:
def tokenize_chinese(text):
    return list(jieba.cut(text))

# Example usage
chinese_text = "你好世界"
tokens = tokenize_chinese(chinese_text)
print(tokens)  # ['你好', '世界']

Building prefix dict from the default dictionary ...
DEBUG:jieba:Building prefix dict from the default dictionary ...
Dumping model to file cache /tmp/jieba.cache
DEBUG:jieba:Dumping model to file cache /tmp/jieba.cache
Loading model cost 1.297 seconds.
DEBUG:jieba:Loading model cost 1.297 seconds.
Prefix dict has been built successfully.
DEBUG:jieba:Prefix dict has been built successfully.


['你好', '世界']


In [None]:


# Add columns for BLEU scores
for lang in languages:
    df[f'BLEU_Score_{lang}'] = np.nan

for index, row in df.iterrows():
    for lang in languages:
        if lang == 'Chinese':
            # Use jieba for Chinese tokenization
            reference = tokenize_chinese(row[f'Deepl_Translation_{lang}'])
            candidate = tokenize_chinese(row[f'OpenAI_Translation_{lang}'])
        else:
            # Use NLTK tokenizer for other languages
            reference = word_tokenize(row[f'Deepl_Translation_{lang}'].lower())
            candidate = word_tokenize(row[f'OpenAI_Translation_{lang}'].lower())

        # Calculating BLEU score
        bleu_score = sentence_bleu([reference], candidate, weights=(0.25, 0.25, 0.25, 0.25))
        df.at[index, f'BLEU_Score_{lang}'] = bleu_score


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [None]:


# Optionally, save to a new CSV
df.to_csv('bleu_scores.csv', index=False)

### Calculate the average BLEU score

In [None]:
# Continue from the previous script

languages = ['Chinese', 'German', 'French', 'Spanish']

# Dictionary to store the average BLEU scores
average_bleu_scores = {}

for lang in languages:
    # Calculate the average BLEU score for each language
    average_score = df[f'BLEU_Score_{lang}'].mean()
    average_bleu_scores[lang] = average_score

# Convert the dictionary to a DataFrame for display
average_bleu_df = pd.DataFrame(list(average_bleu_scores.items()), columns=['Language', 'Average_BLEU_Score'])

# Display the average BLEU score table
print(average_bleu_df)

  Language  Average_BLEU_Score
0  Chinese            0.380189
1   German            0.529708
2   French            0.617482
3  Spanish            0.584771
