In [3]:
from openai import OpenAI
import pandas as pd
df = pd.read_csv('verbs.csv')

client = OpenAI()

def p5_translate_(verb):
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages = [
            {
                "role": "system",
                "content": 
                "ترجم الفعل المعطي بالدارجة المغربية لفعل انجليزي مجرد المصدر." +
                " تبع هاد الشروط ونتا كترجم:"
                "1- متعطيش تفسيرات، غير الترجمة صافي. 2- الترجمة خص يكون فيها غير جذر الفعل المترجم"
            },
            {
                "role": "user", 
                "content": f"{verb}"
            }
        ],
        temperature=0
    )
    return completion.choices[0].message.content

def eng_p5_translate_(verb):
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages = [
            {
            "role": "system",
            "content": "Translate the provided Moroccan Darija dialect verb into a bare-infinitive English verb." + 
             " Please fulfill the following conditions when translating:"+
             " 1- Do not give explanations in the output, only the translation. 2- The translation must contain only the lemma of the translated verb."
            },
            {
                "role": "user", 
                "content": f"{verb}"
            }
                ],
        temperature=0
    )
    return completion.choices[0].message.content

def p4_translate_(verb):
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
messages = [
            {"role": "system", "content": 
                "ترجم الفعل المعطي بالدارجة المغربية لفعل انجليزي مجرد المصدر." +
                "الترجمة خص يكون فيها غير جذر الفعل المترجم بلا تفسيرات."
             },
            {"role": "user", "content": f"{verb}"},
],
        temperature=0
    )
    return completion.choices[0].message.content

def eng_p4_translate_(verb):
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": 
             "Translate the provided Moroccan Darija dialect verb into a bare-infinitive English verb. " + 
             "The translation must contain only the lemma of the translated verb with no explanations."},
            {"role": "user", "content": f"{verb}"},
        ],
        temperature=0
    )
    return completion.choices[0].message.content
 

In [4]:

dataset = df.copy()
columns_to_delete = ['n2', 'n3', 'n4', 'n5', 'n6']
dataset.drop(columns=columns_to_delete, inplace=True)

dataset['p5_transl'] = dataset['darija_ar'].apply(p5_translate_)
dataset['eng_p5_transl'] = dataset['darija_ar'].apply(eng_p5_translate_)
dataset['p4_transl'] = dataset['darija_ar'].apply(p4_translate_)
dataset['eng_p4_transl'] = dataset['darija_ar'].apply(eng_p4_translate_)

In [None]:
import matplotlib.pyplot as plt
from nltk.corpus import wordnet as wn


eng = dataset['eng']
p5_transl = dataset['p5_transl']
eng_p5_transl = dataset['eng_p5_transl']

p4_transl = dataset['p4_transl']
eng_p4_transl = dataset['eng_p4_transl']

p5_transl = [verb.replace(" ", "_") for verb in p5_transl]
eng_p5_transl = [verb.replace(" ", "_") for verb in eng_p5_transl]

p4_transl = [verb.replace(" ", "_") for verb in p4_transl]
eng_p4_transl = [verb.replace(" ", "_") for verb in eng_p4_transl]

eng = [verb.replace(" ", "_") for verb in eng]


def max_path_similarity(word1, word2):
    """Calculate the maximum path similarity between two words."""

    synsets1 = wn.synsets(word1, pos=wn.VERB)
    synsets2 = wn.synsets(word2, pos=wn.VERB)
    max_score = 0
    for syn1 in synsets1:
        for syn2 in synsets2:
            score = syn1.path_similarity(syn2)
            if score and score > max_score:
                max_score = score
    return max_score

def evaluate_pairs(transl,threshold=0.5):
    """Evaluating each pair using WordNet path similarity and a threshold."""
    results = []
    for i in range(len(transl)):
        word1, word2 = transl[i], eng[i]
        similarity = max_path_similarity(word1, word2)
        results.append((word1, word2, similarity, similarity >= threshold))
    return results

results_p5 = evaluate_pairs(p5_transl)
results_eng_p5 = evaluate_pairs(eng_p5_transl)

results_p4 = evaluate_pairs(p4_transl)
results_eng_p4 = evaluate_pairs(eng_p4_transl)

dataset['similariy_p5'] = [item[2] for item in results_p5]
dataset['similarity_eng_p5'] = [item[2] for item in results_eng_p5]

dataset['similarity_p4'] = [item[2] for item in results_p4]
dataset['similarity_eng_p4'] = [item[2] for item in results_eng_p4]

p5_s = dataset['similariy_p5']
ep5_s = dataset['similarity_eng_p5']

p4_s = dataset['similarity_p4'] 
ep4_s = dataset['similarity_eng_p4']

p5= dataset['similariy_p5'].mean()
ep5= dataset['similarity_eng_p5'].mean()

p4= dataset['similarity_p4'].mean()
ep4= dataset['similarity_eng_p4'].mean()

print(f'Mean value of similarities in the first darija prompt is: {p5}')
print(f'Mean value of similarities in the first english prompt is: {ep5}')

print(f'Mean value of similarities in the fourth darija prompt is: {p4}')
print(f'Mean value of similarities in the fourth english prompt is: {ep4}')
dataset.describe()



In [None]:
import matplotlib.pyplot as plt

dataset.hist(bins=50, figsize=(10,5))
plt.xlabel('Similarity (%)')
plt.ylabel('Count of pair verbs')
plt.title('Similarity histogram')
plt.tight_layout()
plt.grid(True)
plt.show()


In [None]:
from scipy.stats import f_oneway

f_stat, p_value = f_oneway(p4_s, ep4_s, ep5_s, p5_s)


print(f"F-statistic: {f_stat}")
print(f"P-value: {p_value}")

if p_value < 0.05:
    print("Significant difference between the fourth prompts written in English and Darija")
else:
    print("No significant difference between the frouth prompts written in English and Darija")

In [None]:
print('filtered Darija prompt of less than 0.2 similarity')
filtered_p5 = [x for x in results_p4 if x[2] < 0.2]
print(filtered_p5)

print('filtered English prompt of less than 0.2 similarity')
filtered_p5 = [x for x in results_eng_p4 if x[2] < 0.2]
print(filtered_p5)