In [1]:
from openai import OpenAI
import csv
import pandas as pd

In [9]:
## Getting the conversation and grammar prompt
## We slightly changed the conversation prompt for testing purposes
CONVERSATION_PROMPT = """
                This written conversation is between you, a skilled English teacher, and a user eager to learn English. Your role is to facilitate a dynamic and educational dialogue, focusing on improving the user's English proficiency. Make sure to keep each interaction very short, and ask only one question in each of your response.

                Introduce new topics or aspects of the English language in each interaction. If the user don't know what to talk about, start the conversation with a clear, achievable learning objective. For example, starting a random topic to discuss about, engaging them in practicing fundamental English scenarios like greetings, or focus on applying a specific grammatical structure or tense.

                Try to structure the conversation so that the user speaks more than the teacher. Ask open-ended questions that require more than yes/no answers. Don't correct any errors in user's response. If possible, include simple images or emojis to support the learning process and make it more engaging. Additionally, please include interactive elements such as role-playing scenarios, describing pictures, or reacting to simple short stories or situations.

                Additionally, keep track of the topics covered and the user's progress, so you can gradually increase the complexity of conversations and introduce new topics based on what has already been learned.
                """

GRAMMAR_PROMPT = """
        You are an AI English language teacher. Review a Message and Response pair between you and a student, focus only on the student's response. Provide feedback that is encouraging, make sure to follow these guidelines:
        
        1. If Mistakes Found in Response
        - If a student's response contains grammar or spelling errors, provide the correct version for sentences with the mistake.
        - List the grammar and spelling errors identified. If you find spelling errors, simply point them out without giving an explanation. On the other hand, explain the identified grammar errors in an encouraging tone. For each mistake (e.g., using 'is' instead of 'am' in 'I is hungry'), You should:
                - Explain the contexts in which each word should be appropriately used. This includes when to use the incorrect word and when to use the appropriate word, such as when to use 'is' and when to use 'am'. 
                - Provide additional, varied examples that correctly use both the misused word ('is') and the appropriate word ('am'). If the misused word is not applicable, only provide examples for the appropriate word.

        2. No Errors Found:
        - If the response is free from grammar and spelling errors, acknowledge this positively.
        - Do not fabricate errors or force corrections where none are needed.

        3. Assess Response Effectiveness:
        - Evaluate if the student has effectively answered the question (message). If the response needs to be improved, suggest a better response.

        Please keep your responses concise, focusing exclusively on checking grammar and spelling in the student's response. Avoid interpreting or expanding upon the student's answer with additional dialogue, question or information.

        Here are some examples:
        Example Input 1: Question: Where do you want to travel to and why? Response: I want to go US.
        Example Output 1:  
        Your sentence has a minor grammatical issue. It should be: "I want to go to the US."
        Errors identified and explanations:
        1. Missing of preposition "to" before "go" to indicate direction.
        Preposition 'to' should be used to indicate direction or destination after "go" and before a noun, such an example is 'I want to go to the park.'
        Another example is 'I want to go home.' In this case, to is not needed since home here is not a noun, but a adverb. 
        
       2. Missing of article "the" before a specific noun referring to a country (US).
         Article 'the' should be used before 'US' because it's a specific country name (the United States).
        You should add "the" before the country name if the county's name includes a common noun or "of". 
        Example: The United Kingdom, The Republic of China

        You only answered the first part of the question. Here's the suggested version of your sentence: I want to go to the US because of its diverse culture and famous landmarks.

        Example Input 2: Question: What is one thing you like to do? Response: I really like singing.
        Example Output 2: Your sentence is grammatically correct, keep up your good work!

        Example Input 3: Question: What do you want to eat? Response: I would like a piece of pie.
        Example Output 3: You did a good job structuring your sentence! However, there's a spelling error.
        1. Spelling error: 'peice' should be corrected to 'piece.' 
        Here's the corrected version of your sentence: 'I would like a piece of pie.'
        """

# adding prompts specific for testing purposes.
INITIAL_USER_PROMPT = "Hi, I am a beginner in learning English. Can you suggest some topics for me?"

ANSWER_QUESTION_PROMPT = """
Please answer the following question concisely as an English learner at the beginner level, make sure its gramatically correct:
{question}
"""
ANSWER_QUESTION_PROMPT_INCORRECT = """
Please answer the following question concisely as an English learner at the beginner level, be sure to delibrately introduce some (1 to 3) grammar or spelling errors.
{question}
"""

In [14]:
# define the function to get response from the GPT API
client = OpenAI()
def send_openai_request(messages, max_tokens=400):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        temperature=1,
        max_tokens=max_tokens,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
        )
    return response.choices[0].message.content

In [17]:
# Automatically getting message response pairs
messages = []
responses = []
# label = 0: response with no grammar mistake
labels = []

# simulate each conversation for 8 interactions, simulate 5 conversations
for _ in range(5):
    conversation = [{"role": "system", "content": CONVERSATION_PROMPT}, 
                    {"role": "user", "content": INITIAL_USER_PROMPT}]
    for _ in range(8):
        message = send_openai_request(conversation, max_tokens=200)
        messages.append(message)
        correct_response = send_openai_request([{"role": "user", "content": ANSWER_QUESTION_PROMPT.format(question=message)}], max_tokens=100)
        responses.append(correct_response)
        labels.append(0)
        messages.append(message)
        incorrect_response = send_openai_request([{"role": "user", "content": ANSWER_QUESTION_PROMPT_INCORRECT.format(question=message)}], max_tokens=100)
        responses.append(incorrect_response)
        labels.append(1)
        conversation.append({"role": "system", "content": message})
        conversation.append({"role": "user", "content": correct_response})

df = pd.DataFrame({
    "Message": messages,
    "Response": responses,
    "Grammar Mistake": labels
})

# Save the DataFrame to a CSV file
csv_file_path = 'MessageResponse.csv'
df.to_csv(csv_file_path, index=False)
df

Unnamed: 0,Message,Response,Grammar Mistake
0,"Hello! Absolutely, I'd be happy to help. As a ...",Hello! My name is John. I am from Mexico. In f...,0
1,"Hello! Absolutely, I'd be happy to help. As a ...",Hello! Me is Lee. I from Korea. I like playing...,1
2,Hello John! It's nice to meet you. Reading is ...,"Hello! My favorite book is ""Harry Potter"". It ...",0
3,Hello John! It's nice to meet you. Reading is ...,Hello! I happy to meet you. My favorite book i...,1
4,"Great job, John! I can see you're a big fan of...","Yes, I have re-read Harry Potter and the Priso...",0
...,...,...,...
75,"Well done! 📚 Jolly good! Now, let's use a bit ...","If I could meet JK Rowling, I was very happy. ...",1
76,"Fantastic response! 💫 Now, let's focus on desc...",Japan is beautiful and clean. It is very busy ...,0
77,"Fantastic response! 💫 Now, let's focus on desc...",Japan very beutiful country. It quiet with muc...,1
78,Well done! 🇯🇵 That's a really nice description...,Sure. Let's start at the front door. When you ...,0


In [19]:
# After getting the message response, we manually annotated the count and types of grammar mistakes, saving it in the same csv file.
# We noticed that GPT's not always giving gramatically correct sentences for label 0, we manually checked the sentences and fixed the grammar errors.
# The new csv file looks like the following
Annotated_df = pd.read_csv('MessageResponse.csv')
Annotated_df

Unnamed: 0,Message,Response,Numbers of Mistakes,Grammatical Errors
0,"Hello! Absolutely, I'd be happy to help. As a ...",Hello! My name is John. I am from Mexico. In f...,0,
1,Hello John! It's nice to meet you. Reading is ...,"Hello! My favorite book is ""Harry Potter"". It ...",0,
2,"Great job, John! I can see you're a big fan of...","Yes, I have re-read Harry Potter and the Priso...",0,
3,"That's interesting, John! Sirius Black is inde...",Sirius Black is brave and loyal. He loves Harr...,0,
4,"Your description of Sirius Black is fantastic,...","Hogwarts School is a big, old castle. It has m...",0,
...,...,...,...,...
75,Nicely done! 😊 Hermione is indeed an iconic ch...,I watched a match yesterday. My favorite team ...,2,"Past tense: ""My favorite team losing"" should b..."
76,"Well done!⚽️ Now, let's shift gears and practi...",Could you lend me a English book?,1,"Article Use: ""a English book"" should be ""an En..."
77,"Well done! 📚 Jolly good! Now, let's use a bit ...","If I could meet JK Rowling, I was very happy. ...",1,"Verb Tense Consistency: ""I was very happy."" sh..."
78,"Fantastic response! 💫 Now, let's focus on desc...",Japan is a very beutiful country. The food is ...,2,"Spelling Error: ""beutiful"" should be ""beautifu..."


In [25]:
# Sending the message response pair along with the grammar prompt to test grammar correction capabilities
# This exceeds GPT4's Tokens Per Minute Limit, so We have to breaking down into 8 loops separately
Outputs = []
question_response = """
                Message: {question}

                Response: {response} 
                """
def runPart(df):
    for index, row in df.iterrows():
        message = row["Message"]
        response = row["Response"]

        # Construct the grammar prompt with message and response
        grammar_messages = [{"role": "system", "content": GRAMMAR_PROMPT}, 
                            {"role": "user", "content": question_response.format(question=message, response=response)}]

        # Send the grammar prompt to the language model and get the grammar feedback
        grammar_feedback = send_openai_request(messages=grammar_messages, max_tokens=350)
        Outputs.append(grammar_feedback)

In [26]:
first_df = Annotated_df.iloc[:10]
runPart(first_df)
print(len(Outputs))

10


In [27]:
second_df = Annotated_df.iloc[10:20]
runPart(second_df)
print(len(Outputs))

20


In [28]:
third_df = Annotated_df.iloc[20:30]
runPart(third_df)

In [30]:
fourth_df = Annotated_df.iloc[30:40]
runPart(fourth_df)

In [31]:
fifth_df = Annotated_df.iloc[40:50]
runPart(fifth_df)

In [50]:
sixth_df = Annotated_df.iloc[50:60]
runPart(sixth_df)

In [51]:
seventh_df = Annotated_df.iloc[60:70]
runPart(seventh_df)

In [53]:
eighth_df = Annotated_df.iloc[70:]
runPart(eighth_df)

In [54]:
Annotated_df['GPT Output'] = Outputs

# Save the DataFrame to a CSV file
Annotated_df.to_csv('GrammarEvaluation.csv', index=False)
Annotated_df

Unnamed: 0,Message,Response,Numbers of Mistakes,Grammatical Errors,GPT Output
0,"Hello! Absolutely, I'd be happy to help. As a ...",Hello! My name is John. I am from Mexico. In f...,0,,"Your message is mostly correct, but you made a..."
1,Hello John! It's nice to meet you. Reading is ...,"Hello! My favorite book is ""Harry Potter"". It ...",0,,"Your response is almost perfect, there's just ..."
2,"Great job, John! I can see you're a big fan of...","Yes, I have re-read Harry Potter and the Priso...",0,,"Your sentence is grammatically correct, and yo..."
3,"That's interesting, John! Sirius Black is inde...",Sirius Black is brave and loyal. He loves Harr...,0,,"Your response is excellent! It's detailed, cle..."
4,"Your description of Sirius Black is fantastic,...","Hogwarts School is a big, old castle. It has m...",0,,Your response is clear and captures the essenc...
...,...,...,...,...,...
75,Nicely done! 😊 Hermione is indeed an iconic ch...,I watched a match yesterday. My favorite team ...,2,"Past tense: ""My favorite team losing"" should b...",Your sentence has some grammatical issues. The...
76,"Well done!⚽️ Now, let's shift gears and practi...",Could you lend me a English book?,1,"Article Use: ""a English book"" should be ""an En...","Your sentence is almost correct, but it has a ..."
77,"Well done! 📚 Jolly good! Now, let's use a bit ...","If I could meet JK Rowling, I was very happy. ...",1,"Verb Tense Consistency: ""I was very happy."" sh...","Your response is almost perfect, though there'..."
78,"Fantastic response! 💫 Now, let's focus on desc...",Japan is a very beutiful country. The food is ...,2,"Spelling Error: ""beutiful"" should be ""beautifu...",Your sentence has a minor spelling mistake and...


In [2]:
# We manually checked the GPT output answer, and assigned a binary accuracy, partial accuracy for each GPT output. 
# For sentences with grammar errors, we assigned a qualitative score based on our own criteria.
scored_df = pd.read_csv('GrammarEvaluation.csv')
scored_df

Unnamed: 0,Message,Response,Numbers of Mistakes,Grammatical Errors,GPT Output,Binary Accuracy,Partial Accuracy,Human Evaluation Score
0,"Hello! Absolutely, I'd be happy to help. As a ...",Hello! My name is John. I am from Mexico. In m...,0,,Your response is well-constructed and free fro...,1,1.0,
1,Hello John! It's nice to meet you. Reading is ...,"Hello! My favorite book is ""Harry Potter"". It ...",0,,Your response is grammatically correct and eff...,1,1.0,
2,"Great job, John! I can see you're a big fan of...","Yes, I have re-read Harry Potter and the Priso...",0,,"Your sentence is grammatically correct, and yo...",1,1.0,
3,"That's interesting, John! Sirius Black is inde...",Sirius Black is brave and loyal. He loves Harr...,0,,"Your response is excellent! It's detailed, cle...",1,1.0,
4,"Your description of Sirius Black is fantastic,...","Hogwarts School is a big, old castle. It has m...",0,,"Your sentence is grammatically correct, well d...",1,1.0,
...,...,...,...,...,...,...,...,...
75,Nicely done! 😊 Hermione is indeed an iconic ch...,I watched a match yesterday. My favorite team ...,2,"Past tense: ""My favorite team losing"" should b...",Your sentence has some grammatical issues. The...,1,1.0,8.0
76,"Well done!⚽️ Now, let's shift gears and practi...",Could you lend me a English book?,1,"Article Use: ""a English book"" should be ""an En...","Your sentence is almost correct, but it has a ...",1,1.0,8.0
77,"Well done! 📚 Jolly good! Now, let's use a bit ...","If I could meet JK Rowling, I was very happy. ...",1,"Verb Tense Consistency: ""I was very happy."" sh...","Your response is almost perfect, though there'...",1,1.0,8.0
78,"Fantastic response! 💫 Now, let's focus on desc...",Japan is a very beutiful country. The food is ...,2,"Spelling Error: ""beutiful"" should be ""beautifu...",Your sentence has a minor spelling mistake and...,1,1.0,8.0


In [4]:
# Calculate the overall "Binary Accuracy"
binary_accuracy = scored_df["Binary Accuracy"].mean()

# Calculate the overall "Partial Accuracy"
partial_accuracy = scored_df["Partial Accuracy"].mean()

# Calculate Precision and Recall based on Binary Accuracy
TP = len(scored_df[(scored_df["Binary Accuracy"] == 1) & (scored_df["Numbers of Mistakes"] > 0)])
FP = len(scored_df[(scored_df["Binary Accuracy"] == 0) & (scored_df["Numbers of Mistakes"] == 0)])
FN = len(scored_df[(scored_df["Binary Accuracy"] == 0) & (scored_df["Numbers of Mistakes"] > 0)])
precision = TP / (TP + FP)
recall = TP / (TP + FN)

print("Overall Binary Accuracy:", binary_accuracy)
print("Precision:", precision)
print("Recall:", recall, '\n')
print("Overall Partial Accuracy:", partial_accuracy, '\n')

# Calculate the average of "Human Evaluation Score" for rows where "Numbers of Mistakes" > 0
average_human_evaluation_score = scored_df.loc[scored_df["Numbers of Mistakes"] > 0, "Human Evaluation Score"].mean()

# Calculate the overall score out of 1
overall_score = average_human_evaluation_score / 8

print("Overall Grammar Explanations Score (out of 1):", overall_score)

Overall Binary Accuracy: 0.9625
Precision: 1.0
Recall: 0.925 

Overall Partial Accuracy: 0.9812916666625 

Overall Grammar Explanations Score (out of 1): 0.8690625000000001
