In [None]:
!pip install langchain openai

In [3]:
import pandas as pd
import os
import google.generativeai as genai
import random
import time
import json
from openai import OpenAI
from langchain.prompts import PromptTemplate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [5]:
# Define root_path

In [6]:
# Load test_df
test_df['comment_text'] = test_df['comment_text'].astype(str)

In [7]:
gemini_prompt_template = """
You will be presented with a series of questions. Your task is to determine whether each question explicitly asks for an explanation of a math concept. After making your determination, you must also provide the reasoning behind your judgment. For each question, respond with "Yes, this is asking for an explanation of a math concept" or "No, this is not asking for an explanation of a math concept," followed by a brief explanation of your reasoning.

Remember, a question that explicitly asks for an explanation of a math concept might include phrases that indicate a request for understanding, clarification, or detailed information about how or why something works in mathematics.

Example Question: If h(x) is the inverse of f(x), what is the value of h(f(x))?

Example Response: No, this is not asking for an explanation of a math concept. The question is requesting to solve a math problem, not an explanation of the concept or how it is applied.

Example Question: What would i have to do to find the slope of the line that pases through the points?

Example Response: No, this is not asking for an explanation of a math concept. The question is requesting to solve a math problem, not an explanation of the concept or how it is applied.

Example Question: Special monomials.....can someone explain me better?

Example Response: Yes, this is asking for an explanation of a math concept. The question seeks understanding of special monomials and their applications in mathematics.

Example Question: I'm confused about the lowest common denominator can someone explain it to me?

Example Response: Yes, this is asking for an explanation of a math concept. The question seeks understanding of the lowest common denominator and its applications in mathematics.

[Now, proceed with classifying and reasoning the following question:]

Question: {question}

Response:
"""

In [8]:
gemini_prompt_template_lc = PromptTemplate(
    input_variables=["question"],
    template=gemini_prompt_template,
)

In [9]:
# Define GOOGLE_API_KEY

In [10]:
genai.configure(api_key=GOOGLE_API_KEY)

In [11]:
safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]


model = genai.GenerativeModel(
    'gemini-pro',
    safety_settings=safety_settings
)

In [12]:
# Test.
prompt = gemini_prompt_template_lc.format(
    question="What would i have to do to find the slope of the line that pases through the points ",
)

response = model.generate_content(prompt)
print(response.text)

No, this is not asking for an explanation of a math concept. The question is requesting to solve a math problem, not an explanation of the concept or how it is applied.


In [13]:
# Define OPENAI_API_KEY

In [14]:
client = OpenAI(api_key=OPENAI_API_KEY)

In [15]:
def generate_response(prompt):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="gpt-4-turbo",
    )
    return chat_completion.choices[0].message.content

In [16]:
openai_prompt_template = """
You will be given responses from a previous task where you classified questions as either explicitly asking for an explanation of a math concept or not, along with your reasoning. Based on these responses, your task now is to perform a majority vote to determine the final classification of the question.

If the majority of responses classify the question as asking for an explanation of a math concept, you will return "1". If the majority classify the question as not asking for an explanation of a math concept, you will return "0". Your response should strictly be "1" or "0". DO NOT return anything other than "1" or "0" in your response.

Please analyze the following responses and conduct a majority vote:

Response 1: {response_1}

Response 2: {response_2}

Response 3: {response_3}

Majority Vote Result:
"""

In [17]:
openai_prompt_template_lc = PromptTemplate(
    input_variables=["response_1", "response_2", "response_3"],
    template=openai_prompt_template,
)

In [None]:
openai_judgement = []

for i, row in test_df.iterrows():
    if i % 10 == 0:
        print(f"Processed {i} rows")

    flag = False

    question = row['comment_text']
    gemini_prompt = gemini_prompt_template_lc.format(
        question=question
    )
    response_1 = model.generate_content(gemini_prompt)
    time.sleep(1.5)
    response_2 = model.generate_content(gemini_prompt)
    time.sleep(1.5)
    response_3 = model.generate_content(gemini_prompt)
    time.sleep(1.5)

    openai_prompt = openai_prompt_template_lc.format(
        response_1=response_1.text,
        response_2=response_2.text,
        response_3=response_3.text,
    )

    while not flag:
        try:
            openai_response = generate_response(openai_prompt)
            if int(openai_response) == 1 or int(openai_response) == 0:
                openai_judgement.append(int(openai_response))
                flag = True
        except:
            pass

test_df['openai_judgement'] = openai_judgement

# Save test_df

In [None]:
test_df['Conceptual'] = test_df['Conceptual'].astype(int)

accuracy = accuracy_score(test_df['Conceptual'], test_df['openai_judgement'])
precision = precision_score(test_df['Conceptual'], test_df['openai_judgement'])
recall = recall_score(test_df['Conceptual'], test_df['openai_judgement'])
f1 = f1_score(test_df['Conceptual'], test_df['openai_judgement'])

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")