In [None]:
!pip install langchain openai

In [2]:
import pandas as pd
import os
import google.generativeai as genai
import random
import time
import json
from openai import OpenAI
from langchain.prompts import PromptTemplate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [4]:
# Define root_path

In [5]:
# Load test_df
test_df['comment_text'] = test_df['comment_text'].astype(str)

In [None]:
test_df.head()

In [7]:
# Define sampled_data_path

In [8]:
# Load sampled_data_df

In [9]:
sub_df_0 = sampled_data_df[sampled_data_df['Conceptual'] == 0.0]
sub_df_1 = sampled_data_df[sampled_data_df['Conceptual'] == 1.0]

In [10]:
positive_examples = list(sub_df_1['comment_text'][:5])
negative_examples = list(sub_df_0['comment_text'][5:10])

In [11]:
def generate_prompt(text):
    prompt = f"""
    Classify each text provided below. Return "1" only if the text is explicitly a question asking for an explanation of a math concept. For all other texts, including math-related questions that do not specifically ask for a concept explanation, return "0". Do not provide any additional information, solutions, or explanations. Your response should strictly be "1" or "0". DO NOT return anything other than "1" or "0" in your response.
    If the text contains multiple questions and if any of the questions is a question asking for an explanation of a math concept, return "1", otherwise, return "0".
    ONLY return one "1" or "0", DO NOT return multiple.

    Example Classifications:

    Text to Classify:
    {positive_examples[0]}
    Answer: 1

    Text to Classify:
    {positive_examples[1]}
    Answer: 1

    Text to Classify:
    {positive_examples[2]}
    Answer: 1

    Text to Classify:
    {positive_examples[3]}
    Answer: 1

    Text to Classify:
    {positive_examples[4]}
    Answer: 1

    Text to Classify:
    {negative_examples[0]}
    Answer: 0

    Text to Classify:
    {negative_examples[1]}
    Answer: 0

    Text to Classify:
    {negative_examples[2]}
    Answer: 0

    Text to Classify:
    {negative_examples[3]}
    Answer: 0

    Text to Classify:
    {negative_examples[4]}
    Answer: 0

    Text to Classify:
    {text}

    Answer:
    """
    return prompt

In [12]:
# Define GOOGLE_API_KEY

In [13]:
genai.configure(api_key=GOOGLE_API_KEY)

In [14]:
safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]


model = genai.GenerativeModel(
    'gemini-pro',
    safety_settings=safety_settings
)

In [None]:
gemini_judgement = []

for i, row in test_df.iterrows():
    if i % 10 == 0:
        print(f"Processed {i} rows")

    flag = False

    text = row['comment_text']
    prompt = generate_prompt(text)

    while not flag:
        try:
            response = model.generate_content(prompt)
            time.sleep(1.5)
            if int(response.text) == 0 or int(response.text) == 1:
                gemini_judgement.append(int(response.text))
                flag = True
        except:
            pass

test_df['gemini_judgement'] = gemini_judgement

# Save test_df

In [None]:
test_df['Conceptual'] = test_df['Conceptual'].astype(int)

accuracy = accuracy_score(test_df['Conceptual'], test_df['gemini_judgement'])
precision = precision_score(test_df['Conceptual'], test_df['gemini_judgement'])
recall = recall_score(test_df['Conceptual'], test_df['gemini_judgement'])
f1 = f1_score(test_df['Conceptual'], test_df['gemini_judgement'])

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")