<a href="https://colab.research.google.com/github/zmarkofsky/DSML_capstone/blob/main/colabs/LLM_Output.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install --upgrade openai



In [47]:
import pandas as pd
import os
import openai
from google.colab import userdata

client = openai.OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
empathy_model = "ft:gpt-3.5-turbo-0125:personal:empathy-tuned-v1:96quPZFn"
interview_model = "ft:gpt-3.5-turbo-0125:personal:interview-tuned-v1:98u7HS8r"
clarity_model = "ft:gpt-3.5-turbo-0125:personal:clarity-tuned-v1:98vb9W4i"
fairness_model = "ft:gpt-3.5-turbo-0125:personal:fairness-tuned-v1:98viUgEA"

In [34]:
empathy_dev_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/dev/empathy_val_public.csv")
empathy_test_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/test/empathy_test_public.csv")
interview_dev_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/dev/interview_val_public.csv")
interview_test_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/test/interview_test_public.csv")
clarity_dev_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/dev/clarity_val_public.csv")
clarity_test_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/test/clarity_test_public.csv")
fairness_dev_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/dev/fairness_val_public.csv")
fairness_test_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/test/fairness_test_public.csv")

In [10]:
empathy_dev_data.head()

Unnamed: 0,_id,text
0,95,"Hi Jonathan, I wanted to reach out to thank yo..."
1,198,"Good Morning Jonathan, I wanted to get with yo..."
2,23,"Hi Jonathan, I learnt that you are working on ..."
3,81,"Hi Jonathan, I hope you are doing well! I want..."
4,97,"Hi Jonathan, Thank you for you recent contribu..."


In [100]:
#Output Function

def generate_responses(df, fine_tuned_model_id, system_message, create_interview_user_message, task, temperature=0):

    # Initialize the dictionary to store the responses
    responses = {}
    benchmark = []
    id = []
    output = []

    # Iterate over the rows in the dataframe
    for index, row in df.iterrows():
        # Prepare the user message
        user_message = create_interview_user_message(row)

        # Prepare the messages
        messages = [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_message},
        ]

        # Call the OpenAI chat completion API
        response = client.chat.completions.create(
            model=fine_tuned_model_id,
            messages=messages,
            temperature=temperature,
            max_tokens=500,
            seed = 42
        )

        # Update lists
        benchmark.append(task)
        id.append(row['_id'])
        output.append(response.choices[0].message.content)
    # Save the response in the dictionary
    responses.update({"benchmark":benchmark, "_id":id, "output":output})

    return responses

In [25]:
# Empathy Functions

empathy_output = {}

empathy_system_message = """
Your task is to classify the provided text as either "Empathetic" (empatheric = 1) or "Non-empathetic" (non-empathetic = 0).
Empathetic Responses involve understanding, supportiveness, and active engagement. Understanding is demonstrated by showing comprehension
of the individual's feelings and perspective. Supportiveness entails offering genuine support, guidance, or constructive feedback while
respecting the individual's contributions and feelings. Active engagement is displayed through asking questions or suggesting actions that
actively engage with the individual's situation. Non-Empathetic Responses lack empathetic qualities. They may lack personalization, offering
generic advice or feedback without addressing the individual's specific feelings or situation. Dismissiveness occurs when the individual's
feelings, concerns, or contributions are downplayed or ignored. Superficiality refers to appearing empathetic on the surface but lacking depth
in understanding or supporting the individual's actual needs.
"""

def empathy_create_user_message(row):
    return f"""Message to Classify: {row['text']}"""

In [37]:
# Interview Functions

interview_system_message = """
Your task is to analyze a dataset of interview questions and responses. Based on the content, tone, and details of the provided answers, your task is to
 generate a plausible answer for the fourth interview question.
"""

def extract_interview_questions_answers(text):
    # Split the text into parts based on "Question:" and "Response:" delimiters
    parts = text.split("Question:")[1:]
    qas = []

    for part in parts:
        q_a = part.split("Response:")
        question = q_a[0].strip()
        answer = q_a[1].strip() if len(q_a) > 1 else ""
        qas.append({"question": question, "answer": answer})

    # return qas
    return qas


def interview_create_user_message(row):
    # Extract the questions and answers from the text
    qas = extract_interview_questions_answers(row['questions_answers'])

    # Create the user message with the four questions and three answers
    user_message = ""
    for qa in qas:
        user_message += f"Question: {qa['question']}\n"
        if qa['answer']:
            user_message += f"Answer: {qa['answer']}\n"

    return user_message

In [48]:
# Fairness Functions

fairness_system_message = """
Given the first and second options, your task is to predict which option received the majority vote as the fairer option. The output should not make up information and not reference these given instructions or context; only output the answer.
"""

def fairness_create_user_message(row):
    # Create the user message with the first and second options
    user_message = f"First Option: {row['first_option']}\nSecond Option: {row['second_option']}"
    return user_message

In [54]:
# Clarity Functions

clarity_system_message = """
Your task is to predict the average clarity rating for each item based on the responses. Respondents rated the clarity of personality test items using a 7-point scale from 1 = extremely unclear to 7 = extremely clear. The output should not make up information and not reference these given instructions or context; only output the answer.
"""

def clarity_create_user_message(row):
    return row['personality_item']

## Outputs

### Dev Set

In [74]:
empathy_dev_output_dict = generate_responses(empathy_dev_data,empathy_model,empathy_system_message, empathy_create_user_message, "empathy")
print(empathy_dev_output_dict)

{'benchmark': ['empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy', 'empathy'], '_id': [95, 198, 23, 81, 97, 39, 200, 47, 129, 177, 144, 113, 182, 109, 194, 197, 83, 67, 36, 46, 70, 120, 15, 55, 2, 104, 40, 115, 135, 69, 107, 123, 22, 158, 186, 9, 49, 87, 4, 160, 124, 119, 164, 188, 50, 27, 17

In [76]:
interview_dev_output_dict = generate_responses(interview_dev_data,interview_model,interview_system_message, interview_create_user_message, "interview",temperature=0.2)
print(interview_dev_output_dict)

{'benchmark': ['interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview', 'interview'], '_id': ['R_1SONLQw1Nbpff7r', 'R_1pzbRz5KyoceciS', 'R_3Rlo4gz2kUdVEiz', 'R_2wcPrwGq4Ml8xh1', 'R_3ERsFFBLBsjejrU', 'R_YRhz1h1Djrv9Txv', 'R_1nSJ3qEqXDJPvLV', 'R_2VEpb7m

In [77]:
clarity_dev_output_dict = generate_responses(clarity_dev_data,clarity_model,clarity_system_message, clarity_create_user_message, "clarity")
print(clarity_dev_output_dict)

{'benchmark': ['clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clarity', 'clari

In [78]:
fairness_dev_output_dict = generate_responses(fairness_dev_data,fairness_model,fairness_system_message, fairness_create_user_message, "fairness")
print(fairness_dev_output_dict)

{'benchmark': ['fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness', 'fairness'], '_id': [24, 

In [97]:
df_dev_full_output = pd.DataFrame(empathy_dev_output_dict)
df_dev_full_output = df_dev_full_output.append(pd.DataFrame(interview_dev_output_dict))
df_dev_full_output = df_dev_full_output.append(pd.DataFrame(clarity_dev_output_dict))
df_dev_full_output = df_dev_full_output.append(pd.DataFrame(fairness_dev_output_dict))
df_dev_full_output = df_dev_full_output.reset_index().drop("index",axis=1)
df_dev_full_output

  df_dev_full_output = df_dev_full_output.append(pd.DataFrame(interview_dev_output_dict))
  df_dev_full_output = df_dev_full_output.append(pd.DataFrame(clarity_dev_output_dict))
  df_dev_full_output = df_dev_full_output.append(pd.DataFrame(fairness_dev_output_dict))


Unnamed: 0,benchmark,_id,output
0,empathy,95,1
1,empathy,198,1
2,empathy,23,1
3,empathy,81,1
4,empathy,97,1
...,...,...,...
309,fairness,100,second
310,fairness,101,second
311,fairness,102,first
312,fairness,103,second


In [99]:
df_dev_full_output.to_csv("dev_full_output.csv",index=False)