# Fine Tuning GPT-3.5-turbo

In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dotenv import dotenv_values

from openai import OpenAI

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
df = pd.read_csv('data\Emotion_classify_Data.csv')
df.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [3]:
print(f'Number of reviews : {df.shape[0]}')
print(f'Unique emotions : {df.Emotion.unique()}')

Number of reviews : 5937
Unique emotions : ['fear' 'anger' 'joy']


In [4]:
train_x, test_x, train_y, test_y = train_test_split(df.Comment.values[:200], df.Emotion.values[:200], test_size=0.2, stratify=df['Emotion'][:200])
print(train_x.shape, train_y.shape, test_x.shape,  test_y.shape)

train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.2, stratify=train_y)
print(train_x.shape, train_y.shape, val_x.shape, val_y.shape)

(160,) (160,) (40,) (40,)
(128,) (128,) (32,) (32,)


In [5]:
train_df = pd.DataFrame({
    'Comments' : train_x,
    'Emotion' : train_y
})

val_df = pd.DataFrame({
    'Comments' : val_x,
    'Emotion' : val_y
})

test_df = pd.DataFrame({
    'Comments' : test_x,
    'Emotion' : test_y
})

In [6]:
### utility functions
def structure_training_data(df, x, y):
    messages = list()
    for i in range(0, df.shape[0]):
        x_val = df.loc[i, x]
        y_val = df.loc[i, y]

        messages.append({
            "messages" :[ 
            {"role" : "user", "content" : x_val},
            {"role" : "assistant", "content" : y_val}]
            })
    
    return messages

def write_to_jsonl(data, file_path):
    with open(file_path, 'w') as file:
        for entry in data:
            json.dump(entry, file)
            file.write('\n')

def format_test(row):
    formatted_message = [{"role": "user", "content": row['Comments']}]
    return formatted_message

def predict(client, test_messages, fine_tuned_model_id):
    response = client.chat.completions.create(
        model=fine_tuned_model_id, messages=test_messages, temperature=0, max_tokens=50
    )
    return response.choices[0].message.content

def store_predictions(client, test_df, fine_tuned_model_id):
    test_df['Prediction'] = None
    for index, row in test_df.iterrows():
        test_message = format_test(row)
        prediction_result = predict(client, test_message, fine_tuned_model_id)
        test_df.at[index, 'Prediction'] = prediction_result

    test_df.to_csv("predictions.csv")

        

In [7]:
train_messages = structure_training_data(df=train_df, x='Comments', y='Emotion')
val_messages = structure_training_data(df=val_df, x='Comments', y='Emotion')
test_messages = structure_training_data(df=test_df, x='Comments', y='Emotion')

In [8]:
write_to_jsonl(data=train_messages, file_path='data\\train.jsonl')
write_to_jsonl(data=val_messages, file_path='data\\val.jsonl')
write_to_jsonl(data=test_messages, file_path='data\\test.jsonl')

In [9]:
env = dotenv_values('.env')
client = OpenAI(api_key=env['OPENAI_API_KEY'])

training_file = client.files.create(
    file=open('data\\train.jsonl', "rb"), purpose="fine-tune"
)
validation_file = client.files.create(
    file=open('data\\val.jsonl', "rb"), purpose="fine-tune"
)


# Create Fine-Tuning Job
suffix_name = "emotion-detection"
response = client.fine_tuning.jobs.create(
    training_file=training_file.id,
    validation_file=validation_file.id,
    model="gpt-3.5-turbo",
    suffix=suffix_name,
)

In [20]:
# while True:
#     model_data = client.fine_tuning.jobs.list(limit=10).model_dump()['data'][0]
#     if model_data['status'] == 'succeeded':
#         print(f"Hyper params: {model_data['hyperparameters']}\nResult_files: {model_data['result_files']}")
#         break
#     else:
#         continue
    

In [24]:
model_data = client.fine_tuning.jobs.list().model_dump()['data'][1]
model_data

{'id': 'ftjob-X4HBAeZzlH1eQlDTD8fREpKf',
 'created_at': 1700041376,
 'error': None,
 'fine_tuned_model': 'ft:gpt-3.5-turbo-0613:og-corp:emotion-detection:8L8DQ87A',
 'finished_at': 1700047159,
 'hyperparameters': {'n_epochs': 3,
  'batch_size': 1,
  'learning_rate_multiplier': 2},
 'model': 'gpt-3.5-turbo-0613',
 'object': 'fine_tuning.job',
 'organization_id': 'org-KWCEUzl17wjWyyFGdoBASheI',
 'result_files': ['file-NvIUOtzvNkw44VwUTuSlqIit'],
 'status': 'succeeded',
 'trained_tokens': 11556,
 'training_file': 'file-RUAq2FNXkEtKID8qsCLj6F5z',
 'validation_file': 'file-1phGaV5GXZF72C8iaCl7omVN'}

In [23]:
store_predictions(client=client, test_df=val_df, fine_tuned_model_id=model_data['fine_tuned_model'])

In [44]:
store_predictions(test_df=test_df, fine_tuned_model_id=model_data['fine_tuned_model'])

In [55]:
val_acc = accuracy_score(y_true=val_df['Emotion'], y_pred=val_df['Prediction'])
test_acc = accuracy_score(y_true=test_df['Emotion'], y_pred=test_df['Prediction'])

val_f1 = f1_score(y_true=val_df['Emotion'], y_pred=val_df['Prediction'], average='weighted')
test_f1 = f1_score(y_true=test_df['Emotion'], y_pred=test_df['Prediction'], average='weighted')

val_recall = recall_score(y_true=val_df['Emotion'], y_pred=val_df['Prediction'], average='weighted')
test_recall = recall_score(y_true=test_df['Emotion'], y_pred=test_df['Prediction'], average='weighted')

In [58]:
pd.DataFrame({'Acc' : [val_acc, test_acc],
              'F1_Score' : [val_f1, test_f1],
              'Recall' : [val_recall, test_recall]},
              index=['Validation', 'Test'])

Unnamed: 0,Acc,F1_Score,Recall
Validation,0.8125,0.810714,0.8125
Test,0.775,0.776489,0.775
