# ChatGPT4o-mini for sentiment analysis

In [1]:
from sklearn.metrics import classification_report
from concurrent.futures import ThreadPoolExecutor
from collections import Counter
from pydantic import BaseModel
from typing import Optional
from openai import OpenAI
from enum import Enum

In [2]:
client = OpenAI()

In [3]:
class Category(str, Enum):
    negative = "negative"
    neutral = "neutral"
    positive = "positive"

class ContentCompliance(BaseModel):
    category: Optional[Category]

In [4]:

def classify_message_4o_mini(message):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini-2024-07-18",
        messages=[
            {"role": "system", "content": "Classify the following message as negative, neutral or positive."},
            {"role": "user", "content": message}
        ],
        response_format=ContentCompliance, 
    )
    category = completion.choices[0].message.parsed.category

    if category == "negative":
        return 0
    elif category == "neutral":
        return 1
    elif category == "positive":
        return 2
    
    return None

def classify_messag_4o_mine_fine_tune(message):
    completion = client.beta.chat.completions.parse(
        model="ft:gpt-4o-mini-2024-07-18:unravel-technologies:sentiment-analysis:Aa1JpFBO",
        messages=[
            {"role": "system", "content": "Classify the following message as negative, neutral or positive."},
            {"role": "user", "content": message}
        ],
        response_format=ContentCompliance, 
    )
    category = completion.choices[0].message.parsed.category

    if category == "negative":
        return 0
    elif category == "neutral":
        return 1
    elif category == "positive":
        return 2
    
    return None
    
    
def classify_messages_parallel(messages, classifier, max_workers=10):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(classifier, messages))
    return results


def process_classification_results(df, predictions):
    predictions_fix = [0 if p is None else p for p in predictions]

    print(classification_report(df['label'].tolist(), predictions_fix))

In [5]:
import pandas as pd

df = pd.read_csv('data\\test_set_twitter.csv')

### ChatGPT 4o-mini

In [6]:
predictions = classify_messages_parallel(df['content'].tolist(), classify_message_4o_mini)

process_classification_results(df, predictions)

              precision    recall  f1-score   support

         0.0       0.44      0.82      0.58       667
         1.0       0.51      0.37      0.43       667
         2.0       0.56      0.24      0.33       667

    accuracy                           0.48      2001
   macro avg       0.50      0.48      0.45      2001
weighted avg       0.50      0.48      0.45      2001



### ChatGPT 4o-mini fine-tune

In [7]:
predictions = classify_messages_parallel(df['content'].tolist(), classify_messag_4o_mine_fine_tune)

process_classification_results(df, predictions)

              precision    recall  f1-score   support

         0.0       0.66      0.73      0.69       667
         1.0       0.70      0.76      0.73       667
         2.0       0.75      0.59      0.66       667

    accuracy                           0.70      2001
   macro avg       0.70      0.70      0.70      2001
weighted avg       0.70      0.70      0.70      2001

