In [2]:
import openai
import os
import json
import pandas as pd
import nltk
import time

from nltk.stem import WordNetLemmatizer
from sklearn.metrics import accuracy_score, classification_report

nltk.download('punkt')
nltk.download('wordnet')

from dotenv import load_dotenv

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
load_dotenv()

data = pd.read_csv("Datasets/SingleLabel.csv")

openai.api_key = os.getenv("OPEN_AI_KEY")

possible_moods = [
    "Sadness",
    "Tension",
    "Tenderness",
]

In [7]:
lemmatizer = WordNetLemmatizer()

random_state = 5
sample_size = 232
max_lyrics_length = 100

def lemmatize_word(word):
    tokens = word.split()
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens]
    lemmatized_text = ' '.join(lemmatized_tokens)
    return lemmatized_text

data["lyrics"] = data["lyrics"].apply(lemmatize_word)
data["lyrics"] = data["lyrics"].apply(lambda lyrics: lyrics[:max_lyrics_length])

In [8]:
sample = data[101:].sample(n=sample_size, random_state=random_state)

In [10]:
training_data = []

for index, row in data.iterrows():
    lyrics = row["lyrics"]
    mood_label = row["label"]
    example = {
        "input": f"Predict the mood for the following lyric: '{lyrics}'",
        "output": mood_label,
    }
    training_data.append(example)

In [11]:
def generate_mood_suggestion(input_text, model="gpt-3.5-turbo-16k"):
    prompt = f"Given the following list of possible moods: {', '.join(possible_moods)}, please suggest a suitable mood for the following text: '{input_text}'."
    conversation = [{"role": "user", "content": example["input"]} for example in training_data[:101]]
    conversation.append({"role": "user", "content": prompt})
    response = openai.ChatCompletion.create(
        model=model,
        messages = conversation,
        n=3,
        max_tokens=1000,
        temperature=0
    )
    suggested_mood = response['choices'][0]['message']['content']
    for mood in possible_moods:
        if mood in suggested_mood:
            return mood

In [12]:
def string_to_json(text: str):
    text = text.replace("`", "")
    text = text.replace("json", "")
    return json.loads(text)

In [13]:
predicted_labels=[]

def evaluate_mood_suggestions(sample, start_index=0):
    actual_labels = sample["label"]

    for index, row in enumerate(sample.iloc[start_index:].itertuples()):
        actual_index = start_index + index
        lyrics = row.lyrics
        try:
            suggested_moods = generate_mood_suggestion(lyrics)
            predicted_labels.append(suggested_moods)
            if (actual_index > 0 and actual_index % 5 == 0):
                print(f"Index {actual_index}: Resting for a while...")
                time.sleep(60)
        except Exception as e:
            if "Rate limit reached" in str(e):
                print(f"Rate limit reached at index {actual_index}. Sleeping for a while...")
                time.sleep(60)
                start_index = actual_index
                return evaluate_mood_suggestions(sample, start_index)
            else:
                print(f"Error at index {actual_index}: {e}")
                time.sleep(120)
                start_index = actual_index
                return evaluate_mood_suggestions(sample, start_index)

    accuracy = accuracy_score(actual_labels, predicted_labels)
    report = classification_report(actual_labels, predicted_labels)

    return accuracy, report

In [14]:
if __name__ == "__main__":
    # print(generate_mood_suggestion("I got f grade for my exammmm"))
    accuracy, report = evaluate_mood_suggestions(sample)

    print(f"Accuracy: {accuracy}")
    print(report)

Error at index 2: The server is overloaded or not ready yet.
Index 5: Resting for a while...
Error at index 7: The server is overloaded or not ready yet.
Index 10: Resting for a while...
Index 15: Resting for a while...
Error at index 17: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Index 20: Resting for a while...
Index 25: Resting for a while...
Error at index 29: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Index 30: Resting for a while...
Index 35: Resting for a while...
Index 40: Resting for a while...
Index 45: Resting for a while...
Index 50: Resting for a while...
Index 55: Resting for a while...
Index 60: Resting for a while...
Index 65: Resting for a while...
Index 70: Resting for a while...
Index 75: Resting for a while...
Index 80: Resting for a while...
Index 85: Resting for a while...
Index 90: Resting for a while...
Index 95: Resting for a while..