In [4]:
import openai
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv()

# Set up OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Load your dataset
df = pd.read_csv("D1.csv")
print(df.columns)
# Annotate dataset
def annotate_text(text, text_id):
    prompt = f"""
    You are an expert in emotion detection. Analyze the following text sentence by sentence and assign an emotion from the given categories to each sentence.

    Emotion categories:
    1. Aesthetic Experience
    2. Anger
    3. Anxiety
    4. Compassion
    5. Depression
    6. Envy
    7. Fright
    8. Gratitude
    9. Guilt
    10. Happiness
    11. Hope
    12. Jealousy
    13. Love
    14. Pride
    15. Relief
    16. Sadness
    17. Shame

    For the given text, respond in the format:
    ID: {text_id} | Label: [Emotion1][Emotion2][Emotion3]...

    Each emotion in the response should correspond to a sentence in the text in the same order the sentences appear. If the number of sentences is unclear, deduce them based on logical segmentation of the text. If a sentence is ambiguous or does not clearly express an emotion, assign "Ambiguous" as the label. Ensure that the number of emotions matches the number of sentences.

    Text:
    {text}
    """

    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[{"role": "system", "content": "You are a helpful assistant."},
                      {"role": "user", "content": prompt}],
            max_tokens=50
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error processing ID {text_id}: {e}")
        return "Error"

# Apply annotation
df['Emotion'] = df.apply(lambda row: annotate_text(row['clean_text'], row['is_depression']), axis=1)

# Save the annotated dataset
df.to_csv("annotated_dataset.csv", index=False)
print("Annotation complete. Results saved to 'annotated_dataset.csv'.")


Index(['clean_text', 'is_depression', 'Emotion'], dtype='object')
Annotation complete. Results saved to 'annotated_dataset.csv'.
