In [None]:
import os
import pandas as pd

# Path to the train folder
# This can be obtained here: https://www.kaggle.com/datasets/samaneheslamifar/facial-emotion-expressions
train_dir = 'images/train'

# List to store image paths and their corresponding emotions
data = []

# Loop through each emotion folder
for emotion in os.listdir(train_dir):
    emotion_dir = os.path.join(train_dir, emotion)
    if os.path.isdir(emotion_dir):
        # Loop through each image in the emotion folder
        for image_name in os.listdir(emotion_dir):
            if image_name.endswith('.jpg'):
                image_path = os.path.join(emotion_dir, image_name)
                data.append([image_path, emotion])

# Create a DataFrame
df = pd.DataFrame(data, columns=['image_path', 'emotion'])

In [None]:
def convert_to_adjective(emotion):
    if emotion == "disgust":
        return "disgusted"
    elif emotion == "surprise":
        return "surprised"
    elif emotion == "fear":
        return "fearful"
    else:
        return emotion

df['emotion'] = df['emotion'].apply(convert_to_adjective)


In [None]:
emotions = df['emotion'].unique().tolist()
emotions

In [None]:
import random
def create_prompt(correct_emotion):
    incorrect_emotion = random.choice([emotion for emotion in emotions if emotion != correct_emotion])
    if random.choice([True, False]):
        return f"Is this person feeling {correct_emotion} or {incorrect_emotion}?"
    else:
        return f"Is this person feeling {incorrect_emotion} or {correct_emotion}?"

# Adding prompt column
df['clean_prompt'] = df['emotion'].apply(create_prompt)

In [None]:
df.rename(columns={"emotion": "correct_answer", "image_path": "clean_image_path"}, inplace=True)

In [None]:
def extract_incorrect_answer(prompt, correct_answer):
    options = prompt.split("feeling ")[1].split(" or ")
    incorrect_answer = options[1] if options[0] == correct_answer else options[0]
    return incorrect_answer

df['incorrect_answer'] = df.apply(lambda row: extract_incorrect_answer(row['clean_prompt'], row['correct_answer']), axis=1)

In [None]:
df['incorrect_answer'] = df['incorrect_answer'].str.replace("?", "")

In [None]:
image_paths_by_emotion = {
    "angry": [path for path in df['clean_image_path'] if "angry" in path],
    "happy": [path for path in df['clean_image_path'] if "happy" in path],
    "sad": [path for path in df['clean_image_path'] if "sad" in path],
    "disgusted": [path for path in df['clean_image_path'] if "disgust" in path],
    "fearful": [path for path in df['clean_image_path'] if "fear" in path],
    "surprised": [path for path in df['clean_image_path'] if "surprise" in path],
    "neutral": [path for path in df['clean_image_path'] if "neutral" in path],
}

image_paths_by_emotion

In [None]:
def get_random_incorrect_image_path(incorrect_answer):
    if incorrect_answer in image_paths_by_emotion and image_paths_by_emotion[incorrect_answer]:
        return random.choice(image_paths_by_emotion[incorrect_answer])
    else:
        return None

df['corrupt_image_path'] = df['incorrect_answer'].apply(get_random_incorrect_image_path)


In [None]:
df.to_csv("facial_expressions_cleaned.csv", index=False)