In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential
from keras.utils import to_categorical
from tensorflow import convert_to_tensor

from keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, BatchNormalization, ZeroPadding2D, Dropout
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau

from keras.layers import MaxPooling2D
from tensorflow.keras import Model

from keras.utils import plot_model
import tensorflow_datasets as tfd


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
path = '/content/drive/MyDrive/Mood-Detection-Project/challenges-in-representation-learning-facial-expression-recognition-challenge/'

In [None]:
data = pd.read_csv(path + 'icml_face_data.csv')

In [None]:
data.head()

In [None]:
# filter rows with emotion == 1
emotion_1 = data[data['emotion'] == 1]

# repeat rows 5 times
repeated_rows = pd.concat([emotion_1] * 6, ignore_index=True)

#concating augmented data and making a new dataframe
data2 = pd.concat([data, repeated_rows], ignore_index=True)

In [None]:
data2.info()

In [None]:
data2[data2['emotion'] == 1].count()

In [None]:
data2.info()

In [None]:
def prepare_data(data):
    """ Prepare data for modeling
        input: data frame with labels und pixel data
        output: image and label array """

    image_array = np.zeros(shape=(len(data), 48, 48))
    image_label = np.array(list(map(int, data['emotion'])))

    for i, row in enumerate(data.index):
        image = np.fromstring(data.loc[row, ' pixels'], dtype=int, sep=' ')
        image = np.reshape(image, (48, 48))
        image_array[i] = image

    return image_array, image_label


def plot_examples(label=0):
    fig, axs = plt.subplots(1, 5, figsize=(25, 12))
    fig.subplots_adjust(hspace = .2, wspace=.2)
    axs = axs.ravel()
    for i in range(5):
        idx = data[data['emotion']==label].index[i]
        axs[i].imshow(train_images[idx][:,:,0], cmap='gray')
        axs[i].set_title(emotions[train_labels[idx].argmax()])
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])

def plot_all_emotions():
    fig, axs = plt.subplots(1, 7, figsize=(30, 12))
    fig.subplots_adjust(hspace = .2, wspace=.2)
    axs = axs.ravel()
    for i in range(7):
        idx = data[data['emotion']==i].index[i]
        axs[i].imshow(train_images[idx][:,:,0], cmap='gray')
        axs[i].set_title(emotions[train_labels[idx].argmax()])
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])

def plot_image_and_emotion(test_image_array, test_image_label, pred_test_labels, image_number):
    """ Function to plot the image and compare the prediction results with the label """

    fig, axs = plt.subplots(1, 2, figsize=(12, 6), sharey=False)

    bar_label = emotions.values()

    axs[0].imshow(test_image_array[image_number], 'gray')
    axs[0].set_title(emotions[test_image_label[image_number]])

    axs[1].bar(bar_label, pred_test_labels[image_number], color='orange', alpha=0.7)
    axs[1].grid()

    plt.show()

def plot_compare_distributions(array1, array2, title1='', title2=''):
    df_array1 = pd.DataFrame()
    df_array2 = pd.DataFrame()
    df_array1['emotion'] = array1.argmax(axis=1)
    df_array2['emotion'] = array2.argmax(axis=1)

    fig, axs = plt.subplots(1, 2, figsize=(12, 6), sharey=False)
    x = emotions.values()

    y = df_array1['emotion'].value_counts()
    keys_missed = list(set(emotions.keys()).difference(set(y.keys())))
    for key_missed in keys_missed:
        y[key_missed] = 0
    axs[0].bar(x, y.sort_index(), color='orange')
    axs[0].set_title(title1)
    axs[0].grid()

    y = df_array2['emotion'].value_counts()
    keys_missed = list(set(emotions.keys()).difference(set(y.keys())))
    for key_missed in keys_missed:
        y[key_missed] = 0
    axs[1].bar(x, y.sort_index())
    axs[1].set_title(title2)
    axs[1].grid()

    plt.show()

In [None]:
emotions = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}

In [None]:
train_image_array, train_image_label = prepare_data(data2[data2[' Usage']=='Training'])
val_image_array, val_image_label = prepare_data(data2[data2[' Usage']=='PrivateTest'])
test_image_array, test_image_label = prepare_data(data2[data2[' Usage']=='PublicTest'])

In [None]:
train_images = train_image_array.reshape((train_image_array.shape[0], 48, 48, 1))
train_images = train_images.astype('float32')/255
val_images = val_image_array.reshape((val_image_array.shape[0], 48,48,1))
val_images = val_images.astype('float32')/255
test_images = test_image_array.reshape((test_image_array.shape[0], 48, 48, 1))
test_images = test_images.astype('float32')/255

In [None]:
train_labels = to_categorical(train_image_label)
val_labels = to_categorical(val_image_label)
test_labels = to_categorical(test_image_label)

In [None]:
plot_all_emotions()

In [None]:
plot_examples(label=1)

In [None]:
plot_compare_distributions(train_labels, test_labels, title1='train labels', title2='test labels')

In [None]:
class_weight = dict(zip(range(0, 7), (((data2[data2[' Usage']=='Training']['emotion'].value_counts()).sort_index())/len(data2[data2[' Usage']=='Training']['emotion'])).tolist()))

In [None]:
class_weight

In [None]:
from keras import models
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Flatten, Dense

model = models.Sequential()

model.add(Conv2D(64, (3, 3), activation='relu', input_shape=(48, 48, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())

model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dense(7, activation='softmax'))

The convolutional layers are responsible for extracting features from input images, and the dense layers make predictions based on these features. Batch normalization helps stabilize and accelerate the training process. The softmax activation in the output layer is used for multi-class classification, providing probabilities for each class.

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
model.summary()

The model has a total of 1,422,087 parameters (weights and biases).
1,421,191 parameters are trainable during the training process.
896 parameters are non-trainable, likely related to batch normalization.

In [None]:
history = model.fit(train_images, train_labels,
                    validation_data = (val_images, val_labels),
                    class_weight = class_weight,
                    epochs=10,
                    batch_size=25)

 The loss values are decreasing, and accuracy is increasing, which is generally a positive sign, indicating that the model is learning from the data.

In [None]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('test accuracy:', test_acc)

Evaluation Process:The trained model is tested on a set of data that it has not seen during training or validation. This set is called the test dataset.the model achieved an accuracy of approximately 56.1% on the test dataset, meaning it correctly predicted the labels for about 56.1% of the test samples.

In [None]:
pred_test_labels = model.predict(test_images)

In [None]:
pred_test_labels = np.argmax(pred_test_labels, axis=1)  # if test_labels contains probabilities

In [None]:
pred_test_labels

In [None]:
test_labels = np.argmax(test_labels, axis=1)

In [None]:
test_labels

In [None]:
loss = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1, len(loss)+1)
plt.plot(epochs, loss, 'bo', label='loss_train')
plt.plot(epochs, loss_val, 'b', label='loss_val')
plt.title('value of the loss function')
plt.xlabel('epochs')
plt.ylabel('value of the loss function')
plt.legend()
plt.grid()
plt.show()

In [None]:
acc = history.history['accuracy']
acc_val = history.history['val_accuracy']
epochs = range(1, len(loss)+1)
plt.plot(epochs, acc, 'bo', label='accuracy_train')
plt.plot(epochs, acc_val, 'b', label='accuracy_val')
plt.title('accuracy')
plt.xlabel('epochs')
plt.ylabel('value of accuracy')
plt.legend()
plt.grid()
plt.show()

In [None]:
cm = confusion_matrix(test_labels, pred_test_labels)

In [None]:
cm

Rows represent the actual (true) classes.
Columns represent the predicted classes.
                     Predicted
                 |  0  |  1  |  2  |  3  |  4  |  5  |  6  |
                 ------------------------------------------
           True 0 | 210 |  3  |  62 |  31 |  89 |  19 |  53 |
                 ------------------------------------------
           True 1 | 105 | 182 |  35 |  14 |  42 |  0  |  14 |
                 ------------------------------------------
           True 2 | 55  |  2  | 191 |  28 | 107 |  54 |  59 |
                 ------------------------------------------
           True 3 | 43  |  2  |  46 | 610 |  75 |  37 |  82 |
                 ------------------------------------------
           True 4 | 69  |  4  |  88 |  39 | 315 |  31 | 107 |
                 ------------------------------------------
           True 5 | 17  |  0  |  47 |  20 |  11 | 308 |  12 |
                 ------------------------------------------
           True 6 | 57  |  1  |  66 |  54 | 118 |  25 | 286 |
                 ------------------------------------------
True Positives (TP): The diagonal elements (e.g., 210, 182, 191, 610, 315, 308, 286) represent the number of correct predictions for each class.
False Positives (FP): The sum of values in each column (excluding the diagonal) gives the count of incorrect predictions for each predicted class.
False Negatives (FN): The sum of values in each row (excluding the diagonal) gives the count of instances where the true class was not predicted.
True Negatives (TN): The rest of the values in the matrix that are not on the diagonal, not in the row totals, and not in the column totals represent correctly predicted instances for the remaining classes.

In [None]:
alexnet = models.Sequential()

# Layer 1
alexnet.add(Conv2D(96, (11, 11), input_shape=(48, 48, 1), padding='same', activation='tanh'))
alexnet.add(BatchNormalization())
alexnet.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 2
alexnet.add(Conv2D(256, (5, 5), padding='same', activation='tanh'))
alexnet.add(BatchNormalization())
alexnet.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 3
alexnet.add(ZeroPadding2D((1, 1)))
alexnet.add(Conv2D(512, (3, 3), padding='same', activation='tanh'))
alexnet.add(BatchNormalization())
alexnet.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 4
alexnet.add(ZeroPadding2D((1, 1)))
alexnet.add(Conv2D(1024, (3, 3), padding='same', activation='tanh'))
alexnet.add(BatchNormalization())

# Layer 5
alexnet.add(ZeroPadding2D((1, 1)))
alexnet.add(Conv2D(1024, (3, 3), padding='same', activation='tanh'))
alexnet.add(BatchNormalization())
alexnet.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 6
alexnet.add(Flatten())
alexnet.add(Dense(4096, activation='tanh'))
alexnet.add(BatchNormalization())
alexnet.add(Dropout(0.5))

# Layer 7
alexnet.add(Dense(4096, activation='tanh'))
alexnet.add(BatchNormalization())
alexnet.add(Dropout(0.5))

# Layer 8
alexnet.add(Dense(7, activation='softmax'))
alexnet.add(BatchNormalization())

# print model summary
alexnet.summary()

In [None]:
alexnet.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = alexnet.fit(train_images, train_labels,
                    validation_data = (val_images, val_labels),
                    class_weight = class_weight,
                    epochs=25,
                    batch_size=50)

In [None]:
 model=model.save('/content/drive/MyDrive/Mood-Detection-Project/challenges-in-representation-learning-facial-expression-recognition-challenge/model.h5')


In [None]:
import tensorflow as tf

loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/Mood-Detection-Project/challenges-in-representation-learning-facial-expression-recognition-challenge/model.h5')

In [None]:
import cv2
import numpy as np
import tensorflow as tf
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
import PIL.Image
from google.colab.patches import cv2_imshow
import requests

# Load pre-trained Haar Cascade classifier for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Load pre-trained machine learning model for mood prediction
model = tf.keras.models.load_model('/content/drive/MyDrive/Mood-Detection-Project/challenges-in-representation-learning-facial-expression-recognition-challenge/model.h5')

# Define a dictionary to map predicted class indices to mood labels
mood_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'sad', 5: 'surprise', 6: 'neutral'}

# Function to capture a photo using the webcam
def take_photo(filename='photo.jpg', quality=0.8):
    js = Javascript('''
        async function takePhoto(quality) {
            const div = document.createElement('div');
            const capture = document.createElement('button');
            capture.textContent = 'Capture';
            div.appendChild(capture);

            const video = document.createElement('video');
            video.style.display = 'block';
            const stream = await navigator.mediaDevices.getUserMedia({ 'video': true });

            document.body.appendChild(div);
            div.appendChild(video);
            video.srcObject = stream;
            await video.play();

            // Resize the output to fit the video element.
            google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

            // Wait for the Capture to be clicked.
            await new Promise((resolve) => capture.onclick = resolve);

            const canvas = document.createElement('canvas');
            canvas.width = video.videoWidth;
            canvas.height = video.videoHeight;
            canvas.getContext('2d').drawImage(video, 0, 0);
            stream.getVideoTracks()[0].stop();
            div.remove();
            return canvas.toDataURL('image/jpeg', quality);
        }
    ''')
    display(js)

    data = eval_js('takePhoto({})'.format(quality))
    binary = b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

# Function to detect emotion from the image using a pre-trained emotion detection model
def detect_emotion(image_path):
     emotion = mood_label

     return emotion

# Function to search for songs on YouTube based on the detected emotion
def search_songs(emotion):
    query = f'{emotion} songs'

    # URL encode the query
    query = requests.utils.quote(query, safe='')

    # YouTube search URL
    youtube_search_url = f'https://www.youtube.com/results?search_query={query}'

    # Open the YouTube search URL
    display(Javascript(f'window.open("{youtube_search_url}","_blank");'))

# Loop over frames from the webcam
while True:
    # Capture a photo using the webcam
    image_path = take_photo()

    # Read the captured image
    frame = cv2.imread(image_path)

    # Convert frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in frame using Haar Cascade classifier
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

    # Loop over detected faces
    for (x, y, w, h) in faces:
        # Extract face ROI (Region of Interest)
        face_roi = gray[y:y+h, x:x+w]
        # Resize face ROI to fit model input size
        face_roi = cv2.resize(face_roi, (48, 48))
        # Normalize pixel values to range [0, 1]
        face_roi = face_roi / 255.0
        # Reshape face ROI to match model input shape
        face_roi = np.reshape(face_roi, (1, 48, 48, 1))
        # Make mood prediction using pre-trained model
        prediction = model.predict(face_roi)
        # Get predicted mood label
        mood_label = mood_labels[np.argmax(prediction)]

        # Detect emotion from the image
        emotion = detect_emotion(image_path)

        # Draw bounding box around detected face
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        # Write predicted mood label and detected emotion on top of bounding box
        text = f'{mood_label}, Emotion: {emotion}'
        cv2.putText(frame, text, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # Search for songs on YouTube based on the detected emotion
        search_songs(emotion)

    # Display output frame
    cv2_imshow(frame)

    # Ask the user if they want to continue or exit
    user_input = input("Want to continue? Enter 'c' to continue, 'q' to exit: ")
    if user_input.lower() == 'q':
        break

# Close all windows
cv2.destroyAllWindows()