For testing of the trained model (ResNet-50) using FER2013 dataset, with custom uploaded images. For use on Google Colab.

In [None]:
from google.colab import files
import cv2
import numpy as np
import torch
import torchvision.transforms as T
from matplotlib import pyplot as plt
from torchvision.models import resnet50, ResNet50_Weights
import torch.nn as nn
import gdown

Download the trained models for face detection and emotion recognition.

In [None]:
# Face detection model
!wget 'https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml'

# ResNet-50 FER2013 trained weights
!gdown --fuzzy 'https://drive.google.com/file/d/1xCsWCExMXrsahiHQPm2hlu8Fm8bVtiUy/view?usp=sharing'

--2024-08-22 07:59:49--  https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 930127 (908K) [text/plain]
Saving to: ‘haarcascade_frontalface_default.xml’


2024-08-22 07:59:49 (107 MB/s) - ‘haarcascade_frontalface_default.xml’ saved [930127/930127]

Downloading...
From (original): https://drive.google.com/uc?id=1xCsWCExMXrsahiHQPm2hlu8Fm8bVtiUy
From (redirected): https://drive.google.com/uc?id=1xCsWCExMXrsahiHQPm2hlu8Fm8bVtiUy&confirm=t&uuid=3fef9f7f-21ae-40c5-a1e9-cc241ab72a9f
To: /content/model.pth
100% 94.4M/94.4M [00:00<00:00, 95.0MB/s]


Upload a picture. After running the cell, click on the "Choose Files" button to upload an image.

In [None]:
uploaded = files.upload()

for filename in uploaded.keys():
    print(f'User uploaded file "{filename}" with length {len(uploaded[filename])} bytes')

Define the modified ResNet-50 model, and load in the downloaded weights.

In [None]:
class ModifiedResNet50(nn.Module):
    def __init__(self, num_classes=7):
        super(ModifiedResNet50, self).__init__()

        self.resnet = resnet50(weights=None)

        # Replace the last fully connected layer
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes)

    def forward(self, x):
        return self.resnet(x)

def predict_emotion(face_tensor, model):
    # Forward pass
    with torch.no_grad():
        output = model(face_tensor)

    return output


# Create an instance of the modified ResNet-50
model = ModifiedResNet50(num_classes=7)

# Load the pretrained weights
model.load_state_dict(torch.load('model.pth'))

# Use cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Set the model to evaluation mode
model.eval()

# Move the model to the GPU
model = model.to(device)

# Load the face detector model
face_detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Order of the classes used during training
emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

Detect faces in the uploaded image and run the emotion recognition model on each face. Using the bounding box of the detected face, the face is extracted and resized to 224x224. The face is then padded to 224x224 and converted to grayscale. The grayscale image is then converted to a tensor and normalized using the same normalization used during training. The processed face is then passed through the emotion recognition model to get the predicted emotion. The bounding box of the face is drawn on the image along with the predicted emotion label. The probabilities of each class predicted by the network are also displayed.

In [None]:
# Transform the image with the same normalization used during training
preprocess = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

img = filename
img_bgr = cv2.imread(img, cv2.IMREAD_COLOR)
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)

# Detect faces
results = face_detector.detectMultiScale(gray, 1.3, 5)

# x, y, w, h is the bounding box of the detected face (if any)
for (x, y, w, h) in results:
    # Extract face ROI
    face_roi = img_rgb[y:y + h, x:x + w]

    # Resize to 224x224 while maintaining aspect ratio
    aspect_ratio = w / h
    if aspect_ratio > 1:
        # Width is larger
        new_w = 224
        new_h = int(224 / aspect_ratio)
    else:
        # Height is larger
        new_h = 224
        new_w = int(224 * aspect_ratio)

    resized_face = cv2.resize(face_roi, (new_w, new_h))

    # Create a black 224x224 image
    padded_face = np.zeros((224, 224, 3), dtype=np.uint8)

    # Calculate padding
    pad_top = (224 - new_h) // 2
    pad_left = (224 - new_w) // 2

    # Place the resized face in the center of the black image
    padded_face[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized_face

    # Convert to grayscale
    gray_face = cv2.cvtColor(padded_face, cv2.COLOR_RGB2GRAY)

    stacked_face = np.stack([gray_face] * 3, axis=-1)
    processed_face = preprocess(stacked_face).to(device)

    # Add batch dimension and run the image through the network
    prediction = predict_emotion(processed_face.unsqueeze(0), model)

    predicted_class = torch.argmax(prediction, dim=1)
    probabilities = torch.softmax(prediction, dim=1).squeeze().cpu().numpy()

    # Get the predicted emotion label
    predicted_emotion = emotion_labels[predicted_class.item()]

    # Draw bounding box and emotion label
    cv2.rectangle(img_rgb, (x, y), (x + w, y + h), (0, 255, 0), 2)
    cv2.putText(img_rgb, predicted_emotion, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

# Display the image with bounding boxes and emotion labels
plt.imshow(img_rgb)
plt.axis('off')
plt.show()

# Display the predictions of each class by the network
plt.figure()
plt.bar(emotion_labels, probabilities)
plt.xlabel('Emotions')
plt.ylabel('Probability')
plt.title('Predicted Emotion Probabilities')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

NameError: name 'filename' is not defined