# A User Perception sub-system.

The aim of this sub-system is to acquire input from a webcam in real time, of a human user or a group of users interacting with Furhat, and to automatically detect their affective states. The basic requirements for this sub-system are to: (i) automatically extract facial features (e.g. Action Units) from the video input using the tools you have learned in the course (e.g., Py-Feat), and (ii) use Machine Learning (ML) techniques to process the features into a high-level representation of the user’s behaviour (i.e., affective states: valence or arousal, or both) that can be used by the second sub-system. You are free to choose which facial features to extract, which ML techniques to apply, which affective state to model, and whether to automatically detect the affective state of one user or a group of users. In order to train the ML model for the automatic detection of affective states, you may use any dataset that you find but we will only provide the MultiEmoVA dataset (if you choose to automatically detect the affective state of a group of users in your scenario) or the DiffusionFER dataset (if you choose to automatically detect the affective state of one single user in your scenario). Regardless of your choice of dataset, you will need to extract your own features. Images and labels for these datasets will be made available in Studium.

In [1]:
import cv2
from feat import Detector
import opencv_jupyter_ui as jcv2
import torch
import os
import pandas as pd

import logging, sys
logging.disable(sys.maxsize)

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [2]:
torch.xpu.is_available()

False

# 1. Load and analyze data from DiffusionFER dataset

In [2]:
detector = Detector(device='cuda' if torch.cuda.is_available() else 'cpu')

  pretrained_dict = torch.load(
  checkpoint = torch.load(
  checkpoint = torch.load(model_path, map_location=self.device)
  torch.load(


In [3]:
emotion = {
    "angry": 0,
    "disgust": 1,
    "fear": 2,
    "happy": 3,
    "sad": 4,
    "surprise": 5,
    "neutral": 6
}

In [4]:
class image_data:
    def __init__(self, path:str, real_emotion:str):
        self.path = path
        self.name = os.path.basename(path)
        self.image = cv2.imread(self.path)
        self.face = detector.detect_faces(self.image)
        self.landmark = detector.detect_landmarks(self.image, self.face)
#        self.emotion = detector.detect_emotions(self.image, self.face, self.landmark)
        self.au = detector.detect_aus(self.image, self.landmark)
#        self.processed = self.image
        self.real_emotion = real_emotion

In [6]:
# read data
data = []
for em in emotion:
    path = os.path.join('./DiffusionFER/DiffusionEmotion_S/cropped/', em)

    files = os.listdir(path)
    files_abs = [os.path.abspath(os.path.join(path, file)) for file in files]
    data.append([image_data(name, em) for name in files_abs])
    print(f"Dataset {em} loaded!")

KeyboardInterrupt: 

In [188]:
# preparing the AUS data for storing
csv_file = []
csv_face = []
csv_emotion = []
csv_AUS = [
    [], [], [], [], [], [], [], [], [], [],
    [], [], [], [], [], [], [], [], [], []
]
au_numbers = [1, 2, 4, 5, 6, 7, 9, 10, 12, 14, 15, 17, 18, 20, 22, 23, 24, 25, 26, 27]

for em in data:
    for csv in em:
        for (face, aus) in enumerate(csv.au[0]):
            csv_file.append(csv.name)
            csv_emotion.append(csv.real_emotion)
            csv_face.append(face)
            for (i, au) in enumerate(aus):
                csv_AUS[i].append(au)

csv_data = {
    "file": csv_file,
    "emotion": csv_emotion,
    "face": csv_face,
}
aus_name = []
for (i, au_entry) in zip(au_numbers, csv_AUS):
    key = f"AUS{i:02d}"
    aus_name.append(key)
    csv_data[key] = au_entry

In [189]:
# writing the csv file
df = pd.DataFrame(csv_data)
df.to_csv('aus.csv', index=False, header=True, sep=',', encoding='utf-8')

Unnamed: 0,emotion,AUS01,AUS02,AUS04,AUS05,AUS06,AUS07,AUS09,AUS10,AUS12,...,AUS15,AUS17,AUS18,AUS20,AUS22,AUS23,AUS24,AUS25,AUS26,AUS27
0,angry,0.480870,0.221707,0.554721,0.341209,0.538130,1.0,0.537321,0.023429,0.0,...,0.406401,0.140524,0.218493,1.0,0.459626,0.023246,0.997896,0.326979,0.050575,0.323358
1,angry,0.368920,0.193828,0.238290,0.269536,0.075385,0.0,0.262338,0.004279,1.0,...,0.350709,0.188251,0.409465,0.0,0.312356,0.202418,0.110333,0.077836,0.065766,0.124806
2,angry,0.183939,0.150695,0.466302,0.274774,0.653131,1.0,0.630967,0.671980,1.0,...,0.368121,0.468825,0.375855,1.0,0.378847,0.471499,0.988207,0.550548,0.018353,0.548190
3,angry,0.537044,0.418116,0.481464,0.438458,0.144407,0.0,0.371219,0.012467,0.0,...,0.224992,0.260010,0.403900,0.0,0.596797,0.231718,0.688821,0.145960,0.571579,0.218896
4,angry,0.443417,0.432919,0.677055,0.334438,0.292933,0.0,0.417626,0.072513,0.0,...,0.233493,0.305763,0.370906,1.0,0.559862,0.285304,0.879467,0.089313,0.210681,0.375842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1280,neutral,0.532191,0.356289,0.667120,0.268087,0.183363,0.0,0.383144,0.062852,1.0,...,0.431349,0.466445,0.368812,0.0,0.319030,0.372964,0.376775,0.208692,0.450813,0.511936
1281,neutral,0.391457,0.269737,0.255740,0.254209,0.467601,1.0,0.555540,0.948187,1.0,...,0.546964,0.207212,0.468196,0.0,0.634832,0.697788,0.006877,0.221563,0.530447,0.269997
1282,neutral,0.315095,0.133612,0.501751,0.290585,0.489786,0.0,0.586251,0.007189,1.0,...,0.310133,0.300780,0.443109,0.0,0.277499,0.456451,0.391197,0.034541,0.120995,0.151804
1283,neutral,0.311539,0.211948,0.174492,0.283762,0.273198,1.0,0.314977,0.722951,1.0,...,0.333568,0.575626,0.530533,0.0,0.313630,0.495963,0.245356,0.125659,0.125381,0.393924


# 2. Train model

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

In [6]:
def train_and_eval(model, train_in, train_out, val_in, val_out):
    model.fit(train_in, train_out)
    predicted_val = model.predict(val_in)

    # Evaluate model
    return accuracy_score(val_out, predicted_val)

## 2.1 Read and Preprocess the dataset in a format that is appropriate for training

In [7]:
data = pd.read_csv("aus.csv")
data = data.drop("file", axis=1)
data = data.drop("face", axis=1)
#data = data[['emotion', 'AUS00', 'AUS01', 'AUS02', ]]
# get rid of AUS10, AUS18, AUS22, AUS24, AUS25
#data = data.drop("AUS10", axis=1)
#data = data.drop("AUS18", axis=1)
#data = data.drop("AUS22", axis=1)
#data = data.drop("AUS24", axis=1)
#data = data.drop("AUS25", axis=1)
print(data)

print("Unique classes", data["emotion"].unique(), "\n")


for class0 in data["emotion"].unique():
    print(f"Found {(data['emotion'] == class0).value_counts().iloc[1]} samples for class {class0}")

labels = data["emotion"]
inputs = data.drop("emotion", axis=1)

      emotion     AUS01     AUS02     AUS04     AUS05     AUS06  AUS07  \
0       angry  0.480870  0.221707  0.554721  0.341209  0.538130    1.0   
1       angry  0.368920  0.193828  0.238290  0.269536  0.075385    0.0   
2       angry  0.183939  0.150695  0.466302  0.274774  0.653131    1.0   
3       angry  0.537044  0.418116  0.481464  0.438458  0.144407    0.0   
4       angry  0.443417  0.432919  0.677055  0.334438  0.292933    0.0   
...       ...       ...       ...       ...       ...       ...    ...   
1280  neutral  0.532191  0.356289  0.667120  0.268087  0.183363    0.0   
1281  neutral  0.391457  0.269737  0.255740  0.254209  0.467601    1.0   
1282  neutral  0.315095  0.133612  0.501751  0.290585  0.489786    0.0   
1283  neutral  0.311539  0.211948  0.174492  0.283762  0.273198    1.0   
1284  neutral  0.417826  0.295563  0.422287  0.284784  0.207153    1.0   

         AUS09     AUS10  AUS12  ...     AUS15     AUS17     AUS18  AUS20  \
0     0.537321  0.023429    0.0  .

## 2.2 Balanced split of the dataset for train/val/test.

In [8]:
data_in, test_in, data_out, test_out = train_test_split(
    inputs,
    labels,
    test_size=0.1,
    random_state=42,
    stratify=labels  # balances labels across the sets
)
train_in, val_in, train_out, val_out = train_test_split(
    data_in,
    data_out,
    test_size=(0.2/0.9),  # 20% of the original data
    random_state=42,
    stratify=data_out
)

## 2.3 Hyperparameter tuning and model selection

### Using different kernels with Support Vector Machines (SVM)

In [9]:
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

model_svc = []
best_model_svc = None
for (index, kernel) in enumerate(kernels):
    model_svc.append(SVC(kernel=kernel))
    accuracy = train_and_eval(model_svc[index], train_in, train_out, val_in, val_out)
    print("accuracy for index %2d, kernel = %10s: %.4f" % (index, kernel, accuracy))
    if best_model_svc is None or accuracy > best_model_svc[1]:
        best_model_svc = (model_svc[index], accuracy)

accuracy for index  0, kernel =     linear: 0.5759
accuracy for index  1, kernel =       poly: 0.6109
accuracy for index  2, kernel =        rbf: 0.6031
accuracy for index  3, kernel =    sigmoid: 0.1673


### Using different `k` values when using `k`-Nearest Neighbours

In [9]:
model_knn = []
best_model_knn = None
for (k, i) in enumerate([1, 2, 4, 8, 16, 32, 64, 128, 254]):
    model_knn.append(KNeighborsClassifier(n_neighbors=i))
    accuracy = train_and_eval(model_knn[k], train_in, train_out, val_in, val_out)
    print("accuracy for index %2d, k = %3d: %.4f" % (k, i, accuracy))
    if best_model_knn is None or accuracy > best_model_knn[1]:
        best_model_knn = (model_knn[k], accuracy)

accuracy for index  0, k =   1: 0.5292
accuracy for index  1, k =   2: 0.4903
accuracy for index  2, k =   4: 0.5409
accuracy for index  3, k =   8: 0.5486
accuracy for index  4, k =  16: 0.5759
accuracy for index  5, k =  32: 0.5875
accuracy for index  6, k =  64: 0.5447
accuracy for index  7, k = 128: 0.5486
accuracy for index  8, k = 254: 0.5331


### Changing the depth and breadth of a Multi-Layer Perceptron (MLP)

In [196]:
best_layer = 0
best_accuracy = 0
best_model_mlp = None
model_mlp = []
layers = []
for layer_index in range(4):
    layers.append(1)
    for (index, layer) in enumerate(layers):
        for i in range(2, 9):
            layers[index] = 2 ** i
            model_mlp.append(MLPClassifier(hidden_layer_sizes=tuple(layers), max_iter=5000))
            accuracy = train_and_eval(model_mlp[-1], train_in, train_out, val_in, val_out)

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model_mlp = (model_mlp[-1], accuracy)

            print(f"accuracy for {layers}: %.4f" % (accuracy))

accuracy for [4]: 0.6148
accuracy for [8]: 0.5837
accuracy for [16]: 0.6226
accuracy for [32]: 0.5875
accuracy for [64]: 0.5370
accuracy for [128]: 0.5136
accuracy for [256]: 0.5409
accuracy for [4, 1]: 0.5720
accuracy for [8, 1]: 0.5875
accuracy for [16, 1]: 0.5214
accuracy for [32, 1]: 0.2646
accuracy for [64, 1]: 0.5253
accuracy for [128, 1]: 0.5642
accuracy for [256, 1]: 0.4591
accuracy for [256, 4]: 0.5136
accuracy for [256, 8]: 0.5409
accuracy for [256, 16]: 0.5486
accuracy for [256, 32]: 0.5409
accuracy for [256, 64]: 0.5564
accuracy for [256, 128]: 0.5370
accuracy for [256, 256]: 0.5331
accuracy for [4, 256, 1]: 0.2685
accuracy for [8, 256, 1]: 0.2685
accuracy for [16, 256, 1]: 0.4241
accuracy for [32, 256, 1]: 0.5253
accuracy for [64, 256, 1]: 0.2646
accuracy for [128, 256, 1]: 0.2685
accuracy for [256, 256, 1]: 0.2646
accuracy for [256, 4, 1]: 0.4708
accuracy for [256, 8, 1]: 0.5331
accuracy for [256, 16, 1]: 0.4475
accuracy for [256, 32, 1]: 0.5058
accuracy for [256, 64, 1]:

## 2.4 Analyzing

In [10]:
# Display the best trained models and their accuracy
print(f"Best SVM model: %10s -> accuracy: %10.2f %%" % (best_model_svc[0].kernel, best_model_svc[1]*100))
print(f"Best KNN model: %10s -> accuracy: %10.2f %%" % (best_model_knn[0].n_neighbors, best_model_knn[1]*100))
print(f"Best MLP model: %10s -> accuracy: %10.2f %%" % (best_model_mlp[0].hidden_layer_sizes, best_model_mlp[1]*100))

Best SVM model:       poly -> accuracy:      61.09 %


NameError: name 'best_model_knn' is not defined

### 2.4.1 Results for: ALL AUs
Best SVM model:       poly -> accuracy:      61.09 %

Best KNN model:         32 -> accuracy:      58.75 %

Best MLP model:      (32,) -> accuracy:      59.92 %

# 3. Testing

In [11]:
def feeling(image, enable_aus=False, enable_emotion=False):
    face = detector.detect_faces(image)

    aus = None
    emotion_str = None

    if enable_emotion or enable_aus:
        landmark = detector.detect_landmarks(image, face)
        if enable_aus:
            aus = detector.detect_aus(image, landmark)
        if enable_emotion:
            emotion_str = best_model_svc[0].predict(aus[0])[0]

    for (i, face) in enumerate(face[0]):
        f_hl = int(face[0])       # horizontal left
        f_vt = int(face[1])       # vertical top
        f_hr = int(face[2])       # horizontal right
        f_vb = int(face[3])       # vertical bottom

        if i == 0:
            color = (0, 255, 0)
        elif i == 1:
            color = (255, 0, 0)
        else:
            color = (0, 0, 255)

        thickness = 2
        cv2.line(image,(f_hl, f_vt), (f_hr, f_vt), color, thickness)   # top left to top right
        cv2.line(image,(f_hr, f_vt), (f_hr, f_vb), color, thickness)   # top right to bottom right
        cv2.line(image,(f_hr, f_vb), (f_hl, f_vb), color, thickness)   # bottom right to bottom left
        cv2.line(image,(f_hl, f_vb), (f_hl, f_vt), color, thickness)   # bottom left to top right

        if enable_emotion:
            cv2.putText(image, emotion_str, (f_hl, f_vt-10), cv2.FONT_HERSHEY_SIMPLEX, 1, color)

    return image, aus, emotion

In [14]:
cam = cv2.VideoCapture(0)
counter = 0

while True:
    # check = True means we managed to get a frame.
    # If check = False, the device is not available, and we should quit.
    check, frame = cam.read()
    if not check:
        break

    #new_frame, aus, emotion = feeling(frame, True, True)

    # OpenCV uses a separate window to display output.
    jcv2.imshow("video", frame)

    # Press ESC to exit.
    key = jcv2.waitKey(1) & 0xFF
    if key == 27:
        break

cam.release()
jcv2.destroyAllWindows()

HBox(children=(Button(button_style='danger', description='Stop', style=ButtonStyle()), HBox(children=(Label(va…

HBox(children=(Button(button_style='danger', description='Stop', style=ButtonStyle()), HBox(children=(Label(va…

VBox(children=(HTML(value='<center>video</center>'), Canvas()), layout=Layout(border_bottom='1.5px solid', bor…