    0 = neutral,                0 - нейтральність
    1 = anger,                  1 - злість
    2 = contempt,               2 - зневага
    3 = disgust,                3 - огида 
    4 = fear,                   4 - страх
    5 = happy,                  5 - радість 
    6 = sadness,                6 - сум 
    7 = surprise                7 - здивування 

In [1]:
import os
from shutil import copy, move

import numpy as np
import pandas as pd
import dlib
import cv2

In [2]:
emotions = ["neutral", "anger", "contempt", "disgust", "fear", "happy", "sadness", "surprise"]

if not os.listdir('sorted_data'):
    print('No emotions within sorted_data detected. Creating new')
    for em in emotions:
        os.makedirs('sorted_data/'+em)

In [3]:
encoded_emotions = {t: em for t, em in enumerate(emotions)}

In [4]:
encoded_emotions

{0: 'neutral',
 1: 'anger',
 2: 'contempt',
 3: 'disgust',
 4: 'fear',
 5: 'happy',
 6: 'sadness',
 7: 'surprise'}

Get all participants of the dataset

In [5]:
img_path = 'src_images'
annotations_path = 'src_labels'
destination = 'sorted_data'

raw_collector = []

participants = os.listdir(img_path)
labels = os.listdir(annotations_path)
# Map emotions with folder names
emo_map = dict(zip(emotions, [f'{k:03}' for k in range(7)]))

for person in sorted(labels):
    for emotion in os.listdir(os.path.join(annotations_path, person)):
        emotion_path = os.path.join(annotations_path, person, emotion)
        if os.listdir(emotion_path):
            with open(os.path.join(emotion_path, os.listdir(emotion_path)[0]), 'r') as f:
                sample = [os.listdir(emotion_path)[0], f.read()]
                raw_collector.append(sample)


In [6]:
collector = []

for filename, emotion in raw_collector:
    file_metadata = filename.split('_')
    emotion = float(emotion.replace(' ', '').replace('e+00\n', ''))
    collector.append([file_metadata, emotion])

##### Create a backup

In [7]:
with open('backup_0.csv', 'w') as f:
    f.write(str(collector).replace('.0],', '\n').replace('[', '').replace(']', ''))

#### Move images to appropriate folders

In [8]:
for [person, collection, img, ll], label in collector:
    # Such a shit :)
    assert label > 0, f'{person, collection} has neutral ({label}) label'
    
    path_to_img_folder = os.path.join(img_path, person, collection)
    
    images = sorted(os.listdir(path_to_img_folder))
    # First 20% of images are neutral. The last (suppose 50%) are emotional.
    n = len(images)
    for i, image in enumerate(images, 1):
        img_with_path = os.path.join(path_to_img_folder, image)
        # Mark first 20% of images as neutral
        if i / n <= 0.2:
            copy(img_with_path, os.path.join(destination, encoded_emotions[0], image))
        elif i / n >= 0.4:
            copy(img_with_path, os.path.join(destination, encoded_emotions[int(label)], image))
        # Mark last 50% of images as not neutral
        #print(img_with_path, label, encoded_emotions[int(label)], round(100 * i / n, 2))
    

Now go to folders & throw out some images manually (especially from the neutral folder)



After that pass all the images through the dlib keypoint detector to get all the facial points

In [None]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('../face_landmarks.dat')


def shape_to_np(shape, dtype="int", landmarks_count=68):
    point = np.zeros((landmarks_count, 2), dtype=dtype)

    for i in range(landmarks_count):
        point[i] = (shape.part(i).x, shape.part(i).y)
    return point


def get_normalized_coords(shape, w, h):
    face_landmarks = np.zeros(shape.shape)
    
    x_max = shape[:, 0].max()
    y_max = shape[:, 1].max()
    x_min = shape[:, 0].min()
    y_min = shape[:, 1].min()
    
    face_landmarks[:, 0] = (x_max - shape[:, 0]) / (x_max - x_min)
    face_landmarks[:, 1] = (y_max - shape[:, 1] / (y_max - y_min))
    
    return face_landmarks


def get_radius_vector(norm_shape, x_c, y_c):
    x, y = np.split(np.array(norm_shape), 2, axis=1)
    return np.sqrt(np.square(x - x_c) + np.square(y - y_c))


def get_angle(norm_shape):
    x, y = np.split(np.array(norm_shape), 2, axis=1)
    return np.arctan([x / y])


def get_face_landmarks(img_name, img_array, predictor):
    gray_image = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
    h, w = img_array.shape[:2]
    rects = detector(gray_image, 1)
#     rad_vectors = []
#     face_landmarks = []
    for i, rect in enumerate(rects):
        if i > 1:
            print(f'{img_name} seems to fail')
            break
        face_detector = predictor(gray_image, rect)
        shape = shape_to_np(face_detector)
        shape = get_normalized_coords(shape, w, h)
        radius_vector = get_radius_vector(shape, w, h)
        angle = get_angle(shape)

        return shape, radius_vector, angle




In [None]:
#!pip install atpbar
from atpbar import atpbar

In [None]:
jsonlike = []
for emotion in atpbar(emotions, name='images'):
    for image in atpbar(os.listdir(os.path.join(destination, emotion)), name=emotion):
        imgpath = os.path.join(destination, emotion, image)
        try:
            img_array = cv2.imread(imgpath)
            face_landmarks, rad_vectors, angles = get_face_landmarks(image, img_array, predictor)
        except:
            print(f'failed on {imgpath}')
            continue
        
        current_image_collector = {'image': image, 'emotion': emotion}

        
        #print(face_landmarks[0])
        
        for i, [x, y] in enumerate(face_landmarks):
            current_image_collector.update({f'x_{i}':x})
            current_image_collector.update({f'y_{i}':y})
            current_image_collector.update({f'r_{i}':rad_vectors[i]})
            current_image_collector.update({f'a_{i}':y})

        jsonlike.append(current_image_collector)

        
        


VBox()

In [None]:
df2 = pd.DataFrame(jsonlike)

In [None]:
df2.head()

In [None]:
df2.info()

In [None]:
df2.describe()

In [None]:
df2 = df2.sample(frac=1)

In [None]:
df2.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle

from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import matplotlib.pyplot as plt

In [None]:
label = df2['emotion']

In [None]:
le = LabelEncoder()
y = le.fit_transform(label)

In [None]:
df2 = df2.drop(['emotion', 'image'], axis=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df2, y, test_size=0.2)

In [None]:
svm = SVC(kernel='linear')

In [None]:
sum(y_pred == y_test) / len(y_test)

In [None]:
X_train = X_train.as_matrix()
X_test = X_test.as_matrix()

In [None]:
X_train.shape, X_test.shape

In [None]:
model = XGBClassifier()
model.fit(X_train, y_train)

In [None]:
print(model)

In [None]:
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

In [None]:
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
pickle.dump(model, open('xgb_model.pickle', 'wb'))

In [None]:
restored = pickle.load(open("xgb_model.pickle", "rb"))

In [None]:
#restored2 = pickle.load(open("xgb_with_rv.pickle", "rb"))

In [None]:
X_sample = X_test[-1]

### Restoring the original point coordinates

In [None]:
x_c = 100
y_c = 100
w = 200
h = 200

In [None]:
%time

shape = []
for k in range(0, len(X_sample), 3):
    x = x_c - w * X_sample[k+1] 
    y = y_c - h * X_sample[k+2]
    shape.append([x, y])
x_s = [s[0] for s in shape]
y_s = [s[1] for s in shape]

plt.scatter(x_s, y_s)

In [None]:
%time

rand_ar = np.array(X_sample).reshape(68,3)
rand_ar = np.delete(rand_ar, obj=0, axis=1)

rand_ar[:,0] = - x_c + w * rand_ar[:,0]
rand_ar[:,1] = - y_c + h * rand_ar[:,1]

plt.scatter(rand_ar[:,0], rand_ar[:,1])

In [None]:
def restore_coordinates(norm_shape, x_c, y_c, w, h):
    shape = np.array(norm_shape).reshape(68,3)
    shape = np.delete(shape, obj=0, axis=1)

    shape[:,0] = - x_c + w * rand_ar[:,0]
    shape[:,1] = - y_c + h * rand_ar[:,1]
    return shape

In [None]:
pred = model.predict(X_sample.reshape([1, 204]))

In [None]:
encoded_emotions[int(pred)]