In [17]:
import os
import os.path
import pandas as pd
import matplotlib.pyplot as plt
import math
import dlib
import cv2
import glob
import random
import numpy as np
import itertools
from sklearn.svm import SVC
import joblib 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix  



In [4]:
# Landmark detector

detector = dlib.get_frontal_face_detector() #Face detector
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") #Landmark identifier. Set the filename to whatever you named the downloaded file
def get_landmarks(image):
    data={}
    detections = detector(image, 1)
    for k,d in enumerate(detections): #For all detected face instances individually
        shape = predictor(image, d) #Draw Facial Landmarks with the predictor class
        xlist = []
        ylist = []
        for i in range(1,68): #Store X and Y coordinates in two lists
            xlist.append(float(shape.part(i).x))
            ylist.append(float(shape.part(i).y))
        xmean = np.mean(xlist) #Find both coordinates of centre of gravity
        ymean = np.mean(ylist)
        xcentral = [(x-xmean) for x in xlist] #Calculate distance centre <-> other points in both axes
        ycentral = [(y-ymean) for y in ylist]
        landmarks_vectorised = []
        for x, y, w, z in zip(xcentral, ycentral, xlist, ylist):
            landmarks_vectorised.append(w)
            landmarks_vectorised.append(z)
            meannp = np.asarray((ymean,xmean))
            coornp = np.asarray((z,w))
            dist = np.linalg.norm(coornp-meannp)
            landmarks_vectorised.append(dist)
            landmarks_vectorised.append((math.atan2(y, x)*360)/(2*math.pi))
        data['landmarks_vectorised'] = landmarks_vectorised
    if len(detections) < 1:
        data['landmarks_vestorised'] = "error"
    return data['landmarks_vectorised']



In [9]:
# Labels

files=[]
y=[]
direc=[]
for dirpath, dirnames, filenames in os.walk(r"D:\bk\vikram_\Projects\Facial_emotions\Emotion"):
    #for i in range(0,len(filenames)):
    for filename in [f for f in filenames]:
        direc.append(dirpath)
        y.append(os.path.join(dirpath, filename))
    if y!=[]:
        files.append(y)
    y=[]

# Images

images=[]
x=[]
for dirpath, dirnames, filenames in os.walk(r"D:\bk\vikram_\Projects\Facial_emotions\cohn-kanade-images"):
    for filename in [f for f in filenames]:
        a=dirpath.replace('cohn-kanade-images','Emotion')
        if filename!=".DS_Store" and a in direc:
            x.append(os.path.join(dirpath, filename))
    if (x!=[]):
        images.append(x[round(len(x)/2.7):])
    x=[]






In [11]:
label_files=[]
images_files=[]
emotions = ["neutral", "anger", "contempt","disgust", "fear", "happy", "sadness", "surprise"] #Emotion list
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
detector = dlib.get_frontal_face_detector()
for i in range(0,len(images)):
#     print(i)
    for j in images[i]:
        img_file = cv2.imread(j)
        gray = cv2.cvtColor(img_file, cv2.COLOR_BGR2GRAY) #convert to grayscale
        clahe_image = clahe.apply(gray)
        
        images_files.append(get_landmarks(clahe_image))
        x=str.strip(open(files[i][0], 'r').read().replace('\n','')).split()
        x=round(float(x[0]))
        label_files.append(emotions[x])




print('Number of Images',len(images_files))




Number of Images 3697


In [12]:
# Check distribution of classes

val,count=np.unique(label_files,return_counts=True)

print(count,val)

'''
[643 146 547 344 835 345 837] ['anger' 'contempt' 'disgust' 'fear' 'happy' 'sadness' 'surprise']

 '''




[643 146 547 344 835 345 837] ['anger' 'contempt' 'disgust' 'fear' 'happy' 'sadness' 'surprise']


"\n[643 146 547 344 835 345 837] ['anger' 'contempt' 'disgust' 'fear' 'happy' 'sadness' 'surprise']\n\n "

In [13]:
 # Store as Numbers

target=[emotions.index(x) for x in label_files]


 #Splitting data for training
X_train, X_test, y_train, y_test = train_test_split(images_files, target, test_size=0.10, random_state=42)




In [14]:
#Training Model
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}
#, verbose = True) #Set the classifier as a support vector machines with polynomial kernel
clf=GridSearchCV(SVC(kernel='linear', probability=True, tol=1e-3),param_grid,refit=True,verbose=2)
#Turn the training set into a numpy array for the classifier
npar_train = np.array(images_files) 
npar_trainlabs = np.array(target)
clf.fit(X_train,y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   9.8s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  13.4s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  13.9s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  14.8s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  13.0s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   3.6s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   3.8s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   3.7s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   3.7s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   3.7s
[CV] END .....................C=0.1, gamma=1, kernel=sigmoid; total time=   5.5s
[CV] END .....................C=0.1, gamma=1, k

[CV] END ....................C=1, gamma=0.01, kernel=sigmoid; total time=   6.1s
[CV] END ....................C=1, gamma=0.01, kernel=sigmoid; total time=   4.9s
[CV] END ....................C=1, gamma=0.01, kernel=sigmoid; total time=   5.0s
[CV] END ....................C=1, gamma=0.01, kernel=sigmoid; total time=   4.6s
[CV] END .......................C=1, gamma=0.001, kernel=rbf; total time=  10.3s
[CV] END .......................C=1, gamma=0.001, kernel=rbf; total time=   9.9s
[CV] END .......................C=1, gamma=0.001, kernel=rbf; total time=  10.2s
[CV] END .......................C=1, gamma=0.001, kernel=rbf; total time=   9.5s
[CV] END .......................C=1, gamma=0.001, kernel=rbf; total time=   9.9s
[CV] END ......................C=1, gamma=0.001, kernel=poly; total time=   3.2s
[CV] END ......................C=1, gamma=0.001, kernel=poly; total time=   3.4s
[CV] END ......................C=1, gamma=0.001, kernel=poly; total time=   3.7s
[CV] END ...................

[CV] END ......................C=100, gamma=0.1, kernel=poly; total time=  11.0s
[CV] END ......................C=100, gamma=0.1, kernel=poly; total time=   6.1s
[CV] END ...................C=100, gamma=0.1, kernel=sigmoid; total time=   9.4s
[CV] END ...................C=100, gamma=0.1, kernel=sigmoid; total time=   9.6s
[CV] END ...................C=100, gamma=0.1, kernel=sigmoid; total time=   9.5s
[CV] END ...................C=100, gamma=0.1, kernel=sigmoid; total time=   9.6s
[CV] END ...................C=100, gamma=0.1, kernel=sigmoid; total time=   8.5s
[CV] END ......................C=100, gamma=0.01, kernel=rbf; total time=  25.6s
[CV] END ......................C=100, gamma=0.01, kernel=rbf; total time=  31.0s
[CV] END ......................C=100, gamma=0.01, kernel=rbf; total time=  24.3s
[CV] END ......................C=100, gamma=0.01, kernel=rbf; total time=  24.6s
[CV] END ......................C=100, gamma=0.01, kernel=rbf; total time=  23.2s
[CV] END ...................

GridSearchCV(estimator=SVC(kernel='linear', probability=True),
             param_grid={'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001],
                         'kernel': ['rbf', 'poly', 'sigmoid']},
             verbose=2)

In [15]:
#Best Parameters 

print(clf.best_estimator_)


print('Test Data Score',clf.score(X_test, y_test))



SVC(C=0.1, gamma=0.1, kernel='poly', probability=True)
Test Data Score 0.9783783783783784


In [18]:
#create some predictions using the test set and then create classification reports and confusion matrices

grid_predictions = clf.predict(X_test)
print(confusion_matrix(y_test,grid_predictions))
print(classification_report(y_test,grid_predictions))


[[71  0  0  0  1  0  0]
 [ 0 16  0  0  0  1  0]
 [ 0  0 48  1  0  1  0]
 [ 0  0  0 25  0  0  0]
 [ 0  0  0  0 85  0  0]
 [ 0  0  0  2  0 35  0]
 [ 0  0  0  1  0  1 82]]
              precision    recall  f1-score   support

           1       1.00      0.99      0.99        72
           2       1.00      0.94      0.97        17
           3       1.00      0.96      0.98        50
           4       0.86      1.00      0.93        25
           5       0.99      1.00      0.99        85
           6       0.92      0.95      0.93        37
           7       1.00      0.98      0.99        84

    accuracy                           0.98       370
   macro avg       0.97      0.97      0.97       370
weighted avg       0.98      0.98      0.98       370



In [19]:

# Save the model as a pickle file

  
joblib.dump(clf, 'clf.pkl') 

print('Model Saved as clf.pkl')

Model Saved as clf.pkl
