In [1]:
import os 
import numpy as np 
import pandas as pd 
import pickle 
import cv2

## Caffe model (SSD) ResNet-10 Architecture

In [2]:
# Load pre-trained face detector models into variables 
face_detection_model = 'models/res10_300x300_ssd_iter_140000.caffemodel'
face_detection_proto = 'models/deploy.prototxt.txt'
face_descriptor = 'models/openface.nn4.small2.v1.t7'
#Loading using cv2
detector_model = cv2.dnn.readNetFromCaffe(face_detection_proto, face_detection_model)
descriptor_model = cv2.dnn.readNetFromTorch(face_descriptor) 

In [3]:
#function to display/close images
def display(winame,image):
    cv2.namedWindow(winame)
    cv2.imshow(winame,image)
    cv2.waitKey(0) #close window when key press is detected
    cv2.destroyWindow(winame)
    cv2.waitKey(1)

In [7]:
img = cv2.imread('images_2/Sachin Tendulkar/2200.jpg')

In [9]:
display('samp',img)

In [10]:
image = img.copy()
h,w = image.shape[:2] #height and width of image
img_blob = cv2.dnn.blobFromImage(image,1,(300,300),(104,177,123),swapRB=False,crop=False)

## Face Detection

In [16]:
#set input
img_blob = cv2.dnn.blobFromImage(image,1,(300,300),(104,177,123),swapRB=False,crop=False)
detector_model.setInput(img_blob)
detections = detector_model.forward()

if len(detections) > 0:
    i = np.argmax(detections[0,0,:,2]) #face with max confidence score
    confidence = detections[0,0,i,2]
    if confidence > 0.5:
        box = detections[0,0,i,3:7] * np.array([w,h,w,h])
        (startx,starty,endx,endy) = box.astype('int')
        #reference: draw bounding for face
        img_draw = image.copy()
        cv2.rectangle(img_draw,(startx,starty),(endx,endy),(0,255,0))
display('sample',image)
display('draw_image',img_draw)


## Feature Extraction/Embedding

In [14]:
roi = image[starty:endy,startx:endx].copy() #region of interest
faceblob = cv2.dnn.blobFromImage(roi,1/255,(96,96),(0,0,0),swapRB=True,crop=True)
descriptor_model.setInput(faceblob)
vectors= descriptor_model.forward() #extract vectors from faces 


In [15]:
vectors

array([[-0.0484469 ,  0.07973776, -0.09963043,  0.05219211,  0.02248172,
         0.20198324,  0.13852596, -0.06266073, -0.06219366,  0.01123174,
         0.00230608,  0.07717835,  0.01544594, -0.03280811, -0.01987833,
        -0.10874771, -0.09319498, -0.01268344,  0.10149116, -0.03454341,
         0.03998282, -0.14238803, -0.11665413, -0.0164524 ,  0.07435052,
         0.00277314, -0.17686346, -0.14906731,  0.00644329,  0.10620255,
         0.08508859,  0.08288753, -0.08299872, -0.03750012,  0.04876548,
         0.05993147, -0.07223057, -0.06693354,  0.0433912 , -0.0713726 ,
         0.14999215, -0.0811222 ,  0.04310592,  0.04121755, -0.09240614,
        -0.03493492,  0.11330134, -0.07407253, -0.12562084, -0.14079288,
        -0.07809538, -0.01990954,  0.07394531, -0.0358038 ,  0.0848316 ,
         0.13799968, -0.115841  ,  0.19711758, -0.0861319 , -0.01615877,
        -0.10057884,  0.07192279,  0.20695479, -0.21651301,  0.16872855,
         0.14802304,  0.04371198, -0.0987719 , -0.1

In [20]:
#Function for feature vector extraction of detected faces
def face_vectorizer(image_path):
    img = cv2.imread(image_path)
    image = img.copy()
    h,w = image.shape[:2]
    img_blob = cv2.dnn.blobFromImage(image,1,(300,300),(104,177,123),swapRB=False,crop=False)
    detector_model.setInput(img_blob)
    detections = detector_model.forward()
    if len(detections) > 0:
        i = np.argmax(detections[0,0,:,2]) #face with max confidence score
        confidence = detections[0,0,i,2]
        if confidence > 0.5:
            box = detections[0,0,i,3:7] * np.array([w,h,w,h])
            (startx,starty,endx,endy) = box.astype('int')
            roi = image[starty:endy,startx:endx].copy() #region of interest
            faceblob = cv2.dnn.blobFromImage(roi,1/255,(96,96),(0,0,0),swapRB=True,crop=True)
            descriptor_model.setInput(faceblob)
            vectors= descriptor_model.forward() #extract vectors from faces 
            
            return vectors
    return None #if face is not detected return none
            
    

## Vectorize all images using face vectorizer function

In [45]:
data = dict(data=[],label=[])

In [47]:
folders = os.listdir('images_2')
for folder in folders:
    filenames = os.listdir('images_2/{}'.format(folder))
    for filename in filenames:
        try:
            vector = face_vectorizer('images_2/{}/{}'.format(folder,filename))
            if vector is not None:
                data['data'].append(vector)
                data['label'].append(folder)
                print('Features Extracted')
        except:
            pass
            

Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Ext

Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Ext

Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Ext

Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Extracted
Features Ext

In [50]:
pd.Series(data['label']).value_counts() #distribution of images by label

Robert Downey Jr      125
Barack Obama          122
Donald Trump          122
Scarlett Johansson    119
Elon Musk             117
Lionel Messi          114
Roger Federer         111
Cristiano Ronaldo     109
Salman Khan           107
Sachin Tendulkar      107
Leonardo DiCaprio     106
Aamir Khan            103
Angelina Jolie         98
Joe Biden              98
Tom Curise             96
Youssef Al-Yakoob      20
dtype: int64

In [51]:
#save data dictionary of image feature vectors
pickle.dump(data,open('data_face_features.pickle',mode='wb'))