# Face Recognition using OpenCV.

This script details the process for building a face recognition system by bounding box method using OpenCV.

In [1]:
import os

In [2]:
#This changes your working directory to the folder that has your training-data and test-data folders
#You can change this to the folder of your choice
os.chdir('Desktop/DATA SCIENCE MASTERS/PROJ/')

In [3]:
#List the directories and make sure your training-data folder has 2 folders inside with names s1 and s2 respectively
os.listdir('training-data')

['s1', 's2']

In [4]:
#Check the s1 folder to make sure it has at least 10 images labelled as so 1.jpg, 2.jpg, etc
os.listdir('training-data/s1')

['1.jpg',
 '10.jpg',
 '2.jpg',
 '3.jpg',
 '4.jpg',
 '5.jpg',
 '6.jpg',
 '7.jpg',
 '8.jpg',
 '9.jpg']

In [5]:
#Check the s2 folder to make sure it also has at least 10 images labelled as so 1.jpg, 2.jpg, etc
os.listdir('training-data/s2')

['1.jpg',
 '10.jpg',
 '2.jpg',
 '3.jpg',
 '4.jpg',
 '5.jpg',
 '6.jpg',
 '7.jpg',
 '8.jpeg',
 '9.jpeg']

In [6]:
os.listdir('test-data')

['test1.jpg', 'test2.jpg']

In [7]:
#Lets import some modules
import cv2
import os # for reading training data directories and paths
import numpy as np # to convert python lists to numpy arrays as it is needed by OpenCV face recognizers


#Prepare training data
#this should be in folders labelled with the names of the people to train the model with
#The more images used in the training, the better
#I'll use at least 10 images in each folder
#However the test folder should just contain images with no labels

#there is no label 0 in our training data, so subject name for index/label 0 is empty
subjects = ["", "Vangelis Michael", "Jennifer Rex"]

In [8]:
#Now lets prepare the data
#Function to detect faces using openCV
def detect_face(img):
    #Convert the images to gray scale as openCV expects gray images
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    
#load OpenCV face detector, i'm using HAAR which is slower but works best in this notebook in detecting faces
#you could also use another accurate and faster classifier: LBP
#    face_cascade = cv2.CascadeClassifier('lbpcascade_frontalface.xml')
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') #for haar

#Lets detect multiscale images
#result should be a list of faces
    faces = face_cascade.detectMultiScale(gray, scaleFactor = 1.2, minNeighbors = 5);

#if no faces are detected then return original img
    if (len(faces) == 0):
        return None, None

#under the assumption that there will be only one face,
#extract the face area
    (x, y, w, h) = faces[0]

#return only the face part of the image
    return gray[y:y+w, x:x+h], faces[0]

In [9]:
#MAIN WORK
def prepare_training_data(data_folder_path):
    #STEP 1
    #Get the directories(one directory for each subject) in data folder
    dirs = os.listdir(data_folder_path)
    
    #list to hold all subject faces and labels
    faces = []
    labels =[]
    
    #Lets go through each directory and read images within it
    for dir_name in dirs:
        #our subject directories start with letter 's' so ignore any non-relevant directories if any
        if not dir_name.startswith("s"):
            continue;
        #STEP 2
        #extract label number of subject from dir_name
        #format of dir name = slabel
        #so removing letter 's' from dir_name will give us label           
        label = int(dir_name.replace("s", ""))
        
        #build path of directory containing images for current subject 
        #sample subject_dir_path = 'traning-data/s1'
        subject_dir_path = data_folder_path + "/" + dir_name
        
        #Get the image names that are inside the iven subject directory
        subject_image_names = os.listdir(subject_dir_path)
        
        #STEP 3
        #go through each image name, read image, detect face and add face to list of faces
        for image_name in subject_image_names:
            #ignore system files like .DS_Store
            if image_name.startswith("."):
                continue;
            
            #build image path
            #sample image path = training-data/s1/1.pgm
            image_path = subject_dir_path + "/" + image_name
            #read image
            image = cv2.imread(image_path)
            
            #Display an image window to show the image
            cv2.imshow("Training on image...", cv2.resize(image, (400, 500)))
            cv2.waitKey(100)
            #detect face
            face, rect = detect_face(image)
            
            #STEP 4
            #For the purpose of this tutorial, we will ignore faces that are not detected
            if face is not None:
                #     add face to list of faces
                faces.append(face)
                #add label for this face
                labels.append(label)
                
    cv2.destroyAllWindows()
    cv2.waitKey(1)
    cv2.destroyAllWindows()
    
    return faces, labels

In [10]:
#Lets prepare our training data
#data will be on two lists of the same size
#One list will contain all the faces
#and the other list will contain respective labels for each face
print('Preparing data...')
faces, labels = prepare_training_data("training-data")
print('Data prepared')

#Print total faces and labels
print('Total faces: ', len(faces))
print("Total labels: ", len(labels))

Preparing data...
Data prepared
Total faces:  13
Total labels:  13


In [11]:
#Lets Train our face recognizer.
#Below are the 3 face recognizers openCV comes equiped with
#EigenFaces: cv2.face.createEigenFaceRecognizer()
#FisherFaces: cv2.face.createFisherFaceRecognizer()
#Local Binary Patterns Histogram (LBPH): cv2.face.LBPHFisherFaceRecognizer()

#i'll use the LBPH recognizer

#create our LBPH recognizer
face_recognizer = cv2.face.LBPHFaceRecognizer_create()
#face_recognizer = cv2.face.EigenFaceRecognizer_create() #All train images must be of equal size dimensions
#face_recognizer = cv2.face.FisherFaceRecognizer_create() # All train images must also be of equal size pixel 1461681 pixels


#Train our face recognizer of our training faces
face_recognizer.train(faces, np.array(labels))
#OpenCV expects labels vector to be a numpy array

In [12]:
#Predictions
#Function to draw rectangle on image
#according to given (x, y) coordinates and 
#given width and height
def draw_rectangle(img, rect):
    (x, y, w, h) = rect
    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
    
#function to draw text on given image starting from passed (x, y) coordinates.
def draw_text(img, text, x, y):
    cv2.putText(img, text, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2)
    

#This function recognizes the person in image passed
#and draws a rectangle around detected face with name
#of the subject
def predict(test_img):
    
#make a copy of the image as we dont want to change original image
    img = test_img.copy()
    
#detect face from the image
    face, rect = detect_face(img)    

#predict the image using our face recognizer
    label, confidence = face_recognizer.predict(face)
#get name of respective label returned by face recognizer
    label_text = subjects[label]

#draw a rectangle around face detected
    draw_rectangle(img, rect)
#draw name of predicted person
    draw_text(img, label_text, rect[0], rect[1]-5)
    
    return img

In [13]:
print("Predicting images...")

#load test images
test_img1 = cv2.imread('test-data/test1.jpg')
test_img2 = cv2.imread('test-data/test2.jpg')

#perform a prediction
predicted_img1 = predict(test_img1)
predicted_img2 = predict(test_img2)
print('Prediction Complete')

#display both images
#cv2.imshow(subjects[1], cv2.resize(predicted_img1, (400, 500)))
cv2.imshow(subjects[2], cv2.resize(predicted_img2, (400, 500)))
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.waitKey(1)
cv2.destroyAllWindows()

Predicting images...
Prediction Complete
