In [1]:
import cv2
import numpy as np
import os
import re
from datetime import datetime, timedelta

In [None]:
# Important! In order for the face recognition to work, please save a copy of 
# 'haarcascade_frontalface_default.xml' to your working directory.
# file can be downloaded here: 
# https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml

# Instructions for taking images:
# In the video window, a square box will show up if the program detects your face. 
# Once face is detected, press s to start image acquire
# press q to quit acquisition mode anytime
# The acquired images will be stored in your designated folder with full resolution and with color

# Instructions for cropping out face region for analysis:
# Choose a resolution for the final face image. The face region will first be cropped out with original 
# resolution when images were taken, resized to the size you defined in the resolution,
# and converted to gray scale images. 

# Personal experience
# I tried resolution of 100x100 and 50x50, and 100x100 gives numpy array too big to do eigenvector 
# calculation. 50x50 is good for calculation, and using first 2 components is good enough to make 
# bi-classficiation prediction on my trial. I used 100 images per class.  

In [8]:
# take a series of images within video
def image_capture(image_path, n, start_id):
    """
    Take a series of images with face from vedio
    
    Input:
    -image_path: path of image folder to store captured images.
    -n: number of images to capture. 
    -start_id: the start number of image used in filename. Default start from 0.
    
    Output:
    -n images with faces stored in designiated image folder.
    """
    # senitize path and create folder if not exists
    image_path = image_path
    if not image_path.endswith('\\'):
        image_path +='\\'
    if not os.path.exists(image_path):
        os.mkdir(image_path)
        
    img_id = start_id
    end_id = img_id+n

    start = False
    oldtime = datetime.now()
    
    cap = cv2.VideoCapture(0)
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    
    while(img_id<end_id):
        # Capture frame-by-frame
        ret, frame = cap.read()

        # Our operations on the frame come here
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # face tracking on gray image
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)

        # Display the resulting frame  
        for (x,y,w,h) in faces:
            cv2.rectangle(gray,(x,y),(x+w,y+h),(255,0,0),2)
            roi_gray = gray[y:y+h, x:x+w]

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        elif cv2.waitKey(1) & 0xFF == ord('s'):
            start = True
            oldtime = datetime.now()

        # take next image after 0.5 second if face is detected.
        newtime = datetime.now()
        diff = (newtime-oldtime).seconds

        if len(faces) and start and diff>0.5:               
            filename = 'img_%i.jpg'%img_id
            cv2.imwrite(str(image_path)+filename,frame)
            img_id+=1
            oldtime = datetime.now()
        
        if start:
            cv2.putText(gray, "Capture image {0}; last image is {1}".format(img_id, end_id-1), (40, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7,(255,255,255),2)
        else:
            cv2.putText(gray, "Press s to start, press q to quit!", (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7,(255,255,255),2)
        cv2.imshow('frame',gray)
        faces = None

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()

In [10]:
# capture 100 images with face and store file in folder named 'weiya'
image_path = 'weiya'
n = 100
start_id = 0
image_capture(image_path, n, start_id)

In [2]:
# crop out face region within images, resize to 100x100 and save face images to faces folder
def face_crop(image_path, face_path, class_id, resolution):
    """
    crop out face region from images, save as filename cX_imgageY_face_Z.jpg X is the class number, Y is yth image of X class, Z is zth face detected in imageY. 
    
    Input:
    -image_path: path of image folder to be processed, images from same class are stored in same folder.
    -face_path: path of processed images
    -class_id: class number of face images.
    -resolution: resize resolution (px, px)
    
    Output:
    -croped face image with resolution (100px, 100px)
    """
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    
    class_id = class_id
    image_path = image_path
    face_path = face_path
    img_id = None
    
    #senitize check of path, create face folder if not exists
    if not image_path.endswith('\\'):
        image_path +='\\'
    if not face_path.endswith('\\'):
        face_path +='\\'   
    if not os.path.exists(face_path):
        os.mkdir(face_path)
    
    #crop face from images
    for file in os.listdir(image_path):
        if file.endswith('.jpg'):
            img_id = re.findall(r'img_(\d+).jpg', file)[0]
        if img_id:
            filename = 'img_%s.jpg'%img_id
            img = cv2.imread(str(image_path)+filename)
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            faces = face_cascade.detectMultiScale(gray, 1.3, 5)
            i = 1
            for (x,y,w,h) in faces:
                output = 'c{0}_image{1}_face{2}.jpg'.format(class_id, img_id, i)
                roi_gray = gray[y:y+h, x:x+w]
                roi_gray = cv2.resize(roi_gray, resolution)
                cv2.imwrite(str(face_path)+output,roi_gray)
                i += 1
        img_id = None

    cv2.destroyAllWindows()    

In [3]:
# crop face out of images in folder 'weiya', store processed face images in 'faces' folder, class label is 1. Filename is c1_imageX_faceY.jpg
resolution = (50, 50)
face_crop('weiya', 'faces', 1, resolution)