# Import to use scripts

In [1]:
import pathlib
import sys
root = pathlib.Path.cwd().parent
sys.path.insert(1, str(root))

%load_ext autoreload
%autoreload 2  # Autoreload all modules

# Import needed library

## Import basic things

In [2]:
import os
import shutil
import matplotlib.pyplot as plt
import numpy as np

from pathlib import Path
from PIL import Image

## Import third-library party

In [3]:
import cv2
import dlib
import face_recognition
import mtcnn
from imutils import face_utils
from tqdm import tqdm

Using TensorFlow backend.


# Load image path

In [4]:
from appconfig.config import IMAGE_DATA_DIR, RAW_DATA_DIR, CLEANED_DATA_DIR, PROCESSED_DATA_DIR
# DIR is Pathlib object

# Initialize detector

In [5]:
p = str(Path.cwd().parent / 'resources' / 'shape_predictor_68_face_landmarks.dat')
predictor = dlib.shape_predictor(p)
mtcnn_detector = mtcnn.MTCNN()
dlib_detector = dlib.get_frontal_face_detector()

# Create train, test, validation folder 

In [18]:
def make_directory(path, foldername):
    """
    Make directory
    path: str
    foldername: str
    """
    
    dir_ = path / foldername
    if not os.path.exists(dir_):
        os.mkdir(dir_)
    return

make_directory(PROCESSED_DATA_DIR, 'train')
make_directory(PROCESSED_DATA_DIR, 'test')

# Create data dictionary

In [12]:
def create_data_dictionary(DATA_DIR):
    '''
    Loop through Image directory has many subfolders, subfolders's name is label

    Args: 
    DATA_DIR: Image directory. type Poxis_Path
    limit_image: Maximum images in dictionary

    return:
    Dict {'label': [images's filename]}
    '''
    data_dict = {}
    for path in DATA_DIR.glob('*'):
        label = str(path).split('/')[-1]
        data_dict.setdefault(label, [])

        for image_path in (DATA_DIR/label).glob('*'):
            filename = str(image_path).split('/')[-1]
            data_dict.get(label).append(filename)
            
    return data_dict

data_dict_ = create_data_dictionary(IMAGE_DATA_DIR)

In [13]:
# Limit 1000 image for each person
limit = 1000
data_dict = {}
i = 0 
for key, val in data_dict_.items():
    vals = val[:limit]
    data_dict.setdefault(key, vals)

# Extract Face

In [6]:
def handle_when_x_y_less_than_0(x, y):
    x = np.max([0, x])
    y = np.max([0, y])
    return x, y

def draw_rectangle_around_faces(frame, top, right, bottom, left):
    cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
    return frame

def calculate_angle(left_eye, right_eye):
    dX = right_eye[0] - left_eye[0]
    dY = right_eye[1] - left_eye[1]
    angle = np.degrees(np.arctan2(dY, dX))
    return angle

def rotate_image(arr_img, angle):
    image = Image.fromarray(arr_img)
    image = image.rotate(angle)
    arr_img = np.asarray(image)
    return arr_img

def face_landmark(arr_img):
    rects = dlib_detector(arr_img, 0)
    # loop over the face detections
    if len(rects) > 0:
        for (i, rect) in enumerate(rects):
            # determine the facial landmarks for the face region, then
            # convert the facial landmark (x, y)-coordinates to a NumPy
            # array
            shape = predictor(arr_img, rect)
            shape = face_utils.shape_to_np(shape)

            return shape
    else:
        return face_recognition.face_landmarks(arr_img)

def handle_face_recognition(obj):
    lst = []
    for key, val in obj[0].items():
        for coor in val:
            lst.append(coor)
    return lst

def crop_with_landmark(arr_img):
    face_landmarks = face_landmark(arr_img)
    if type(face_landmarks) == list:
        face_landmarks = handle_face_recognition(face_landmarks)
        
    x_lst = []
    y_lst = []
    limit_pixel = 0 # for not negative
    for x, y in face_landmarks:
        if x > limit_pixel and y > limit_pixel:
            x_lst.append(x)
            y_lst.append(y)

    left = min(x_lst)
    right = max(x_lst)
    top = min(y_lst)
    bottom = max(y_lst)


    arr_img = arr_img[top: bottom, left: right]
    
    return arr_img

def crop_with_mtcnn(arr_img):
    result = mtcnn_detector.detect_faces(arr_img)
    if result:
        x1, y1, width, height = result[0]['box']
        x1, y1 = handle_when_x_y_less_than_0(x1, y1)
        x2, y2 = x1 + width, y1 + height
        # extract the face
        face = arr_img[y1:y2, x1:x2]
        return face
    else:
        return None

def extract_face(filename, required_size=(160, 160)):
    # load image from file
    pixels = plt.imread(filename)
    # detect faces in the image
    results = mtcnn_detector.detect_faces(pixels)
    # rotate image
    if results:
        left_eye = results[0]['keypoints'].get('left_eye')
        right_eye = results[0]['keypoints'].get('right_eye')
        angle = calculate_angle(left_eye, right_eye)
        rotated_image = rotate_image(pixels, angle)
    # extract the face from rotated image
        if len(face_landmark(rotated_image)) > 0:
            face = crop_with_landmark(rotated_image)
        elif crop_with_mtcnn(rotated_image) is None:
            face = pixels
        else:
            face = crop_with_mtcnn(rotated_image)
    # resize pixels to the model size
        image = Image.fromarray(face)
    else:
        image = Image.fromarray(pixels)
    
    image = image.resize(required_size)
    face_array = np.asarray(image)
    return face_array

# Align Face and Save 

In [16]:
def convert_array_to_image_and_save(image_array, label, filename):
    make_directory(CLEANED_DATA_DIR, label)
    save_path = CLEANED_DATA_DIR / label
    image = Image.fromarray(image_array)
    image.save(str(CLEANED_DATA_DIR / label / filename))
    return
    
def align_face(data_dict):
    path = IMAGE_DATA_DIR
    for label, filenames in data_dict.items():
        for filename in tqdm(filenames):
            image_path = str(path / label / filename)
            face_array = extract_face(image_path)
            convert_array_to_image_and_save(face_array, label, filename)
    return

In [19]:
align_face(data_dict)

100%|██████████| 1000/1000 [05:52<00:00,  2.84it/s]
