# ELEC0134 - Applied Machine Learning Systems Assignment 22-23
## A1: Gender Detection

This notebook will create a SVM model to detect gender on the celeba dataset. To create the model the following steps will be followed:

1. Image preprocessing to extract facial landmarks.
2. Normalization of the features.
3. Training the SVM model 
4. Tuning hyperparameters of SVM model 

## 1) Preprocessing
### Facial landmark extraction



In [1]:
# Imports 

import os.path
import numpy as np
from keras.preprocessing import image
import cv2
import dlib
import imutils
from imutils import face_utils

Uses dlib library's pretrained shape predictor to predict the locations of 668 landmarks on detected faces. 

In [7]:
detector = dlib.get_frontal_face_detector()

# Uses landmark predictor from parent directory
predictor = dlib.shape_predictor('../shape_predictor_68_face_landmarks.dat')

In [8]:
def shape_to_np(shape, dtype="int"):
    # initialize the list of (x, y)-coordinates
    coords = np.zeros((shape.num_parts, 2), dtype=dtype)

    # loop over all facial landmarks and convert them
    # to a 2-tuple of (x, y)-coordinates
    for i in range(0, shape.num_parts):
        coords[i] = (shape.part(i).x, shape.part(i).y)

    # return the list of (x, y)-coordinates
    return coords

In [9]:
def rect_to_bb(rect):
    # take a bounding predicted by dlib and convert it
    # to the format (x, y, w, h) as we would normally do
    # with OpenCV
    x = rect.left()
    y = rect.top()
    w = rect.right() - x
    h = rect.bottom() - y

    # return a tuple of (x, y, w, h)
    return (x, y, w, h)

In [None]:
def run_dlib_shape(image):
    # in this function we load the image, detect the landmarks of the face, and then return the image and the landmarks
    # load the input image, resize it, and convert it to grayscale
    resized_image = image.astype('uint8')

    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    gray = gray.astype('uint8')

    # detect faces in the grayscale image
    rects = detector(gray, 1)
    num_faces = len(rects)

    if num_faces == 0:
        return None, resized_image

    face_areas = np.zeros((1, num_faces))
    face_shapes = np.zeros((136, num_faces), dtype=np.int64)

    # loop over the face detections
    for (i, rect) in enumerate(rects):
        # determine the facial landmarks for the face region, then
        # convert the facial landmark (x, y)-coordinates to a NumPy
        # array
        temp_shape = predictor(gray, rect)
        temp_shape = shape_to_np(temp_shape)

        # convert dlib's rectangle to a OpenCV-style bounding box
        # [i.e., (x, y, w, h)],
        #   (x, y, w, h) = face_utils.rect_to_bb(rect)
        (x, y, w, h) = rect_to_bb(rect)
        face_shapes[:, i] = np.reshape(temp_shape, [136])
        face_areas[0, i] = w * h
    # find largest face and keep
    dlibout = np.reshape(np.transpose(face_shapes[:, np.argmax(face_areas)]), [68, 2])

    return dlibout, resized_image

In [None]:
def extract_features_labels(images_dir, labels_dir, name):
    """
    This funtion extracts the landmarks features for all images in the folder 'dataset/celeba'.
    It also extracts the gender label for each image.
    :return:
        landmark_features:  an array containing 68 landmark points for each image in which a face was detected
        gender_labels:      an array containing the gender label (male=0 and female=1) for each image in
                            which a face was detected
    """
    image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
    target_size = None
    labels_file = open(labels_dir, 'r')
    print('labels_file ', labels_file)
    lines = labels_file.readlines()
    gender_labels = {line.split('\t')[0] : int(line.split('\t')[2]) for line in lines[1:]}
    counter = 0
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        fails = []

        for img_path in image_paths:
            file_name= img_path.split('.')[0].split('/')[-1]

            # load image
            img = image.img_to_array(
                image.load_img(img_path,
                               target_size=target_size,
                               interpolation='bicubic'))
            features, _ = run_dlib_shape(img)
            if features is not None:
                all_features.append(features)
                all_labels.append(gender_labels[file_name])
            else: 
                fails.apprend(file_name)
            counter += 1
            print(counter)


    landmark_features = np.array(all_features)
    landmark_features = landmark_features.reshape(len(landmark_features), 136)
    gender_labels = np.array(all_labels)

    np.save("all_features_{}.npy".format(name), landmark_features)
    np.save("gender_labels_{}.npy".format(name), gender_labels)

    return landmark_features, gender_labels, fails


In [None]:
train_img = 'Dataset/dataset_AMLS_22-23/celeba/img'
train_label = 'Dataset/dataset_AMLS_22-23/celeba/labels.csv'
extract_features_labels(train_img, train_label, 'train')

test_img = 'Dataset/dataset_AMLS_22-23_test/celeba_test/img'
test_label = 'Dataset/dataset_AMLS_22-23_test/celeba_test/labels.csv'
extract_features_labels(test_img, test_label, 'test')
