In [41]:
import os
import matplotlib.pyplot as plt
import cv2 
import numpy as np
from sklearn.decomposition import PCA
from tqdm import tqdm
from sklearn.model_selection import cross_validate, cross_val_predict, RandomizedSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import random
from PIL import Image,ImageEnhance 
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
import codecs, json 

READ_FULL_DATA = False


In [42]:
def preprocess_image(img_file, sharpness_factor = 10, bordersize = 3):
        im = Image.open(img_file)
        
        bright= ImageEnhance.Brightness(im)
        if(np.average(np.array(im))<128):
            im=bright.enhance(2.5)  
              
        enhancer = ImageEnhance.Sharpness(im)  
        im_s_1 = enhancer.enhance(sharpness_factor)
        # plt.imshow(im_s_1, cmap='gray')
        
        #(width, height) = (im.width , im.height * 2)
        #im_s_1 = im_s_1.resize((width, height))
        image = np.array(im_s_1)
        image = cv2.copyMakeBorder(
            image,
            top=bordersize,
            bottom=bordersize,
            left=bordersize,
            right=bordersize,
            borderType=cv2.BORDER_CONSTANT,
            value=[255,255,255]
        )
        orig_image = image.copy()
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.GaussianBlur(image,(3,3),0)
        
        (thresh, bw_image) = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        return bw_image, orig_image
    

In [43]:
def get_contour_pixels(bw_image):
        contours, _= cv2.findContours(
            bw_image, cv2.RETR_TREE, 
            cv2.CHAIN_APPROX_NONE
            ) 
        # contours = imutils.grab_contours(contours)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[1:]
        
        img2 = bw_image.copy()[:,:,np.newaxis]
        img2 = np.concatenate([img2, img2, img2], axis = 2)
        return contours

In [44]:
def get_cold_features(contours,approx_poly_factor = 0.01):
     
        N_RHO_BINS = 7
        N_ANGLE_BINS = 12
        N_BINS = N_RHO_BINS * N_ANGLE_BINS
        BIN_SIZE = 360 // N_ANGLE_BINS
        R_INNER = 5.0
        R_OUTER = 35.0
        K_S = np.arange(3, 8)
        
        rho_bins_edges = np.log10(np.linspace(R_INNER, R_OUTER, N_RHO_BINS))
        feature_vectors = np.zeros((len(K_S), N_BINS))
        
        # print([len(cnt) for cnt in contours])
        for j, k in enumerate(K_S):
            hist = np.zeros((N_RHO_BINS, N_ANGLE_BINS))
            for cnt in contours:
                epsilon = approx_poly_factor * cv2.arcLength(cnt,True)
                cnt = cv2.approxPolyDP(cnt,epsilon,True)
                n_pixels = len(cnt)
                
                point_1s = np.array([point[0] for point in cnt])
                x1s, y1s = point_1s[:, 0], point_1s[:, 1]
                point_2s = np.array([cnt[(i + k) % n_pixels][0] for i in range(n_pixels)])
                x2s, y2s = point_2s[:, 0], point_2s[:, 1]
                
                thetas = np.degrees(np.arctan2(y2s - y1s, x2s - x1s) + np.pi)
                rhos = np.sqrt((y2s - y1s) ** 2 + (x2s - x1s) ** 2)
                rhos_log_space = np.log10(rhos)
                
                quantized_rhos = np.zeros(rhos.shape, dtype=int)
                for i in range(N_RHO_BINS):
                    quantized_rhos += (rhos_log_space < rho_bins_edges[i])
                    
                for i, r_bin in enumerate(quantized_rhos):
                    theta_bin = int(thetas[i] // BIN_SIZE) % N_ANGLE_BINS
                    hist[r_bin - 1, theta_bin] += 1
                
            normalised_hist = hist / hist.sum()
            feature_vectors[j] = normalised_hist.flatten()
            
        return feature_vectors.flatten()



In [45]:
def get_hinge_features(contours):

        N_ANGLE_BINS = 40
        BIN_SIZE = 360 // N_ANGLE_BINS
        LEG_LENGTH = 25
       
        hist = np.zeros((N_ANGLE_BINS, N_ANGLE_BINS))
            
        # print([len(cnt) for cnt in contours])
        for cnt in contours:
            n_pixels = len(cnt)
            if n_pixels <= LEG_LENGTH:
                continue
            
            points = np.array([point[0] for point in cnt])
            xs, ys = points[:, 0], points[:, 1]
            point_1s = np.array([cnt[(i + LEG_LENGTH) % n_pixels][0] for i in range(n_pixels)])
            point_2s = np.array([cnt[(i - LEG_LENGTH) % n_pixels][0] for i in range(n_pixels)])
            x1s, y1s = point_1s[:, 0], point_1s[:, 1]
            x2s, y2s = point_2s[:, 0], point_2s[:, 1]
            
            phi_1s = np.degrees(np.arctan2(y1s - ys, x1s - xs) + np.pi)
            phi_2s = np.degrees(np.arctan2(y2s - ys, x2s - xs) + np.pi)
            
            indices = np.where(phi_2s > phi_1s)[0]
            
            for i in indices:
                phi1 = int(phi_1s[i] // BIN_SIZE) % N_ANGLE_BINS
                phi2 = int(phi_2s[i] // BIN_SIZE) % N_ANGLE_BINS
                hist[phi1, phi2] += 1
                
        normalised_hist = hist / np.sum(hist)
        feature_vector = normalised_hist[np.triu_indices_from(normalised_hist, k = 1)]
        
        return feature_vector

In [46]:
# Read the data
if READ_FULL_DATA:
    img_contours = []

    files_list = os.listdir('./data/Females')
    for i in tqdm(range(len(files_list))):
        img_path = f"./data/Females/{files_list[i]}"
        bw_image, _ = preprocess_image(img_path)
        contours = get_contour_pixels(bw_image)
        img_contours.append(contours)

    files_list = os.listdir('./data/Males')
    for i in tqdm(range(132)):
        img_path = f"./data/Males/{files_list[i]}"
        bw_image, _ = preprocess_image(img_path)
        contours = get_contour_pixels(bw_image)
        img_contours.append(contours)


males = [0] * len(os.listdir('./data/Females'))
females = [1] * len(os.listdir('./data/Males'))
y = males + females
print("Females:", len(females), "\tMales:", len(males), "\ntotal:", len(y))
    


Females: 232 	Males: 131 
total: 363


In [47]:
# extract the features
if READ_FULL_DATA:
    X_COLD = []
    X_HINGE = []

    for i in tqdm(range(len(img_contours))):
        X_COLD.append(get_cold_features(img_contours[i]))
        X_HINGE.append(get_hinge_features(img_contours[i]))
    

In [48]:
# StandardScaler
from sklearn.preprocessing import StandardScaler
if READ_FULL_DATA:
    scaler = StandardScaler()
    X_COLD = scaler.fit_transform(X_COLD)
    X_HINGE = scaler.fit_transform(X_HINGE)

In [49]:
# save features in json format
if READ_FULL_DATA:

    json.dump(X_COLD.tolist(), codecs.open('X_COLD.json', 'w', encoding='utf-8'), 
            separators=(',', ':'), 
            sort_keys=True, 
            indent=4) 
    json.dump(X_HINGE.tolist(), codecs.open('X_HINGE.json', 'w', encoding='utf-8'), 
            separators=(',', ':'), 
            sort_keys=True, 
            indent=4) 


In [50]:
# read the features
if not READ_FULL_DATA:

    X_COLD = np.array(json.loads(codecs.open('X_COLD.json', 'r', encoding='utf-8').read()))
    X_HINGE = np.array(json.loads(codecs.open('X_HINGE.json', 'r', encoding='utf-8').read()))


# COLD Features

In [51]:
# calc the PCA for COLD features
pca_COLD = PCA(n_components=50)
X_train_COLD_PCA = pca_COLD.fit_transform(X_COLD)
np.sum(pca_COLD.explained_variance_ratio_)



0.9406110401358289

In [52]:
# train on the COLD features
temp = list(zip(X_train_COLD_PCA, y))
random.shuffle(temp)
res1, res2 = zip(*temp)

scoring = {'accuracy' : make_scorer(accuracy_score), 
           'precision' : make_scorer(precision_score),
           'recall' : make_scorer(recall_score), 
           'f1_score' : make_scorer(f1_score)}

clfR = RandomForestClassifier(n_estimators=10000) 
scores = cross_val_score(clfR, res1, res2, cv=5)


In [57]:
# print(np.average(scores['test_accuracy']))
# print(np.average(scores['test_f1_score']))
print(np.average(scores))

0.749056603773585


# HINGE Features

In [58]:
# calc the PCA for HINGE features
pca_HINGE = PCA(n_components=100)
X_train_HINGE_PCA = pca_HINGE.fit_transform(X_HINGE)
np.sum(pca_HINGE.explained_variance_ratio_)


0.9121056440996939

In [59]:
# train on the HINGE features
temp = list(zip(X_train_HINGE_PCA, y))
random.shuffle(temp)
res1, res2 = zip(*temp)

scoring = {'accuracy' : make_scorer(accuracy_score), 
           'precision' : make_scorer(precision_score),
           'recall' : make_scorer(recall_score), 
           'f1_score' : make_scorer(f1_score)}

clfR = RandomForestClassifier(n_estimators=10000) 
scores = cross_validate(clfR, res1, res2, cv=5, scoring=scoring)

In [60]:
print(np.average(scores['test_accuracy']))
print(np.average(scores['test_f1_score']))

0.7907837445573295
0.7918249157427849


# COLD & HINGE Features

In [61]:
# train on the COLD and HINGE features
X_train_PCA = np.concatenate([X_COLD, X_HINGE], axis=1)
temp = list(zip(X_train_PCA, y))
random.shuffle(temp)
res1, res2 = zip(*temp)

clfR = RandomForestClassifier(n_estimators=10000) 
scores = cross_val_score(clfR, res1, res2, cv=5)


In [63]:
# print(np.average(scores['test_accuracy']))
# print(np.average(scores['test_f1_score']))
print(np.average(scores))

0.7414368650217706
