# Importing libraries

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import os
import dlib
from mtcnn.mtcnn import MTCNN
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import pandas as pd
import face_recognition
from yellowbrick.cluster import KElbowVisualizer
from sklearn.cluster import MiniBatchKMeans

In [None]:
def rotate_image(image, angle):
    image_center = tuple(np.array(image.shape[1::-1]) / 2)
    rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
    result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
    return result

# Face detection and alignment

In [None]:
def face_align(image, direc = ''):
    base_dir = os.getcwd()
    img = cv2.imread(base_dir+'/'+direc+image)
    faces = detector.detect_faces(img)# result
    #to draw faces on image
    c = 0
    L = []
    for result in faces:
        x, y, w, h = result['box']
        if result['confidence'] > 0.985:
            x1, y1 = x + w, y + h
            left_eye = result['keypoints']['left_eye']
            right_eye = result['keypoints']['right_eye']
            angle = np.arctan((right_eye[1]-left_eye[1])/(right_eye[0]-left_eye[0]))*(180/3.14)
            face = img[y:y1, x:x1]
            aligned_face = cv2.resize(rotate_image(face, angle), (150, 150))
            L.append(aligned_face)
    return L

# Creating/saving faces 

In [None]:
base_dir = os.getcwd()

if not os.path.exists('Faces'):
    print("New directory created")
    os.makedirs('Faces')
    
count = 0
detector = MTCNN()
num = len(os.listdir(base_dir + '/camera'))
for k in range(num): #file in os.listdir(base_dir + '/Camera'):
    file = os.listdir(base_dir + '/camera')[k]
    file_name, file_extension = os.path.splitext(file)
    if (file_extension in ['.png','.jpg']):
        faces = face_align(file, 'camera/')
        c = 0
        os.sys.stdout.write('\r')
        os.sys.stdout.write("[%-100s] %d%%" % ('='*((k*100)//num), ((k*100)//num)))
        os.sys.stdout.flush()
        for face in faces:
            cv2.imwrite(base_dir+'/Faces/'+file+str(c)+'.jpg', face)
            c = c+1
            count = count + 1
print ('\nfound '+str(count)+' faces')

# Face encoding array

In [None]:
base_dir = os.getcwd()
num = len(os.listdir(base_dir + '/Faces'))

images = os.listdir(base_dir + '/Faces')

num = len(images)

arrx = np.zeros((num,128))
img_arr = np.zeros((num,150,150,3))
i = 0
img_names = []
for img_file in images:
    image = cv2.imread(base_dir + '/Faces/'+img_file)
    try:
        encoding = np.array(face_recognition.face_encodings(image, known_face_locations=[(0, 150, 150, 0)])[0])
        arrx[i,:] = encoding
        img_arr[i,:,:,:] = image
        img_names.append(img_file)
        i = i +1
    except:
        pass

In [None]:
arrx.shape

# Finding optimum cluster number

In [None]:
model = KMeans()
# k is range of number of clusters.
visualizer = KElbowVisualizer(model, k=(5,30), timings= True)
visualizer.fit(arrx)        # Fit data to visualizer
visualizer.show()        # Finalize and render figure

In [None]:
range_n_clusters = list (range(5,30))
x=[]
y=[]
for n_clusters in range_n_clusters:
    clusterer = KMeans(n_clusters=n_clusters,init='k-means++',max_iter=300,n_init=50)
    preds = clusterer.fit_predict(arrx)
    #centers = clusterer.cluster_centers_

    score = silhouette_score(arrx, preds)
    #print("For n_clusters = {}, silhouette score is {})".format(n_clusters, score))
    x.append(n_clusters)
    y.append(score)
n_clusters=x[y.index(max(y))]
print(n_clusters) #to print cluster number with max silhouette score

plt.plot(x, y) 
plt.xlabel('number of clusters (k)')  
plt.ylabel('Silhouette score') 
plt.title('Silhouette score for Kmeans clustering')
plt.savefig('k-means_silhouetter_score.png',facecolor='w',edgecolor='w', transparent=False)
plt.show() 

# Kmeans clustering

In [None]:
k = 18
clusters = KMeans(k, random_state = 40)
clusters.fit(arrx)
image_cluster = pd.DataFrame(img_names[0:277],columns=['image'])
image_cluster["clusterid"] = clusters.labels_
image_cluster
label = clusters.labels_

for j in range(k):
    result = np.where(label == j)
    if not os.path.exists('Category'+str(j)):
        print("New directory created")
        os.makedirs('Category'+str(j))
    for i in result[0]:
        img = np.float32(img_arr[i,:].reshape((150,150,3)))
        cv2.imwrite(os.getcwd()+'/Category'+str(j)+'/img'+str(j)+str(i)+'.png', img)

# Minibatch Kmeans clustering

In [None]:
total_clusters = 11
# Initialize the K-Means model
kmeans = MiniBatchKMeans(n_clusters = total_clusters)
# Fitting the model to training set
kmeans.fit(arx)
label = kmeans.labels_
for j in range(total_clusters):
    result = np.where(label == j)
    if not os.path.exists('Category'+str(j)):
        print("New directory created")
        os.makedirs('Category'+str(j))
    for i in result[0]:
        img = np.float32(img_arr[i,:].reshape((150,150,3)))
        cv2.imwrite(os.getcwd()+'/Category'+str(j)+'/img'+str(j)+str(i)+'.png', img)