Reference)

https://github.com/ageitgey/face_recognition

https://www.pyimagesearch.com/2018/07/09/face-clustering-with-python/

In [1]:
# import the necessary packages
import face_recognition # by Adam Geitgey, a PyImageConf2018 speaker
import pickle
import cv2
import os

from itertools import chain 
from PIL import Image

import numpy as np

# Face Encoding

In [2]:
def load_directory_data(pwd):

    file_path_list = []
    
    # 디렉토리, 디렉토리 내 폴더 리스트, 파일 리스트
    for path,dirs,files in os.walk(pwd):
        
        for f in files:           
            file_path = path + '/' +f
            file_path_list.append(file_path)             
    
    print("# of files: ",len(file_path_list))
    
    return file_path_list

# input 이미지들의 path 따오는 코드. 이미지의 절대 경로를 반환.
print("[INFO] quantifying faces...")
imagePaths = load_directory_data('Sample Data Collection From MELD/SampleFaces')

[INFO] quantifying faces...
# of files:  79


In [3]:
# loop over the image paths
data = []

for (i, imagePath) in enumerate(imagePaths):
    # load the input image and convert it from BGR (OpenCV ordering) to dlib ordering (RGB) 
    print("[INFO] processing image {}/{}".format(i + 1,len(imagePaths)), end = '\r') # 작업량 카운트
    image = cv2.imread(imagePath)
    rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # OpenCV’s default BGR to RGB, dlib가 rgb만 받음.
   
    # 본 코드에서는 기본 이미지 크기를 임의로 box 를 만들었는데, input 이미지가 이미 얼굴만 잘린 이미지이기 때문.
    boxes = [(0,image.shape[0],image.shape[1],0)] # 인물의 얼굴을 좌표로 반환. (top, right, bottom, left)
#    boxes = face_recognition.face_locations(rgb,model="detection_method") 

    # Given an image, return the 128-dimension face encoding for each face in the image.
    # num_jitters=100 - randomly distort your image 100 times (randomly zoomed, rotated, translated, flipped)
    encodings = face_recognition.face_encodings(rgb_img, boxes)

    # build a dictionary of the image path, bounding box location, and facial encodings for the current image
    d = {"imagePath": imagePath, "loc": boxes, "encoding": encodings}
    data.append(d)

#    한 이미지에 여러 얼굴 잡힐 경우
#    d = [{"imagePath": imagePath, "loc": box, "encoding": enc} for (box, enc) in zip(boxes, encodings)]
#    data.extend(d)

print(len(data))

79NFO] processing image 79/79


Boxes가 없는 경우 encoding 오류 날 때 있다. 원래 Face 얼굴대로 Crop된 이미지들 이므로 box의 크기를 이미지 사이즈 그대로 설정하였다.

In [4]:
import pickle

# save
with open('face_encoding.pickle', 'wb') as f:
    pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

For each of the detected faces + encodings, we build a dictionary that includes:

1. The path to the input image
2. The location of the face in the image (i.e., the bounding box)
3. The 128-d encoding itself

# Face Clustering 

In [5]:
# import the necessary packages
from sklearn.cluster import DBSCAN
import numpy as np
import pickle
import cv2

In [6]:
# load the serialized face encodings + bounding box locations from disk, 
# then extract the set of encodings to so we can cluster on them

# load
with open('face_encoding.pickle', 'rb') as f:
    data = pickle.load(f)

encodings = [d["encoding"] for d in data]

In [7]:
np.array(encodings).shape

(79, 1, 128)

이 shape 로 DBSCAN 에 넣지 못함

In [8]:
## flatten encoding array (79,1,128) --> (79,128)

encoding_new = []

for e in encodings:
    encoding_new.append(np.array(e).flatten())
    
np.array(encoding_new).shape

(79, 128)

In [9]:
# cluster the embeddings
print("[INFO] clustering...")
clt = DBSCAN(metric = "euclidean", n_jobs = 4)
clt.fit(encoding_new)
 
# determine the total number of unique faces found in the dataset
labelIDs = np.unique(clt.labels_)
print(clt.labels_)
numUniqueFaces = len(np.where(labelIDs > -1)[0])
print("[INFO] # unique faces: {}".format(numUniqueFaces))

[INFO] clustering...
[ 0  3  0  1  0  1  0  1  1  2  2  1 -1  2  0  3  1  1  4  0  3  1  1  3
  4  0  2  4  4  1  1  2  2  3  1 -1  1  0  1  2  4  1  4  1  1  1  1  4
  3  4  0  0 -1  4  1  0  2  4  1  4  0 -1  3  3  1  1  2  3  2 -1 -1  3
  0  4 -1  2  4  1  2]
[INFO] # unique faces: 5


In [10]:
for i,d in enumerate(data):
    image = Image.open(data[i]['imagePath'])
    if clt.labels_[i] == 4:
        image.save('Sample Data Collection From MELD/4/'+str(i)+'.jpg')

In [11]:
clt.labels_[0] == 0

True