### 导入必备工具包

In [5]:
import face_recognition
import pickle
import cv2
import os
import numpy as np
from sklearn.cluster import DBSCAN

In [6]:
dataset = 'dataset'
encodings = 'encodings.pickle'
detection_method = 'cnn'

### 读取到所有输入数据的路径

In [None]:
def list_files(basePath):
    for (rootDir,dirName,filenames) in os.walk(basePath):
        for filename in filenames:
            imagePath = os.path.join(rootDir,filename)
            yield imagePath

In [19]:
imagePaths = list(list_files(dataset))

### 对图像进行编码，转换成128D的向量

In [None]:
data = []
for (i,imagePath) in enumerate(imagePaths):
    print ('当前输入数据索引',i)
    #读取到图像数据
    image = cv2.imread(imagePath)
    #转换下顺序，因为一会要用工具包进行人脸检测，所以所必须得是固定格式
    rgb = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    #人脸检测
    boxes = face_recognition.face_locations(rgb,model=detection_method)
    #向量编码
    encodings = face_recognition.face_encodings(rgb,boxes)
    #组合得到的结果
    d = [{'imagePath':imagePath,'loc':box,'encoding':enc} for (box,enc) in zip(boxes,encodings)]
    data.extend(d)

当前输入数据索引 0
当前输入数据索引 1
当前输入数据索引 2


### 保存到本地

In [None]:
f = open(encodings,'wb')
f.write(pickle.dump(data))
f.close()

### 读取保存好的向量

In [7]:
data = pickle.loads(open(encodings,'rb').read())
data = np.array(data)
encodings = [d['encoding'] for d in data]

In [8]:
encodings[0]

array([-0.13456686,  0.14403877,  0.07261265, -0.02214705, -0.13693842,
        0.00447783,  0.04457019, -0.10250483,  0.19053376, -0.01216701,
        0.27943072, -0.05122857, -0.22868238, -0.05441248, -0.05913119,
        0.10733175, -0.17733857, -0.12875885, -0.0175312 , -0.00771059,
        0.042868  ,  0.03457959,  0.0113402 ,  0.06657487, -0.12548007,
       -0.34815612, -0.04874776, -0.1136909 ,  0.08474787, -0.08733188,
       -0.03128369,  0.05843937, -0.2168265 , -0.08472218, -0.00858918,
        0.03199864, -0.00700156, -0.06248249,  0.20330036,  0.02258976,
       -0.15130012,  0.03175352, -0.01839004,  0.31603727,  0.09881824,
       -0.00549249,  0.05976491, -0.10961753,  0.07141068, -0.1757232 ,
        0.08242613,  0.2088591 ,  0.11992976,  0.02375047, -0.02019374,
       -0.13702856,  0.01252095,  0.12100402, -0.23836303,  0.13112594,
        0.13510659, -0.00267247,  0.03402919, -0.01406269,  0.17453504,
        0.09060631, -0.03872195, -0.15619071,  0.14142843, -0.10

In [9]:
np.array(encodings).shape

(127, 128)

### 执行聚类操作

In [10]:
dbscan = DBSCAN(metric = 'euclidean',n_jobs=-1)
dbscan.fit(encodings)

DBSCAN(algorithm='auto', eps=0.5, leaf_size=30, metric='euclidean',
    metric_params=None, min_samples=5, n_jobs=-1, p=None)

In [11]:
dbscan.labels_

array([ 0,  0,  1,  0,  2,  3,  1,  0,  2,  0,  3,  0,  1,  0,  0,  4,  0,
        2,  1,  4,  0,  2,  1,  0,  4,  3,  1,  4,  1,  4,  0,  4,  4,  1,
        1,  1,  1,  3,  3,  2,  0,  3,  4,  2,  1,  2,  2,  4,  1,  1,  1,
        0,  0,  4,  1,  1,  2,  0,  4,  0,  1,  2,  2,  3,  0,  4,  4,  1,
        2,  4,  2,  3,  2,  1,  0,  1,  4,  4,  2,  4,  2,  0,  0,  3,  0,
        4,  1,  3,  3,  0,  2,  3,  1,  3,  1,  4,  2,  3,  2,  0,  3,  1,
        0,  3,  0,  2,  0,  2, -1,  4,  4,  0,  2,  3,  4,  0,  2,  3,  4,
        3,  4,  1,  0,  4,  4,  2,  2], dtype=int64)

In [12]:
labelIDs = np.unique(dbscan.labels_)
labelIDs

array([-1,  0,  1,  2,  3,  4], dtype=int64)

In [13]:
labelIDs = np.array([0,  1,  2,  3,  4], dtype=np.int64)

In [18]:
def build_montages(image_list, image_shape, montage_shape):
    image_montages = []
    montage_image = np.zeros(shape=(image_shape[1] * (montage_shape[1]), image_shape[0] * montage_shape[0], 3),
                          dtype=np.uint8)
    cursor_pos = [0, 0]
    start_new_img = False
    for img in image_list:
        start_new_img = False
        img = cv2.resize(img, image_shape)
        montage_image[cursor_pos[1]:cursor_pos[1] + image_shape[1], cursor_pos[0]:cursor_pos[0] + image_shape[0]] = img
        cursor_pos[0] += image_shape[0]  
        if cursor_pos[0] >= montage_shape[0] * image_shape[0]:
            cursor_pos[1] += image_shape[1]  
            cursor_pos[0] = 0
            if cursor_pos[1] >= montage_shape[1] * image_shape[1]:
                cursor_pos = [0, 0]
                image_montages.append(montage_image)

                montage_image = np.zeros(shape=(image_shape[1] * (montage_shape[1]), image_shape[0] * montage_shape[0], 3),
                                      dtype=np.uint8)
                start_new_img = True
    if start_new_img is False:
        image_montages.append(montage_image)  
    return image_montages

In [20]:
for labelID in labelIDs:
    idxs = np.where(dbscan.labels_ == labelID)[0]
    print (idxs)
    np.random.choice(idxs,size=min(25,len(idxs)))
    
    faces = []
    
    for i in idxs:
        image = cv2.imread(data[i]['imagePath'])
        (top,right,bottom,left) = data[i]['loc']
        face = image[top:bottom,left:right]
        face = cv2.resize(face,(96,96))
        faces.append(face)
    montage = build_montages(faces,(96,96),(5,5))[0]
    cv2.imshow('res',montage)
    cv2.waitKey(0)

[  0   1   3   7   9  11  13  14  16  20  23  30  40  51  52  57  59  64
  74  81  82  84  89  99 102 104 106 111 115 122]
[  2   6  12  18  22  26  28  33  34  35  36  44  48  49  50  54  55  60
  67  73  75  86  92  94 101 121]
[  4   8  17  21  39  43  45  46  56  61  62  68  70  72  78  80  90  96
  98 105 107 112 116 125 126]
[  5  10  25  37  38  41  63  71  83  87  88  91  93  97 100 103 113 117
 119]
[ 15  19  24  27  29  31  32  42  47  53  58  65  66  69  76  77  79  85
  95 109 110 114 118 120 123 124]
