In [1]:
import warnings
warnings.filterwarnings('ignore')
# Utilities库
import sys
import os
from tqdm import tqdm
import math
import numpy as np
import cv2
import tensorflow as tf
import facenet
import detect_face
import pickle

from sklearn.svm import SVC
from sklearn.svm import LinearSVC

In [3]:
# 使用Tensorflow的Facenet模型
with tf.Graph().as_default():
    with tf.Session() as sess:
        datadir = IMG_OUT_PATH  # 检测、对齐、裁剪后的图像目录
        dataset = facenet.get_dataset(datadir)
        paths, labels, labels_dict = facenet.get_image_paths_and_labels(
            dataset)
        print('Origin: Number of classes: %d' % len(labels_dict))
        print('Origin: Number of images: %d' % len(paths))

        # 由于lfw的人脸图像集中有很多的人脸类别只有1张的图像
        # 所以只使用图像样本數大于5的人脸类别

        paths, labels, labels_dict = facenet.get_image_paths_and_labels(
            dataset, enable_filter=True, filter_size=5)
        print('Filtered: Number of classes: %d' % len(labels_dict))
        print('Filtered: Number of images: %d' % len(paths))

        # 载入Facenet模型
        print('Loading feature extraction model')
        modeldir = FACENET_MODEL_PATH  #'/..Path to Pre-trained model../20170512-110547/20170512-110547.pb'
        facenet.load_model(modeldir)

        images_placeholder = tf.get_default_graph().get_tensor_by_name(
            "input:0")
        embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
        phase_train_placeholder = tf.get_default_graph().get_tensor_by_name(
            "phase_train:0")
        embedding_size = embeddings.get_shape()[1]
        # 打印"人脸特征"的向量大小
        print("Face embedding size: ", embedding_size)

        # 计算人脸特征向量 (128 bytes)
        print('Calculating features for images')
        batch_size = 1000  # 批次量
        image_size = 160  # 作为Facenet的图像输入的大小

        nrof_images = len(paths)  # 处理人脸图像的总数
        # 计算批次
        nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / batch_size))
        # 构建一个变量保存人脸特征向量
        emb_array = np.zeros(
            (nrof_images, embedding_size))  # <-- Face Embedding

        for i in tqdm(range(nrof_batches_per_epoch)):
            start_index = i * batch_size
            end_index = min((i + 1) * batch_size, nrof_images)
            paths_batch = paths[start_index:end_index]
            images = facenet.load_data(paths_batch, False, False, image_size)
            feed_dict = {
                images_placeholder: images,
                phase_train_placeholder: False
            }
            emb_array[start_index:end_index, :] = sess.run(embeddings,
                                                           feed_dict=feed_dict)

Origin: Number of classes: 5750
Origin: Number of images: 13233
Filtered: Number of classes: 423
Filtered: Number of images: 5985
Loading feature extraction model
Model filename: D:\pythonworks\01_erhwen\real-time-deep-face-recognition\model\facenet\20170512-110547\20170512-110547.pb
Face embedding size:  128
Calculating features for images


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:27<00:00,  4.59s/it]


In [4]:
#序列化可重复使用的数据

# 保存人脸embedding的数据
emb_features_file = open(os.path.join(DATA_PATH,'lfw_emb_features.pkl'), 'wb')
pickle.dump(emb_array, emb_features_file)
emb_features_file.close()

# 保存人脸embedding所对应坐标(label)的数据
emb_lables_file = open(os.path.join(DATA_PATH,'lfw_emb_labels.pkl'), 'wb')
pickle.dump(labels, emb_lables_file)
emb_lables_file.close()

# 保存"标签(label)对应到人脸的字典的数据
emb_lables_dict_file = open(os.path.join(DATA_PATH,'lfw_emb_labels_dict.pkl'), 'wb')
pickle.dump(labels_dict, emb_lables_dict_file)
emb_lables_dict_file.close()

In [5]:
# 反序列化可重复使用的数据

# 人脸embedding的数据
with open(os.path.join(DATA_PATH,'lfw_emb_features.pkl'), 'rb') as emb_features_file:
    emb_features =pickle.load(emb_features_file)

# 人脸embedding所对应的标签(label)的数据
with open(os.path.join(DATA_PATH,'lfw_emb_labels.pkl'), 'rb') as emb_lables_file:
    emb_labels =pickle.load(emb_lables_file)

# 标签(label)对应到人脸名称的字典的数据
with open(os.path.join(DATA_PATH,'lfw_emb_labels_dict.pkl'), 'rb') as emb_lables_dict_file:
    emb_labels_dict =pickle.load(emb_lables_dict_file)

In [6]:
print("人臉embedding featues: {}, shape: {}, type: {}".format(len(emb_features), emb_features.shape, type(emb_features)))
print("人臉embedding labels: {}, type: {}".format(len(emb_labels), type(emb_labels)))
print("人臉embedding labels dict: {}, type: {}", len(emb_labels_dict), type(emb_labels_dict))

人臉embedding featues: 5985, shape: (5985, 128), type: <class 'numpy.ndarray'>
人臉embedding labels: 5985, type: <class 'list'>
人臉embedding labels dict: {}, type: {} 423 <class 'dict'>


In [7]:
# 相关变量
X_train = []; y_train = []
X_test = []; y_test = []

# 保存已经处理的人脸label
processed = set()

# 分割训练数据集与验证数据集
for (emb_feature, emb_label) in zip(emb_features, emb_labels):
    if emb_label in processed:
        X_train.append(emb_feature)
        y_train.append(emb_label)
    else:
        X_test.append(emb_feature)
        y_test.append(emb_label)
        processed.add(emb_label)

# 结果
print('X_train: {}, y_train: {}'.format(len(X_train), len(y_train)))
print('X_test: {}, y_test: {}'.format(len(X_test), len(y_test)))


X_train: 5562, y_train: 5562
X_test: 423, y_test: 423


In [8]:
# 训练分类器
print('Training classifier')
linearsvc_classifier = LinearSVC(C=1, multi_class='ovr')

# 进行训练
linearsvc_classifier.fit(X_train, y_train)

# 使用验证集验证准确率
score = linearsvc_classifier.score(X_test, y_test)

# 打印准确率
print("Validation result: ", score)

Training classifier
Validation result:  0.978723404255


In [9]:
# 序列化人脸莫模型
classifier_filename = SVM_MODEL_PATH

# 产生一个人脸的人名列表，便于识别使用
#class_names = [cls.name.replace('_', ' ') for cls in dataset]

class_names = []
for key in sorted(emb_labels_dict.keys()):
    class_names.append(emb_labels_dict[key].replace('_', ' '))

# 保存人脸分类器
with open(classifier_filename, 'wb') as outfile:
    pickle.dump((linearsvc_classifier, class_names), outfile)
    
print('Saved classifier model to file "%s"' % classifier_filename)

Saved classifier model to file "D:\pythonworks\01_erhwen\real-time-deep-face-recognition\model\svm\lfw_svm_classifier.pkl"


In [10]:
len(class_names)

423