In [15]:
from facenet_pytorch import MTCNN
import torch
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import torch.nn.functional as F
from torchvision import transforms, datasets
from PIL import Image
import cv2
from facenet_pytorch import InceptionResnetV1, MTCNN
from load_data import MyPatchApplier
from patch_attack import *
import time
import platform

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [4]:
def BinarySearch(list, l, r):
    if l - r == 1:
        if (list[l] >= list[r]):
            return list[l], l
        else:
            return list[r], r
    if l == r:
        return list[r], r
    m = (int)((l + r) / 2)
    lmax, lmax_idx = BinarySearch(list, l, m)
    rmax, rmax_idx = BinarySearch(list, m + 1, r)
    if (lmax >= rmax):
        return lmax, lmax_idx
    else:
        return rmax, rmax_idx

In [41]:
def get_id_result(emb, dataset):
    embedding_list = dataset[0]
    name_list = dataset[1]
    dist_list = []  

    for emb_db in embedding_list:
        dist = F.cosine_similarity(emb.cuda(), emb_db.cuda(), dim=-1).item()
        dist_list.append(dist)
    # max_sim, max_idx = BinarySearch(dist_list, 0, len(dist_list) - 1)
    max_sim = max(dist_list)
    max_idx = dist_list.index(max_sim)
    return name_list[max_idx], max_sim

In [42]:
def get_id_result_using_batch(emb, dataset):
    batch_size = 128
    emb_list = dataset[0]
    total = len(emb_list)
    emb_list = torch.stack(emb_list)
    name_list = dataset[1]
    dist_list = []  

    emb_list = emb_list.cuda()
    emb = emb.cuda()

    _idx = 0
    while _idx < total:
        idx_ = _idx + batch_size
        if idx_ > total:
            idx_ = total
        emb_db_batch = emb_list[_idx:idx_]
        emb_batch = emb.expand_as(emb_db_batch)
        dist_batch = F.cosine_similarity(emb_batch, emb_db_batch, dim=-1)
        for i in range(idx_ - _idx):
            dist_list.append(dist_batch[i])
        _idx = idx_
    max_sim, max_idx = BinarySearch(dist_list, 0, total - 1)
    return name_list[max_idx], max_sim

In [43]:
def similarity(emb, target, origin, dataset):
    embedding_list = dataset[0]
    name_list = dataset[1]
    target_cnt, origin_cnt = 0, 0
    target_sim, origin_sim = 0, 0
    for idx, emb_db in enumerate(embedding_list):
        if (name_list[idx] == target):
            dist = F.cosine_similarity(emb.cuda(), emb_db.cuda(), dim=-1).item()
            target_cnt += 1
            target_sim += dist
        if (name_list[idx] == origin):
            dist = F.cosine_similarity(emb.cuda(), emb_db.cuda(), dim=-1).item()
            origin_cnt += 1
            origin_sim += dist
    return float(target_sim) / target_cnt, float(origin_sim) / origin_cnt

In [45]:
def test_similarity(target_name, origin_name):
    model = InceptionResnetV1(pretrained='vggface2').eval()
    mtcnn = MTCNN(
        image_size=160, margin=0,
        thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
        device=device
    )
    cam = cv2.VideoCapture(0)
    dataset = torch.load('data_LFW_add_zmx.pt')
    tot = 0
    target_sim = 0
    origin_sim = 0
    while True:
        ret, frame = cam.read()
        if not ret:
            print("fail to grab frame, try again")
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame)
        img_cropped, prob = mtcnn(img, return_prob=True)
        if img_cropped is not None:
            boxes, _ = mtcnn.detect(img) # type: ignore
            if prob > 0.80:
                patch = torch.load('../train_result/with_mask/Yves_Brodeur_80_80/patch.pt') # change me
                patch_applier = MyPatchApplier().cuda()
                img_adv = patch_applier(img_cropped.unsqueeze(0), patch) # tensor
                
                emb = model(img_adv).detach()
                target_sim_delta, origin_sim_delta = similarity(emb, target_name, origin_name, dataset)
                target_sim += target_sim_delta
                origin_sim += origin_sim_delta
                tot += 1
                print(target_sim_delta)
                print(origin_sim_delta)
                box = boxes[0]
                original_frame = frame.copy()  # storing copy of frame before drawing on it
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        cv2.imshow("IMG", frame)
        k = cv2.waitKey(1)
        if k % 256 == 27:  # ESC
            print('Esc pressed, closing...')
            break
    cam.release()
    cv2.destroyAllWindows()
    return target_sim / tot, origin_sim / tot

In [8]:
def identification_vid(target_name):
    model = InceptionResnetV1(pretrained='vggface2').eval()
    mtcnn = MTCNN(
        image_size=160, margin=0,
        thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
        device=device
    )
    cam = cv2.VideoCapture(0)
    dataset = torch.load('data_LFW_add_zmx.pt')
    tot = 0
    cnt = 0
    similarity = 0
    similarity_with_origin = 0
    max_sim = 0
    min_sim = 1
    while True:
        ret, frame = cam.read()
        if not ret:
            print("fail to grab frame, try again")
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame)
        img_cropped, prob = mtcnn(img, return_prob=True)
        if img_cropped is not None:
            boxes, _ = mtcnn.detect(img) # type: ignore
            if prob > 0.80:
                patch = torch.load('../train_result/with_mask/Yves_Brodeur_80_80/patch.pt') # change me
                patch_applier = MyPatchApplier().cuda()
                img_adv = patch_applier(img_cropped.unsqueeze(0), patch) # tensor
                
                emb = model(img_adv).detach()
                name, max_similarity = get_id_result(emb, dataset)
                if name == target_name:
                    cnt += 1
                    similarity += max_similarity
                    max_sim = max(max_sim, max_similarity)
                    min_sim = min(min_sim, max_similarity)
                
                tot += 1
                box = boxes[0]
                if max_similarity < 1.20:
                    frame = cv2.putText(frame, name + ' ' + str(max_similarity), (int(box[0]), int(box[1])),
                                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA)
                frame = cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
                                      (255, 0, 0), 2)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        cv2.imshow("IMG", frame)
        k = cv2.waitKey(1)
        if k % 256 == 27:  # ESC
            print('Esc pressed, closing...')
            break
    cam.release()
    cv2.destroyAllWindows()
    return float(cnt) / tot, similarity / cnt, max_sim, min_sim

In [46]:
def response_time(target_name):
    model = InceptionResnetV1(pretrained='vggface2').eval()
    mtcnn = MTCNN(
        image_size=160, margin=0,
        thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
        device=device
    )
    dataset = torch.load('data_LFW_add_zmx.pt')
    cam = cv2.VideoCapture(0)
    start_record = 0
    frame_cnt = 0
    et0, et1 = 0, 0
    while True:
        ret, frame = cam.read()
        if not ret:
            print("fail to grab frame, try again")
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame)
        img_cropped, prob = mtcnn(img, return_prob=True)
        if img_cropped is not None:
            boxes, _ = mtcnn.detect(img) # type: ignore
            if prob > 0.80 and start_record:
                patch = torch.load('../train_result/with_mask/Yves_Brodeur_80_80/patch.pt') # change me
                patch_applier = MyPatchApplier().cuda()
                img_adv = patch_applier(img_cropped.unsqueeze(0), patch) 
                
                emb = model(img_adv).detach()
                name, max_similarity = get_id_result_using_batch(emb, dataset)

                if name == target_name:
                    if frame_cnt == 0:
                        et0 = time.time()
                    frame_cnt  = frame_cnt + 1
                    if frame_cnt == 3:
                        et1 = time.time()
                        cam.release()
                        cv2.destroyAllWindows()
                        return et1 - et0
                else:
                    frame_cnt = 0 # reset   
                    
                box = boxes[0]
                if max_similarity < 1.20:
                    frame = cv2.putText(frame, name + ' ' + str(max_similarity), (int(box[0]), int(box[1])),
                                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA)
                frame = cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
                                      (255, 0, 0), 2)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        cv2.imshow("IMG", frame)
        k = cv2.waitKey(1)
        if k % 256 == 27:  # ESC
            print('Esc pressed, closing...(failed to pass)')
            break
        elif k % 256 == 32: # space
            start_record = 1
            print('Start recording...')
    cam.release()
    cv2.destroyAllWindows()
    return 

In [47]:
#identification_vid('Yves_Brodeur')
# print(test_similarity('Yves_Brodeur', 'Zhang_Mingxuan')) 
response_time("Yves_Brodeur")

Start recording...


1.8721098899841309

Update Release 12.12:

对查找函数进行了修改:
1. 计算`dist_list`时采用`batch`
2. 改用二分查找寻找最大值
3. 将`dataset`的加载放在外部(最大的时间开销: 反复加载)