In [1]:
import os
import torch
from torch import nn
from torchvision import datasets
import torchvision.models as models
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torch.optim as optim
from PIL import Image
from facenet_pytorch import MTCNN
import cv2
import time
import embeddingModels
import nbimporter
import TripletFolderClass
import torch.optim as optim
from facenet_pytorch import InceptionResnetV1

In [2]:
mtcnn0 = MTCNN(image_size=150, margin=0, keep_all=False, min_face_size=40) # keep_all=False
mtcnn = MTCNN(image_size=150, margin=0, keep_all=True, min_face_size=40) # keep_all=True

In [3]:
class InceptionResnet(nn.Module):
    def __init__(self, device, pool=None, dropout=0.3, pretrain=True):
        super(InceptionResnet, self).__init__()
        # fit an image, the output is a 512 embedding original
        # the model is pre-trained on vggface2
        if pretrain:
            self.net = InceptionResnetV1(pretrained='vggface2', dropout_prob=dropout, device=device)
        else:
            self.net = InceptionResnetV1(dropout_prob=dropout, device=device)
        # the number of channels in the output of convolutional layers
        self.out_features = self.net.last_linear.in_features
        # keep convolutional layers only and remove linear layers and global average pooling layer
        if pool == 'gem':
            self.net.avgpool_1a = GeM(p_trainable=True)
    def forward(self, x):
        # return a 512 dimension vector
        return self.net(x)

In [4]:
class FaceNet(nn.Module):
    def __init__(self, model_name=None, pool=None, dropout=0.0, embedding_size=512, device='cuda', pretrain=True):
        super(FaceNet, self).__init__()
        # Backbone
        # three models choice 1. SE-ResNeXt101 2.EfficientNetB7 3.InceptionResnetV1 (Pre-trained for face recog.)
        self.model_name = model_name

        # model (backbone)
        if(model_name=='resnet'):
            self.model = SEResNeXt101(pretrain)
        elif(model_name=='effnet'):
            self.model = EfficientNetEncoderHead(depth=3, pretrain=pretrain)
        else:
            self.model = InceptionResnet(device, pool=pool, dropout=dropout, pretrain=pretrain)

        # global pooling
        if(pool == "gem"):
            # Generalizing Pooling
            self.global_pool = GeM(p_trainable=True)
        else:
            # global average pooling
            self.global_pool = nn.AdaptiveAvgPool2d(1)
        # neck
        self.neck = nn.Sequential(
                nn.Linear(self.model.out_features, embedding_size, bias=True),
                nn.BatchNorm1d(embedding_size, eps=0.001),
                #nn.Sigmoid()
            )
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x):
        # backbone
        if self.model_name == None:
            return self.model(x)
        
        x = self.model(x)
        # global pool
        x = self.global_pool(x)
        x = self.dropout(x)
        # change the output from cnn to a vector first
        x = x[:,:,0,0]
        # neck
        embeddings = self.neck(x)
        return embeddings

In [5]:
facenet = FaceNet(model_name=None, pool=None, embedding_size=512, dropout=0.3, device='cpu', pretrain='True')

In [None]:
checkpoint = torch.load('./Models/InceptionResNetV1_Triplet.pth', map_location=torch.device('cpu'))

In [None]:
#Inceptionresnet.load_state_dict(checkpoint['model_state_dict'])

MODEL_PATH = './Models/InceptionResNetV1_Triplet.pth'
facenet.load_state_dict(checkpoint['model_state_dict'])

In [None]:
facenet.eval()

In [None]:
facenet.to('cpu')

In [None]:
dataset = datasets.ImageFolder('Database') # photos folder path 
idx_to_class = {i:c for c,i in dataset.class_to_idx.items()} # accessing names of peoples from folder names

def collate_fn(x):
    return x[0]

loader = DataLoader(dataset, collate_fn=collate_fn)

name_list = [] # list of names corresponding to cropped photos
embedding_list = [] # list of embeding matrix after conversion from cropped faces to embedding matrix using resnet

for img, idx in loader:
    face, prob = mtcnn0(img, return_prob=True)
    if face is not None and prob>0.92:
        emb = facenet(face.unsqueeze(0))
        embedding_list.append(emb.detach())
        name_list.append(idx_to_class[idx])

# save data
data = [embedding_list, name_list]
torch.save(data, 'data.pt') # saving data.pt file

In [None]:
# Using webcam recognize face

# loading data.pt file
load_data = torch.load('data.pt')
embedding_list = load_data[0]
name_list = load_data[1]

cam = cv2.VideoCapture(0)


while True:
    ret, frame = cam.read()
    if not ret:
        print("failed to grab frame, try again")
        break
        
    img = Image.fromarray(frame)
    img_cropped_list, prob_list = mtcnn(img, return_prob=True)
    
    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)
                
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                #img_cropped_list = img_cropped_list.to('cuda')
                emb = facenet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    #dist = torch.dist(emb, emb_db).item()
                    dist = torch.linalg.norm(emb - emb_db).item()
                    #pdist = torch.nn.PairwiseDistance(p=2)
                    #dist = pdist(emb, emb_db)
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i] 
                
                original_frame = frame.copy() # storing copy of frame before drawing on it
                
                if min_dist<0.90:
                    #similarity = ( 1 - min_dist) * 100
                    #str_sim = "{:.2f}".format(similarity)
                    
                    frame = cv2.putText(frame, name+' '+ str(min_dist), (int(box[0]),int(box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0),1, cv2.LINE_AA)
                
                frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)

    cv2.imshow("FaceRec", frame)
        
    
    k = cv2.waitKey(1)
    if k%256==27: # ESC
        print('Esc pressed, closing...')
        break
        
    elif k%256==32: # space to save image
        print('Enter your name :')
        name = input()
        
        # create directory if not exists
        if not os.path.exists('Database/'+name):
            os.mkdir('Database/'+name)
            
        img_name = "Database/{}/{}.jpg".format(name, int(time.time()))
        cv2.imwrite(img_name, original_frame)
        print(" saved: {}".format(img_name))
        
        
cam.release()
cv2.destroyAllWindows()