In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import cv2
import os
import tqdm
import glob
from statistics import mode
from sklearn.cluster import KMeans
from sklearn.cluster import MeanShift
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torch.utils.data.dataset import Dataset
from torchvision import datasets, models, transforms
import time
from PIL import Image
import copy
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
def eulidist(rectlist,center):
    dx1_2 = (rectlist[0]-center[0])**2
    dx2_2 = (rectlist[2]-center[0])**2
    dy1_2 = (rectlist[1]-center[1])**2
    dy2_2 = (rectlist[3]-center[1])**2
    return dx1_2+dx2_2+dy1_2+dy2_2
def selectFaces(faces,center):
    disList = []
    for i in range(len(faces)):
        (x,y,w,h) = faces[i]
        loc = np.array((x,y,x+w,y+h))
        disList.append(eulidist(loc,center))
    disList = np.array(disList)
    return faces[np.argmin(disList)]
def extractFrame(mp4Dir):
    vid = cv2.VideoCapture(mp4Dir)
    allFrame = []
    while vid.isOpened():
        ret,frame = vid.read()
        if ret:
            allFrame.append(frame)
        else:
            break
    return np.array(allFrame)
def extractXYWH(allFrame):
    face_cascade=cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    imgList = allFrame.copy()
    for i in tqdm.tqdm(range(len(allFrame))):
        img = imgList[i].copy()
        gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray)
        if not len(faces):
=            face = np.array((0,0,0,0))
        else:
            center = np.array(imgList[i][:,:,::-1].shape)[:2][::-1]/2
            (x,y,w,h) = selectFaces(faces,center)
            face = np.array((x,y,w,h))
        if i == 0:
            coorList = face
        else:
            coorList = np.vstack((coorList,face))
    return coorList
def meanShiftCluster(coorList):
    ms = MeanShift(bandwidth=20)
    ms.fit(coorList[:,:2])
    labels = ms.labels_
    remainIdx = mode(labels)
    w = int(np.mean(coorList[labels==remainIdx,2]))
    h = int(np.mean(coorList[labels==remainIdx,3]))
    return labels,remainIdx,w,h
def extractFaces(allFrame,coorList,labels,remainIdx,w,h):
    imgList = allFrame.copy()
    facesList,newimgList,newimgBol = [],[],[]
    for i in range(len(allFrame)):
        img = imgList[i].copy()
        if labels[i] == remainIdx:
            x,y = coorList[i,0],coorList[i,1]
            tmp = img[y:y+h,x:x+w].copy()
            facesList.append(tmp)
            img = cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
        newimgList.append(img)
        newimgBol.append(labels[i] == remainIdx)
    facesList = np.array(facesList)
    newimgList = np.array(newimgList)
    newimgBol = np.array(newimgBol)
    return facesList,newimgList,newimgBol
def dir2newvidface(mp4Dir):
    allFrame  = extractFrame(mp4Dir)
    coorArr = extractXYWH(allFrame)
    labels,remainIdx,w,h = meanShiftCluster(coorArr)
    facesArr,newimgArr,newimgBol = extractFaces(allFrame,
                                    coorArr,labels,remainIdx,w,h)
    return facesArr,newimgArr,newimgBol#

In [3]:
def loadModel():
    model = torch.load(os.path.join(os.getcwd(),'model.pt'))
    model.avgpool = nn.AvgPool2d(kernel_size=7,stride=1,padding=0)
    labelfile = open('labelModel.txt','r')
    class_names = [line.rstrip() for line in labelfile.readlines()]
    return model,class_names

In [4]:
def runmodel(model,dataloaders,device,class_names):
    was_training = model.training
    model.eval()
    labels = []
    with torch.no_grad():
        for i, (inputs) in enumerate(dataloaders):
            inputs = inputs.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            for j in range(inputs.size()[0]):
                label = class_names[preds[j]]
                labels.append(label)
                
    return np.array(labels)

In [5]:
class CustomDatasetFrom(Dataset):
    def __init__(self, nparray, transform=None):
        self.data = nparray
        self.transforms = transform

    def __getitem__(self, index):
        img_as_np = self.data[index]
        # Convert image from numpy array to PIL image
        img_as_img = Image.fromarray(img_as_np)
        # Transform image to tensor
        if self.transforms is not None:
            img_as_tensor = self.transforms(img_as_img)
        # Return image 
        return (img_as_tensor)

    def __len__(self):
        return self.data.shape[0]

In [6]:
def testplay(faces,bbimgs,bol,labels):
    %matplotlib inline
    %load_ext autoreload
    %autoreload 2
    %matplotlib notebook
    fig = plt.figure(figsize=(20,10))  
    ax = plt.gca()
    plt.ion()
    fig.show()
    fig.canvas.draw()
    j = 0
    for i in range(len(bol)):
        ax.clear()
        grid = plt.GridSpec(1,4,wspace=0,hspace=0)
        plt.subplot(grid[0,:3])
        plt.imshow(newimgArr[i][:,:,::-1])
        plt.axis('off')
        plt.subplot(grid[0,-1])
        if bol[i]:
            plt.imshow(facesArr[j][:,:,::-1])
            plt.title(labels[j],fontsize=20)
            j+=1
        plt.axis('off')
        fig.canvas.draw()

In [7]:
def vid2demo(PATH):
    print('---------------LOAD MODEL---------------')
    model,class_names = loadModel()
    print('---------------EXTRACT VIDEO INFO-------------')
    facesArr,newimgArr,newimgBol = dir2newvidface(PATH)
    print('---------------CREATE DATASET---------------')
    transformations = transforms.Compose([
        transforms.Resize(224), 
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    custom_data_from_nparr=CustomDatasetFrom(facesArr[:,:,:,::-1],
                                         transformations)
    dataloaders = torch.utils.data.DataLoader(custom_data_from_nparr, 
                                              batch_size=4,
                                    shuffle=False, num_workers=0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('---------RUN MODEL---------')
    facelabels = runmodel(model,dataloaders,device,class_names)
    print('---------PLAY DEMO---------')
#     testplay(facesArr,newimgArr,newimgBol,facelabels)
    return facesArr,newimgArr,newimgBol,facelabels

In [8]:
# Please input videos not from dealer IDs 480,758,781
# inputDir = '21832529252352.mp4' #717
inputDir = '128623736946688.mp4' #818
# inputDir = '39528197169152.mp4' #231
# inputDir = '107091253911552.mp4' #634
facesArr,newimgArr,newimgBol,facelabels = vid2demo(inputDir)

---------------LOAD MODEL---------------




---------------EXTRACT VIDEO INFO-------------


100%|████████████████████████████████████████████████████████████████████████████████| 765/765 [01:26<00:00,  8.83it/s]


---------------CREATE DATASET---------------
---------RUN MODEL---------
---------PLAY DEMO---------


In [9]:
facelabels.shape

(724,)

In [10]:
testplay(facesArr,newimgArr,newimgBol,facelabels) 

<IPython.core.display.Javascript object>