# Face Detection and Calculation in Images

by Nan BAI

adapted from the original code of facenet-pytorch library

In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable as V
from torchvision import transforms as trn
from torch.nn import functional as F
import numpy as np
import pandas as pd
import os
from PIL import Image, ImageDraw
from PIL.ImageOps import colorize
from IPython import display

workers = 0 if os.name == 'nt' else 4

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


In [3]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

In [4]:
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [5]:
def returnTF():
# load the image transformer
    tf = trn.Compose([
        trn.Resize((150,150)),
        trn.ToTensor(),
        #trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    return tf

In [6]:
def load_images(images_folder):
    all_images = []
    for file in os.listdir(images_folder):
        if os.path.splitext(file)[1].lower() in ('.jpg', '.jpeg', '.png'):
            all_images.append(file)
    return all_images

In [7]:
def load_images_from_category_folders(images_folder):
    all_images = {}
    all_imgs = []
    categories = []
    category_dict = {}
    for file in os.listdir(images_folder):
        if os.path.isdir(images_folder + '/' + file):
            print(file)
            categories.append(file)
            category_dict[len(category_dict)] = file
            all_images[file] = []
            for f in os.listdir(images_folder + '/' + file):
                if os.path.splitext(f)[1].lower() in ('.jpg', '.jpeg', '.png'):
                    all_images[file].append(f)
                    all_imgs.append((f, file, len(categories)))
    return all_images, all_imgs, categories, category_dict

In [11]:
images_folder = 'Venezia/data_storage/images/150/'
all_images = load_images(images_folder)

In [27]:
images_folder_Suzhou = 'Suzhou/data_storage/images/150/'
all_images_Suzhou = load_images(images_folder_Suzhou)

In [8]:
images_folder_Amsterdam = 'Amsterdam/data_storage/images/150/'
all_images_Amsterdam = load_images(images_folder_Amsterdam)

In [35]:
images_folder_large = 'data_storage/images/grid-150/'
all_images_large = load_images(images_folder_large)

In [12]:
def get_one_image(ID = 0, all_images=all_images, images_folder=images_folder):
    # load the test image
    if type(all_images[0]) is tuple:
        img_name = all_images[ID][0]
        img = Image.open(images_folder + '/' + all_images[ID][1] +'/'+img_name)
    else:
        img_name = all_images[ID]
        #os.system('wget %s -q -O test.jpg' % img_url)
        img = Image.open(images_folder + img_name)
    if img.mode == 'L':
        img = colorize(img, black=(0,0,0), white=(255,255,255))
    if img.mode == 'CMYK':
        img = img.convert('RGB')
    if not img.mode == 'RGB':
        img = img.convert('RGB')
    tf = returnTF()
    input_img = V(tf(img).permute(1,2,0).unsqueeze(0))
    img = img.resize((150,150))
    return img_name, img, input_img*255

In [13]:
def get_one_image_name(name, all_images=all_images, images_folder=images_folder):
    if type(all_images[0]) is tuple:
        img_list = [a[0] for a in all_images]
        cat_list = [a[1] for a in all_images]
        assert name+'.jpg' in [a[0] for a in all_images]
        ID = img_list.index(name+'.jpg')
        img = Image.open(images_folder + '/' + cat_list[ID] +'/'+name+ '.jpg')
    else:
        assert name+'.jpg' in all_images
        img = Image.open(images_folder + name + '.jpg')
          
    if img.mode == 'L':
        img = colorize(img, black=(0,0,0), white=(255,255,255))
    tf = returnTF()
    input_img = V(tf(img).permute(1,2,0).unsqueeze(0))
    img = img.resize((150,150))
    return img, input_img*255

In [14]:
def process_one_image(img, input_img, model=mtcnn):
    boxes, prob = mtcnn.detect(input_img)
    for i in range(len(boxes)):
        if len(prob[i])<=1:
            if not prob[i][0]:
                boxes[i] = []
                prob[i] = [0]
    return boxes, prob

In [15]:
def draw_one_image(img, input_img, model=mtcnn):
    boxes, prob = process_one_image(img, input_img, model=model)
    frame = img.copy()
    frame_draw = frame.copy()
    draw = ImageDraw.Draw(frame_draw)
    for box in boxes[0]:
        draw.rectangle(box.tolist(), outline=(255, 0, 0), width=3)
        
    # Add to frame list
    frames_draw = frame_draw.resize((150, 150), Image.BILINEAR)
    d = display.display(frames_draw, display_id=True)

In [16]:
def get_areas(boxes):
    areas = []
    for box in boxes:
        area = (box[2]-box[0])*(box[3]-box[1])
        areas.append(area)
    return np.array(areas).sum(),areas

In [17]:
def report_one_image(img, input_img, model=mtcnn):
    boxes, prob = process_one_image(img, input_img, model=model)
    report = {}
    report['prob'] = prob[0]
    report['boxes'] = boxes
    report['num_faces'] = len(boxes)
    area, areas = get_areas(boxes)
    report['area'] = area
    report['areas'] = areas
    report['ratio'] = area/(img.size()[0]*img.size()[1])
    return report

In [18]:
def report_many_images(img, input_img, image_names, model=mtcnn):
    if len(image_names)==1:
        return report_one_image(img, input_img, model=model)
    
    boxes, prob= process_one_image(img, input_img, model)
    all_report = {}
    
    for n in range(len(image_names)):
        report = {}
        report['Face_prob'] = prob[n][0]
        report['Face_boxes'] = boxes[n]
        report['Face_num_faces'] = len(boxes[n])
        area, areas = get_areas(boxes[n])
        report['Face_area'] = area
        report['Face_areas'] = areas
        report['Face_ratio'] = area/(img[n].size()[0]*img[n].size()[1])
        
        all_report[image_names[n][:-4]] = report

    return all_report

In [19]:
def report_images(IDs=[0,1], images_folder=images_folder, all_images=all_images, model=mtcnn):
    image_names = [get_one_image(ID, all_images, images_folder)[0] for ID in IDs]
    #for ID in IDs:
    #    print(get_one_image(ID, all_images, images_folder)[0], get_one_image(ID, all_images, images_folder)[1])
    imgs = torch.as_tensor(np.stack([get_one_image(ID, all_images, images_folder)[1] for ID in IDs]))
    input_img = torch.cat(tuple([get_one_image(ID, all_images, images_folder)[-1] for ID in IDs]))
    report = report_many_images(imgs, input_img, image_names, model=model)
    return report

In [20]:
def images_to_df(IDs=[0,1], images_folder=images_folder, all_images=all_images, model=mtcnn):
    report = report_images(IDs, images_folder=images_folder, all_images=all_images, model=model)
    image_names = report.keys()
    df_all = pd.DataFrame(report)
    return df_all

In [21]:
def batch_df(images_folder=images_folder, all_images=all_images, batch_size = 8, model=mtcnn, ls_df = []):
    size = len(all_images)
    batch_num = int(size/batch_size)
    #ls_df = []
    for i in range(batch_num):
        IDs = list(range(batch_size*i, batch_size*(i+1)))
        temp_df = images_to_df(IDs, images_folder=images_folder, all_images=all_images, model=model)
        ls_df.append(temp_df.T)
        print('{}/{} batches finished'.format(i+1, batch_num+1))
    IDs = list(range(batch_size*batch_num, size))
    temp_df = images_to_df(IDs, images_folder=images_folder, all_images=all_images, model=model)
    ls_df.append(temp_df.T)
    df_all = pd.concat(ls_df, axis=0)
    return df_all

In [18]:
Face_pred = batch_df(images_folder, all_images,256)

  batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points)
  boxes = np.array(boxes)
  probs = np.array(probs)
  points = np.array(points)


1/12 batches finished
2/12 batches finished
3/12 batches finished
4/12 batches finished
5/12 batches finished
6/12 batches finished
7/12 batches finished
8/12 batches finished
9/12 batches finished
10/12 batches finished
11/12 batches finished


In [28]:
Face_pred_Suzhou = batch_df(images_folder_Suzhou, all_images_Suzhou,256)

  batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points)
  boxes = np.array(boxes)
  probs = np.array(probs)
  points = np.array(points)


1/13 batches finished
2/13 batches finished
3/13 batches finished
4/13 batches finished
5/13 batches finished
6/13 batches finished
7/13 batches finished
8/13 batches finished
9/13 batches finished
10/13 batches finished
11/13 batches finished
12/13 batches finished


In [22]:
Face_pred_Amsterdam = batch_df(images_folder_Amsterdam, all_images_Amsterdam,256)

  batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points)
  boxes = np.array(boxes)
  probs = np.array(probs)
  points = np.array(points)


1/15 batches finished
2/15 batches finished
3/15 batches finished
4/15 batches finished
5/15 batches finished
6/15 batches finished
7/15 batches finished
8/15 batches finished
9/15 batches finished
10/15 batches finished
11/15 batches finished
12/15 batches finished
13/15 batches finished
14/15 batches finished


In [116]:
Face_pred_large = batch_df(images_folder_large, all_images_large,256)

1/317 batches finished
2/317 batches finished
3/317 batches finished
4/317 batches finished
5/317 batches finished
6/317 batches finished
7/317 batches finished
8/317 batches finished
9/317 batches finished
10/317 batches finished
11/317 batches finished
12/317 batches finished
13/317 batches finished
14/317 batches finished
15/317 batches finished
16/317 batches finished
17/317 batches finished
18/317 batches finished
19/317 batches finished
20/317 batches finished
21/317 batches finished
22/317 batches finished
23/317 batches finished
24/317 batches finished
25/317 batches finished
26/317 batches finished
27/317 batches finished
28/317 batches finished
29/317 batches finished
30/317 batches finished
31/317 batches finished
32/317 batches finished
33/317 batches finished
34/317 batches finished
35/317 batches finished
36/317 batches finished
37/317 batches finished
38/317 batches finished
39/317 batches finished
40/317 batches finished
41/317 batches finished
42/317 batches finished
4

In [130]:
Face_pred_large = Face_pred_large.reset_index().drop_duplicates(subset = 'index').set_index('index')

In [20]:
Face_pred.to_csv('Venezia/data_storage/Face_preds.csv',sep='\t')

In [29]:
Face_pred_Suzhou.to_csv('Suzhou/data_storage/Face_preds.csv',sep='\t')

In [23]:
Face_pred_Amsterdam.to_csv('Amsterdam/data_storage/Face_preds.csv',sep='\t')

In [131]:
Face_pred_large.to_csv('data_storage/images/Face_preds.csv',sep='\t')

In [6]:
Face_pred= pd.read_csv('Venezia/data_storage/Face_preds.csv',sep='\t',index_col='Unnamed: 0')

In [7]:
Face_pred

Unnamed: 0,Face_prob,Face_boxes,Face_num_faces,Face_area,Face_areas,Face_ratio,index
3,0.000000,[],0,0.00000,[],0.000000,51870743082
4,0.000000,[],0,0.00000,[],0.000000,51871789478
5,0.000000,[],0,0.00000,[],0.000000,51870742942
6,0.929874,[[ 65.960014 45.39857 112.72479 105.72964 ]],1,2821.36900,[2821.369],0.125394,51870380541
13,0.000000,[],0,0.00000,[],0.000000,51863124066
...,...,...,...,...,...,...,...
4981,0.000000,[],0,0.00000,[],0.000000,51233430558
4982,0.813669,[[67.875496 9.444743 83.15715 27.281273]],1,272.57166,[272.57166],0.012114,51234286630
4983,0.000000,[],0,0.00000,[],0.000000,51234294135
4984,0.000000,[],0,0.00000,[],0.000000,51234291635


In [8]:
Face_pred['Face_prob'][Face_pred['Face_prob']!=0].describe()

count    166.000000
mean       0.930268
std        0.098505
min        0.700429
25%        0.868059
50%        0.991205
75%        0.999570
max        1.000000
Name: Face_prob, dtype: float64

In [9]:
Face_pred['Face_ratio'][Face_pred['Face_prob']!=0].describe()

count    166.000000
mean       0.076534
std        0.184670
min        0.003644
25%        0.014804
50%        0.025501
75%        0.061156
max        1.838786
Name: Face_ratio, dtype: float64

In [14]:
Face_pred['Face_num_faces'][Face_pred['Face_prob']!=0].describe()

count    166.000000
mean       1.349398
std        0.785223
min        1.000000
25%        1.000000
50%        1.000000
75%        1.000000
max        6.000000
Name: Face_num_faces, dtype: float64

In [15]:
Face_pred= pd.read_csv('Suzhou/data_storage/Face_preds.csv',sep='\t',index_col='Unnamed: 0')

In [16]:
Face_pred

Unnamed: 0,Face_prob,Face_boxes,Face_num_faces,Face_area,Face_areas,Face_ratio
23903381607,0.0,[],0,0.0,[],0.0
23903645987,0.0,[],0,0.0,[],0.0
23919231927,0.0,[],0,0.0,[],0.0
23940920237,0.0,[],0,0.0,[],0.0
23952125997,0.0,[],0,0.0,[],0.0
...,...,...,...,...,...,...
51859424958,0.0,[],0,0.0,[],0.0
51859673444,0.0,[],0,0.0,[],0.0
51859675789,0.0,[],0,0.0,[],0.0
51859999785,0.0,[],0,0.0,[],0.0


In [17]:
Face_pred['Face_prob'][Face_pred['Face_prob']!=0].describe()

count    303.000000
mean       0.956071
std        0.080793
min        0.701789
25%        0.964053
50%        0.998158
75%        0.999957
max        1.000000
Name: Face_prob, dtype: float64

In [18]:
Face_pred['Face_ratio'][Face_pred['Face_prob']!=0].describe()

count    303.000000
mean       0.057062
std        0.072769
min        0.003033
25%        0.012074
50%        0.025756
75%        0.083591
max        0.483004
Name: Face_ratio, dtype: float64

In [19]:
Face_pred['Face_num_faces'][Face_pred['Face_prob']!=0].describe()

count    303.000000
mean       1.402640
std        0.706821
min        1.000000
25%        1.000000
50%        1.000000
75%        2.000000
max        5.000000
Name: Face_num_faces, dtype: float64

In [20]:
Face_pred= pd.read_csv('Amsterdam/data_storage/Face_preds.csv',sep='\t',index_col='Unnamed: 0')

In [21]:
Face_pred

Unnamed: 0,Face_prob,Face_boxes,Face_num_faces,Face_area,Face_areas,Face_ratio
4074991040,0.000000,[],0,0.0000,[],0.000000
49286194036,0.000000,[],0,0.0000,[],0.000000
4995005091,0.000000,[],0,0.0000,[],0.000000
50996941904,0.000000,[],0,0.0000,[],0.000000
51042706771,0.000000,[],0,0.0000,[],0.000000
...,...,...,...,...,...,...
5335187703,0.000000,[],0,0.0000,[],0.000000
5335187791,0.000000,[],0,0.0000,[],0.000000
5683515939,0.999745,[[ 50.57038 40.098087 67.702614 61.921387]...,2,746.3048,"[373.88184, 372.42297]",0.033169
6526596205,0.000000,[],0,0.0000,[],0.000000


In [22]:
Face_pred['Face_prob'][Face_pred['Face_prob']!=0].describe()

count    667.000000
mean       0.954624
std        0.078805
min        0.700180
25%        0.953888
50%        0.995914
75%        0.999662
max        1.000000
Name: Face_prob, dtype: float64

In [23]:
Face_pred['Face_ratio'][Face_pred['Face_prob']!=0].describe()

count    667.000000
mean       0.049063
std        0.111617
min        0.002593
25%        0.009706
50%        0.019993
75%        0.039661
max        1.127565
Name: Face_ratio, dtype: float64

In [24]:
Face_pred['Face_num_faces'][Face_pred['Face_prob']!=0].describe()

count    667.000000
mean       1.547226
std        0.830411
min        1.000000
25%        1.000000
50%        1.000000
75%        2.000000
max        6.000000
Name: Face_num_faces, dtype: float64

In [26]:
Face_pred= pd.read_csv('data_storage/images/Face_preds.csv',sep='\t')

In [27]:
Face_pred

Unnamed: 0,index,Face_prob,Face_boxes,Face_num_faces,Face_area,Face_areas,Face_ratio
0,10003463424,0.0,[],0,0.0,[],0.0
1,10003465315,0.0,[],0,0.0,[],0.0
2,10003466745,0.0,[],0,0.0,[],0.0
3,10003466824,0.0,[],0,0.0,[],0.0
4,10003470025,0.0,[],0,0.0,[],0.0
...,...,...,...,...,...,...,...
80959,9986056745,0.0,[],0,0.0,[],0.0
80960,9986199265,0.0,[],0,0.0,[],0.0
80961,9986220375,0.0,[],0,0.0,[],0.0
80962,9986314863,0.0,[],0,0.0,[],0.0


In [28]:
Face_pred['Face_prob'][Face_pred['Face_prob']!=0].describe()

count    9288.000000
mean        0.948147
std         0.080700
min         0.700076
25%         0.930316
50%         0.992544
75%         0.999603
max         1.000000
Name: Face_prob, dtype: float64

In [29]:
Face_pred['Face_ratio'][Face_pred['Face_prob']!=0].describe()

count    9288.000000
mean        0.075740
std         0.112140
min         0.002026
25%         0.017036
50%         0.036783
75%         0.084583
max         1.440277
Name: Face_ratio, dtype: float64

In [30]:
Face_pred['Face_num_faces'][Face_pred['Face_prob']!=0].describe()

count    9288.000000
mean        1.298019
std         0.650607
min         1.000000
25%         1.000000
50%         1.000000
75%         1.000000
max        10.000000
Name: Face_num_faces, dtype: float64