File Structure as follows on Google Collab:

*   content/
  *   dataset/
      *  1/
          *  {48 photos from 5025 dataset}



Implementation heavily influenced from Tim Esler's pytorch implementation and infer notebook, MTCNN: 

https://github.com/timesler/facenet-pytorch#performance-comparison-of-face-detection-packages

In [None]:
pip install facenet-pytorch

In [None]:
#Import MTCNN detector and necessary packages
import json
from facenet_pytorch import MTCNN
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
workers = 0 if os.name == 'nt' else 4

In [None]:
#Check for CUDA enables GPU, if not then use CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cpu


In [None]:
#Instantiate MTCNN detector
mtcnn = MTCNN(
    image_size=300, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

In [None]:
#Helper function to collate images from dataset into the loader for the MTCNN algorithm
def collate_fn(x):
  return x[0]

dataset = datasets.ImageFolder('/content/dataset/')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)



In [None]:
mtcnn_annotated = {}
#Iterate through every image within the loader object
for i,x in enumerate(loader):
  #Run the mtcnn detector for each image and output bounding boxes, landmarks and confidence
  boxes, probs, landmarks = mtcnn.detect(x[0], landmarks=True)
  #Display landmarks and bounding boxe on face
  fig, ax = plt.subplots(figsize=(16, 12))
  ax.imshow(x[0])
  ax.axis('off')
  for box, landmark in zip(boxes, landmarks):
    ax.plot(box[[0, 2]],box[[1, 1]],'r-')
    ax.plot(box[[0, 2]],box[[3, 3]],'r-')
    ax.plot(box[[2, 2]],box[[1, 3]],'r-')
    ax.plot(box[[0, 0]],box[[3, 1]],'r-')
    ax.scatter(landmark[:, 0], landmark[:, 1], s=10)

  #Take the first bounding box and set of landmarks and convert to float values
  boxes=boxes[0]
  landmarks=landmarks[0].tolist()
  list(np.float_(landmarks))
  bb = {}
  lm = {}
  #Place x,y,w,h values for bounding box in dictionary
  bb = {"x":float(boxes[0]),"y":float(boxes[1]),"w":float(boxes[2])-float(boxes[0]),"h":float(boxes[3])-(boxes[1])}
  #Place landmark coordinates in dictionary
  lm = {"left_eye": landmarks[0], "right_eye": landmarks[1], "nose": landmarks[2], "left_mouth": landmarks[3], "right_mouth": landmarks[4]}
  #Add bounding box and landmark coordinates to dictionary for every face id
  mtcnn_annotated[i] = {"bounding_box":bb,"landmarks":lm}
  




In [None]:
#Export dictionary as JSON file for further analysis
json = json.dumps(mtcnn_annotated)
f = open("mtcnn_annotated.json","w")
f.write(json)
f.close()