# **Mask detection and social distancing:** Face Mask Detection

Through this notebook we implement a PyTorch model to detect whether a face has a mask on or not. 

___
___

## **1. INITIALIZATION**

### *1.1 IMPORTS*

In [1]:
from IPython.display import display
from xml.dom import minidom # to manipulate .xml files

import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import time

from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

import torch
import torch.nn as nn

To fill the `requirement.txt` file we use the following line of code:

In [2]:
import session_info
session_info.show()

In [3]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device: {}".format(device))

device: cuda


### *1.2. DATA LOADING*

In [4]:
root_dir = "data/FaceMaskDetection/"

In [5]:
def get_annotations(root_dir):

    """

    arg:
    - root_dir (string): 
        the directory with the "images" and "annotations" folders
    
    out:
    - df (dataframe):
        a dataframe matching an image id and a bounding box id to the size of
        the image and the position of the bounding box and its annotation:
        "without mask" => -1 | "with mask" => 1 |"mask_weared_incorrect" => 0  
    """

    files = os.listdir(root_dir+"annotations/")
    files.sort()
    
    # if the dataframe has not been created yet (saved in the "annotations.csv" file later)
    if "annotations.csv" not in os.listdir(root_dir+"annotations/"):

        data = []
        mask_value = {"without_mask": -1, "mask_weared_incorrect": 0, "with_mask": 1}

        for file in files:
        
            ann = minidom.parse(root_dir+"annotations/"+file)
            img_id = int(ann.getElementsByTagName("filename")[0].firstChild.data[12:-4])
            img_height = int(ann.getElementsByTagName("height")[0].firstChild.data)
            img_width = int(ann.getElementsByTagName("width")[0].firstChild.data)
        
            for i,object in enumerate(ann.getElementsByTagName("object")):

                box_id = i
                
                label = mask_value[object.getElementsByTagName("name")[0].firstChild.data]
                xmin = object.getElementsByTagName("xmin")[0].firstChild.data
                xmax = object.getElementsByTagName("xmax")[0].firstChild.data
                ymin = object.getElementsByTagName("ymin")[0].firstChild.data
                ymax = object.getElementsByTagName("ymax")[0].firstChild.data

                data.append((img_id, img_height, img_width, box_id, label, xmin, xmax, ymin, ymax))
        
        columns = ["img_id", "img_height", "img_width", "box_id", "label", "xmin", "xmax", "ymin", "ymax"]
        pd.DataFrame(data=data, columns=columns, index=None).to_csv(root_dir+"annotations/annotations.csv", index=None)
        
    return pd.read_csv(root_dir+"annotations/annotations.csv", index_col=False)

In [6]:
annotations = get_annotations(root_dir)

In [7]:
annotations

Unnamed: 0,img_id,img_height,img_width,box_id,label,xmin,xmax,ymin,ymax
0,0,366,512,0,-1,79,109,105,142
1,0,366,512,1,1,185,226,100,144
2,0,366,512,2,-1,325,360,90,141
3,1,156,400,0,1,321,354,34,69
4,1,156,400,1,1,224,261,38,73
...,...,...,...,...,...,...,...,...,...
4067,98,267,400,2,1,263,287,62,85
4068,98,267,400,3,1,344,377,80,106
4069,99,267,400,0,0,181,273,54,162
4070,99,267,400,1,1,99,176,87,165


In [43]:
class FaceMaskDataset(Dataset):

    def __init__(self, root_dir):
        
        self.annotations = get_annotations(root_dir)
        self.img_dir = root_dir+"images/"
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):

        img = plt.imread(self.img_dir+"maksssksksss{}.png".format(int(self.annotations.iloc[idx]["img_id"])))
        if img.shape[-1] == 4:
            img = img[:,:,:3]
        xmin = self.annotations.iloc[idx]["xmin"]
        xmax = self.annotations.iloc[idx]["xmax"]
        ymin = self.annotations.iloc[idx]["ymin"]
        ymax = self.annotations.iloc[idx]["ymax"]
        img = transforms.Resize((64,64))(torch.Tensor(img[ymin:ymax,xmin:xmax]).permute(2,0,1))
        label = torch.Tensor([0,0,0])
        label[self.annotations.iloc[idx]["label"]+1] = 1 # one hot encoding
        return img, label

In [44]:
FMD = FaceMaskDataset(root_dir)
print(len(FMD))

4072


In [45]:
train_dataset, test_dataset = torch.utils.data.random_split(FMD, [3200,872])

In [57]:
batch_size = 64

train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

___

## **2. THE MODEL**

### *2.1. DEFINING THE MODEL*

In [237]:
model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Linear(512,3)

model.to(device)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

### *2.2. TRAINING*

In [238]:
def train_model(criterion, optimizer, nb_epochs):

    s = time.time()

    for epoch in range(nb_epochs):

        running_loss_train, running_loss_test = 0,0
        running_acc_train, running_acc_test = 0,0

        for train in [True, False]:

            if train:
                dataloader = train_dataloader
                model.train()
            else:
                dataloader = test_dataloader
                model.eval()

            for inputs,labels in dataloader:

                inputs = inputs.to(device)
                labels = labels.to(device)

                if train:
                    optimizer.zero_grad()

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if train:
                    loss.backward()
                    optimizer.step()
                    running_loss_train += loss.item()
                    running_acc_train += int(torch.count_nonzero(torch.clamp_max(torch.abs(torch.argmax(outputs,dim=1)-torch.argmax(labels,dim=1)),1)-1))
                
                else:
                    running_loss_test += loss.item()
                    running_acc_test += int(torch.count_nonzero(torch.clamp_max(torch.abs(torch.argmax(outputs,dim=1)-torch.argmax(labels,dim=1)),1)-1))
        
        running_loss_train /= len(train_dataset)
        running_loss_test /= len(test_dataset)
        running_acc_train /= len(train_dataset)
        running_acc_test /= len(test_dataset)


        print("epochs {} ({} s) | train loss : {} | test loss : {} | train acc : {} | test acc : {}".format(
            epoch,
            int(time.time()-s),
            int(100000*running_loss_train)/100000,
            int(100000*running_loss_test)/100000,
            int(100000*running_acc_train)/100000,
            int(100000*running_acc_test)/100000)
        )

In [239]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
criterion = nn.MSELoss().cuda()
nb_epochs = 10

In [240]:
train_model(criterion, optimizer, nb_epochs)

epochs 0 (51 s) | train loss : 0.0088 | test loss : 0.00815 | train acc : 0.55031 | test acc : 0.57224
epochs 1 (105 s) | train loss : 0.00706 | test loss : 0.00767 | train acc : 0.59375 | test acc : 0.62385


### *2.3. SAVING THE MODEL*

In [None]:
torch.save(model.state_dict(), "./models/FaceMaskDetection.pt")

___

## **3. TESTING THE MODEL**

### *3.1. LOADING THE MODEL*

In [None]:
model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Linear(512,3)

model.to(device)
model.load_state_dict(torch.load("./models/FaceMaskDetection.pt"))
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

### *3.2. VISUALIZING PREDICTION*

In [233]:
def prediction(model, img):
    img = transforms.Resize((64,64))(torch.Tensor(img).permute(2,0,1))
    img = img.reshape((1,3,64,64))
    return int(torch.argmax(model(img)))

In [235]:
# reminder :
# - the model doesn't detect the face !
# - we provide the faces location and the model determines if the mask is on, off or misplaced

def show_random_results(nb_images):

    img_ids = np.random.choice(max(annotations["img_id"]), size=nb_images)

    for img_id in img_ids:

        a = annotations[annotations["img_id"] == img_id]

        plt.figure(figsize=(12,6))

        for i in range(len(a)):

            img = plt.imread(root_dir+"images/maksssksksss{}.png".format(img_id))
            xmin = a.iloc[i]["xmin"]
            xmax = a.iloc[i]["xmax"]
            ymin = a.iloc[i]["ymin"]
            ymax = a.iloc[i]["ymax"]
            pred = prediction(model, img[ymin:ymax,xmin:xmax])
            img 
            
            
        plt.imshow(img)
        plt.show()


In [236]:
show_random_results(2)

RuntimeError: shape '[1, 3, 64, 64]' is invalid for input of size 16384

<Figure size 864x432 with 0 Axes>