# **Task 1 | Mask Recognition**

***Goal :*** *Detect human faces on videos and check whether or not they have a mask on*. 

In this notebook we implement two different models to perform the first task :
- [Faster-RCNN (ResNet50)](https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html)

___
___

## **1. INITIALIZATION**

### *1.1 IMPORTS*

In [36]:
from tqdm import tqdm
from xml.dom import minidom

import cv2
import os
import pandas as pd
import random as rd

from tools import engine, utils

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

import torch
import torch.nn as nn
import torchvision

In [3]:
# to fill the `requirement.txt` file we use the following line of code:
import session_info
session_info.show()

In [4]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device: {}".format(device))

device: cuda


### *1.2. DATA LOADING*

In [5]:
data_path = "data/FaceMaskDetection/"           # path to the directory with the relevant data
annotations_path = data_path + "annotations/"   # path to the directory with the .xml annotations files
images_path = data_path + "images/"             # path to the directory with the images

annotation_files = os.listdir(annotations_path) # list of the files in the annotation directory
image_files = os.listdir(images_path)           # list of files in the image directory

annotation_files.sort()
image_files.sort()

In [6]:
# 1st check-point: same number of files and same file ids in the same order
assert ([annotation_file[15:-4] for annotation_file in annotation_files] == [image_file[15:-4] for image_file in image_files])

In [7]:
name_to_label =  {"without_mask": 0, "mask_weared_incorrect": 1, "with_mask": 2}
label_to_name =  {0: "without_mask", 1: "mask_weared_incorrect", 2: "with_mask"}

In [8]:
# we use the following fubction to have all the annotations files in the same pandas dataframe (easier to manipulate)
def get_annotations(data_path, annotations_path, annotation_files):

    # if the dataframe has already been computed and saved as a .csv file
    if "annotations.csv" in os.listdir(data_path):
        annotations = pd.read_csv(data_path + "annotations.csv", index_col=None)

    # else, we compute it and then save it
    else:

        data = []

        for i in tqdm(range(len(annotation_files))):
    
            annotation = minidom.parse(annotations_path + annotation_files[i])
            
            image_id = int(annotation.getElementsByTagName("filename")[0].firstChild.data[12:-4])
            image_height = int(annotation.getElementsByTagName("height")[0].firstChild.data)
            image_width = int(annotation.getElementsByTagName("width")[0].firstChild.data)
        
            for box_id,object in enumerate(annotation.getElementsByTagName("object")):

                box_label = name_to_label[object.getElementsByTagName("name")[0].firstChild.data]
                xmin = int(object.getElementsByTagName("xmin")[0].firstChild.data)
                xmax = int(object.getElementsByTagName("xmax")[0].firstChild.data)
                ymin = int(object.getElementsByTagName("ymin")[0].firstChild.data)
                ymax = int(object.getElementsByTagName("ymax")[0].firstChild.data)

                data.append((image_id, image_height, image_width, box_id, box_label, xmin, xmax, ymin, ymax))
        
        columns = ["image_id", "image_height", "image_width", "box_id", "box_label", "xmin", "xmax", "ymin", "ymax"]
        annotations = pd.DataFrame(data=data, columns=columns, index=None)
        annotations.to_csv(data_path + "annotations.csv", index=None)

    return annotations

In [37]:
annotations = get_annotations(data_path, annotations_path, annotation_files)
annotations

Unnamed: 0,image_id,image_height,image_width,box_id,box_label,xmin,xmax,ymin,ymax
0,0,366,512,0,0,79,109,105,142
1,0,366,512,1,2,185,226,100,144
2,0,366,512,2,0,325,360,90,141
3,1,156,400,0,2,321,354,34,69
4,1,156,400,1,2,224,261,38,73
...,...,...,...,...,...,...,...,...,...
4067,98,267,400,2,2,263,287,62,85
4068,98,267,400,3,2,344,377,80,106
4069,99,267,400,0,1,181,273,54,162
4070,99,267,400,1,2,99,176,87,165


In [113]:
class FaceMaskDataset(Dataset):

    def __init__(self, annotations, images_dir, image_files):
        self.annotations = annotations
        self.images_dir = images_dir
        self.image_files = image_files
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img = cv2.imread(self.images_dir+self.image_files[idx])
        img_annotations = self.annotations[self.annotations["image_id"] == int(self.image_files[idx][12:-4])]
        height = int(list(img_annotations["image_height"])[0])
        width = int(list(img_annotations["image_width"])[0])
        xmins = [128*xmin/width for xmin in list(img_annotations["xmin"])]
        ymins = [128*ymin/height for ymin in list(img_annotations["ymin"])]
        xmaxs = [128*xmax/width for xmax in list(img_annotations["xmax"])]
        ymaxs = [128*ymax/height for ymax in list(img_annotations["ymax"])]
        target =  {
            "boxes": torch.as_tensor([[xmins[i], ymins[i], xmaxs[i], ymaxs[i]] for i in range(len(img_annotations))], dtype=torch.float32),
            "labels": torch.as_tensor(list(img_annotations["box_label"]), dtype=torch.int64),
            "image_id": torch.as_tensor([int(self.image_files[idx][12:-4])]),
            "area": torch.as_tensor([(xmaxs[i]-xmins[i])*(ymaxs[i]-ymins[i]) for i in range(len(img_annotations))], dtype=torch.float32),
            "iscrowd": torch.zeros((len(img_annotations),), dtype=torch.int64)
        }
        img = transforms.Resize((128,128))(torch.Tensor(img).permute(2,0,1))
        return img, target

90% of the whole dataset is dedicated to training and the 10% left is used as a test dataset.

In [116]:
FMD = FaceMaskDataset(annotations, images_path, image_files)

train_ratio = 0.9
trainset, testset = torch.utils.data.random_split(FMD, [int(train_ratio*len(FMD)), len(FMD)-int(train_ratio*len(FMD))])

batch_size = 2

trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)

___

## **2. THE MODELS**

### *2.1 Faster-RCNN (ResNet50)*

#### Defining the models

In [12]:
def get_rcnn_model(nb_classes):
    # load a model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, nb_classes)
    return model

In [13]:
modelRCNN = get_rcnn_model(nb_classes=3)
modelRCNN.to(device)
modelRCNN.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

#### Training the model

In [13]:
params = [p for p in modelRCNN.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

num_epochs = 10

In [34]:
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    engine.train_one_epoch(modelRCNN, optimizer, trainloader, device, epoch, print_freq=len(trainloader)//3)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    engine.evaluate(modelRCNN, testloader, device=device)

Epoch: [0]  [  0/384]  eta: 0:05:26  lr: 0.000018  loss: 2.8066 (2.8066)  loss_classifier: 1.3367 (1.3367)  loss_box_reg: 0.3101 (0.3101)  loss_objectness: 0.9680 (0.9680)  loss_rpn_box_reg: 0.1918 (0.1918)  time: 0.8510  data: 0.1350  max mem: 2269
Epoch: [0]  [128/384]  eta: 0:02:24  lr: 0.001687  loss: 0.2520 (0.6219)  loss_classifier: 0.0893 (0.2834)  loss_box_reg: 0.1667 (0.2331)  loss_objectness: 0.0137 (0.0777)  loss_rpn_box_reg: 0.0085 (0.0277)  time: 0.5816  data: 0.0932  max mem: 2537
Epoch: [0]  [256/384]  eta: 0:01:15  lr: 0.003357  loss: 0.2674 (0.4900)  loss_classifier: 0.0741 (0.1969)  loss_box_reg: 0.1721 (0.2136)  loss_objectness: 0.0087 (0.0556)  loss_rpn_box_reg: 0.0056 (0.0240)  time: 0.6189  data: 0.0794  max mem: 2709
Epoch: [0]  [383/384]  eta: 0:00:00  lr: 0.005000  loss: 0.2571 (0.4361)  loss_classifier: 0.1038 (0.1644)  loss_box_reg: 0.1398 (0.1949)  loss_objectness: 0.0261 (0.0539)  loss_rpn_box_reg: 0.0086 (0.0229)  time: 0.6328  data: 0.1100  max mem: 2709


#### Saving the model

In [20]:
torch.save(modelRCNN.state_dict(), "./models/MaskRecognitionFasterRCNN.pt")

#### Testing the model

In [14]:
modelRCNN = get_rcnn_model(nb_classes=3)
modelRCNN.load_state_dict(torch.load("./models/MaskRecognitionFasterRCNN.pt"))
modelRCNN.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [134]:
def show_random_results(model, nb_images):

    img_ids = rd.sample(list(annotations["image_id"]), nb_images)

    for img_id in img_ids:

        img = cv2.imread(images_path+"maksssksksss{}.png".format(img_id))
        cv2.imshow("before | maksssksksss{}.png".format(img_id), img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        h,w,c = img.shape
        overlay = img.copy()
        output = img.copy()

        model_input = transforms.Resize((128,128))(torch.Tensor(img).permute(2,0,1))
        model_input = model_input.reshape((1,3,128,128))
        target = model(model_input)[0]

        for i in range(len(target["boxes"])):

            box = target["boxes"][i]
            label = int(target["labels"][i])
            xmin = int(w*box[0]/128)
            ymin = int(h*box[1]/128)
            xmax = int(w*box[2]/128)
            ymax = int(h*box[3]/128)
            
            if label == 0:
                cv2.rectangle(overlay, (xmin,ymin), (xmax,ymax), (0,0,255), 2)
            
            elif label == 1:
                cv2.rectangle(overlay, (xmin,ymin), (xmax,ymax), (0,127,127), 2)
            
            else:
                cv2.rectangle(overlay, (xmin,ymin), (xmax,ymax), (0,255,0), 2)
            
        output = cv2.addWeighted(overlay, 0.5, output, 0.5, 0, output)        
        cv2.imshow("after | maksssksksss{}.png".format(img_id), output)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

In [139]:
show_random_results(modelRCNN, 10)