# **Task 1 | Mask Recognition**

***Goal :*** *Detect human faces on videos and check whether or not they have a mask on*. 

In this notebook we implement two different models to perform the task :
- [Faster-RCNN (ResNet50)](https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html)

___
___

## **1. INITIALIZATION**

### *1.1 IMPORTS*

In [1]:
from IPython.display import display, clear_output
import cv2
import os
import pandas as pd
import random as rd

from tools import engine, utils

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

import torch
import torchvision

In [2]:
# to fill the `requirement.txt` file we use the following line of code:
import session_info
session_info.show()

In [3]:
torch.cuda.empty_cache()

# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device: {}".format(device))

device: cuda


### *1.3. DATA LOADING*

In [4]:
# data preprocessing
!python ./DataPreprocessing.py

5187




In [5]:
data_dir_path = "data/FaceMaskDetection_Processed/" # path to the directory with the relevant data
images_dir_path = data_dir_path + "images/"         # path to the directory with the images
images_files = os.listdir(images_dir_path)           # list of files in the image directory

annotations = pd.read_csv(data_dir_path + "annotations.csv", index_col=None) # dataframe with information about the images and their bounding boxes
display(annotations)

Unnamed: 0,image_id,image_height,image_width,box_id,box_label,xmin,xmax,ymin,ymax
0,0,366,512,0,1,79,109,105,142
1,0,366,512,1,3,185,226,100,144
2,0,366,512,2,1,325,360,90,141
3,1,366,512,0,1,402,432,105,142
4,2,111,90,0,1,30,60,37,74
...,...,...,...,...,...,...,...,...,...
8910,5182,266,276,0,2,92,184,88,177
8911,5183,266,276,0,2,92,184,88,177
8912,5184,266,276,0,2,92,184,88,177
8913,5185,266,276,0,2,92,184,88,177


In [7]:
annotations["box_label"].value_counts()

1    4228
3    3232
2    1455
Name: box_label, dtype: int64

In [8]:
class FaceMaskDataset(Dataset):

    def __init__(self, annotations, images_dir_path, images_files):
        self.annotations = annotations
        self.images_dir_path = images_dir_path
        self.images_files = images_files
    
    def __len__(self):
        return len(self.images_files)
    
    def __getitem__(self, idx):

        img = cv2.imread(self.images_dir_path+self.images_files[idx])
        img = transforms.Resize((256,256))(torch.Tensor(img).permute(2,0,1))

        img_id = int(self.images_files[idx][:-4])
        img_annotations = self.annotations[self.annotations["image_id"] == img_id]

        img_height = int(list(img_annotations["image_height"])[0])
        img_width = int(list(img_annotations["image_width"])[0])
        xmins = [256*xmin/img_width for xmin in list(img_annotations["xmin"])]
        ymins = [256*ymin/img_height for ymin in list(img_annotations["ymin"])]
        xmaxs = [256*xmax/img_width for xmax in list(img_annotations["xmax"])]
        ymaxs = [256*ymax/img_height for ymax in list(img_annotations["ymax"])]

        target =  {
            "boxes": torch.as_tensor([[xmins[i], ymins[i], xmaxs[i], ymaxs[i]] for i in range(len(img_annotations))], dtype=torch.float32),
            "labels": torch.as_tensor(list(img_annotations["box_label"]), dtype=torch.int64),
            "image_id": torch.as_tensor([img_id]),
            "area": torch.as_tensor([(xmaxs[i]-xmins[i])*(ymaxs[i]-ymins[i]) for i in range(len(img_annotations))], dtype=torch.float32),
            "iscrowd": torch.zeros((len(img_annotations),), dtype=torch.int64)
        }

        return img, target

90% of the whole dataset is dedicated to training and the 10% left is used as a test dataset.

In [9]:
FMD = FaceMaskDataset(annotations, images_dir_path, images_files)

train_ratio = 0.9
trainset, testset = torch.utils.data.random_split(FMD, [int(train_ratio*len(FMD)), len(FMD)-int(train_ratio*len(FMD))])

batch_size = 2

trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)

___

## **2. THE MODELS**

### *2.2 Faster-RCNN (ResNet50)*

#### Defining the models

In [14]:
def get_rcnn_model(nb_classes):
    # load a model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, nb_classes)
    return model

In [15]:
modelRCNN = get_rcnn_model(nb_classes=4)
modelRCNN.to(device)
try:
    modelRCNN.load_state_dict(torch.load("./models/MaskRecognitionFasterRCNN.pt"))
    print("model loaded")
except:
    print("new model")
    pass
modelRCNN.eval()

model loaded


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

#### Training the model

In [16]:
params = [p for p in modelRCNN.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

num_epochs = 5

In [17]:
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    engine.train_one_epoch(modelRCNN, optimizer, trainloader, device, epoch, print_freq=len(trainloader)//3)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    engine.evaluate(modelRCNN, testloader, device=device)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [   0/2334]  eta: 1:21:53  lr: 0.000001  loss: 2.6656 (2.6656)  loss_classifier: 2.4341 (2.4341)  loss_box_reg: 0.2302 (0.2302)  loss_objectness: 0.0001 (0.0001)  loss_rpn_box_reg: 0.0011 (0.0011)  time: 2.1050  data: 0.0240  max mem: 1779
Epoch: [0]  [ 778/2334]  eta: 0:14:10  lr: 0.000390  loss: 0.1533 (0.3253)  loss_classifier: 0.0623 (0.1446)  loss_box_reg: 0.0849 (0.1647)  loss_objectness: 0.0010 (0.0098)  loss_rpn_box_reg: 0.0006 (0.0062)  time: 0.5637  data: 0.0224  max mem: 2043


#### Saving the model

In [None]:
torch.save(modelRCNN.state_dict(), "./models/MaskRecognitionFasterRCNN.pt")

#### Testing the model

In [None]:
modelRCNN = get_rcnn_model(nb_classes=3)
modelRCNN.to(device)
modelRCNN.load_state_dict(torch.load("./models/MaskRecognitionFasterRCNN.pt"))
modelRCNN.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [None]:
def show_random_results(model, nb_images):

    img_ids = rd.sample(list(annotations["image_id"]), nb_images)

    for img_id in img_ids:

        img = cv2.imread(images_dir_path+"{}.png".format(img_id))
        cv2.imshow("before | {}.png".format(img_id), img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        h,w,c = img.shape
        overlay = img.copy()
        output = img.copy()

        model_input = transforms.Resize((256,256))(torch.Tensor(img).permute(2,0,1))
        model_input = model_input.reshape((1,3,256,256)).to(device)
        target = model(model_input)[0]

        for i in range(len(target["boxes"])):

            box = target["boxes"][i]
            label = int(target["labels"][i])
            xmin = int(w*box[0]/256)
            ymin = int(h*box[1]/256)
            xmax = int(w*box[2]/256)
            ymax = int(h*box[3]/256)
            
            if label == 0:
                cv2.rectangle(overlay, (xmin,ymin), (xmax,ymax), (0,0,255), 2)
            
            elif label == 1:
                cv2.rectangle(overlay, (xmin,ymin), (xmax,ymax), (0,255,255), 2)
            
            else:
                cv2.rectangle(overlay, (xmin,ymin), (xmax,ymax), (0,255,0), 2)

        output = cv2.addWeighted(overlay, 0.5, output, 0.5, 0, output)        
        cv2.imshow("after | maksssksksss{}.png".format(img_id), output)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

In [None]:
for i in range(len(FMD)):
    t = modelRCNN(FMD[i][0].reshape(1,3,256,256).to(device))[0]
    if 0 in t["labels"]:
        print(i)

In [None]:
show_random_results(modelRCNN, 10)

2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2
2
2
1
1
1
1
1
1
1
1
2
1
2
1
2
1
1
1
2
2
1
1
2
2
1
2
1
1
1
2
2
2
1
2
1
1
1
1
2
2
1
1
1


___
___
