# **Task 1 | Mask Recognition**

***Goal :*** *Detect human faces on videos and check whether or not they have a mask on*. 

In this notebook we implement two models different model to perform the task :
- [YOLOv5](https://github.com/ultralytics/yolov5)

___
___

## **1. INITIALIZATION**

### *1.1 IMPORTS*

In [1]:
from IPython.display import display, Image, clear_output
from math import nan
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tqdm import tqdm
from xml.dom import minidom

import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random as rd
import time

from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

import torch
import torch.nn as nn

In [2]:
# to fill the `requirement.txt` file we use the following line of code:
import session_info
session_info.show()

In [3]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device: {}".format(device))

device: cuda


### *1.2. DATA LOADING*

In [4]:
data_root_dir = "data/FaceMaskDetection/"           # path to the directory with the relevant data
annotations_dir = data_root_dir + "annotations/"    # path to the directory with the .xml annotations files
images_dir = data_root_dir + "images/"              # path to the directory with the images

annotation_files = os.listdir(annotations_dir)      # list of the files in the annotation directory
annotation_files.sort()

image_files = os.listdir(images_dir)                # list of files in the image directory
image_files.sort()

In [5]:
# 1st check-point: same number of files
assert (len(annotation_files) == len(image_files))

# 2nd check-point: same file ids in the same order
assert ([annotation_file[15:-4] for annotation_file in annotation_files] == [image_file[15:-4] for image_file in image_files])

In [8]:
# to have all the annotations files in the same pandas dataframe (easier to manipulate)
def get_annotations(data_root_dir, annotations_dir, annotation_files):

    # if the dataframe has already been computed and saved as a .csv file
    if "annotations.csv" in os.listdir(data_root_dir):
        annotations = pd.read_csv(data_root_dir + "annotations.csv", index_col=None)

    # else, we compute and save it
    else:

        data = []
        mask_label = {"without_mask": 0, "mask_weared_incorrect": 1, "with_mask": 2}

        for i in tqdm(range(len(annotation_files))):
            
            file = annotation_files[i]
            ann = minidom.parse(annotations_dir + file)

            image_id = int(ann.getElementsByTagName("filename")[0].firstChild.data[12:-4])
            image_height = int(ann.getElementsByTagName("height")[0].firstChild.data)
            image_width = int(ann.getElementsByTagName("width")[0].firstChild.data)
        
            for box_id,object in enumerate(ann.getElementsByTagName("object")):

                box_label = mask_label[object.getElementsByTagName("name")[0].firstChild.data]
                xmin = int(object.getElementsByTagName("xmin")[0].firstChild.data)
                xmax = int(object.getElementsByTagName("xmax")[0].firstChild.data)
                ymin = int(object.getElementsByTagName("ymin")[0].firstChild.data)
                ymax = int(object.getElementsByTagName("ymax")[0].firstChild.data)

                data.append((image_id, image_height, image_width, box_id, box_label, xmin, xmax, ymin, ymax))
        
        columns = ["image_id", "image_height", "image_width", "box_id", "box_label", "xmin", "xmax", "ymin", "ymax"]
        annotations = pd.DataFrame(data=data, columns=columns, index=None)
        annotations.to_csv(data_root_dir + "annotations.csv", index=None)

    return annotations

In [11]:
annotations = get_annotations(data_root_dir, annotations_dir, annotation_files)
display(annotations)

Unnamed: 0,image_id,image_height,image_width,box_id,box_label,xmin,xmax,ymin,ymax
0,0,366,512,0,0,79,109,105,142
1,0,366,512,1,2,185,226,100,144
2,0,366,512,2,0,325,360,90,141
3,1,156,400,0,2,321,354,34,69
4,1,156,400,1,2,224,261,38,73
...,...,...,...,...,...,...,...,...,...
4067,98,267,400,2,2,263,287,62,85
4068,98,267,400,3,2,344,377,80,106
4069,99,267,400,0,1,181,273,54,162
4070,99,267,400,1,2,99,176,87,165


To [train the YOLO model](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) we need to format our data as it follows:

In [8]:
def get_yolo_labels(annotations, data_root_dir):

    if "labels_yolo" not in os.listdir(data_root_dir):
        os.mkdir(data_root_dir + "labels_yolo")
    
    annotations_ids = list(set(annotations["image_id"]))
    annotations_ids.sort()

    for i in tqdm(range(1,len(annotations_ids))):

        image_id = annotations_ids[i]
        indexes = list(annotations[annotations["image_id"] == image_id].index)

        label_file_lines = []

        for index in indexes:

            box_label = annotations.iloc[index]["box_label"]

            image_height = annotations["image_height"].iloc[index]
            image_width = annotations["image_width"].iloc[index]
            xmin = annotations["xmin"].iloc[index]
            xmax = annotations["xmax"].iloc[index]
            ymin = annotations["ymin"].iloc[index]
            ymax = annotations["ymax"].iloc[index]

            box_xcenter = ((xmin + xmax)/2)/image_width
            box_ycenter = ((ymin + ymax)/2)/image_height
            box_width = (xmax - xmin)/image_width
            box_height = (ymax - ymin)/image_height

            label_file_lines.append(' '.join([str(box_label), str(box_xcenter), str(box_ycenter), str(box_width), str(box_height)]))
        
        with open(data_root_dir + "labels_yolo/maksssksksss{}.txt".format(image_id), 'w') as f:
            f.writelines('\n'.join(label_file_lines))
            f.close()


In [9]:
labels_yolo_dir = data_root_dir + "labels_yolo/"
get_yolo_labels(annotations, data_root_dir)

labels_yolo_files = os.listdir(labels_yolo_dir)
labels_yolo_files.sort()

100%|██████████| 852/852 [00:01<00:00, 785.66it/s]


In [10]:
# 3rd check-point: same number of files
assert (len(annotation_files) == len(labels_yolo_files))

# 4th check-point: same file ids in the same order
assert ([annotation_file[15:-4] for annotation_file in annotation_files] == [labels_yolo_file[15:-4] for labels_yolo_file in labels_yolo_files])

___

## **2. THE MODELS**

### *2.1. YOLOv5*

#### DEFINING THE MODEL

To use YOLOv5 we need to clone it from the [official GitHub repository](https://github.com/ultralytics/yolov5).

In [21]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5/
!pip install -r requirements.txt
%cd ..
clear_output()

d:\AIVC\INF634 - COMPUTER VISION\mask-detection-social-distancing\yolov5


fatal: destination path 'yolov5' already exists and is not an empty directory.


d:\AIVC\INF634 - COMPUTER VISION\mask-detection-social-distancing
