In [1]:
import cv2
import torch
import time
import imutils
from PIL import Image as PImage
from matplotlib import pyplot as plt
from imutils.video import WebcamVideoStream
from fastai.vision import *
from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor

Loading pre-trained model

In [2]:
model_pth = '/home/vinaykudari/models/mask-detection/export.pth'
model_pkl = '/home/vinaykudari/datasets/face_mask/training/models/'
sample_images = '/home/vinaykudari/datasets/face_mask/training/train/'
PROTO_TXT_PATH = '/home/vinaykudari/Desktop/ML/models/face-detection/deploy.prototxt.txt'
MODEL_PATH = '/home/vinaykudari/Desktop/ML/models/face-detection/res10_300x300_ssd_iter_140000.caffemodel'

In [3]:
# Loading face detection model
face_detector = cv2.dnn.readNetFromCaffe(PROTO_TXT_PATH, MODEL_PATH)

In [5]:
# Loading mask detection model: Pytorch
loc = torch.load(model_pth)
body = create_body(models.resnet34, True, None)
data_classes = 2
nf = callbacks.hooks.num_features_model(body) * 2
head = create_head(nf, data_classes, None, ps=0.5, bn_final=False)
model = nn.Sequential(body, head)

model.load_state_dict(loc['model'])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

print('model loaded')

model loaded


In [4]:
# Loading mask detection model: Fast AI
learn = load_learner(model_pkl)

Prediction on Video frames

In [5]:
# Transform image
transformations = Compose([
        ToPILImage(),
        Resize((224, 224)),
        ToTensor(),
    ])

In [6]:
vs = WebcamVideoStream(src=0).start()
time.sleep(1)

font = cv2.FONT_HERSHEY_SIMPLEX

while True:
    frame = vs.read()
    frame = imutils.resize(frame, width=600)
    frame_h, frame_w, _ = frame.shape
    
    # Face detection
    blob = cv2.dnn.blobFromImage(frame, 1.0, (frame_h, frame_w))
    face_detector.setInput(blob)
    predictions = face_detector.forward()
    
    faces = []
    for i in range(predictions.shape[2]):
        accuracy = predictions[0, 0, i, 2]
        if accuracy > 0.4:
            box = predictions[0, 0, i, 3:7] * np.array([frame_w, frame_h, frame_w, frame_h])
            x1, y1, x2, y2 = box.astype("int")
            faces.append([x1, y1, x2-x1, y2-y1])
    
    total_faces = len(faces)
    masked_faces = 0
    
    # Mask Detection for detected faces
    for face in faces:
        x, y, w, h = face
        if y+h < 480 and x+w < 640:
            faceImg = frame[max(y-20, 0):y+h+20, max(x-20, 0):x+w+20]
            faceImg_RGB = cv2.cvtColor(faceImg, cv2.COLOR_BGR2RGB)

            im = Image(transformations(faceImg_RGB))
            pred_class, pred_idx, outputs = learn.predict(im)
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            if outputs[0] > 0.90:
                text = f'Wearing mask, {int(outputs[0]*100)} accurate'
                masked_faces += 1
            else:
                text = f'Not wearing mask, {int(outputs[1]*100)} accuracy'
            text_size = cv2.getTextSize(text, font, 1, 2)[0]
            text_x = x + w // 2 - text_size[0] // 2

            cv2.putText(frame, text, (text_x, y-20), font, 0.5, (0, 255, 0), 1)
    
    stats = f'{masked_faces}/{total_faces} wearing masks'
    cv2.putText(frame, stats, (20, 30), font, 0.5, (255, 0, 0), 1)
            
    cv2.imshow('Mask Detection', frame)
    key = cv2.waitKey(1) & 0xFF
    
    if key == ord("q"):
        break
    

KeyboardInterrupt: 

In [7]:
vs.stop()

cv2.destroyAllWindows()

vs = WebcamVideoStream().start()
time.sleep(1)