In [1]:
from ultralytics import YOLO
import torch
from ultralytics.nn.autobackend import AutoBackend
import time
import os
import cv2
import numpy as np
from torchvision import transforms
from torch.quantization import QuantStub, DeQuantStub, fuse_modules, prepare, convert
from torchvision import transforms
from PIL import Image
from torch.utils.data import DataLoader, Dataset
import os
os.environ["PYTORCH_SHOW_DISPATCH_TRACE"] = "1"
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


In [2]:
# Load trained best custom model weights
model_weights = "trained_weights/exp1_yolov8n_trained.pt"
# Select test image for model evaluation and latency check
test_img = "datasets/test/images/maksssksksss25.png"

In [3]:
# Load the trained YOLOv8 model
model = YOLO(model_weights)

# Run inference on an image
results = model(test_img, save=True)
print(results)



image 1/1 e:\Face-mask-detection\datasets\test\images\maksssksksss25.png: 640x480 1 with_mask, 155.6ms
Speed: 6.0ms preprocess, 155.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1mruns\detect\predict13[0m
[ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'with_mask', 1: 'without_mask', 2: 'mask_weared_incorrect'}
obb: None
orig_img: array([[[175, 185, 183],
        [174, 184, 182],
        [175, 185, 182],
        ...,
        [175, 185, 177],
        [178, 188, 180],
        [181, 190, 183]],

       [[175, 185, 182],
        [174, 184, 181],
        [175, 184, 182],
        ...,
        [176, 186, 178],
        [178, 188, 180],
        [179, 188, 181]],

       [[174, 184, 182],
        [173, 183, 181],
        [174, 183, 181],
        ...,
        [178, 188, 180],
        [178, 187, 180],
        [177, 187, 179]],

       ...,

       [[129, 13

In [4]:
# Measure latency
start_time = time.time()
with torch.no_grad():
    results = model(test_img, save=False)
end_time = time.time()

latency = (end_time - start_time) * 1000  # Convert seconds to milliseconds
print(f"Inference Latency of custom trained model: {latency:.2f} ms")



image 1/1 e:\Face-mask-detection\datasets\test\images\maksssksksss25.png: 640x480 1 with_mask, 120.7ms
Speed: 3.0ms preprocess, 120.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 480)
Inference Latency of custom trained model: 153.59 ms


Post Quantisation of trained model weight

In [5]:
#Define path for saving quantized model weight
quantized_model_weights = "yolov8_mask_detection_quantized.pt"

In [6]:
# Load the custom model
model = AutoBackend(model_weights) 
model.eval()

# Dynamically quantize the model 
quantized_model = torch.quantization.quantize_dynamic(
    model.model, {torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8
)

# Saving both the quantized architecture and weights
quantized_checkpoint = {
    "model": quantized_model, 
    "metadata": {
        "classes": model.names,  
    }
}

torch.save(quantized_checkpoint, quantized_model_weights)
print("Quantized model saved.")


Model summary (fused): 168 layers, 3,006,233 parameters, 0 gradients, 8.1 GFLOPs
Quantized model saved.


In [7]:
qunatised_model = YOLO(quantized_model_weights)
# Measure latency
start_time = time.time()
with torch.no_grad():
    results = qunatised_model(test_img, save=False)
end_time = time.time()

latency = (end_time - start_time) * 1000  # Convert seconds to milliseconds
print(f"Quantized Inference Latency: {latency:.2f} ms")


image 1/1 e:\Face-mask-detection\datasets\test\images\maksssksksss25.png: 640x480 1 with_mask, 664.2ms
Speed: 6.0ms preprocess, 664.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)
Quantized Inference Latency: 713.10 ms


In [8]:
# Load the checkpoint
checkpoint = torch.load(quantized_model_weights, map_location="cpu")

# Check the keys to find the model
print("Checkpoint Keys:", checkpoint.keys())

# If the model state_dict is in a different key, replace 'model' with the correct one
if 'model' in checkpoint:
    quantized_model = checkpoint['model']
elif 'model_state_dict' in checkpoint:
    quantized_model = checkpoint['model_state_dict']
else:
    raise KeyError("Model not found in checkpoint")

# Ensure the model is in evaluation mode
quantized_model.eval()


Checkpoint Keys: dict_keys(['model', 'metadata'])


DetectionModel(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (act): SiLU(inplace=True)
    )
    (2): C2f(
      (cv1): Conv(
        (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
        (act): SiLU(inplace=True)
      )
      (cv2): Conv(
        (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1))
        (act): SiLU(inplace=True)
      )
      (m): ModuleList(
        (0): Bottleneck(
          (cv1): Conv(
            (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            (act): SiLU(inplace=True)
          )
        )
      )
    )
    (3): Conv(
      (conv): Conv2d(32

Check Latency and compare for both model

In [9]:
# Load test image and preprocess
test_img_path = "datasets/test/images/maksssksksss7.png"
image = cv2.imread(test_img_path)  
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
image = cv2.resize(image, (640, 640))  
image = image / 255.0  
image = np.transpose(image, (2, 0, 1))  
image = np.expand_dims(image, axis=0)  
image_tensor = torch.tensor(image, dtype=torch.float32)  

# Measure latency
start_time = time.time()
with torch.no_grad():
    results = quantized_model(image_tensor)
end_time = time.time()

latency = (end_time - start_time) * 1000  
print(f"Inference Latency: {latency:.2f} ms")


Inference Latency: 192.49 ms


Latency for FPS (Frames per Second)

In [None]:
# Define the image transformation (resize and normalize)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((640, 640)),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])

# Function to load image and preprocess
def load_image(img_path):
    img = cv2.imread(img_path)  
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
    return img

# List of test image paths 
test_img = [
    "datasets/test/images/maksssksksss7.png",
    "datasets/test/images/maksssksksss25.png",
    "datasets/test/images/maksssksksss36.png",
    # Add more image paths 
]

# Start FPS calculation
fps_start_time = time.time()
num_frames = len(test_img) 
for img_path in test_img:
    img = load_image(img_path)  
    img = transform(img).unsqueeze(0) 
    
    # Perform inference on each image
    with torch.no_grad():  
        quantized_model(img) 

fps_end_time = time.time()

# Calculate FPS
fps = num_frames / (fps_end_time - fps_start_time)
print(f"FPS (frames per second): {fps:.2f}")


FPS (frames per second): 1.72


In [11]:
# Check the model size
original_model_size = os.path.getsize(model_weights) / (1024 * 1024)
quantized_model_size = os.path.getsize(quantized_model_weights) / (1024 * 1024)

# Model size reduction ratio
size_reduction_ratio = (original_model_size - quantized_model_size) / original_model_size * 100

print(f"Original model size: {original_model_size:.2f} MB")
print(f"Quantized model size: {quantized_model_size:.2f} MB")
print(f"Model size reduction: {size_reduction_ratio:.2f}%")


Original model size: 5.96 MB
Quantized model size: 11.64 MB
Model size reduction: -95.13%


Model weights file has to reduct after quantisation, but here its contradicting. So we ll try static post quantisation technique

In [12]:
static_quantized_model_weights = "yolov8_mask_detection_static_quantized.pt"

In [None]:
# Load YOLOv8 model
model = AutoBackend(model_weights)
model.eval()

# Set quantization config
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')

# Prepare model for static quantization with the model for calibration
torch.quantization.prepare(model, inplace=True)

# Simulate calibration with a dummy input calibration
dummy_input = torch.randn(1, 3, 640, 640)  
with torch.no_grad():
    for _ in range(100): 
        model(dummy_input)

# Apply static quantization after calibration
quantized_model = torch.quantization.convert(model, inplace=False)

# Save the quantized model checkpoint
quantized_checkpoint = {
    "model": quantized_model,
    "metadata": {
        "classes": model.names  
    }
}

torch.save(quantized_checkpoint, static_quantized_model_weights)

print("Static quantized model saved.")


Model summary (fused): 168 layers, 3,006,233 parameters, 0 gradients, 8.1 GFLOPs
Static quantized model saved.


In [14]:
# Check the model size (in MB)
original_model_size = os.path.getsize(model_weights) / (1024 * 1024) 
quantized_model_size = os.path.getsize(static_quantized_model_weights) / (1024 * 1024)

# Model size reduction ratio
size_reduction_ratio = (original_model_size - quantized_model_size) / original_model_size * 100

print(f"Original model size: {original_model_size:.2f} MB")
print(f"Static Quantized model size: {quantized_model_size:.2f} MB")
print(f"Model size reduction: {size_reduction_ratio:.2f}%")


Original model size: 5.96 MB
Static Quantized model size: 3.17 MB
Model size reduction: 46.80%


Now the weight has reduced too. This way quantisation technique improves the model and makes it better for smaller devices
