In [1]:
import logging
import datetime

import matplotlib.pyplot as plt

import numpy as np
import torch
import torch.nn as nn
from torch.nn.utils import parameters_to_vector
import torch.optim as optim
from torchinfo import summary

import config
import modules.dataloaders as data_loaders
import modules.utils as utils
import modules.models as models
import modules.models_brevitas_noComp_fixed_point as models_bnn_fxpoint_no_comp
import modules.loss as loss_module
import modules.metrics as metrics
import modules.train_epoch as train_epoch
import modules.val_epoch as val_epoch

from brevitas.export import export_onnx_qcdq

# Logger

In [2]:
log_path = config.LOGS_FOLDER

logger = logging.getLogger("GonLogger")
logger.propagate = False
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(log_path + 'logfile.log')
formatter = logging.Formatter('%(message)s')
file_handler.setFormatter(formatter)

# add file handler to logger
logger.addHandler(file_handler)

logger.info('BED Detector.\n' + 
            '\tNo Sigmoid, No Softmax. Permute as a Layer.\n' +
            '\tDFire and FASDD UAV and CV.\n' +
            '\tFASDD: train and val datasets to train and test dataset to validate.\n' +
            '\tFASDD RS not included, as it only has smoke and it is too different to current pictures.\n' + 
            '\tBrevitas Quantization.\n')

# Hyperparameters Log

In [3]:
''' ============================
    Print Config Values
============================ '''
print('\nDatasets Length')
print(f'\tTrain: {"Full" if config.DS_LEN == None else config.DS_LEN}')
print(f'\tVal: {"Full" if config.VAL_DS_LEN == None else config.VAL_DS_LEN}')
print(f'\nLoad Model: {config.LOAD_MODEL}')
if (config.LOAD_MODEL == True):
    print(f'\tModel: {config.LOAD_MODEL_FILE}')
print(f'Device: {config.DEVICE}')
print('Optimizer:')
print(f'\tLearning Rate: {config.LEARNING_RATE}')
print(f'\tGradients Clip Norm: {config.GRADIENTS_CLIP_NORM}')
print(f'\tWeight Decay: {config.WEIGHT_DECAY}')
print('Scheduler:')
print(f'\tScheduler factor: {config.FACTOR}')
print(f'\tScheduler patience: {config.PATIENCE}')
print(f'\tScheduler threshold: {config.THRES}')
print(f'\tScheduler min learning rate: {config.MIN_LR}')
print(f'Batch Size: {config.BATCH_SIZE}')
print(f'Num Workers: {config.NUM_WORKERS}')
print(f'Pin Memory: {config.PIN_MEMORY}')
print(f'Epochs: {config.EPOCHS}')
print('\nIMG DIMS:')
print(f'\tWidth: {config.IMG_W}\n\tHeight: {config.IMG_H}')
print('\nGrid, Bounding Boxes, Classes, Max Obj and Thresholds:')
print(f'\tGrid: {config.S}')
print(f'\tNumber of Bounding Boxes per Cell: {config.B}')
print(f'\tNumber of Classes: {config.C}')
print(f'\tMaximum Number of Objects per Image: {config.MAX_OBJ}')
print(f'\tIOU Threshold: {config.IOU_THRESHOLD}')
print(f'\tScore Threshold: {config.SCORE_THRESHOLD}')
print('\nBrevitas Config:')
print(f'\tFixed Point: {config.FIXED_POINT}')
print(f'\tWeights Bit Width: {config.WEIGHTS_BIT_WIDTH}')
print(f'\tBig Layers Weights Bit Width: {config.BIG_LAYERS_WEIGHTS_BIT_WIDTH}')
print(f'\tHead Weights Bit Width: {config.HEAD_WEIGHTS_BIT_WIDTH}')
print(f'\tBias Bit Width: {config.BIAS_BIT_WIDTH}')
print(f'\tActivations Bit Width: {config.ACTIVATIONS_BIT_WIDTH}')

logger.info('\nDatasets Length')
logger.info(f'\tTrain: {"Full" if config.DS_LEN == None else config.DS_LEN}')
logger.info(f'\tVal: {"Full" if config.VAL_DS_LEN == None else config.VAL_DS_LEN}')
logger.info(f'\nLoad Model: {config.LOAD_MODEL}')
if (config.LOAD_MODEL == True):
    logger.info(f'\tModel: {config.LOAD_MODEL_FILE}')
logger.info(f'\nDevice: {config.DEVICE}')
logger.info('Optimizer:')
logger.info(f'\tLearning Rate: {config.LEARNING_RATE}')
logger.info(f'\tGradients Clip Norm: {config.GRADIENTS_CLIP_NORM}')
logger.info(f'\tWeight Decay: {config.WEIGHT_DECAY}')
logger.info('Scheduler:')
logger.info(f'\tScheduler factor: {config.FACTOR}')
logger.info(f'\tScheduler patience: {config.PATIENCE}')
logger.info(f'\tScheduler threshold: {config.THRES}')
logger.info(f'\tScheduler min learning rate: {config.MIN_LR}')
logger.info(f'\nBatch Size: {config.BATCH_SIZE}')
logger.info(f'Num Workers: {config.NUM_WORKERS}')
logger.info(f'Pin Memory: {config.PIN_MEMORY}')
logger.info(f'Epochs: {config.EPOCHS}')
logger.info('\nIMG DIMS:')
logger.info(f'\tWidth: {config.IMG_W}\n\tHeight: {config.IMG_H}')
logger.info('\nGrid, Bounding Boxes, Classes and Thresholds:')
logger.info(f'\tGrid: {config.S}')
logger.info(f'\tNumber of Bounding Boxes per Cell: {config.B}')
logger.info(f'\tNumber of Classes: {config.C}')
logger.info(f'\tMaximum Number of Objects per Image: {config.MAX_OBJ}')
logger.info(f'\tIOU Threshold: {config.IOU_THRESHOLD}')
logger.info(f'\tScore Threshold: {config.SCORE_THRESHOLD}')
logger.info('\nBrevitas Config:')
logger.info(f'\tFixed Point: {config.FIXED_POINT}')
logger.info(f'\tWeights Bit Width: {config.WEIGHTS_BIT_WIDTH}')
logger.info(f'\tBig Layers Weights Bit Width: {config.BIG_LAYERS_WEIGHTS_BIT_WIDTH}')
logger.info(f'\tHead Weights Bit Width: {config.HEAD_WEIGHTS_BIT_WIDTH}')
logger.info(f'\tBias Bit Width: {config.BIAS_BIT_WIDTH}')
logger.info(f'\tActivations Bit Width: {config.ACTIVATIONS_BIT_WIDTH}')


Datasets Length
	Train: 200
	Val: 200

Load Model: True
	Model: ./experiments/test_20_no_sigmoid_softmax_permute_out/weights/BED_detector__best_mAP=0.6405__epoch=144.pt
Device: cuda
Optimizer:
	Learning Rate: 0.001
	Gradients Clip Norm: 500
	Weight Decay: 0.001
Scheduler:
	Scheduler factor: 0.8
	Scheduler patience: 3
	Scheduler threshold: 0.01
	Scheduler min learning rate: 1e-06
Batch Size: 64
Num Workers: 8
Pin Memory: True
Epochs: 5

IMG DIMS:
	Width: 224
	Height: 224

Grid, Bounding Boxes, Classes, Max Obj and Thresholds:
	Grid: 7
	Number of Bounding Boxes per Cell: 2
	Number of Classes: 2
	Maximum Number of Objects per Image: 10
	IOU Threshold: 0.5
	Score Threshold: 0.2

Brevitas Config:
	Fixed Point: True
	Weights Bit Width: 4
	Big Layers Weights Bit Width: 2
	Head Weights Bit Width: 8
	Bias Bit Width: 4
	Activations Bit Width: 8


# Datasets and Dataloaders

In [4]:
train_loader = data_loaders.get_train_loader()
val_loader = data_loaders.get_val_loader()


TRAIN DFIRE dataset
DFire Removed wrong images: 0
DFire Removed due to overlapping: 11
DFire Removed due to more than 10: 2

Train DFire dataset len: 187

TRAIN FASDD UAV dataset
FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 22
FASDD Removed due to more than 10: 5

Train FASDD UAV dataset len: 173

VAL FASDD UAV dataset
FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 21
FASDD Removed due to more than 10: 7

Val FASDD UAV dataset len: 172

TRAIN FASDD CV dataset
FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 11
FASDD Removed due to more than 10: 0

Train FASDD CV dataset len: 189

VAL FASDD CV dataset
FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 14
FASDD Removed due to more than 10: 1

Val FASDD CV dataset len: 185

Concatenate Train DFire and Train FASDD UAV datasets
Train dataset len: 360
Concatenate with Val FASDD UAV dataset
Train dataset len: 532
Concatenate with Train FASDD CV dataset
Train dataset len: 721
Con

### Plot Some Train Pictures

In [5]:
for batch_idx, (img, label) in enumerate(train_loader):
       
    if batch_idx == 0:
        print(f'Batch size equal to img.shape[0] = {img.shape[0]}')
        print(f'Batch images shape = {img.shape}')
        plt.subplots(4, 5, figsize=(10,8))
        for i in range(20):
            pic = utils.plot_dataset_img(img[i], label[i], grid=True)
            plt.subplot(4, 5, i+1)
            plt.imshow(pic)
        plt.tight_layout()
        plt.savefig(config.RUN_FOLDER + 'train_pictures.png')
        plt.close()
        break

Batch size equal to img.shape[0] = 64
Batch images shape = torch.Size([64, 3, 224, 224])


### Plot Some Val Pictures

In [6]:
for batch_idx, (img, label) in enumerate(val_loader):
       
    #if batch_idx == 33:
    if batch_idx == 3:
        print(f'Batch size equal to img.shape[0] = {img.shape[0]}')
        print(f'Batch images shape = {img.shape}')
        plt.subplots(4, 5, figsize=(10,8))
        for i in range(20):
            pic = utils.plot_dataset_img(img[i], label[i], grid=True)
            plt.subplot(4, 5, i+1)
            plt.imshow(pic)
        plt.tight_layout()
        plt.savefig(config.RUN_FOLDER + 'val_pictures.png')
        plt.close()
        break

Batch size equal to img.shape[0] = 64
Batch images shape = torch.Size([64, 3, 224, 224])


# Loss Setup

In [7]:
if config.LOSS_FN == "YOLOV1_LOSS":
    print(f'Loss Function: YOLOV1_LOSS')
    logger.info(f'\nLoss Function: YOLOV1_LOSS')
    loss_fn = loss_module.YoloLoss_2BBox()
    print(f'Lambda for L1 regularization: {config.LAMBDA_L1_LOSS}')
    logger.info(f'Lambda for L1 regularization: {config.LAMBDA_L1_LOSS}')
else:
    print("Wrong loss function")
    logger.info("Wrong loss function")
    raise SystemExit("Wrong loss function")

Loss Function: YOLOV1_LOSS
Lambda for L1 regularization: 0


# Model Setup

In [8]:
if config.MODEL == "BED":
    
    print("Using BED Detector")
    logger.info("\nUsing BED Detector")
    model = models.SIMPLE_BED_DETECTOR().to(config.DEVICE) 

else:
    print("Wrong Model")
    logger.info("Wrong Model")
    raise SystemExit("Wrong Model")


# MODEL PARAMETERS
n_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'\nTrainable parameters = {n_trainable}')
logger.info(f'\nTrainable parameters = {n_trainable}')

n_params = parameters_to_vector(model.parameters()).numel()
print(f'Total parameters = {n_params}\n')
logger.info(f'Total parameters = {n_params}')

Using BED Detector

Trainable parameters = 287276
Total parameters = 287276



### Load Checkpoint

In [9]:
epochs_trained = utils.load_checkpoint(config.LOAD_MODEL_FILE, 
                                       model, 
                                       optimizer=None, 
                                       scheduler=None, 
                                       device=config.DEVICE)

logger.info(f"Loading Model. Trained during {epochs_trained} epochs")

Loading Model. Trained during 144 epochs


In [10]:
model.eval()

SIMPLE_BED_DETECTOR(
  (model): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (dropout1): Dropout2d(p=0.3, inplace=False)
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (dropout2): Dropout2d(p=0.3, inplace=False)
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv31): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn31): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu31): ReLU()
    (conv32): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=F

### Check Model Shape

In [11]:
in_rand_np = np.random.rand(4, 3, config.IMG_H, config.IMG_W)
in_rand = torch.tensor(in_rand_np, dtype=torch.float32, device=config.DEVICE)
out_test = model(in_rand)

print(f'Input shape is {in_rand.shape}')
print(f'Model shape is {out_test.shape}')
print(f'BED Model Arquitecture\n{model}')
logger.info(f'\nInput shape is {in_rand.shape}')
logger.info(f'Model shape is {out_test.shape}\n')
logger.info(f'BED Model Arquitecture\n{model}')

Input shape is torch.Size([4, 3, 224, 224])
Model shape is torch.Size([4, 12, 7, 7])
BED Model Arquitecture
SIMPLE_BED_DETECTOR(
  (model): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (dropout1): Dropout2d(p=0.3, inplace=False)
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (dropout2): Dropout2d(p=0.3, inplace=False)
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv31): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn31): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

### Torch Summary

In [12]:
print(summary(model, input_size=(1, 3, config.IMG_H, config.IMG_W)))
logger.info("\nORIGINAL Model Summary")
logger.info(summary(model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

Layer (type:depth-idx)                   Output Shape              Param #
SIMPLE_BED_DETECTOR                      [1, 12, 7, 7]             --
├─Sequential: 1-1                        [1, 12, 7, 7]             --
│    └─Conv2d: 2-1                       [1, 32, 224, 224]         864
│    └─BatchNorm2d: 2-2                  [1, 32, 224, 224]         64
│    └─ReLU: 2-3                         [1, 32, 224, 224]         --
│    └─Dropout2d: 2-4                    [1, 32, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 32, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 16, 112, 112]         4,608
│    └─BatchNorm2d: 2-7                  [1, 16, 112, 112]         32
│    └─ReLU: 2-8                         [1, 16, 112, 112]         --
│    └─Dropout2d: 2-9                    [1, 16, 112, 112]         --
│    └─MaxPool2d: 2-10                   [1, 16, 56, 56]           --
│    └─Conv2d: 2-11                      [1, 16, 56, 56]           256
│    └─Bat

# Fuse Conv2d and Batch Norm to create Fused Model

### Modules to Fuse

In [13]:
modules_to_fuse = [ 
    ["model.conv1", "model.bn1"],
    ["model.conv2", "model.bn2"],
    ["model.conv31", "model.bn31"],
    ["model.conv32", "model.bn32"],
    ["model.conv33", "model.bn33"],
    ["model.conv34", "model.bn34"],
    ["model.conv41", "model.bn41"],
    ["model.conv42", "model.bn42"],
    ["model.conv43", "model.bn43"],
    ["model.conv44", "model.bn44"],
    ["model.conv45", "model.bn45"],
    ["model.conv46", "model.bn46"],
    ["model.conv51", "model.bn51"],
    ["model.conv52", "model.bn52"],
    ["model.conv53", "model.bn53"],
    ["model.conv54", "model.bn54"],
    ["model.conv55", "model.bn55"],
    ["model.conv56", "model.bn56"],
    ["model.conv61", "model.bn61"],
    ["model.conv62", "model.bn62"],
    ["model.conv71", "model.bn71"],
    ["model.conv72", "model.bn72"],
    ["model.conv73", "model.bn73"],
]

### Code to Fuse

In [14]:
# Move the model to CPU before doing any other action
#model.to('cpu') # Is this really needed?
#model.eval() # Already done above. If you remove previous cell, uncoment to set .eval()

fused_model = torch.ao.quantization.fuse_modules(model, modules_to_fuse)

### Evaluate Fused Model vs Non Fused

In [15]:
#model.eval()
fused_model.eval()

logger.info('\n*********************** Baseline mAP evaluation of Fused and Original Models ***********************')
with torch.no_grad():
    print("____________________________ MODEL BEFORE FUSION ____________________________")
    non_fused_metrics = metrics.torchmetrics_mAP(
        loader=val_loader, 
        model=model)
    print(non_fused_metrics)
    logger.info(f'\nNon Fused Model mAP metrics:\n{non_fused_metrics}')
    print("\n____________________________ MODEL AFTER FUSION ____________________________")
    fused_metrics = metrics.torchmetrics_mAP(
        loader=val_loader, 
        model=fused_model)
    print(fused_metrics)
    logger.info(f'\nFused Model mAP metrics:\n{fused_metrics}')

____________________________ MODEL BEFORE FUSION ____________________________


Validating: 100%|█████████████████████████████████| 8/8 [00:02<00:00,  3.92it/s]


{'mAP': tensor(0.5567), 'AP': [0.5918485522270203, 0.5215780735015869], 'AR': [0.6433823704719543, 0.5520833134651184]}

____________________________ MODEL AFTER FUSION ____________________________


Validating: 100%|█████████████████████████████████| 8/8 [00:02<00:00,  3.98it/s]


{'mAP': tensor(0.5567), 'AP': [0.5919306874275208, 0.5215228796005249], 'AR': [0.6433823704719543, 0.5520833134651184]}


### Torch Summary

In [16]:
print(summary(fused_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))
logger.info("\nFUSED Model Summary")
logger.info(summary(fused_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

Layer (type:depth-idx)                   Output Shape              Param #
SIMPLE_BED_DETECTOR                      [1, 12, 7, 7]             --
├─Sequential: 1-1                        [1, 12, 7, 7]             --
│    └─Conv2d: 2-1                       [1, 32, 224, 224]         896
│    └─Identity: 2-2                     [1, 32, 224, 224]         --
│    └─ReLU: 2-3                         [1, 32, 224, 224]         --
│    └─Dropout2d: 2-4                    [1, 32, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 32, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 16, 112, 112]         4,624
│    └─Identity: 2-7                     [1, 16, 112, 112]         --
│    └─ReLU: 2-8                         [1, 16, 112, 112]         --
│    └─Dropout2d: 2-9                    [1, 16, 112, 112]         --
│    └─MaxPool2d: 2-10                   [1, 16, 56, 56]           --
│    └─Conv2d: 2-11                      [1, 16, 56, 56]           272
│    └─Ide

In [17]:
fused_model.eval()

SIMPLE_BED_DETECTOR(
  (model): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): Identity()
    (relu1): ReLU()
    (dropout1): Dropout2d(p=0.3, inplace=False)
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): Identity()
    (relu2): ReLU()
    (dropout2): Dropout2d(p=0.3, inplace=False)
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv31): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
    (bn31): Identity()
    (relu31): ReLU()
    (conv32): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn32): Identity()
    (relu32): ReLU()
    (conv33): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
    (bn33): Identity()
    (relu33): ReLU()
    (conv34): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn34): Ident

# Brevitas Model Setup

In [18]:
if config.MODEL == "BED":
    
    print("Using Brevitas BED Detector")
    logger.info("\nUsing Brevitas BED Detector")
    
    qnn_model = models_bnn_fxpoint_no_comp.FIXED_POINT_QUANT_SIMPLE_BED_DETECTOR(
        weight_bw = config.WEIGHTS_BIT_WIDTH, 
        big_layers_weight_bw = config.BIG_LAYERS_WEIGHTS_BIT_WIDTH,
        head_weight_bw = config.HEAD_WEIGHTS_BIT_WIDTH,
        act_bw = config.ACTIVATIONS_BIT_WIDTH, 
        bias_bw = config.BIAS_BIT_WIDTH,         
    ).to(config.DEVICE)

else:
    print("Wrong Model")
    logger.info("Wrong Model")
    raise SystemExit("Wrong Model")

optimizer = optim.Adam(qnn_model.parameters(), 
                       lr=config.LEARNING_RATE, 
                       weight_decay=config.WEIGHT_DECAY)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                 mode='min',
                                                 factor=config.FACTOR, 
                                                 patience=config.PATIENCE, 
                                                 threshold=config.THRES, 
                                                 threshold_mode='abs',
                                                 min_lr=config.MIN_LR)


# MODEL PARAMETERS
n_trainable = sum(p.numel() for p in qnn_model.parameters() if p.requires_grad)
print(f'\nTrainable parameters = {n_trainable}')
logger.info(f'\nTrainable parameters = {n_trainable}')

n_params = parameters_to_vector(qnn_model.parameters()).numel()
print(f'Total parameters = {n_params}\n')
logger.info(f'Total parameters = {n_params}')

Using Brevitas BED Detector

Trainable parameters = 287311
Total parameters = 287311



### Torch Summary

In [19]:
#qnn_model.eval()

In [20]:
print(summary(qnn_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))
logger.info("\nBrevitas QUANT Model Summary")
logger.info(summary(qnn_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

  return super().rename(names)


Layer (type:depth-idx)                                                      Output Shape              Param #
FIXED_POINT_QUANT_SIMPLE_BED_DETECTOR                                       [1, 12, 7, 7]             --
├─Sequential: 1-1                                                           [1, 12, 7, 7]             --
│    └─QuantIdentity: 2-1                                                   [1, 3, 224, 224]          --
│    │    └─ActQuantProxyFromInjector: 3-1                                  [1, 3, 224, 224]          --
│    │    └─ActQuantProxyFromInjector: 3-2                                  [1, 3, 224, 224]          --
│    └─QuantReLU: 2-1813                                                    --                        (recursive)
│    │    └─ActQuantProxyFromInjector: 3-1895                               --                        (recursive)
│    └─QuantIdentity: 2-3                                                   --                        --
│    │    └─ActQuantProxyFromInj

# Load Fused Model Trained Weights to Brevitas Model

In [21]:
from brevitas import config as brevitas_config

brevitas_config.IGNORE_MISSING_KEYS = True

In [22]:
fused_model.eval()

logger.info('\nLoading Pretrained Weights from Fused Model to Quant Model')
for fused_model_name, fused_model_mod in fused_model.named_modules():
    for qnn_model_name, qnn_model_mod in  qnn_model.named_modules():
        if fused_model_name == qnn_model_name:
            print(f'original model name: {fused_model_name} - QNN model name: {qnn_model_name}')
            logger.info(f'original model name: {fused_model_name} - QNN model name: {qnn_model_name}')
            if isinstance(fused_model_mod, nn.Conv2d):
                if fused_model_name == qnn_model_name:
                    qnn_model_mod.load_state_dict(fused_model_mod.state_dict())
                    print(f'\t****** Loading weights of Conv2d layer fused {fused_model_name} into QNN {qnn_model_name}')
                    logger.info(f'\t****** Loading weights of Conv2d layer fused {fused_model_name} into QNN {qnn_model_name}')
            elif isinstance(fused_model_mod, nn.BatchNorm2d):
                print(f'\toooooo BN should never print here, as Fused Model should not have such layers')    
                logger.info(f'\toooooo BN should never print here, as Fused Model should not have such layers')    
            elif isinstance(fused_model_mod, nn.Linear):
                if fused_model_name == qnn_model_name:
                    qnn_model_mod.load_state_dict(fused_model_mod.state_dict())
                    print(f'\t****** Loading weights of Linear layer fused {fused_model_name} into QNN {qnn_model_name}')
                    logger.info(f'\t****** Loading weights of Linear layer fused {fused_model_name} into QNN {qnn_model_name}')
            else:
                print(f'Module type: {type(fused_model_mod)}')
                logger.info(f'Module type: {type(fused_model_mod)}')
                print(f'\t______ Ignore weights or params of layer fused {fused_model_name} and QNN {qnn_model_name}')
                logger.info(f'\t______ Ignore weights or params of layer fused {fused_model_name} and QNN {qnn_model_name}')

original model name:  - QNN model name: 
Module type: <class 'modules.models.SIMPLE_BED_DETECTOR'>
	______ Ignore weights or params of layer fused  and QNN 
original model name: model - QNN model name: model
Module type: <class 'torch.nn.modules.container.Sequential'>
	______ Ignore weights or params of layer fused model and QNN model
original model name: model.conv1 - QNN model name: model.conv1
	****** Loading weights of Conv2d layer fused model.conv1 into QNN model.conv1
original model name: model.relu1 - QNN model name: model.relu1
Module type: <class 'torch.nn.modules.activation.ReLU'>
	______ Ignore weights or params of layer fused model.relu1 and QNN model.relu1
original model name: model.dropout1 - QNN model name: model.dropout1
Module type: <class 'torch.nn.modules.dropout.Dropout2d'>
	______ Ignore weights or params of layer fused model.dropout1 and QNN model.dropout1
original model name: model.maxpool2 - QNN model name: model.maxpool2
Module type: <class 'torch.nn.modules.po

# Loss and Metrics Loggers and Plotters

In [23]:
train_losses_logger = utils.LogLosses()
train_metrics_logger = utils.LogMetrics()
lr_logger = utils.LogLR(log_path=config.PLOTS_FOLDER)

val_losses_logger = utils.LogLosses()
val_metrics_logger = utils.LogMetrics()

loss_plotter = utils.PlotMetrics(log_path=config.PLOTS_FOLDER, model_name=config.MODEL, loss_or_metric='Loss')
metrics_plotter = utils.PlotMetrics(log_path=config.PLOTS_FOLDER, model_name=config.MODEL, loss_or_metric='Metric')

# Train Loop Function

In [24]:
def train_loop(model, start_epoch=0, epochs_to_train=config.EPOCHS):

    ''' ==============================================================
                                TRAINING LOOP
    ============================================================== '''
    start = datetime.datetime.now()
    start_time = start.strftime("%H:%M:%S")
    print(f'\n***Start Training: {start_time}\n')
    logger.info(f'\n***Start Training: {start_time}\n')
    
    # Start with infinite validation loss
    best_valid_loss = np.inf
    best_mAP = torch.tensor(0., dtype=torch.float32)

    epochs_loss_plot = []
    epochs_metric_plot = []
    if start_epoch != 0:
        # Losses Update
        current_losses = train_losses_logger.get_losses()
        current_total_loss = current_losses['Total']
        current_total_loss_epochs = len(current_total_loss)
        epochs_loss_plot.extend(range(current_total_loss_epochs))  
        # Metrics Update
        current_metrics = train_metrics_logger.get_metrics()
        current_total_mAP = current_metrics['mAP']['mAP']
        current_total_mAP_epochs = len(current_total_mAP)
        epochs_metric_plot.extend(range(current_total_mAP_epochs)) 
        for i, e in enumerate(epochs_metric_plot):
            epochs_metric_plot[i] = 5*epochs_metric_plot[i] + 4  
    
    end_epoch = start_epoch + epochs_to_train
        
    for epoch in range(start_epoch, end_epoch):

        print(f'\n=== EPOCH {epoch}/{end_epoch-1} ===')
        logger.info(f'\n=== EPOCH {epoch}/{end_epoch-1} ===')
        
        #====================== TRAINING ========================#
        current_lr = train_epoch.get_lr(optimizer=optimizer)
        logger.info(f'Learning Rate = {current_lr}\n')
        lr_logger.log_lr(current_lr)
            
        calculate_mAP = False
        if ( (epoch+1) % 5 ) == 0:
            calculate_mAP = True
            epochs_metric_plot.append(epoch)

        train_losses, train_metrics = train_epoch.train_fn(
            loader=train_loader, 
            model=model, 
            optimizer=optimizer, 
            loss_fn=loss_fn,
            loss_l1_lambda=config.LAMBDA_L1_LOSS,
            metric=metrics.map_metric,
            device=config.DEVICE,
            calculate_mAP=calculate_mAP)
        
        train_losses_logger.update_losses(train_losses)
        if calculate_mAP == True:
            train_metrics_logger.update_metrics(train_metrics)
                
        logger.info(utils.print_metrics_to_logger("TRAIN STATS", train_losses, train_metrics, mAP_available=calculate_mAP))
        
        #===================== VALIDATING =======================#
        with torch.no_grad():
            val_losses, val_metrics = val_epoch.eval_fn(
                loader=val_loader, 
                model=model,                         
                loss_fn=loss_fn,
                metric=metrics.map_metric,
                device=config.DEVICE,
                calculate_mAP=calculate_mAP)
            
            scheduler.step(val_losses['Total'])
            
            val_losses_logger.update_losses(val_losses)
            if calculate_mAP == True:
                val_metrics_logger.update_metrics(val_metrics)

            logger.info(utils.print_metrics_to_logger("VAL STATS", val_losses, val_metrics, mAP_available=calculate_mAP))
            
        epochs_loss_plot.append(epoch)

        loss_plotter.plot_all_metrics(
            train_losses_logger.get_losses(),
            val_losses_logger.get_losses(),
            epochs_loss_plot)

        if calculate_mAP == True:
            metrics_plotter.plot_all_metrics(
                train_metrics_logger.get_metrics(),
                val_metrics_logger.get_metrics(),
                epochs_metric_plot)

        lr_logger.plot_lr(epochs_loss_plot)
        
        #======================= SAVING =========================#
        if ( (epoch+1) % 5 ) == 0:
            save_name = config.WEIGHTS_FOLDER + config.MODEL + '_detector__5epoch.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name) 
            
        if best_valid_loss > val_losses['Total']:
            best_valid_loss = val_losses['Total']
            print(f"\nSaving model with new best validation loss: {best_valid_loss:.3f}")
            logger.info(f"Saving model with new best validation loss: {best_valid_loss:.3f}")
            save_name = config.WEIGHTS_FOLDER + config.MODEL + '_detector__' + 'best_loss'  + '.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name)  

        # Save model if mAP increases
        if calculate_mAP == True:
            if ( best_mAP < val_metrics['mAP'] ) :
                best_mAP = val_metrics['mAP']
                print(f"\nSaving model with new best mAP: {best_mAP:.4f}")
                logger.info(f"Saving model with new best mAP: {best_mAP:.4f}")
                save_precision_name = f'best_mAP={best_mAP:.4f}__epoch={epoch}'
                save_name = config.WEIGHTS_FOLDER + config.MODEL + '_detector__' + save_precision_name + '.pt'
                utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name)  
        
    logger.info('Saving last model')   
    torch.save(model.state_dict(), config.WEIGHTS_FOLDER + 'last_' + config.MODEL + '_detector.pt') 
    
    #======================= FINISH =========================#
    end = datetime.datetime.now()
    end_time = end.strftime("%H:%M:%S")
    print(f'\n***Script finished: {end_time}\n')  
    print(f'Time elapsed: {end-start}')
    logger.info(f'\n***Script finished: {end_time}\n')  
    logger.info(f'Time elapsed: {end-start}')
    
    return model

# Main Execute

In [25]:
print("Starting script\n")
logger.info("Starting script\n")
    
qnn_model_trained = train_loop(qnn_model)

Starting script


***Start Training: 10:10:27


=== EPOCH 0/4 ===
Learning Rate = 0.001



Training: 100%|█████████████████████████████████| 14/14 [00:14<00:00,  1.00s/it]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
123.882     |53.201      |51.514      |4.343       |14.823      


Validating: 100%|█████████████████████████████████| 8/8 [00:02<00:00,  3.87it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
132.328     |65.606      |48.668      |5.813       |12.241      

Saving model with new best validation loss: 132.328

=== EPOCH 1/4 ===
Learning Rate = 0.001



Training: 100%|█████████████████████████████████| 14/14 [00:13<00:00,  1.07it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
95.033      |37.204      |43.480      |5.708       |8.640       


Validating: 100%|█████████████████████████████████| 8/8 [00:02<00:00,  3.89it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
107.091     |46.283      |44.260      |5.234       |11.314      

Saving model with new best validation loss: 107.091

=== EPOCH 2/4 ===
Learning Rate = 0.001



Training: 100%|█████████████████████████████████| 14/14 [00:13<00:00,  1.05it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
88.315      |32.905      |42.432      |5.808       |7.169       


Validating: 100%|█████████████████████████████████| 8/8 [00:02<00:00,  3.86it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
105.504     |44.951      |45.448      |3.833       |11.272      

Saving model with new best validation loss: 105.504

=== EPOCH 3/4 ===
Learning Rate = 0.001



Training: 100%|█████████████████████████████████| 14/14 [00:13<00:00,  1.02it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
81.889      |30.015      |37.947      |7.152       |6.775       


Validating: 100%|█████████████████████████████████| 8/8 [00:02<00:00,  3.86it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
98.132      |42.464      |40.139      |5.401       |10.128      

Saving model with new best validation loss: 98.132

=== EPOCH 4/4 ===
Learning Rate = 0.001



Training: 100%|█████████████████████████████████| 14/14 [00:14<00:00,  1.01s/it]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
78.340      |28.847      |37.031      |7.204       |5.257       
Train mAP = 0.1731


Validating: 100%|█████████████████████████████████| 8/8 [00:02<00:00,  3.58it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
99.103      |40.922      |37.674      |11.047      |9.460       
Val mAP = 0.1510

Saving model with new best mAP: 0.1510

***Script finished: 10:11:49

Time elapsed: 0:01:21.597764


# Export to ONNX

In [26]:
export_onnx_qcdq(
    qnn_model_trained, 
    torch.randn(1, 3, config.IMG_H, config.IMG_W).to(config.DEVICE), 
    export_path=config.RUN_FOLDER+'bed_detector___fixed_point__qcdq.onnx')



### CPU

In [27]:
qnn_model_trained.to('cpu')
export_onnx_qcdq(
    qnn_model_trained, 
    torch.randn(1, 3, config.IMG_H, config.IMG_W), 
    export_path=config.RUN_FOLDER+'bed_detector___fixed_point__qcdq__CPU.onnx')



### More Train

In [28]:
# print("Train More script\n")
# logger.info("Train More script\n")
    
# qnn_model_more_trained = train_loop(
#     qnn_model,
#     start_epoch=40,
#     epochs_to_train=20)