In [1]:
import logging
import datetime
import pandas as pd

import matplotlib.pyplot as plt

import numpy as np
import torch
from torch.nn.utils import parameters_to_vector
import torch.optim as optim
from torchinfo import summary

import config
import modules.dataloaders as data_loaders
import modules.utils as utils
import modules.models as models
import modules.loss as loss_module
import modules.metrics as metrics
import modules.train_epoch as train_epoch
import modules.val_epoch as val_epoch

# AIMET imports

In [2]:
from decimal import Decimal

from aimet_torch.compress import ModelCompressor
from aimet_torch.defs import SpatialSvdParameters
from aimet_torch.onnx_utils import OnnxSaver
from aimet_common.defs import CostMetric, CompressionScheme, GreedySelectionParameters

2024-07-19 13:52:43,219 - root - INFO - AIMET


In [3]:
#torch.cuda.empty_cache()

# Define Matplot Style

In [4]:
#mpl.style.use('seaborn-v0_8')

# Logger

In [5]:
log_path = config.LOGS_FOLDER

logger = logging.getLogger("GonLogger")
logger.propagate = False
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(log_path + 'logfile.log')
formatter = logging.Formatter('%(message)s')
file_handler.setFormatter(formatter)

# add file handler to logger
logger.addHandler(file_handler)

logger.info('BED Detector.\n' +  
            '\tDFire and FASDD UAV and CV.\n' +
            '\tFASDD: train and val datasets to train, and test dataset to validate.\n' +
            '\tFASDD RS not included, as it only has smoke and it is too different to current pictures\n' + 
            f'\tSVD Compression Ratio  = {config.SVD_COMPRESSION_RATIO}\n')

# Hyperparameters Log

In [6]:
''' ============================
    Print Config Values
============================ '''
print(f'\nLoad Model: {config.LOAD_MODEL}')
print(f'\tModel: {config.LOAD_MODEL_FILE}')
print(f'Device: {config.DEVICE}')
print('Optimizer:')
print(f'\tLearning Rate: {config.LEARNING_RATE}')
print(f'\tGradients Clip Norm: {config.GRADIENTS_CLIP_NORM}')
print(f'\tWeight Decay: {config.WEIGHT_DECAY}')
print('Scheduler:')
print(f'\tScheduler factor: {config.FACTOR}')
print(f'\tScheduler patience: {config.PATIENCE}')
print(f'\tScheduler threshold: {config.THRES}')
print(f'\tScheduler min learning rate: {config.MIN_LR}')
print(f'Batch Size: {config.BATCH_SIZE}')
print(f'Num Workers: {config.NUM_WORKERS}')
print(f'Pin Memory: {config.PIN_MEMORY}')
print(f'Epochs: {config.EPOCHS}')
print('IMG DIMS:')
print(f'\tWidth: {config.IMG_W}\n\tHeight: {config.IMG_H}')
print('\nGrid, Bounding Boxes, Classes, Max Obj and Thresholds:')
print(f'\tGrid: {config.S}')
print(f'\tNumber of Bounding Boxes per Cell: {config.B}')
print(f'\tNumber of Classes: {config.C}')
print(f'\tMaximum Number of Objects per Image: {config.MAX_OBJ}')
print(f'\tIOU Threshold: {config.IOU_THRESHOLD}')
print(f'\tScore Threshold: {config.SCORE_THRESHOLD}')
print('\nAIMET Configuration')
print(f'\tUse Previous Dic: {config.USE_PREVIOUS_DIC}')
print(f'\tSpatial SVD Compression: {config.SVD_COMPRESSION_RATIO}')
print(f'\tPrunning Compression: {config.PRUNING_COMPRESSION_RATIO}')

logger.info(f'\nLoad Model: {config.LOAD_MODEL}')
logger.info(f'\tModel: {config.LOAD_MODEL_FILE}')
logger.info(f'\nDevice: {config.DEVICE}')
logger.info('Optimizer:')
logger.info(f'\tLearning Rate: {config.LEARNING_RATE}')
logger.info(f'\tGradients Clip Norm: {config.GRADIENTS_CLIP_NORM}')
logger.info(f'\tWeight Decay: {config.WEIGHT_DECAY}')
logger.info('Scheduler:')
logger.info(f'\tScheduler factor: {config.FACTOR}')
logger.info(f'\tScheduler patience: {config.PATIENCE}')
logger.info(f'\tScheduler threshold: {config.THRES}')
logger.info(f'\tScheduler min learning rate: {config.MIN_LR}')
logger.info(f'\nBatch Size: {config.BATCH_SIZE}')
logger.info(f'Num Workers: {config.NUM_WORKERS}')
logger.info(f'Pin Memory: {config.PIN_MEMORY}')
logger.info(f'Epochs: {config.EPOCHS}')
logger.info('IMG DIMS:')
logger.info(f'\tWidth: {config.IMG_W}\n\tHeight: {config.IMG_H}')
logger.info('\nGrid, Bounding Boxes, Classes and Thresholds:')
logger.info(f'\tGrid: {config.S}')
logger.info(f'\tNumber of Bounding Boxes per Cell: {config.B}')
logger.info(f'\tNumber of Classes: {config.C}')
logger.info(f'\tMaximum Number of Objects per Image: {config.MAX_OBJ}')
logger.info(f'\tIOU Threshold: {config.IOU_THRESHOLD}')
logger.info(f'\tScore Threshold: {config.SCORE_THRESHOLD}\n')
logger.info('\nAIMET Configuration')
logger.info(f'\tUse Previous Dic: {config.USE_PREVIOUS_DIC}')
logger.info(f'\tSpatial SVD Compression: {config.SVD_COMPRESSION_RATIO}')
logger.info(f'\tPrunning Compression: {config.PRUNING_COMPRESSION_RATIO}')


Load Model: True
	Model: ./experiments/test_20_no_sigmoid_softmax_permute_out/weights/BED_detector__best_mAP=0.6405__epoch=144.pt
Device: cuda
Optimizer:
	Learning Rate: 0.0001
	Gradients Clip Norm: 500
	Weight Decay: 0.0001
Scheduler:
	Scheduler factor: 0.8
	Scheduler patience: 1
	Scheduler threshold: 0.01
	Scheduler min learning rate: 1e-06
Batch Size: 64
Num Workers: 8
Pin Memory: True
Epochs: 10
IMG DIMS:
	Width: 224
	Height: 224

Grid, Bounding Boxes, Classes, Max Obj and Thresholds:
	Grid: 7
	Number of Bounding Boxes per Cell: 2
	Number of Classes: 2
	Maximum Number of Objects per Image: 10
	IOU Threshold: 0.5
	Score Threshold: 0.2

AIMET Configuration
	Use Previous Dic: True
	Spatial SVD Compression: 0.7
	Prunning Compression: 0.8


# Dataset Setup

In [7]:
train_loader = data_loaders.get_train_loader()

val_loader = data_loaders.get_val_loader(
    dfire_len = 900,
    fasdd_uav_len = 900,
    fasdd_cv_len = 3400)


TRAIN DFIRE dataset


Corrupt JPEG data: 1 extraneous bytes before marker 0xd9


DFire Removed wrong images: 0
DFire Removed due to overlapping: 1292
DFire Removed due to more than 10: 59

Train DFire dataset len: 15870

TRAIN FASDD UAV dataset
FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 1233
FASDD Removed due to more than 10: 449

Train FASDD UAV dataset len: 10869

VAL FASDD UAV dataset
FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 841
FASDD Removed due to more than 10: 300

Val FASDD UAV dataset len: 7224

TRAIN FASDD CV dataset


Corrupt JPEG data: 1 extraneous bytes before marker 0xd9


FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 2141
FASDD Removed due to more than 10: 342

Train FASDD CV dataset len: 45177

VAL FASDD CV dataset


Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1 extraneous bytes before marker 0xd9


FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 1238
FASDD Removed due to more than 10: 221

Val FASDD CV dataset len: 30311

Concatenate Train DFire and Train FASDD UAV datasets
Train dataset len: 26739
Concatenate with Val FASDD UAV dataset
Train dataset len: 33963
Concatenate with Train FASDD CV dataset
Train dataset len: 79140
Concatenate with Val FASDD CV dataset
Train dataset len: 109451

TEST DFire dataset
DFire Removed wrong images: 0
DFire Removed due to overlapping: 62
DFire Removed due to more than 10: 3

Test dataset len: 835

TEST FASDD UAV dataset
FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 77
FASDD Removed due to more than 10: 38

Val FASDD UAV dataset len: 785

TEST FASDD CV dataset
FASDD Removed wrong images: 0
FASDD Removed due to overlapping: 70
FASDD Removed due to more than 10: 6

Test FASDD CV dataset len: 3324

Concatenate Test DFire and FASDD UAV datasets
Test dataset len: 1620
Concatenate with FASDD CV dataset
Test dataset le

# Plot Some Train Pictures

In [8]:
for batch_idx, (img, label) in enumerate(train_loader):
       
    if batch_idx == 0:
        print(f'Batch size equal to img.shape[0] = {img.shape[0]}')
        print(f'Batch images shape = {img.shape}')
        plt.subplots(4, 5, figsize=(10,8))
        for i in range(20):
            pic = utils.plot_dataset_img(img[i], label[i], grid=True)
            plt.subplot(4, 5, i+1)
            plt.imshow(pic)
        plt.tight_layout()
        plt.savefig(config.RUN_FOLDER + 'train_pictures.png')
        plt.close()
        break

Batch size equal to img.shape[0] = 64
Batch images shape = torch.Size([64, 3, 224, 224])


# Plot Some Val Pictures

In [9]:
for batch_idx, (img, label) in enumerate(val_loader):
       
    if batch_idx == 27:
        print(f'Batch size equal to img.shape[0] = {img.shape[0]}')
        print(f'Batch images shape = {img.shape}')
        plt.subplots(4, 5, figsize=(10,8))
        for i in range(20):
            pic = utils.plot_dataset_img(img[i], label[i], grid=True)
            plt.subplot(4, 5, i+1)
            plt.imshow(pic)
        plt.tight_layout()
        plt.savefig(config.RUN_FOLDER + 'val_pictures.png')
        plt.close()
        break

Batch size equal to img.shape[0] = 64
Batch images shape = torch.Size([64, 3, 224, 224])


# Loss Setup

In [10]:
if config.LOSS_FN == "YOLOV1_LOSS":
    print(f'Loss Function: YOLOV1_LOSS')
    logger.info(f'\nLoss Function: YOLOV1_LOSS')
    loss_fn = loss_module.YoloLoss_2BBox()
    print(f'Lambda for L1 regularization: {config.LAMBDA_L1_LOSS}')
    logger.info(f'Lambda for L1 regularization: {config.LAMBDA_L1_LOSS}')
else:
    print("Wrong loss function")
    logger.info("Wrong loss function")
    raise SystemExit("Wrong loss function")

Loss Function: YOLOV1_LOSS
Lambda for L1 regularization: 0


# Model Setup

In [11]:
if config.MODEL == "BED":
    
    print("Using BED Detector")
    logger.info("\nUsing BED Detector")
    #model = models.BED_DETECTOR().to(config.DEVICE)  
    model = models.SIMPLE_BED_DETECTOR().to(config.DEVICE)  

else:
    print("Wrong Model")
    logger.info("Wrong Model")
    raise SystemExit("Wrong Model")

optimizer = optim.Adam(model.parameters(), 
                       lr=config.LEARNING_RATE, 
                       weight_decay=config.WEIGHT_DECAY)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                 mode='min',
                                                 factor=config.FACTOR, 
                                                 patience=config.PATIENCE, 
                                                 threshold=config.THRES, 
                                                 threshold_mode='abs',
                                                 min_lr=config.MIN_LR)

# MODEL PARAMETERS
n_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'\nTrainable parameters = {n_trainable}')
logger.info(f'\nTrainable parameters = {n_trainable}')

n_params = parameters_to_vector(model.parameters()).numel()
print(f'Total parameters = {n_params}\n')
logger.info(f'Total parameters = {n_params}')

Using BED Detector

Trainable parameters = 287276
Total parameters = 287276



### Check Model Shape

In [12]:
in_rand_np = np.random.rand(4, 3, config.IMG_H, config.IMG_W)
in_rand = torch.tensor(in_rand_np, dtype=torch.float32, device=config.DEVICE)
out_test = model(in_rand)

print(f'Input shape is {in_rand.shape}')
print(f'Model shape is {out_test.shape}')
print(f'BED Model Arquitecture\n{model}')
logger.info(f'\nInput shape is {in_rand.shape}')
logger.info(f'Model shape is {out_test.shape}\n')
logger.info(f'BED Model Arquitecture\n{model}')

Input shape is torch.Size([4, 3, 224, 224])
Model shape is torch.Size([4, 12, 7, 7])
BED Model Arquitecture
SIMPLE_BED_DETECTOR(
  (model): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (dropout1): Dropout2d(p=0.3, inplace=False)
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (dropout2): Dropout2d(p=0.3, inplace=False)
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv31): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn31): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

### Torch Summary

In [13]:
print(summary(model, input_size=(1, 3, config.IMG_H, config.IMG_W)))
logger.info("\nModel Summary")
logger.info(summary(model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

Layer (type:depth-idx)                   Output Shape              Param #
SIMPLE_BED_DETECTOR                      [1, 12, 7, 7]             --
├─Sequential: 1-1                        [1, 12, 7, 7]             --
│    └─Conv2d: 2-1                       [1, 32, 224, 224]         864
│    └─BatchNorm2d: 2-2                  [1, 32, 224, 224]         64
│    └─ReLU: 2-3                         [1, 32, 224, 224]         --
│    └─Dropout2d: 2-4                    [1, 32, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 32, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 16, 112, 112]         4,608
│    └─BatchNorm2d: 2-7                  [1, 16, 112, 112]         32
│    └─ReLU: 2-8                         [1, 16, 112, 112]         --
│    └─Dropout2d: 2-9                    [1, 16, 112, 112]         --
│    └─MaxPool2d: 2-10                   [1, 16, 56, 56]           --
│    └─Conv2d: 2-11                      [1, 16, 56, 56]           256
│    └─Bat

# Load Pretrained or Initialize Weights

In [14]:
epochs_trained = utils.load_checkpoint(config.LOAD_MODEL_FILE, 
                                       model, 
                                       optimizer=None, 
                                       scheduler=None, 
                                       device=config.DEVICE)

logger.info(f"Loading Model. Trained during {epochs_trained} epochs")

Loading Model. Trained during 144 epochs


# AIMET Spatial SVD

### Configure SVD Parameters

In [15]:
modules_to_ignore = [model.model.conv72, model.model.conv73, model.model.conv74]
#modules_to_ignore = []
greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(config.SVD_COMPRESSION_RATIO), 
                                          saved_eval_scores_dict=config.SVD_DIC_FILE)
auto_params = SpatialSvdParameters.AutoModeParams(greedy_params,
                                                  modules_to_ignore=modules_to_ignore)
spatial_svd_params = SpatialSvdParameters(mode=SpatialSvdParameters.Mode.auto,
                                          params=auto_params)

### Evaluate Model Callback

Signature: (model, iterations, use_cuda)
Return an accuracy metric

In [16]:
def evaluate_model(model, iterations, use_cuda):
   
    model.eval()
    
    for batch_idx, (x, y) in enumerate(val_loader):
        if use_cuda == True:
            x, y = x.to('cuda'), y.to('cuda')
        else:
            model.to('cpu')
        out = model(x)
        
        # Remove Permute from the model
        out = out.permute(0, 2, 3, 1)
        
        if iterations is not None:
            if batch_idx == iterations:
                break

        # Mean Average Precision
        for idx in range(x.shape[0]):
            target_boxes = metrics.get_true_boxes(y[idx].detach().to('cpu'))
            pred_boxes = metrics.get_pred_boxes(out[idx].detach().to('cpu'))
            metrics.map_metric.update(preds = pred_boxes, target = target_boxes) 

    meanAP = metrics.map_metric.compute()
    metrics.map_metric.reset()
    print(f'Val mAP = {meanAP["map_50"]:.4f}')
    
    return meanAP['map_50'].item()

### Baseline F1 Mean

In [17]:
baseline_mAP = evaluate_model(model, None, True)
print(type(baseline_mAP))

logger.info(f'Baseline mAP: {baseline_mAP}')

Val mAP = 0.6235
<class 'float'>


### Input Shape

In [18]:
input_shape = (1, 3, config.IMG_H, config.IMG_W)

In [19]:
comp_model, stats = ModelCompressor.compress_model(model,
                                                   input_shape=input_shape,
                                                   eval_callback=evaluate_model,
                                                   eval_iterations=None,
                                                   compress_scheme=CompressionScheme.spatial_svd,
                                                   cost_metric=CostMetric.memory,
                                                   parameters=spatial_svd_params,
                                                   visualization_url=None)                                                    

2024-07-19 14:12:41,808 - CompRatioSelect - INFO - Greedy selection: Read eval dict from ./data/greedy_selection_eval_scores_dict.pkl
2024-07-19 14:12:41,809 - CompRatioSelect - INFO - Greedy selection: overall_min_score=0.000000, overall_max_score=0.639224
2024-07-19 14:12:41,810 - CompRatioSelect - INFO - Greedy selection: Original model cost=(Cost: memory=285216, mac=251955648)
2024-07-19 14:12:41,832 - CompRatioSelect - INFO - Greedy selection: final choice - comp_ratio=0.691395, score=0.620848
2024-07-19 14:12:41,843 - Svd - INFO - Spatial SVD splitting layer: model.conv1 using rank: 5
2024-07-19 14:12:41,844 - Svd - INFO - Spatial SVD splitting layer: model.conv2 using rank: 6
2024-07-19 14:12:41,871 - Svd - INFO - Spatial SVD splitting layer: model.conv31 using rank: 3
2024-07-19 14:12:41,873 - Svd - INFO - Spatial SVD splitting layer: model.conv32 using rank: 16
2024-07-19 14:12:41,899 - Svd - INFO - Spatial SVD splitting layer: model.conv34 using rank: 38
2024-07-19 14:12:42,1

In [20]:
print(comp_model)
logger.info(comp_model)

SIMPLE_BED_DETECTOR(
  (model): Sequential(
    (conv1): Sequential(
      (0): Conv2d(3, 5, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=False)
      (1): Conv2d(5, 32, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1), bias=False)
    )
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (dropout1): Dropout2d(p=0.3, inplace=False)
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Sequential(
      (0): Conv2d(32, 6, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=False)
      (1): Conv2d(6, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1), bias=False)
    )
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (dropout2): Dropout2d(p=0.3, inplace=False)
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv31): Sequential(
      (0): Conv2d

### Print Stats

In [21]:
print(stats)
logger.info(stats)

**********************************************************************************************
Compressed Model Statistics
Baseline model accuracy: 0.623534, Compressed model accuracy: 0.269368
Compression ratio for memory=0.691395, mac=0.498948

**********************************************************************************************

Per-layer Stats
    Name:model.conv1, compression-ratio: 0.7
    Name:model.conv2, compression-ratio: 0.2
    Name:model.conv31, compression-ratio: 0.4
    Name:model.conv32, compression-ratio: 0.5
    Name:model.conv33, compression-ratio: None
    Name:model.conv34, compression-ratio: 0.6
    Name:model.conv41, compression-ratio: None
    Name:model.conv42, compression-ratio: 0.4
    Name:model.conv43, compression-ratio: None
    Name:model.conv44, compression-ratio: 0.5
    Name:model.conv45, compression-ratio: 0.8
    Name:model.conv46, compression-ratio: 0.5
    Name:model.conv51, compression-ratio: 0.7
    Name:model.conv52, compression-ratio: 

### Torchinfo: model compressed summary

In [22]:
print(summary(comp_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))
logger.info("Compressed Model Summary")
logger.info(summary(comp_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

Layer (type:depth-idx)                   Output Shape              Param #
SIMPLE_BED_DETECTOR                      [1, 12, 7, 7]             --
├─Sequential: 1-1                        [1, 12, 7, 7]             --
│    └─Sequential: 2-1                   [1, 32, 224, 224]         --
│    │    └─Conv2d: 3-1                  [1, 5, 224, 224]          45
│    │    └─Conv2d: 3-2                  [1, 32, 224, 224]         480
│    └─BatchNorm2d: 2-2                  [1, 32, 224, 224]         64
│    └─ReLU: 2-3                         [1, 32, 224, 224]         --
│    └─Dropout2d: 2-4                    [1, 32, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 32, 112, 112]         --
│    └─Sequential: 2-6                   [1, 16, 112, 112]         --
│    │    └─Conv2d: 3-3                  [1, 6, 112, 112]          576
│    │    └─Conv2d: 3-4                  [1, 16, 112, 112]         288
│    └─BatchNorm2d: 2-7                  [1, 16, 112, 112]         32
│    └─ReLU:

### Evaluate Compressed Model

In [23]:
comp_mAP = evaluate_model(comp_model, None, True)

Val mAP = 0.2694


### Visualize Results

In [24]:
comp_ratios_file_path = './data/greedy_selection_comp_ratios_list.pkl'
eval_scores_path = './data/greedy_selection_eval_scores_dict.pkl'

unpickled_ratios = pd.read_pickle(comp_ratios_file_path)
unpickled_scores = pd.read_pickle(eval_scores_path)

In [25]:
df_scores = pd.DataFrame(unpickled_scores)
df_scores.to_csv(config.RUN_FOLDER + 'scores.csv')
print(df_scores)

     model.conv1  model.conv2  model.conv31  model.conv32  model.conv33  \
0.1     0.386057     0.606255      0.265039      0.160997      0.008663   
0.2     0.386057     0.636280      0.265039      0.088679      0.064369   
0.3     0.455908     0.637542      0.599083      0.514793      0.077807   
0.4     0.601516     0.637040      0.629675      0.527044      0.214259   
0.5     0.618065     0.639187      0.629652      0.634818      0.554928   
0.6     0.618065     0.634067      0.629652      0.635100      0.584973   
0.7     0.632588     0.638372      0.632170      0.633773      0.594843   
0.8     0.639224     0.637884      0.632105      0.634377      0.615874   
0.9     0.635014     0.633336      0.637175      0.635554      0.620831   

     model.conv34  model.conv41  model.conv42  model.conv43  model.conv44  \
0.1      0.511627      0.020953      0.116612      0.000000      0.303776   
0.2      0.542719      0.040299      0.417590      0.095840      0.501272   
0.3      0.581273 

In [26]:
df_ratios = pd.DataFrame(unpickled_ratios)
df_ratios.to_csv(config.RUN_FOLDER + 'ratios.csv')
print(df_ratios)

               0     1
0    model.conv1   0.7
1    model.conv2   0.2
2   model.conv31   0.4
3   model.conv32   0.5
4   model.conv33  None
5   model.conv34   0.6
6   model.conv41  None
7   model.conv42   0.4
8   model.conv43  None
9   model.conv44   0.5
10  model.conv45   0.8
11  model.conv46   0.5
12  model.conv51   0.7
13  model.conv52   0.5
14  model.conv53  None
15  model.conv54   0.6
16  model.conv55   0.7
17  model.conv56   0.8
18  model.conv61  None
19  model.conv62   0.8
20  model.conv71  None


# Save Compressed Before Training

In [27]:
torch.onnx.export(comp_model, torch.randn(input_shape).to(config.DEVICE), config.RUN_FOLDER + 'comp_model_noTrain.onnx')

### Optimizer and Scheduler of Compressed Model to Train

In [28]:
optimizer = optim.Adam(comp_model.parameters(), 
                       lr=config.LEARNING_RATE, 
                       weight_decay=config.WEIGHT_DECAY)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                 mode='min',
                                                 factor=config.FACTOR, 
                                                 patience=config.PATIENCE, 
                                                 threshold=config.THRES, 
                                                 threshold_mode='abs',
                                                 min_lr=config.MIN_LR)

utils.save_checkpoint(epoch=0, 
                      model=comp_model,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      checkpoint_name=config.WEIGHTS_FOLDER + 'comp_model_after_svd.pt')

# Loss and Metrics Loggers and Plotters

In [29]:
train_losses_logger = utils.LogLosses()
train_metrics_logger = utils.LogMetrics()
lr_logger = utils.LogLR(log_path=config.PLOTS_FOLDER)

val_losses_logger = utils.LogLosses()
val_metrics_logger = utils.LogMetrics()

loss_plotter = utils.PlotMetrics(log_path=config.PLOTS_FOLDER, model_name=config.MODEL, loss_or_metric='Loss')
metrics_plotter = utils.PlotMetrics(log_path=config.PLOTS_FOLDER, model_name=config.MODEL, loss_or_metric='Metric')

# Train Loop Function

In [30]:
def train_loop(model, start_epoch=0):

    ''' ==============================================================
                                TRAINING LOOP
    ============================================================== '''
    start = datetime.datetime.now()
    start_time = start.strftime("%H:%M:%S")
    print(f'\n***Start Training: {start_time}\n')
    logger.info(f'\n***Start Training: {start_time}\n')
    
    # Start with infinite validation loss
    best_valid_loss = np.inf
    best_mAP = torch.tensor(0., dtype=torch.float32)

    epochs_loss_plot = []
    epochs_metric_plot = []

    end_epoch = start_epoch + config.EPOCHS
        
    for epoch in range(start_epoch, end_epoch):

        print(f'\n=== EPOCH {epoch}/{end_epoch-1} ===')
        logger.info(f'\n=== EPOCH {epoch}/{end_epoch-1} ===')
        
        #====================== TRAINING ========================#
        current_lr = train_epoch.get_lr(optimizer=optimizer)
        logger.info(f'Learning Rate = {current_lr}\n')
        lr_logger.log_lr(current_lr)
            
        calculate_mAP = False
        if ( (epoch+1) % 5 ) == 0:
            calculate_mAP = True
            epochs_metric_plot.append(epoch)

        train_losses, train_metrics = train_epoch.train_fn(
            loader=train_loader, 
            model=model, 
            optimizer=optimizer, 
            loss_fn=loss_fn,
            loss_l1_lambda=config.LAMBDA_L1_LOSS,
            metric=metrics.map_metric,
            device=config.DEVICE,
            calculate_mAP=calculate_mAP)
        
        train_losses_logger.update_losses(train_losses)
        if calculate_mAP == True:
            train_metrics_logger.update_metrics(train_metrics)
                
        logger.info(utils.print_metrics_to_logger("TRAIN STATS", train_losses, train_metrics, mAP_available=calculate_mAP))
        
        #===================== VALIDATING =======================#
        with torch.no_grad():
            val_losses, val_metrics = val_epoch.eval_fn(
                loader=val_loader, 
                model=model,                         
                loss_fn=loss_fn,
                metric=metrics.map_metric,
                device=config.DEVICE,
                calculate_mAP=calculate_mAP)
            
            scheduler.step(val_losses['Total'])
            
            val_losses_logger.update_losses(val_losses)
            if calculate_mAP == True:
                val_metrics_logger.update_metrics(val_metrics)

            logger.info(utils.print_metrics_to_logger("VAL STATS", val_losses, val_metrics, mAP_available=calculate_mAP))
            
        epochs_loss_plot.append(epoch)

        loss_plotter.plot_all_metrics(
            train_losses_logger.get_losses(),
            val_losses_logger.get_losses(),
            epochs_loss_plot)

        if calculate_mAP == True:
            metrics_plotter.plot_all_metrics(
                train_metrics_logger.get_metrics(),
                val_metrics_logger.get_metrics(),
                epochs_metric_plot)

        lr_logger.plot_lr(epochs_loss_plot)
        
        #======================= SAVING =========================#
        if ( (epoch+1) % 5 ) == 0:
            save_name = config.WEIGHTS_FOLDER + config.MODEL + '_detector__5epoch.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name) 
            
        if best_valid_loss > val_losses['Total']:
            best_valid_loss = val_losses['Total']
            print(f"\nSaving model with new best validation loss: {best_valid_loss:.3f}")
            logger.info(f"Saving model with new best validation loss: {best_valid_loss:.3f}")
            save_name = config.WEIGHTS_FOLDER + config.MODEL + '_detector__' + 'best_loss'  + '.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name)  

        # Save model if mAP increases
        if calculate_mAP == True:
            if ( best_mAP < val_metrics['mAP'] ) :
                best_mAP = val_metrics['mAP']
                print(f"\nSaving model with new best mAP: {best_mAP:.4f}")
                logger.info(f"Saving model with new best mAP: {best_mAP:.4f}")
                save_precision_name = f'best_mAP={best_mAP:.4f}__epoch={epoch}'
                save_name = config.WEIGHTS_FOLDER + config.MODEL + '_detector__' + save_precision_name + '.pt'
                utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name)  
            # Break if model is good enough
            if (best_mAP.item() > 0.64):
                break
        
    logger.info('Saving last model')   
    torch.save(model.state_dict(), config.WEIGHTS_FOLDER + 'last_' + config.MODEL + '_detector.pt') 
    
    #======================= FINISH =========================#
    end = datetime.datetime.now()
    end_time = end.strftime("%H:%M:%S")
    print(f'\n***Script finished: {end_time}\n')  
    print(f'Time elapsed: {end-start}')
    logger.info(f'\n***Script finished: {end_time}\n')  
    logger.info(f'Time elapsed: {end-start}')
    
    return model

# Training

In [31]:
print("Starting script\n")
logger.info("Starting script\n")
    
svd_model = train_loop(comp_model)

Starting script


***Start Training: 14:13:24


=== EPOCH 0/9 ===
Learning Rate = 0.0001



Training:  10%|███████                                                                | 169/1710 [00:33<02:59,  8.59it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  56%|███████████████████████████████████████▌                               | 952/1710 [02:58<01:33,  8.07it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  76%|█████████████████████████████████████████████████████▎                | 1303/1710 [03:59<01:13,  5.55it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  96%|███████████████████████████████████████████████████████████████████▌  | 1650/1710 [05:02<00:08,  6.90it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:14<00:00,  5.45it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
51.758      |17.237      |24.426      |7.401       |2.694       


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:10<00:00,  7.58it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
52.307      |19.193      |22.203      |6.736       |4.175       

Saving model with new best validation loss: 52.307

=== EPOCH 1/9 ===
Learning Rate = 0.0001



Training:  14%|██████████▎                                                            | 247/1710 [00:43<03:05,  7.91it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  27%|██████████████████▉                                                    | 456/1710 [01:21<02:39,  7.85it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  33%|███████████████████████▏                                               | 559/1710 [01:40<03:02,  6.31it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  91%|███████████████████████████████████████████████████████████████▊      | 1560/1710 [04:41<00:25,  5.98it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:09<00:00,  5.53it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
49.342      |16.416      |23.300      |7.259       |2.367       


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:10<00:00,  7.41it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
51.262      |18.631      |20.720      |8.040       |3.871       

Saving model with new best validation loss: 51.262

=== EPOCH 2/9 ===
Learning Rate = 0.0001



Training:  10%|███████▏                                                               | 172/1710 [00:32<03:47,  6.76it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  52%|████████████████████████████████████▉                                  | 890/1710 [02:43<01:30,  9.03it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  82%|█████████████████████████████████████████████████████████▎            | 1399/1710 [04:17<00:55,  5.57it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  86%|███████████████████████████████████████████████████████████▉          | 1463/1710 [04:28<00:26,  9.26it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:12<00:00,  5.48it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
48.419      |15.930      |22.930      |7.289       |2.271       


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:11<00:00,  6.84it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
50.782      |18.427      |21.433      |7.046       |3.877       

Saving model with new best validation loss: 50.782

=== EPOCH 3/9 ===
Learning Rate = 0.0001



Training:   1%|▉                                                                       | 21/1710 [00:04<04:02,  6.97it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  14%|█████████▉                                                             | 239/1710 [00:45<05:36,  4.37it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  30%|█████████████████████▏                                                 | 510/1710 [01:31<03:03,  6.54it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  48%|█████████████████████████████████▊                                     | 815/1710 [02:28<01:53,  7.90it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:17<00:00,  5.38it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
48.275      |15.926      |22.816      |7.215       |2.317       


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:11<00:00,  6.62it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
50.864      |18.299      |21.146      |7.317       |4.102       

=== EPOCH 4/9 ===
Learning Rate = 0.0001



Training:  13%|█████████▎                                                             | 224/1710 [00:41<05:26,  4.55it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  43%|██████████████████████████████▎                                        | 731/1710 [02:18<04:33,  3.57it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  53%|█████████████████████████████████████▊                                 | 912/1710 [02:51<04:29,  2.96it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  55%|███████████████████████████████████████▏                               | 943/1710 [02:57<02:40,  4.78it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:19<00:00,  5.35it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
47.818      |15.775      |22.622      |7.227       |2.193       
Train mAP = 0.5493


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:12<00:00,  6.37it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
50.186      |18.139      |21.266      |6.705       |4.077       
Val mAP = 0.5973

Saving model with new best validation loss: 50.186

Saving model with new best mAP: 0.5973

=== EPOCH 5/9 ===
Learning Rate = 0.0001



Training:  26%|██████████████████▎                                                    | 441/1710 [01:23<03:00,  7.02it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  50%|███████████████████████████████████▎                                   | 851/1710 [02:35<02:22,  6.02it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  80%|████████████████████████████████████████████████████████              | 1370/1710 [04:09<00:55,  6.18it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  98%|████████████████████████████████████████████████████████████████████▊ | 1681/1710 [05:05<00:05,  4.99it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:11<00:00,  5.49it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
47.388      |15.596      |22.427      |7.168       |2.198       


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:11<00:00,  6.93it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
50.835      |18.322      |20.981      |7.338       |4.195       

=== EPOCH 6/9 ===
Learning Rate = 0.0001



Training:  29%|████████████████████▉                                                  | 504/1710 [01:33<02:41,  7.45it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  37%|██████████████████████████▌                                            | 641/1710 [01:58<02:13,  8.03it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  46%|█████████████████████████████████                                      | 795/1710 [02:27<02:14,  6.79it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  75%|████████████████████████████████████████████████████▍                 | 1282/1710 [03:55<01:00,  7.06it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:12<00:00,  5.48it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
47.039      |15.399      |22.352      |7.164       |2.123       


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:12<00:00,  6.35it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
49.562      |17.778      |20.573      |7.198       |4.012       

Saving model with new best validation loss: 49.562

=== EPOCH 7/9 ===
Learning Rate = 0.0001



Training:  17%|████████████▎                                                          | 296/1710 [00:52<04:30,  5.23it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  38%|██████████████████████████▊                                            | 647/1710 [01:55<02:07,  8.33it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  77%|█████████████████████████████████████████████████████▉                | 1317/1710 [03:59<00:51,  7.62it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  90%|███████████████████████████████████████████████████████████████▏      | 1544/1710 [04:39<00:36,  4.53it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:08<00:00,  5.55it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
47.066      |15.542      |22.287      |7.129       |2.107       


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:11<00:00,  6.91it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
50.159      |18.237      |21.061      |6.853       |4.008       

=== EPOCH 8/9 ===
Learning Rate = 0.0001



Training:  37%|██████████████████████████▏                                            | 631/1710 [01:54<02:16,  7.89it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  52%|████████████████████████████████████▉                                  | 889/1710 [02:41<01:43,  7.95it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  54%|██████████████████████████████████████▍                                | 925/1710 [02:48<01:40,  7.79it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  70%|████████████████████████████████████████████████▊                     | 1191/1710 [03:35<00:58,  8.87it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:09<00:00,  5.52it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
46.785      |15.273      |22.279      |7.112       |2.121       


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:10<00:00,  7.12it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
49.638      |18.123      |20.838      |6.755       |3.922       

=== EPOCH 9/9 ===
Learning Rate = 8e-05



Training:  53%|█████████████████████████████████████▎                                 | 898/1710 [02:49<02:25,  5.58it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  70%|████████████████████████████████████████████████▉                     | 1194/1710 [03:44<02:32,  3.38it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  80%|████████████████████████████████████████████████████████▏             | 1374/1710 [04:18<00:58,  5.73it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training:  82%|█████████████████████████████████████████████████████████▎            | 1399/1710 [04:22<00:43,  7.10it/s]Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Training: 100%|██████████████████████████████████████████████████████████████████████| 1710/1710 [05:19<00:00,  5.35it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
46.671      |15.364      |22.109      |7.072       |2.126       
Train mAP = 0.5581


Validating: 100%|████████████████████████████████████████████████████████████████████████| 77/77 [00:12<00:00,  6.01it/s]


Total Loss  |Box Loss    |Conf Loss   |No Obj Loss |Class Loss  
------------ ------------ ------------ ------------ ------------
49.597      |17.848      |20.723      |6.982       |4.045       
Val mAP = 0.5998

Saving model with new best mAP: 0.5998

***Script finished: 15:09:16

Time elapsed: 0:55:51.963307


# Check Comp Model Params

In [32]:
# MODEL PARAMETERS
n_trainable = sum(p.numel() for p in comp_model.parameters() if p.requires_grad)
print(f'\nTrainable parameters = {n_trainable}')
logger.info(f'\nTrainable parameters = {n_trainable}')

n_params = sum(p.numel() for p in comp_model.parameters())
print(f'Total parameters = {n_params}')
logger.info(f'Total parameters = {n_params}\n')


Trainable parameters = 199257
Total parameters = 199257


In [33]:
comp_model_mAP = evaluate_model(comp_model, None, True)

Val mAP = 0.5998


# Export to ONNX

In [34]:
torch.onnx.export(comp_model, torch.randn(input_shape).to(config.DEVICE), config.RUN_FOLDER + 'comp_model_Train.onnx')