In [1]:
import os
import logging
from pathlib import Path
import datetime
import shutil

from tqdm import tqdm
import numpy as np
import math
import pandas as pd
import random
from decimal import Decimal

import torch

import matplotlib.pyplot as plt
import matplotlib as mpl

import torch.nn as nn 
from torch.nn.utils import parameters_to_vector
import torch.optim as optim
from torchinfo import summary

import torchmetrics

import config
import modules.dataloaders as dataloaders
import modules.models_bed_evolution.bed_01_downto_28 as cnv_model
import modules.loss as loss
import modules.metrics as metrics
import modules.train_epoch as train_epoch
import modules.val_epoch as val_epoch
import modules.utils as utils

# AIMET imports

In [2]:
from aimet_torch.compress import ModelCompressor
from aimet_torch.defs import SpatialSvdParameters
from aimet_torch.defs import ChannelPruningParameters
from aimet_torch.onnx_utils import OnnxSaver
from aimet_common.defs import CostMetric, CompressionScheme, GreedySelectionParameters
# from aimet_common.utils import start_bokeh_server_session
# from aimet_torch.visualize_serialized_data import VisualizeCompression


2025-01-26 02:16:37,435 - root - INFO - AIMET


# Define Matplot Style

In [3]:
#mpl.style.use('seaborn-v0_8')

# Logger

In [4]:
log_path = config.LOGS_FOLDER

logger = logging.getLogger("GonLogger")
logger.propagate = False
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(log_path + 'logfile.log')
formatter = logging.Formatter('%(message)s')
file_handler.setFormatter(formatter)

# add file handler to logger
logger.addHandler(file_handler)

logger.info('BED Classifier XS Tiny.\n' +  
            '\tOne Head.\n' +
            '\tAdding best mean F1 save.\n' +
            '\tWeighted for Precision.\n' +
            '\tModules.\n'+ 
            '\tLosses and Metrics Loggers.\n' +
            f'\tSVD Compression Ratio  = {config.SVD_COMPRESSION_RATIO}\n' +
            f'\tPruning Compression Ratio  = {config.PRUNING_COMPRESSION_RATIO}\n' +
            f'\t{config.EPOCHS} epochs.\n')

# Hyperparameters Log

In [5]:
''' ============================
    Print Config Values
============================ '''
print(f'Training Brevitas Model = {config.BREVITAS_MODEL}')
print(f'Training AIMET Model = {config.AIMET_RUN}')
print('\nDatasets Length')
print(f'\tTrain and Val: {"Full" if config.DS_LEN == None else config.DS_LEN}')
print(f'\nLoad Model: {config.LOAD_MODEL}')
if (config.LOAD_MODEL == True):
    print(f'\tModel: {config.LOAD_MODEL_FILE}')
print(f'Device: {config.DEVICE}')
print('Optimizer:')
print(f'\tLearning Rate: {config.LEARNING_RATE}')
print(f'\tWeight Decay: {config.WEIGHT_DECAY}')
print('Scheduler:')
print(f'\tScheduler factor: {config.FACTOR}')
print(f'\tScheduler patience: {config.PATIENCE}')
print(f'\tScheduler threshold: {config.THRES}')
print(f'\tScheduler min learning rate: {config.MIN_LR}')
print(f'Batch Size: {config.BATCH_SIZE}')
print(f'Num Workers: {config.NUM_WORKERS}')
print(f'Pin Memory: {config.PIN_MEMORY}')
print(f'Epochs: {config.EPOCHS}')
print('\nIMG DIMS:')
print(f'\tWidth: {config.IMG_W}\n\tHeight: {config.IMG_H}')
print('\nBrevitas Config:')
print(f'\tFixed Point: {config.FIXED_POINT}')
print(f'\tWeights Bit Width: {config.WEIGHTS_BIT_WIDTH}')
print(f'\tBig Layers Weights Bit Width: {config.BIG_LAYERS_WEIGHTS_BIT_WIDTH}')
print(f'\tBias Bit Width: {config.BIAS_BIT_WIDTH}')
print(f'\tActivations Bit Width: {config.ACTIVATIONS_BIT_WIDTH}')

logger.info(f'Training Brevitas Model = {config.BREVITAS_MODEL}')
logger.info(f'Training AIMET Model = {config.AIMET_RUN}')
logger.info('\nDatasets Length')
logger.info(f'\tTrain and Val: {"Full" if config.DS_LEN == None else config.DS_LEN}')
logger.info(f'\nLoad Model: {config.LOAD_MODEL}')
if (config.LOAD_MODEL == True):
    logger.info(f'\tModel: {config.LOAD_MODEL_FILE}')
logger.info(f'\nDevice: {config.DEVICE}')
logger.info('Optimizer:')
logger.info(f'\tLearning Rate: {config.LEARNING_RATE}')
logger.info(f'\tWeight Decay: {config.WEIGHT_DECAY}')
logger.info('Scheduler:')
logger.info(f'\tScheduler factor: {config.FACTOR}')
logger.info(f'\tScheduler patience: {config.PATIENCE}')
logger.info(f'\tScheduler threshold: {config.THRES}')
logger.info(f'\tScheduler min learning rate: {config.MIN_LR}')
logger.info(f'\nBatch Size: {config.BATCH_SIZE}')
logger.info(f'Num Workers: {config.NUM_WORKERS}')
logger.info(f'Pin Memory: {config.PIN_MEMORY}')
logger.info(f'Epochs: {config.EPOCHS}')
logger.info('\nIMG DIMS:')
logger.info(f'\tWidth: {config.IMG_W}\n\tHeight: {config.IMG_H}')
logger.info('\nBrevitas Config:')
logger.info(f'\tFixed Point: {config.FIXED_POINT}')
logger.info(f'\tWeights Bit Width: {config.WEIGHTS_BIT_WIDTH}')
logger.info(f'\tBig Layers Weights Bit Width: {config.BIG_LAYERS_WEIGHTS_BIT_WIDTH}')
logger.info(f'\tBias Bit Width: {config.BIAS_BIT_WIDTH}')
logger.info(f'\tActivations Bit Width: {config.ACTIVATIONS_BIT_WIDTH}')

Training Brevitas Model = False
Training AIMET Model = True

Datasets Length
	Train and Val: 128

Load Model: True
	Model: ./experiments_bed_evolution/11_downto_28__full_ds/weights/BED_Downto_28_classifier__best_mean_F1.pt
Device: cuda
Optimizer:
	Learning Rate: 0.001
	Weight Decay: 0.001
Scheduler:
	Scheduler factor: 0.8
	Scheduler patience: 2
	Scheduler threshold: 0.001
	Scheduler min learning rate: 1e-06
Batch Size: 64
Num Workers: 8
Pin Memory: True
Epochs: 2

IMG DIMS:
	Width: 224
	Height: 224

Brevitas Config:
	Fixed Point: True
	Weights Bit Width: 4
	Big Layers Weights Bit Width: 2
	Bias Bit Width: 4
	Activations Bit Width: 4


# Dataset Setup

In [6]:
train_loader = dataloaders.get_train_loader()
val_loader = dataloaders.get_val_loader()

aimet_val_len = 2048
aimet_val_loader = dataloaders.get_val_loader(val_ds_len = aimet_val_len)


TRAIN DFIRE dataset
DFire Removed wrong images: 0
DFire empty images: 56
DFire only smoke images: 37
DFire only fire images: 8
DFire smoke and fire images: 27

Train DFire dataset len: 128

TRAIN FASDD UAV dataset
FASDD Removed wrong images: 0
FASDD empty images: 58
FASDD only smoke images: 22
FASDD only fire images: 2
FASDD smoke and fire images: 46

Train FASDD UAV dataset len: 128

VAL FASDD UAV dataset
FASDD Removed wrong images: 0
FASDD empty images: 59
FASDD only smoke images: 26
FASDD only fire images: 2
FASDD smoke and fire images: 41

Val FASDD UAV dataset len: 128

TRAIN FASDD CV dataset
FASDD Removed wrong images: 0
FASDD empty images: 53
FASDD only smoke images: 36
FASDD only fire images: 19
FASDD smoke and fire images: 20

Train FASDD CV dataset len: 128

Val FASDD CV dataset
FASDD Removed wrong images: 0
FASDD empty images: 50
FASDD only smoke images: 32
FASDD only fire images: 20
FASDD smoke and fire images: 26

Val FASDD CV dataset len: 128

Concatenate Train DFire and

### Dataset Length

In [7]:
logger.info("\n********* Datasets Length *********")

print(f'Train Dataset Length: {len(train_loader.dataset)}')
logger.info(f'Train Dataset Length: {len(train_loader.dataset)}')

print(f'Test Dataset Length: {len(val_loader.dataset)}')
logger.info(f'Test Dataset Length: {len(val_loader.dataset)}')

print(f'Aimet Test Dataset Length: {len(aimet_val_loader.dataset)}')
logger.info(f'Aimet Test Dataset Length: {len(aimet_val_loader.dataset)}')
logger.info(f'Aimet Val Loader individual lenght = {aimet_val_len}')

Train Dataset Length: 640
Test Dataset Length: 384
Aimet Test Dataset Length: 384


# Models Setup

In [8]:
if config.MODEL == "BED_AIMET":   
    print("Using BED Classifier")
    logger.info("\nUsing BED Classifier")
    fp32_model = cnv_model.BED_CLASSIFIER_DOWNTO_28().to(config.DEVICE)    
else:
    print("Wrong Model")
    logger.info("Wrong Model")
    raise SystemExit("Wrong Model")

# MODEL PARAMETERS
n_trainable = sum(p.numel() for p in fp32_model.parameters() if p.requires_grad)
print(f'\nTrainable parameters = {n_trainable}')
logger.info(f'\nTrainable parameters = {n_trainable}')

n_params = parameters_to_vector(fp32_model.parameters()).numel()
print(f'Total parameters = {n_params}\n')
logger.info(f'Total parameters = {n_params}\n')

Using BED Classifier

Trainable parameters = 93266
Total parameters = 93266



### Check Model Shape

In [9]:
in_rand_np = np.random.rand(4, 3, config.IMG_H, config.IMG_W)
in_rand = torch.tensor(in_rand_np, dtype=torch.float32, device=config.DEVICE)
out_test = fp32_model(in_rand)
print(f'Model shape is {out_test}')
print(f'BED Model Arquitecture\n{fp32_model}')
logger.info(f'Model shape is {out_test}')
logger.info(f'BED Model Arquitecture\n{fp32_model}')

Model shape is tensor([[-0.0010, -0.1882],
        [-0.0030, -0.1812],
        [-0.0025, -0.1836],
        [ 0.0005, -0.1826]], device='cuda:0', grad_fn=<AddmmBackward0>)
BED Model Arquitecture
BED_CLASSIFIER_DOWNTO_28(
  (model): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv31): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn31): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    

# Torchinfo: model summary

In [10]:
print(summary(fp32_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))
logger.info("Original FP32 Model Summary")
logger.info(summary(fp32_model, input_size=(config.BATCH_SIZE, 3, config.IMG_H, config.IMG_W)))

Layer (type:depth-idx)                   Output Shape              Param #
BED_CLASSIFIER_DOWNTO_28                 [1, 2]                    --
├─Sequential: 1-1                        [1, 2]                    --
│    └─Conv2d: 2-1                       [1, 32, 224, 224]         864
│    └─BatchNorm2d: 2-2                  [1, 32, 224, 224]         64
│    └─ReLU: 2-3                         [1, 32, 224, 224]         --
│    └─MaxPool2d: 2-4                    [1, 32, 112, 112]         --
│    └─Conv2d: 2-5                       [1, 16, 112, 112]         4,608
│    └─BatchNorm2d: 2-6                  [1, 16, 112, 112]         32
│    └─ReLU: 2-7                         [1, 16, 112, 112]         --
│    └─MaxPool2d: 2-8                    [1, 16, 56, 56]           --
│    └─Conv2d: 2-9                       [1, 16, 56, 56]           256
│    └─BatchNorm2d: 2-10                 [1, 16, 56, 56]           32
│    └─ReLU: 2-11                        [1, 16, 56, 56]           --
│    └─Con

# Load Pretrained or Initialize Weights

In [11]:
epochs_trained = utils.load_checkpoint(config.LOAD_MODEL_FILE, 
                                       fp32_model, 
                                       optimizer=None, 
                                       scheduler=None, 
                                       device=config.DEVICE)

fp32_model.eval()
logger.info(f"Loading Model. Trained during {epochs_trained} epochs")

Loading Model. Trained during 62 epochs


# Save ONNX original model

In [12]:
input_shape = (1, 3, config.IMG_H, config.IMG_W)

torch.onnx.export(fp32_model, torch.randn(input_shape).to(config.DEVICE), config.RUN_FOLDER + 'fp32_model.onnx')
#OnnxSaver.set_node_names('/models/fp32_model.onnx', fp32_model, input_shape)

# AIMET Spatial SVD

### Configure SVD Parameters

In [14]:
#modules_to_ignore = [fp32_model.model.conv1]
modules_to_ignore = [] # Let the first conv be splitted
greedy_params = GreedySelectionParameters(
    target_comp_ratio=Decimal(config.SVD_COMPRESSION_RATIO), 
    saved_eval_scores_dict=config.SVD_DIC_FILE)
auto_params = SpatialSvdParameters.AutoModeParams(
    greedy_params,
    modules_to_ignore=modules_to_ignore)
spatial_svd_params = SpatialSvdParameters(
    mode=SpatialSvdParameters.Mode.auto,
    params=auto_params)

### Evaluate Model Callback

Signature: (model, iterations, use_cuda)
Return an accuracy metric

In [15]:
def evaluate_model(model, iterations, use_cuda):
    
    model.eval()

    #for batch_idx, (x, y) in enumerate(val_loader):
    for batch_idx, (x, y) in enumerate(aimet_val_loader):
        if use_cuda == True:
            x, y = x.to('cuda'), y.to('cuda')
        else:
            model.to('cpu')
        out = model(x)
        if iterations is not None:
            if batch_idx == iterations:
                break
        
        # F1 average Macro   
        yhat = torch.sigmoid(out.detach())
        metrics.f1_metric_mean.update(yhat, y)
    
    f1_mean = metrics.f1_metric_mean.compute()
    metrics.f1_metric_mean.reset()

    print(f'F1 mean: {f1_mean:.4f}')
    
    return f1_mean.item()

### Baseline F1 Mean

In [16]:
baseline_f1 = evaluate_model(fp32_model, None, True)
print(type(baseline_f1))

F1 mean: 0.9522
<class 'float'>


In [17]:
svd_model, stats = ModelCompressor.compress_model(
    fp32_model,
    input_shape=input_shape,
    eval_callback=evaluate_model,
    eval_iterations=None,
    compress_scheme=CompressionScheme.spatial_svd,
    cost_metric=CostMetric.mac,
    parameters=spatial_svd_params,
    visualization_url=None)                                                    

2025-01-26 02:16:55,943 - Svd - INFO - Spatial SVD splitting layer: model.conv1 using rank: 1
F1 mean: 0.0305
2025-01-26 02:16:57,771 - CompRatioSelect - INFO - Layer model.conv1, comp_ratio 0.100000 ==> eval_score=0.030501
2025-01-26 02:16:57,775 - Svd - INFO - Spatial SVD splitting layer: model.conv1 using rank: 1
F1 mean: 0.0305
2025-01-26 02:16:59,602 - CompRatioSelect - INFO - Layer model.conv1, comp_ratio 0.200000 ==> eval_score=0.030501
2025-01-26 02:16:59,606 - Svd - INFO - Spatial SVD splitting layer: model.conv1 using rank: 2
F1 mean: 0.4674
2025-01-26 02:17:01,444 - CompRatioSelect - INFO - Layer model.conv1, comp_ratio 0.300000 ==> eval_score=0.467391
2025-01-26 02:17:01,448 - Svd - INFO - Spatial SVD splitting layer: model.conv1 using rank: 3
F1 mean: 0.5579
2025-01-26 02:17:03,269 - CompRatioSelect - INFO - Layer model.conv1, comp_ratio 0.400000 ==> eval_score=0.557912
2025-01-26 02:17:03,273 - Svd - INFO - Spatial SVD splitting layer: model.conv1 using rank: 4
F1 mean: 0

### Copy Data Files to Running Folder

In [18]:
shutil.copyfile(
    './data/greedy_selection_eval_scores_dict.pkl', 
    config.RUN_FOLDER + 'svd_eval_scores.pkl')
shutil.copyfile(
    './data/greedy_selection_comp_ratios_list.pkl', 
    config.RUN_FOLDER + 'svd_comp_ratios.pkl')

'experiments_bed_evolution/15_downto_28_aimet__big_aimet_ds__MAC__128_ds//svd_comp_ratios.pkl'

### Print Stats

In [19]:
print(stats)
logger.info(stats)

**********************************************************************************************
Compressed Model Statistics
Baseline model accuracy: 0.952233, Compressed model accuracy: 0.851159
Compression ratio for memory=0.506826, mac=0.649468

**********************************************************************************************

Per-layer Stats
    Name:model.conv1, compression-ratio: 0.8
    Name:model.conv2, compression-ratio: 0.7
    Name:model.conv31, compression-ratio: None
    Name:model.conv32, compression-ratio: 0.9
    Name:model.conv33, compression-ratio: None
    Name:model.conv34, compression-ratio: 0.7
    Name:model.conv41, compression-ratio: None
    Name:model.conv42, compression-ratio: 0.6
    Name:model.conv43, compression-ratio: 0.8
    Name:model.conv44, compression-ratio: 0.4
    Name:model.conv45, compression-ratio: 0.2
    Name:model.conv46, compression-ratio: 0.1

***************************************************************************************

### Print Models

In [20]:
print(svd_model)
logger.info("\nSVD Model")
logger.info(svd_model)

BED_CLASSIFIER_DOWNTO_28(
  (model): Sequential(
    (conv1): Sequential(
      (0): Conv2d(3, 6, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=False)
      (1): Conv2d(6, 32, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1), bias=False)
    )
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Sequential(
      (0): Conv2d(32, 22, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=False)
      (1): Conv2d(22, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1), bias=False)
    )
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv31): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn31): BatchNorm2d(16, eps=1e-05, momentum=0.1, aff

### Torchinfo: model compressed summary

In [21]:
svd_model.eval()

print(summary(svd_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

logger.info("Compressed Model Summary")
logger.info(summary(svd_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

Layer (type:depth-idx)                   Output Shape              Param #
BED_CLASSIFIER_DOWNTO_28                 [1, 2]                    --
├─Sequential: 1-1                        [1, 2]                    --
│    └─Sequential: 2-1                   [1, 32, 224, 224]         --
│    │    └─Conv2d: 3-1                  [1, 6, 224, 224]          54
│    │    └─Conv2d: 3-2                  [1, 32, 224, 224]         576
│    └─BatchNorm2d: 2-2                  [1, 32, 224, 224]         64
│    └─ReLU: 2-3                         [1, 32, 224, 224]         --
│    └─MaxPool2d: 2-4                    [1, 32, 112, 112]         --
│    └─Sequential: 2-5                   [1, 16, 112, 112]         --
│    │    └─Conv2d: 3-3                  [1, 22, 112, 112]         2,112
│    │    └─Conv2d: 3-4                  [1, 16, 112, 112]         1,056
│    └─BatchNorm2d: 2-6                  [1, 16, 112, 112]         32
│    └─ReLU: 2-7                         [1, 16, 112, 112]         --
│    └─M

### Evaluate Compressed Model

In [22]:
svd_f1 = evaluate_model(svd_model, None, True)
logger.info(f'\nSVD Model evaluation with Aimet Val Loader before training: {svd_f1}')

F1 mean: 0.8512


### Visualize Results

In [23]:
comp_ratios_file_path = './data/greedy_selection_comp_ratios_list.pkl'
eval_scores_path = './data/greedy_selection_eval_scores_dict.pkl'

unpickled_ratios = pd.read_pickle(comp_ratios_file_path)
unpickled_scores = pd.read_pickle(eval_scores_path)

In [24]:
df_scores = pd.DataFrame(unpickled_scores)
df_scores.to_csv(config.RUN_FOLDER + 'scores_svd.csv')
print(df_scores)

     model.conv1  model.conv2  model.conv31  model.conv32  model.conv33  \
0.1     0.030501     0.627974      0.316864      0.223588      0.005263   
0.2     0.030501     0.808363      0.316864      0.212155      0.000000   
0.3     0.467391     0.872104      0.399972      0.338384      0.000000   
0.4     0.557912     0.860360      0.597683      0.226675      0.577160   
0.5     0.598291     0.888460      0.626242      0.876839      0.684384   
0.6     0.598291     0.923112      0.626242      0.871368      0.773400   
0.7     0.690273     0.937649      0.737392      0.906371      0.821031   
0.8     0.948903     0.946663      0.827370      0.922411      0.851065   
0.9     0.946728     0.952776      0.852138      0.938452      0.907386   

     model.conv34  model.conv41  model.conv42  model.conv43  model.conv44  \
0.1      0.420712      0.000000      0.756926      0.386256      0.624998   
0.2      0.645164      0.035354      0.858814      0.453103      0.919361   
0.3      0.824999 

In [25]:
df_ratios = pd.DataFrame(unpickled_ratios)
df_ratios.to_csv(config.RUN_FOLDER + 'ratios_svd.csv')
print(df_ratios)

               0     1
0    model.conv1   0.8
1    model.conv2   0.7
2   model.conv31  None
3   model.conv32   0.9
4   model.conv33  None
5   model.conv34   0.7
6   model.conv41  None
7   model.conv42   0.6
8   model.conv43   0.8
9   model.conv44   0.4
10  model.conv45   0.2
11  model.conv46   0.1


# Save Compressed Before Training

In [26]:
torch.onnx.export(svd_model, torch.randn(input_shape).to(config.DEVICE), config.RUN_FOLDER + 'svd_model_noTrain.onnx')

### Optimizer and Scheduler of Compressed Model to Train

In [27]:
optimizer = optim.Adam(svd_model.parameters(), 
                       lr=config.LEARNING_RATE, 
                       weight_decay=config.WEIGHT_DECAY)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                 mode='min',
                                                 factor=config.FACTOR, 
                                                 patience=config.PATIENCE, 
                                                 threshold=config.THRES, 
                                                 threshold_mode='abs',
                                                 min_lr=config.MIN_LR)

utils.save_checkpoint(epoch=0, 
                      model=svd_model,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      checkpoint_name=config.WEIGHTS_FOLDER + 'comp_model_after_svd.pt')

# Loss Function

In [28]:
if config.LOSS_FN == "BCE":
    print(f'Loss Function: BCE')
    logger.info(f'\nLoss Function: BCE')
    print(f'Smoke Precision Weight: {config.SMOKE_PRECISION_WEIGHT}')
    logger.info(f'Smoke Precision Weight: {config.SMOKE_PRECISION_WEIGHT}')
    loss_fn = loss.BCE_LOSS(device=config.DEVICE, smoke_precision_weight=config.SMOKE_PRECISION_WEIGHT)
else:
    print("Wrong loss function")
    logger.info("Wrong loss function")
    raise SystemExit("Wrong loss function")

Loss Function: BCE
Smoke Precision Weight: 0.8


# Loss and Metrics Loggers and Plotters

In [29]:
train_losses_logger = utils.LogLosses()
train_metrics_logger = utils.LogMetrics()
lr_logger = utils.LogLR(log_path=config.PLOTS_FOLDER)

val_losses_logger = utils.LogLosses()
val_metrics_logger = utils.LogMetrics()

loss_plotter = utils.PlotMetrics(log_path=config.PLOTS_FOLDER, model_name=config.MODEL, loss_or_metric='Loss')
metrics_plotter = utils.PlotMetrics(log_path=config.PLOTS_FOLDER, model_name=config.MODEL, loss_or_metric='Metric')

# Train Loop Function

In [30]:
def train_loop(model, start_epoch=0, epochs_to_train=config.EPOCHS):

    ''' ==============================================================
                                TRAINING LOOP
    ============================================================== '''
    start = datetime.datetime.now()
    start_time = start.strftime("%H:%M:%S")
    print(f'\n***Start Training: {start_time}\n')
    logger.info(f'\n***Start Training: {start_time}\n')
    
    # Start with infinite validation loss
    best_valid_loss = np.inf
    best_smoke_precision = 0. #torch.tensor([0.])
    smoke_f1_min_save = 0.9 #torch.tensor([0.9])
    best_mean_f1 = 0.

    if start_epoch == 0:
        epochs_plot = []
    else:
        epochs_plot = [i for i in range(start_epoch)]    

    end_epoch = start_epoch + epochs_to_train
        
    for epoch in range(start_epoch, end_epoch):

        print(f'\n=== EPOCH {epoch}/{end_epoch-1} ===')
        logger.info(f'\n=== EPOCH {epoch}/{end_epoch-1} ===')
        
        #====================== TRAINING ========================#
        current_lr = train_epoch.get_lr(optimizer=optimizer)
        logger.info(f'Learning Rate = {current_lr}\n')
        lr_logger.log_lr(current_lr)
                
        train_losses, train_metrics = train_epoch.train_fn(
            loader=train_loader, 
            model=model, 
            optimizer=optimizer, 
            loss_fn=loss_fn,
            device=config.DEVICE)
        
        train_losses_logger.update_metrics(train_losses)
        train_metrics_logger.update_metrics(train_metrics)
                
        logger.info(utils.print_metrics_to_logger("TRAIN Stats", train_losses, train_metrics))
        
        #===================== VALIDATING =======================#
        with torch.no_grad():
            val_losses, val_metrics = val_epoch.eval_fn(
                loader=val_loader, 
                model=model,                         
                loss_fn=loss_fn,
                device=config.DEVICE)
            
            scheduler.step(val_losses['Total'])
            
            val_losses_logger.update_metrics(val_losses)
            val_metrics_logger.update_metrics(val_metrics)

            logger.info(utils.print_metrics_to_logger("VAL Stats", val_losses, val_metrics))
            
        epochs_plot.append(epoch)

        loss_plotter.plot_all_metrics(
            train_losses_logger.get_metrics(),
            val_losses_logger.get_metrics(),
            epochs_plot)

        metrics_plotter.plot_all_metrics(
            train_metrics_logger.get_metrics(),
            val_metrics_logger.get_metrics(),
            epochs_plot)

        lr_logger.plot_lr(epochs_plot)
        #======================= SAVING =========================#
        if ( (epoch+1) % 5 ) == 0:
            save_name = config.WEIGHTS_FOLDER + config.MODEL + '_classifier__5epoch.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name) 
            
        if best_valid_loss > val_losses['Total']:
            best_valid_loss = val_losses['Total']
            print(f"\nSaving model with new best validation loss: {best_valid_loss:.4f}")
            logger.info(f"Saving model with new best validation loss: {best_valid_loss:.4f}")
            save_name = config.WEIGHTS_FOLDER + config.MODEL + '_classifier__' + 'best_loss'  + '.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name) 
            if config.BREVITAS_MODEL == True:
                save_onnx = config.ONNX_FOLDER + config.MODEL + '_classifier__' + 'best_loss'  #+ '.onnx'
                utils.export_onnx(model, (1, config.NUM_CHANNELS, config.IMG_H, config.IMG_W), save_onnx, config.DEVICE)

        # Save model if best mean F1 increases
        val_f1_mean = (val_metrics['F1'][0] + val_metrics['F1'][1]) / 2
        if (val_f1_mean > best_mean_f1) :
            best_mean_f1 = val_f1_mean
            print(f'Saving model with best Mean F1: {best_mean_f1:.4f}')
            logger.info(f'Saving model with best Mean F1: {best_mean_f1:.4f}')
            save_f1_name = 'best_mean_F1'
            save_name = config.WEIGHTS_FOLDER + config.MODEL + '_classifier__' + save_f1_name + '.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name) 
            if config.BREVITAS_MODEL == True:
                save_onnx = config.ONNX_FOLDER + config.MODEL + '_classifier__' + save_f1_name #+ '.onnx'
                utils.export_onnx(model, (1, config.NUM_CHANNELS, config.IMG_H, config.IMG_W), save_onnx, config.DEVICE)
        
    logger.info('Saving last model')   
    torch.save(model.state_dict(), config.WEIGHTS_FOLDER + 'last_' + config.MODEL + '_classifier.pt') 
    
    #======================= FINISH =========================#
    end = datetime.datetime.now()
    end_time = end.strftime("%H:%M:%S")
    print(f'\n***Script finished: {end_time}\n')  
    print(f'Time elapsed: {end-start}')
    logger.info(f'\n***Script finished: {end_time}\n')  
    logger.info(f'Time elapsed: {end-start}')
    
    return model

# Training

In [31]:
print("Start Training\n")
logger.info("Start Training\n")

svd_model_trained = train_loop(svd_model)

Start Training


***Start Training: 02:20:21


=== EPOCH 0/1 ===
Learning Rate = 0.001



Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.93it/s]


Total Loss  |Smoke Loss  |Fire Loss   
------------ ------------ ------------
28.229      |15.963      |12.266      



Validating: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  3.25it/s]


Total Loss  |Smoke Loss  |Fire Loss   
------------ ------------ ------------
64.773      |46.330      |18.443      
SMOKE -> Precision: 0.6754 - Recall: 0.9628 - Accuracy: 0.7552 - F1: 0.7939
FIRE -> Precision: 0.7786 - Recall: 0.9397 - Accuracy: 0.9010 - F1: 0.8516

Saving model with new best validation loss: 64.7730
Saving model with best Mean F1: 0.8227

=== EPOCH 1/1 ===
Learning Rate = 0.001



Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.98it/s]


Total Loss  |Smoke Loss  |Fire Loss   
------------ ------------ ------------
21.033      |15.457      |5.576       



Validating: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  3.23it/s]


Total Loss  |Smoke Loss  |Fire Loss   
------------ ------------ ------------
33.359      |25.818      |7.540       
SMOKE -> Precision: 0.8533 - Recall: 0.8351 - Accuracy: 0.8490 - F1: 0.8441
FIRE -> Precision: 0.9714 - Recall: 0.8793 - Accuracy: 0.9557 - F1: 0.9231

Saving model with new best validation loss: 33.3588
Saving model with best Mean F1: 0.8836

***Script finished: 02:20:33

Time elapsed: 0:00:12.066482


# Check Comp Model Params

In [32]:
svd_model_trained.eval()
logger.info(f'\n########################################### SVD Model\n')
logger.info(svd_model_trained)

# MODEL PARAMETERS
n_trainable = sum(p.numel() for p in svd_model_trained.parameters() if p.requires_grad)
print(f'\nTrainable parameters = {n_trainable}')
logger.info(f'\nTrainable parameters = {n_trainable}')

n_params = sum(p.numel() for p in svd_model_trained.parameters())
print(f'Total parameters = {n_params}')
logger.info(f'Total parameters = {n_params}\n')


Trainable parameters = 47752
Total parameters = 47752


# Export to ONNX

In [33]:
torch.onnx.export(svd_model_trained, torch.randn(input_shape).to(config.DEVICE), config.RUN_FOLDER + 'svd_model_trained.onnx')
#OnnxSaver.set_node_names('/models/fp32_model.onnx', trained_model, input_shape)

# Pruning the Model after Training the SVD one

### Supress Warnings

In [34]:
import warnings
warnings.filterwarnings('ignore')

### Baseline SVD Trained

In [35]:
svd_trained_f1 = evaluate_model(svd_model_trained, None, True)

F1 mean: 0.8836


### Pruning Parameters

#### Check first layer to setup modules to ignore

In [39]:
if isinstance(svd_model_trained.model.conv1, nn.Sequential):  
    modules_to_ignore = [svd_model_trained.model.conv1[0]]
    print("Ignore conv1[0] due to SVD")
    logger.info("\nPruning params: conv1 is Sequential, so it was splitted in SVD -> update modules to ignore")
else:
    modules_to_ignore = [svd_model_trained.model.conv1]
    logger.info("\nPruning params: conv1 is not Sequential, so it was not splitted in SVD")
    print("Ignore conv1 due to SVD")

In [40]:
#modules_to_ignore = [svd_model_trained.model.conv1]
greedy_params = GreedySelectionParameters(
    target_comp_ratio=Decimal(config.PRUNING_COMPRESSION_RATIO),
    saved_eval_scores_dict=config.PRUNING_DIC_FILE)
auto_params = ChannelPruningParameters.AutoModeParams(
    greedy_params,
    modules_to_ignore=modules_to_ignore)
cp_params = ChannelPruningParameters(
    mode=ChannelPruningParameters.Mode.auto,
    params=auto_params,
    data_loader=aimet_val_loader, #val_loader,
    num_reconstruction_samples=500,
    allow_custom_downsample_ops=False)

In [41]:
comp_model, stats = ModelCompressor.compress_model(svd_model_trained,
                                                   input_shape=input_shape,
                                                   eval_callback=evaluate_model,
                                                   eval_iterations=None,
                                                   compress_scheme=CompressionScheme.channel_pruning,
                                                   cost_metric=CostMetric.mac,
                                                   parameters=cp_params,
                                                   visualization_url=None)                                                    

2025-01-26 02:23:22,130 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.4133
2025-01-26 02:23:23,977 - CompRatioSelect - INFO - Layer model.conv1.1, comp_ratio 0.100000 ==> eval_score=0.413258
2025-01-26 02:23:26,034 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.4174
2025-01-26 02:23:27,919 - CompRatioSelect - INFO - Layer model.conv1.1, comp_ratio 0.200000 ==> eval_score=0.417427


Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-ukixjicq'


2025-01-26 02:23:29,842 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.4372
2025-01-26 02:23:31,730 - CompRatioSelect - INFO - Layer model.conv1.1, comp_ratio 0.300000 ==> eval_score=0.437224
2025-01-26 02:23:33,642 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8170
2025-01-26 02:23:35,502 - CompRatioSelect - INFO - Layer model.conv1.1, comp_ratio 0.400000 ==> eval_score=0.817048
2025-01-26 02:23:37,432 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8116
2025-01-26 02:23:39,287 - CompRatioSelect - INFO - Layer model.conv1.1, comp_ratio 0.500000 ==> eval_score=0.811621
2025-01-26 02:23:41,197 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8046
2025-01-26 02:23:43,056 - CompRatioSelect - INFO - Layer model.conv1.1, comp_ratio 0.600000 ==> eval_score=0.804583
2025-01-26 02:23:44,956 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8491
2025-01-26 02:23:46,827 - CompRatioSelect - IN

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-ybfoghe6'


2025-01-26 02:24:22,403 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8784
2025-01-26 02:24:24,299 - CompRatioSelect - INFO - Layer model.conv2.0, comp_ratio 0.800000 ==> eval_score=0.878446


Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-2f6snyfh'


2025-01-26 02:24:26,202 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8836
2025-01-26 02:24:28,048 - CompRatioSelect - INFO - Layer model.conv2.0, comp_ratio 0.900000 ==> eval_score=0.883582
2025-01-26 02:24:29,934 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.4537
2025-01-26 02:24:31,789 - CompRatioSelect - INFO - Layer model.conv2.1, comp_ratio 0.100000 ==> eval_score=0.453750
2025-01-26 02:24:33,662 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8058
2025-01-26 02:24:35,519 - CompRatioSelect - INFO - Layer model.conv2.1, comp_ratio 0.200000 ==> eval_score=0.805826
2025-01-26 02:24:37,407 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8358
2025-01-26 02:24:39,269 - CompRatioSelect - INFO - Layer model.conv2.1, comp_ratio 0.300000 ==> eval_score=0.835839
2025-01-26 02:24:41,154 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8669
2025-01-26 02:24:43,014 - CompRatioSelect - IN

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-y6qqaoi_'


2025-01-26 02:25:41,505 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.2334
2025-01-26 02:25:43,387 - CompRatioSelect - INFO - Layer model.conv32.0, comp_ratio 0.200000 ==> eval_score=0.233449
2025-01-26 02:25:45,307 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.6274
2025-01-26 02:25:47,202 - CompRatioSelect - INFO - Layer model.conv32.0, comp_ratio 0.300000 ==> eval_score=0.627384
2025-01-26 02:25:49,113 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.6778
2025-01-26 02:25:51,038 - CompRatioSelect - INFO - Layer model.conv32.0, comp_ratio 0.400000 ==> eval_score=0.677828
2025-01-26 02:25:52,945 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8039
2025-01-26 02:25:54,834 - CompRatioSelect - INFO - Layer model.conv32.0, comp_ratio 0.500000 ==> eval_score=0.803913
2025-01-26 02:25:56,723 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8459
2025-01-26 02:25:58,603 - CompRatioSelect 

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-kmtj26u2'


2025-01-26 02:26:15,711 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.6784
2025-01-26 02:26:17,607 - CompRatioSelect - INFO - Layer model.conv32.1, comp_ratio 0.200000 ==> eval_score=0.678430
2025-01-26 02:26:19,526 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.7887
2025-01-26 02:26:21,414 - CompRatioSelect - INFO - Layer model.conv32.1, comp_ratio 0.300000 ==> eval_score=0.788656
2025-01-26 02:26:23,331 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8244
2025-01-26 02:26:25,203 - CompRatioSelect - INFO - Layer model.conv32.1, comp_ratio 0.400000 ==> eval_score=0.824410
2025-01-26 02:26:27,115 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8707
2025-01-26 02:26:28,986 - CompRatioSelect - INFO - Layer model.conv32.1, comp_ratio 0.500000 ==> eval_score=0.870683
2025-01-26 02:26:30,900 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8739
2025-01-26 02:26:32,773 - CompRatioSelect 

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-g40_sy27'


2025-01-26 02:31:14,729 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8673
2025-01-26 02:31:16,640 - CompRatioSelect - INFO - Layer model.conv43.1, comp_ratio 0.800000 ==> eval_score=0.867305
2025-01-26 02:31:18,593 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8740
2025-01-26 02:31:20,512 - CompRatioSelect - INFO - Layer model.conv43.1, comp_ratio 0.900000 ==> eval_score=0.873985
2025-01-26 02:31:22,454 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.6060
2025-01-26 02:31:24,373 - CompRatioSelect - INFO - Layer model.conv44.0, comp_ratio 0.100000 ==> eval_score=0.605977
2025-01-26 02:31:26,312 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.6942
2025-01-26 02:31:28,229 - CompRatioSelect - INFO - Layer model.conv44.0, comp_ratio 0.200000 ==> eval_score=0.694199
2025-01-26 02:31:30,179 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8116
2025-01-26 02:31:32,094 - CompRatioSelect 

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-psanxufp'


2025-01-26 02:31:49,505 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8743
2025-01-26 02:31:51,442 - CompRatioSelect - INFO - Layer model.conv44.0, comp_ratio 0.800000 ==> eval_score=0.874310
2025-01-26 02:31:53,399 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8836
2025-01-26 02:31:55,326 - CompRatioSelect - INFO - Layer model.conv44.0, comp_ratio 0.900000 ==> eval_score=0.883582
2025-01-26 02:31:57,285 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.5139
2025-01-26 02:31:59,218 - CompRatioSelect - INFO - Layer model.conv44.1, comp_ratio 0.100000 ==> eval_score=0.513898
2025-01-26 02:32:01,163 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.7553
2025-01-26 02:32:03,086 - CompRatioSelect - INFO - Layer model.conv44.1, comp_ratio 0.200000 ==> eval_score=0.755267


Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-p2ts3jqz'


2025-01-26 02:32:05,042 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8047
2025-01-26 02:32:06,962 - CompRatioSelect - INFO - Layer model.conv44.1, comp_ratio 0.300000 ==> eval_score=0.804714
2025-01-26 02:32:08,921 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8802
2025-01-26 02:32:10,851 - CompRatioSelect - INFO - Layer model.conv44.1, comp_ratio 0.400000 ==> eval_score=0.880182
2025-01-26 02:32:12,804 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8749
2025-01-26 02:32:14,728 - CompRatioSelect - INFO - Layer model.conv44.1, comp_ratio 0.500000 ==> eval_score=0.874928
2025-01-26 02:32:16,702 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8798
2025-01-26 02:32:18,633 - CompRatioSelect - INFO - Layer model.conv44.1, comp_ratio 0.600000 ==> eval_score=0.879759
2025-01-26 02:32:20,583 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8806
2025-01-26 02:32:22,510 - CompRatioSelect 

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 722, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/envs/pytorch_aimet/lib/python3.8/shutil.py", line 720, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-kemnbyb1'


2025-01-26 02:34:13,715 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8836
2025-01-26 02:34:15,658 - CompRatioSelect - INFO - Layer model.conv46.0, comp_ratio 0.900000 ==> eval_score=0.883582
2025-01-26 02:34:17,616 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.4000
2025-01-26 02:34:19,562 - CompRatioSelect - INFO - Layer model.conv46.1, comp_ratio 0.100000 ==> eval_score=0.400000
2025-01-26 02:34:21,568 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.4025
2025-01-26 02:34:23,515 - CompRatioSelect - INFO - Layer model.conv46.1, comp_ratio 0.200000 ==> eval_score=0.402469
2025-01-26 02:34:25,490 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.4010
2025-01-26 02:34:27,435 - CompRatioSelect - INFO - Layer model.conv46.1, comp_ratio 0.300000 ==> eval_score=0.400966
2025-01-26 02:34:29,410 - ChannelPruning - INFO - finished linear regression fit 
F1 mean: 0.8474
2025-01-26 02:34:31,357 - CompRatioSelect 

### Copy Data Files to Running Folder

In [42]:
shutil.copyfile(
    './data/greedy_selection_eval_scores_dict.pkl', 
    config.RUN_FOLDER + 'pruning_eval_scores.pkl')
shutil.copyfile(
    './data/greedy_selection_comp_ratios_list.pkl', 
    config.RUN_FOLDER + 'pruning_comp_ratios.pkl')

'experiments_bed_evolution/15_downto_28_aimet__big_aimet_ds__MAC__128_ds/pruning_comp_ratios.pkl'

# Print Stats

In [44]:
print(stats)
logger.info(stats)

**********************************************************************************************
Compressed Model Statistics
Baseline model accuracy: 0.883582, Compressed model accuracy: 0.842376
Compression ratio for memory=0.810771, mac=0.800312

**********************************************************************************************

Per-layer Stats
    Name:model.conv1.1, compression-ratio: 0.9
    Name:model.conv2.0, compression-ratio: 0.8
    Name:model.conv2.1, compression-ratio: 0.9
    Name:model.conv31, compression-ratio: None
    Name:model.conv32.0, compression-ratio: None
    Name:model.conv32.1, compression-ratio: None
    Name:model.conv33, compression-ratio: None
    Name:model.conv34.0, compression-ratio: None
    Name:model.conv34.1, compression-ratio: None
    Name:model.conv41, compression-ratio: 0.9
    Name:model.conv42.0, compression-ratio: 0.9
    Name:model.conv42.1, compression-ratio: 0.9
    Name:model.conv43.0, compression-ratio: 0.9
    Name:model.conv4

### Torchinfo: model compressed summary

In [45]:
print(comp_model)

BED_CLASSIFIER_DOWNTO_28(
  (model): Sequential(
    (conv1): Sequential(
      (0): Conv2d(3, 5, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=False)
      (1): Conv2d(5, 25, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1), bias=False)
    )
    (bn1): BatchNorm2d(25, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Sequential(
      (0): Conv2d(25, 19, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=False)
      (1): Conv2d(19, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1), bias=False)
    )
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv31): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn31): BatchNorm2d(16, eps=1e-05, momentum=0.1, aff

In [46]:
comp_model.eval()

print(summary(comp_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

logger.info("\nPruned Model")
logger.info(comp_model)

logger.info("\nCompressed Model Summary")
logger.info(summary(comp_model, input_size=(1, 3, config.IMG_H, config.IMG_W)))

Layer (type:depth-idx)                   Output Shape              Param #
BED_CLASSIFIER_DOWNTO_28                 [1, 2]                    --
├─Sequential: 1-1                        [1, 2]                    --
│    └─Sequential: 2-1                   [1, 25, 224, 224]         --
│    │    └─Conv2d: 3-1                  [1, 5, 224, 224]          45
│    │    └─Conv2d: 3-2                  [1, 25, 224, 224]         375
│    └─BatchNorm2d: 2-2                  [1, 25, 224, 224]         50
│    └─ReLU: 2-3                         [1, 25, 224, 224]         --
│    └─MaxPool2d: 2-4                    [1, 25, 112, 112]         --
│    └─Sequential: 2-5                   [1, 16, 112, 112]         --
│    │    └─Conv2d: 3-3                  [1, 19, 112, 112]         1,425
│    │    └─Conv2d: 3-4                  [1, 16, 112, 112]         912
│    └─BatchNorm2d: 2-6                  [1, 16, 112, 112]         32
│    └─ReLU: 2-7                         [1, 16, 112, 112]         --
│    └─Max

# Evaluate Compressed Model

In [48]:
comp_f1 = evaluate_model(comp_model, None, True)
logger.info(f'\nPruned Model evaluation with Aimet Val Loader before training: {comp_f1}')

F1 mean: 0.8424


# Visualize Results

In [49]:
comp_ratios_file_path = './data/greedy_selection_comp_ratios_list.pkl'
eval_scores_path = './data/greedy_selection_eval_scores_dict.pkl'

unpickled_ratios = pd.read_pickle(comp_ratios_file_path)
unpickled_scores = pd.read_pickle(eval_scores_path)

In [50]:
df_scores = pd.DataFrame(unpickled_scores)
df_scores.to_csv(config.RUN_FOLDER + 'scores_pruning.csv')
print(df_scores)

     model.conv1.1  model.conv2.0  model.conv2.1  model.conv31  \
0.1       0.413258       0.348857       0.453750      0.000000   
0.2       0.417427       0.743735       0.805826      0.197581   
0.3       0.437224       0.761934       0.835839      0.295744   
0.4       0.817048       0.831584       0.866907      0.645459   
0.5       0.811621       0.866073       0.874174      0.572345   
0.6       0.804583       0.871171       0.865562      0.842398   
0.7       0.849062       0.877758       0.876123      0.865087   
0.8       0.842224       0.878446       0.874852      0.859882   
0.9       0.889656       0.883582       0.878877      0.874752   

     model.conv32.0  model.conv32.1  model.conv33  model.conv34.0  \
0.1        0.015707        0.040215      0.045000        0.053846   
0.2        0.233449        0.678430      0.269417        0.597110   
0.3        0.627384        0.788656      0.336283        0.625293   
0.4        0.677828        0.824410      0.638014        0.7016

In [51]:
df_ratios = pd.DataFrame(unpickled_ratios)
df_ratios.to_csv(config.RUN_FOLDER + 'ratios_pruning.csv')
print(df_ratios)

                 0     1
0    model.conv1.1   0.9
1    model.conv2.0   0.8
2    model.conv2.1   0.9
3     model.conv31  None
4   model.conv32.0  None
5   model.conv32.1  None
6     model.conv33  None
7   model.conv34.0  None
8   model.conv34.1  None
9     model.conv41   0.9
10  model.conv42.0   0.9
11  model.conv42.1   0.9
12  model.conv43.0   0.9
13  model.conv43.1  None
14  model.conv44.0   0.9
15  model.conv44.1  None
16  model.conv45.0   0.4
17  model.conv45.1  None
18  model.conv46.0   0.5
19  model.conv46.1   0.9


# Save Compressed Before Training

In [52]:
torch.onnx.export(comp_model, torch.randn(input_shape).to(config.DEVICE), config.RUN_FOLDER + 'pruned_model_noTrain.onnx')

### Optimizer and Scheduler of Compressed Model to Train

In [53]:
optimizer = optim.Adam(comp_model.parameters(), 
                       lr=config.LEARNING_RATE, 
                       weight_decay=config.WEIGHT_DECAY)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                 mode='min',
                                                 factor=config.FACTOR, 
                                                 patience=config.PATIENCE, 
                                                 threshold=config.THRES, 
                                                 threshold_mode='abs',
                                                 min_lr=config.MIN_LR)

utils.save_checkpoint(epoch=0, 
                      model=comp_model,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      checkpoint_name=config.WEIGHTS_FOLDER + 'comp_model_after_pruning.pt')

# Define Plotters again to restart them

In [54]:
train_losses_logger = utils.LogLosses()
train_metrics_logger = utils.LogMetrics()
lr_logger = utils.LogLR(log_path=config.PLOTS_FOLDER_2)

val_losses_logger = utils.LogLosses()
val_metrics_logger = utils.LogMetrics()

loss_plotter = utils.PlotMetrics(log_path=config.PLOTS_FOLDER_2, model_name=config.MODEL, loss_or_metric='Loss')
metrics_plotter = utils.PlotMetrics(log_path=config.PLOTS_FOLDER_2, model_name=config.MODEL, loss_or_metric='Metric')

# Define Train Function Again to restart it

In [55]:
def train_svd_loop(model, start_epoch=0):

    ''' ==============================================================
                                TRAINING LOOP
    ============================================================== '''
    start = datetime.datetime.now()
    start_time = start.strftime("%H:%M:%S")
    print(f'\n***Start Training: {start_time}\n')
    logger.info(f'\n***Start Training: {start_time}\n')
    
    # Start with infinite validation loss
    best_valid_loss = np.inf
    best_smoke_precision = 0. #torch.tensor([0.])
    smoke_f1_min_save = 0.9 #torch.tensor([0.9])
    best_mean_f1 = 0.

    #start_epoch = 0
    epochs_plot = []
        
    for epoch in range(start_epoch, config.EPOCHS):

        print(f'\n=== EPOCH {epoch}/{config.EPOCHS-1} ===')
        logger.info(f'\n=== EPOCH {epoch}/{config.EPOCHS-1} ===')
        
        #====================== TRAINING ========================#
        current_lr = train_epoch.get_lr(optimizer=optimizer)
        logger.info(f'Learning Rate = {current_lr}\n')
        lr_logger.log_lr(current_lr)
                
        train_losses, train_metrics = train_epoch.train_fn(
            loader=train_loader, 
            model=model, 
            optimizer=optimizer, 
            loss_fn=loss_fn,
            device=config.DEVICE)
        
        train_losses_logger.update_metrics(train_losses)
        train_metrics_logger.update_metrics(train_metrics)
                
        logger.info(utils.print_metrics_to_logger("TRAIN Stats", train_losses, train_metrics))
        
        #===================== VALIDATING =======================#
        with torch.no_grad():
            val_losses, val_metrics = val_epoch.eval_fn(
                loader=val_loader, 
                model=model,                         
                loss_fn=loss_fn,
                device=config.DEVICE)
            
            scheduler.step(val_losses['Total'])
            
            val_losses_logger.update_metrics(val_losses)
            val_metrics_logger.update_metrics(val_metrics)

            logger.info(utils.print_metrics_to_logger("VAL Stats", val_losses, val_metrics))
            
        epochs_plot.append(epoch)

        loss_plotter.plot_all_metrics(
            train_losses_logger.get_metrics(),
            val_losses_logger.get_metrics(),
            epochs_plot)

        metrics_plotter.plot_all_metrics(
            train_metrics_logger.get_metrics(),
            val_metrics_logger.get_metrics(),
            epochs_plot)

        lr_logger.plot_lr(epochs_plot)
        #======================= SAVING =========================#
        if ( (epoch+1) % 5 ) == 0:
            save_name = config.WEIGHTS_FOLDER_2 + config.MODEL + '_classifier__5epoch.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name) 
            
        if best_valid_loss > val_losses['Total']:
            best_valid_loss = val_losses['Total']
            print(f"\nSaving model with new best validation loss: {best_valid_loss:.3f}")
            logger.info(f"Saving model with new best validation loss: {best_valid_loss:.3f}")
            save_name = config.WEIGHTS_FOLDER_2 + config.MODEL + '_classifier__' + 'best_loss'  + '.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name)  

        # Save model if best mean F1 increases
        val_f1_mean = (val_metrics['F1'][0] + val_metrics['F1'][1]) / 2
        if (val_f1_mean > best_mean_f1) :
            best_mean_f1 = val_f1_mean
            print(f'Saving model with best Mean F1: {best_mean_f1:.4f}')
            logger.info(f'Saving model with best Mean F1: {best_mean_f1:.4f}')
            save_f1_name = 'best_mean_F1'
            save_name = config.WEIGHTS_FOLDER_2 + config.MODEL + '_classifier__' + save_f1_name + '.pt'
            utils.save_checkpoint(epoch, model, optimizer, scheduler, save_name) 

        
    logger.info('Saving last model')   
    torch.save(model.state_dict(), config.WEIGHTS_FOLDER_2 + 'last_' + config.MODEL + '_classifier.pt') 
    
    #======================= FINISH =========================#
    end = datetime.datetime.now()
    end_time = end.strftime("%H:%M:%S")
    print(f'\n***Script finished: {end_time}\n')  
    print(f'Time elapsed: {end-start}')
    logger.info(f'\n***Script finished: {end_time}\n')  
    logger.info(f'Time elapsed: {end-start}')
    
    return model

# Train the model pruned

In [56]:
print("Start Training\n")
logger.info("Start Training\n")

pruned_model = train_svd_loop(comp_model)

Start Training


***Start Training: 02:40:48


=== EPOCH 0/1 ===
Learning Rate = 0.001



Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  3.11it/s]


Total Loss  |Smoke Loss  |Fire Loss   
------------ ------------ ------------
26.647      |17.449      |9.199       



Validating: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  3.00it/s]


Total Loss  |Smoke Loss  |Fire Loss   
------------ ------------ ------------
39.895      |28.572      |11.322      
SMOKE -> Precision: 0.8274 - Recall: 0.7394 - Accuracy: 0.7969 - F1: 0.7809
FIRE -> Precision: 0.8889 - Recall: 0.8966 - Accuracy: 0.9349 - F1: 0.8927

Saving model with new best validation loss: 39.895
Saving model with best Mean F1: 0.8368

=== EPOCH 1/1 ===
Learning Rate = 0.001



Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  3.05it/s]


Total Loss  |Smoke Loss  |Fire Loss   
------------ ------------ ------------
20.146      |12.867      |7.279       



Validating: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:02<00:00,  2.99it/s]


Total Loss  |Smoke Loss  |Fire Loss   
------------ ------------ ------------
39.628      |28.028      |11.600      
SMOKE -> Precision: 0.8425 - Recall: 0.6543 - Accuracy: 0.7708 - F1: 0.7365
FIRE -> Precision: 0.8770 - Recall: 0.9224 - Accuracy: 0.9375 - F1: 0.8992

Saving model with new best validation loss: 39.628

***Script finished: 02:41:01

Time elapsed: 0:00:12.219919


# Export to ONNX

In [57]:
torch.onnx.export(pruned_model, torch.randn(input_shape).to(config.DEVICE), config.RUN_FOLDER + 'pruned_model_trained.onnx')