**01. Building the Bottleneck inverted residual block (BIR)**

<img src = 'BIR.png' height = '300' />

In [None]:
# DL needs
import tensorflow as tf
import keras as kr

# Data needs
import pandas as pd
from sklearn.model_selection import train_test_split

# Numerical computation needs
import numpy as np

# plotting needs
import matplotlib.pyplot as plt
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

# ensuring reproducibility
random_seed=42
tf.random.set_seed(random_seed)

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="keras")



**Note:**
* Use `training` in the call() function of your layer if and only if using layers like `BatchNormalization()` or `Dropout()`, else omit using it, as the model doesn't train if `training` variable is used but not explicitly called/used within the function.

In [286]:
@kr.utils.register_keras_serializable(package='BottleNeckInverseResidualBlock')
class BIR(kr.layers.Layer):
    def __init__(self,input_channels,output_channels,expansion_factor,stride = 1,expansion_kernel_size = 1,depthwise_kernel_size = 3, **kwargs):
        super().__init__(**kwargs)

        self._block_name = kwargs.get('name','BIR')
        self.stride = stride
        self.use_residual = (self.stride == 1 and input_channels == output_channels)
        expanded_channels = input_channels * expansion_factor

        # Expansion 
        self.expand_conv = tf.keras.layers.Conv2D(
            filters = expanded_channels,
            kernel_size = expansion_kernel_size,
            padding = 'same',
            use_bias = False,
            name = f'{self._block_name}_expand'
        )

        self.bn1 = tf.keras.layers.BatchNormalization(name = f'{self._block_name}_expand_BN')
        self.relu1 = tf.keras.layers.ReLU(name=f'{self._block_name}_expand_relu')

        # Depthwise convolution
        self.depthwise_conv = tf.keras.layers.DepthwiseConv2D(
            kernel_size = depthwise_kernel_size,
            strides = self.stride,
            padding = 'same',
            use_bias = False,
            name = f'{self._block_name}_depthwise'
        )
        self.bn2 = tf.keras.layers.BatchNormalization(name = f'{self._block_name}_depthwise_BN')
        self.relu2 = tf.keras.layers.ReLU(name=f'{self._block_name}_depthwise_relu')


        # Projection
        self.project_conv = tf.keras.layers.Conv2D(
            filters = output_channels,
            kernel_size = 1,
            padding = 'same',
            use_bias = False,
            name = f'{self._block_name}_project'
        )
        self.bn3 = tf.keras.layers.BatchNormalization(name = f'{self._block_name}_project_BN')


        # Addition layer
        self.add_layer = tf.keras.layers.Add(name = f'{self._block_name}_add')

        ### putting together all layers:
        

    def call(self,inputs,training = False):
        # expansion
        x = self.expand_conv(inputs)
        x = self.bn1(x,training = training)
        x = self.relu1(x)

        # depthwise convolution
        x = self.depthwise_conv(x)
        x = self.bn2(x,training = training)
        x = self.relu2(x)

        # projection
        x = self.project_conv(x)
        x = self.bn3(x,training = training)

        # residual connection
        if self.use_residual:
            return self.add_layer([inputs,x])
        else:
            return x

In [204]:
inputs = tf.keras.layers.Input(shape=(112,112,3))
outputs = BIR(input_channels=3,output_channels=16,expansion_factor=6,name = "block_1")(inputs)
 
model = tf.keras.models.Model(inputs = inputs, outputs= outputs)
model.summary()


**02. Creating the encoder section**

<img src = 'QUICKSAL.png' width = '700'/>

* Create 7 blocks of BIR with the following input/output shapes:
  
  |BIR block|Input shape|Stride|Output shape|Expansion factor (t) |n (repetitions)|
  |---|---|---|---|---|---|
  | B1 |112<sup>2</sup> x 32 |1|112<sup>2</sup> x 16 |1| 1 |
  | B2 |112<sup>2</sup> x 16 |2|56<sup>2</sup> x 24 |6| 2 |
  | B3 |56<sup>2</sup> x 24 |2|28<sup>2</sup> x 32 |6| 3 |
  | B4 |28<sup>2</sup> x 32 |2|14<sup>2</sup> x 64 |6| 4 |
  | B5 |14<sup>2</sup> x 64 |1|14<sup>2</sup> x 96 |6| 3 |
  | B6 |14<sup>2</sup> x 96 |2|7<sup>2</sup> x 160 |6| 3 |
  | B7 |7<sup>2</sup> x 160 |1|7<sup>2</sup> x 320 |6| 1 |
<br>

In [527]:
@kr.utils.register_keras_serializable(package='QUICKSAL_encoder')
class QUICKSAL_encoder(tf.keras.layers.Layer):
    def __init__(self,**kwargs):
        super().__init__(**kwargs)

        self.block_args = {
            'block_1_1':{'input_channels':32,'output_channels': 16,'t':1,'stride':1},
            'block_2_1':{'input_channels':16,'output_channels': 24,'t':6,'stride':1},
            'block_2_2':{'input_channels':24,'output_channels': 24,'t':6,'stride':2},
            'block_3_1':{'input_channels':24,'output_channels': 32,'t':6,'stride':1},
            'block_3_2':{'input_channels':32,'output_channels': 32,'t':6,'stride':1},
            'block_3_3':{'input_channels':32,'output_channels': 32,'t':6,'stride':2},
            'block_4_1':{'input_channels':32,'output_channels': 64,'t':6,'stride':1},
            'block_4_2':{'input_channels':64,'output_channels': 64,'t':6,'stride':1},
            'block_4_3':{'input_channels':64,'output_channels': 64,'t':6,'stride':1},
            'block_4_4':{'input_channels':64,'output_channels': 64,'t':6,'stride':2},
            'block_5_1':{'input_channels':64,'output_channels': 96,'t':6,'stride':1},
            'block_5_2':{'input_channels':96,'output_channels': 96,'t':6,'stride':1},
            'block_5_3':{'input_channels':96,'output_channels': 96,'t':6,'stride':1},
            'block_6_1':{'input_channels':96,'output_channels': 160,'t':6,'stride':1},
            'block_6_2':{'input_channels':160,'output_channels': 160,'t':6,'stride':1},
            'block_6_3':{'input_channels':160,'output_channels': 160,'t':6,'stride':2},
            'block_7_1':{'input_channels':160,'output_channels': 320,'t':6,'stride':1},
        }


        # remaining layers have expansion kernal-size of 1x1
        self._layers = [BIR(input_channels = block_params['input_channels'],
                            output_channels = block_params['output_channels'],
                            expansion_factor = block_params['t'],
                            stride = block_params['stride'],
                            name = block_name) for block_name,block_params in self.block_args.items()]
        
        self.all_outputs = {block_name:None for block_name in self.block_args.keys()}

    def call(self,inputs):
        x = self._layers[0](inputs)
        self.all_outputs['block_1_1'] = x
        for layer in self._layers[1:]:
            x = layer(x)
            self.all_outputs[layer.name] = x
        needed_outputs_from = ['block_3_1','block_4_1','block_6_1','block_7_1']
        return [self.all_outputs[block_name] for block_name in needed_outputs_from]

In [434]:
inputs = tf.keras.layers.Input(shape = (112,112,32))
outputs = QUICKSAL_encoder(name = 'QUICKSAL_ENCODER')(inputs)
encoder_model = tf.keras.models.Model(inputs = inputs, outputs = outputs,name = 'encoder_model')
encoder_model.summary()

In [429]:
mbnet = tf.keras.applications.MobileNetV2(
    include_top = False,
    alpha = 1.0,
    input_shape = (224,224,3),
    weights = 'imagenet'
)

In [418]:
for i,layer in enumerate(mbnet.layers):
    print(i,layer.name,layer.output.shape)

0 input_layer_24 (None, 224, 224, 3)
1 Conv1 (None, 112, 112, 32)
2 bn_Conv1 (None, 112, 112, 32)
3 Conv1_relu (None, 112, 112, 32)
4 expanded_conv_depthwise (None, 112, 112, 32)
5 expanded_conv_depthwise_BN (None, 112, 112, 32)
6 expanded_conv_depthwise_relu (None, 112, 112, 32)
7 expanded_conv_project (None, 112, 112, 16)
8 expanded_conv_project_BN (None, 112, 112, 16)
9 block_1_expand (None, 112, 112, 96)
10 block_1_expand_BN (None, 112, 112, 96)
11 block_1_expand_relu (None, 112, 112, 96)
12 block_1_pad (None, 113, 113, 96)
13 block_1_depthwise (None, 56, 56, 96)
14 block_1_depthwise_BN (None, 56, 56, 96)
15 block_1_depthwise_relu (None, 56, 56, 96)
16 block_1_project (None, 56, 56, 24)
17 block_1_project_BN (None, 56, 56, 24)
18 block_2_expand (None, 56, 56, 144)
19 block_2_expand_BN (None, 56, 56, 144)
20 block_2_expand_relu (None, 56, 56, 144)
21 block_2_depthwise (None, 56, 56, 144)
22 block_2_depthwise_BN (None, 56, 56, 144)
23 block_2_depthwise_relu (None, 56, 56, 144)
24 blo

**03. Adding pre-trained weights of MobileNet-v2 (imagenet) to encoder layer, and setting `trainable=False` except the expand layer in the first block** 

  |BIR block|MobileNet-V2 model layer match-indices/model-names|
  |---|---|
  | B1 |2-8|
  | B2 |block_1,block_2|
  | B3 |block_3,block_4,block_5|
  | B4 |block_6,block_7,block_8,block_9|
  | B5 |block_10,block_11,block_12|
  | B6 |block_13,block_14,block_15|
  | B7 |block_16|

In [543]:
def set_encoder_weights(enc,mbnet):
    ### For block 1: 
    ### first line is commented because 
    # enc._layers[0]._layers[0].set_weights(mbnet.layers[1].get_weights()) 
    enc._layers[0]._layers[1].set_weights(mbnet.layers[2].get_weights())
    enc._layers[0]._layers[3].set_weights(mbnet.layers[4].get_weights())
    enc._layers[0]._layers[4].set_weights(mbnet.layers[5].get_weights())
    enc._layers[0]._layers[6].set_weights(mbnet.layers[7].get_weights())
    enc._layers[0]._layers[7].set_weights(mbnet.layers[8].get_weights())  

    # For block 2 to 7
    i=1
    # setting the weights
    for block in enc._layers[1:]:
        for layer in block._layers:
            # setting expand layer
            if layer.name[10:] == 'expand':
                layer.set_weights(mbnet.get_layer(f'block_{i}_expand').get_weights())
            elif layer.name[10:] == 'expand_BN':
                layer.set_weights(mbnet.get_layer(f'block_{i}_expand_BN').get_weights())
            elif layer.name[10:] == 'depthwise':
                layer.set_weights(mbnet.get_layer(f'block_{i}_depthwise').get_weights())
            elif layer.name[10:] == 'depthwise_BN':
                layer.set_weights(mbnet.get_layer(f'block_{i}_depthwise_BN').get_weights())
            elif layer.name[10:] == 'project':
                layer.set_weights(mbnet.get_layer(f'block_{i}_project').get_weights())
            elif layer.name[10:] == 'project_BN':
                layer.set_weights(mbnet.get_layer(f'block_{i}_project_BN').get_weights())
        i+=1

In [531]:
enc = QUICKSAL_encoder()
_ = enc(tf.keras.layers.Input((112,112,32)))

--2025-04-26 00:50:24--  http://mftp.mmcheng.net/Data/MSRA10K_Imgs_GT.zip
Resolving mftp.mmcheng.net (mftp.mmcheng.net)... 108.179.200.15
Connecting to mftp.mmcheng.net (mftp.mmcheng.net)|108.179.200.15|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://mftp.mmcheng.net/Data/MSRA10K_Imgs_GT.zip [following]
--2025-04-26 00:50:25--  https://mftp.mmcheng.net/Data/MSRA10K_Imgs_GT.zip
Connecting to mftp.mmcheng.net (mftp.mmcheng.net)|108.179.200.15|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 205213299 (196M) [application/zip]
Saving to: ‘MSRA10K_Imgs_GT.zip’


2025-04-26 00:51:09 (4.62 MB/s) - ‘MSRA10K_Imgs_GT.zip’ saved [205213299/205213299]



In [532]:
set_encoder_weights(enc,mbnet)

In [545]:
for block in enc._layers[:]:
    block.trainable = False

enc._layers[0]._layers[0].trainable = True

In [546]:
for block in enc._layers:
    print(block.name)
    for layer in block._layers:
        print(layer.name,layer.trainable)

block_1_1
block_1_1_expand True
block_1_1_expand_BN False
block_1_1_expand_relu False
block_1_1_depthwise False
block_1_1_depthwise_BN False
block_1_1_depthwise_relu False
block_1_1_project False
block_1_1_project_BN False
block_1_1_add False
block_2_1
block_2_1_expand False
block_2_1_expand_BN False
block_2_1_expand_relu False
block_2_1_depthwise False
block_2_1_depthwise_BN False
block_2_1_depthwise_relu False
block_2_1_project False
block_2_1_project_BN False
block_2_1_add False
block_2_2
block_2_2_expand False
block_2_2_expand_BN False
block_2_2_expand_relu False
block_2_2_depthwise False
block_2_2_depthwise_BN False
block_2_2_depthwise_relu False
block_2_2_project False
block_2_2_project_BN False
block_2_2_add False
block_3_1
block_3_1_expand False
block_3_1_expand_BN False
block_3_1_expand_relu False
block_3_1_depthwise False
block_3_1_depthwise_BN False
block_3_1_depthwise_relu False
block_3_1_project False
block_3_1_project_BN False
block_3_1_add False
block_3_2
block_3_2_expan