In [1]:
import numpy as np

from qonnx.core.datatype import DataType
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.infer_shapes import InferShapes

from qonnx.util.cleanup import cleanup as qonnx_cleanup

from finn.util.visualization import showInNetron
from qonnx.core.modelwrapper import ModelWrapper

from qonnx.custom_op.registry import getCustomOp

import onnx.helper as oh
import qonnx.util.basic as util

In [2]:
# from onnx import __version__, IR_VERSION
# from onnx.defs import onnx_opset_version
# print(f"onnx.__version__={__version__!r}, opset={onnx_opset_version()}, IR_VERSION={IR_VERSION}")

In [3]:
prune_folder = './manual_pruning/only_thresholds/Sparse24/'

# Load Model and Clean

In [4]:
# model_file = './onnx_models/MY_MBLNET_V2_RESNET_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx'

# model_file = './onnx_models/Mobilenetv2_Mini_Resnet_112__best_F1__Bipolar.onnx'

# model_file = './onnx_models/Mobilenetv2_Mini_Resnet_4bitINP__best_F1__Bipolar.onnx'

model_file = './onnx_models/Mobilenetv2_Mini_Resnet_Sparse24__best_F1__Bipolar.onnx'

In [5]:
qonnx_clean_filename = prune_folder + '00_prune_clean.onnx'
qonnx_cleanup(model_file, out_file=qonnx_clean_filename)

In [6]:
showInNetron(qonnx_clean_filename)

Serving './manual_pruning/only_thresholds/Sparse24/00_prune_clean.onnx' at http://0.0.0.0:8083


# Analyze layers to prune

Check scales of weights that are very close to zero, under epsilon:

$$
0 < abs(scale) < \epsilon
$$

Store all initializers in a list and look for the corresponding convolution afterwards.

In [7]:
model = ModelWrapper(qonnx_clean_filename)

In [8]:
all_inits_names = [init.name for init in model.graph.initializer]

print(f'Number of initializers = {len(all_inits_names)}')

Number of initializers = 376


In [9]:
eps = 1e-15

layers_to_prune = {}

for idx, init_name in enumerate(all_inits_names):
    if "Quant" in init_name and "param1" in init_name:
    # It is a scale value, check it
        np_init = model.get_initializer(init_name)
        np_abs_val = np.abs(np_init)
        zero_idx = (np_abs_val < eps) * (np_abs_val > 0)
        if np.all(zero_idx == False):
            #print(f'Index = {idx}. {init_name} was not appended, as there were no values under epsilon')
            continue
        else:
            zero_layer = np.where(zero_idx == True)[0]
            quant_layer_name = init_name.split("_param")[0]
            layers_to_prune[quant_layer_name] = {1: {*zero_layer}}
            #print(f'Index = {idx}. {init_name} appended, as there were values under epsilon')

In [10]:
print(f'Number of layers to prune: {len(layers_to_prune)}')
for k, v in layers_to_prune.items():
    print(k, v)

Number of layers to prune: 15
Quant_12 {1: {28}}
Quant_13 {1: {28}}
Quant_15 {1: {0, 83, 45, 54}}
Quant_16 {1: {0, 83, 45, 54}}
Quant_18 {1: {64, 33, 2, 99, 102, 7, 40, 41, 12, 13, 112, 51, 20, 89, 59, 94}}
Quant_19 {1: {64, 33, 2, 99, 102, 7, 40, 41, 12, 13, 112, 51, 20, 89, 59, 94}}
Quant_21 {1: {3, 7, 9, 12, 21, 24, 27, 29, 31, 32, 39, 41, 43, 44, 50, 57, 63, 68, 71, 73, 75, 80, 83, 90, 94, 96, 97, 99, 103, 113, 117, 124, 125}}
Quant_22 {1: {3, 7, 9, 12, 21, 24, 27, 29, 31, 32, 39, 41, 43, 44, 50, 57, 63, 68, 71, 73, 75, 80, 83, 90, 94, 96, 97, 99, 103, 113, 117, 124, 125}}
Quant_24 {1: {30}}
Quant_25 {1: {30}}
Quant_26 {1: {10, 51, 14, 7}}
Quant_27 {1: {0, 1, 5, 7, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 34, 35, 37, 38, 45, 47, 49, 51, 54, 56, 57, 58, 59, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 78, 79, 82, 85, 89, 92, 93, 97, 98, 100, 103, 104, 109, 110, 111, 112, 115, 117, 118, 121, 126, 127}}
Quant_28 {1: {0, 1, 5, 7, 10, 11, 12, 13, 14, 15, 16, 18

In [11]:
# print(len(layers_to_prune["Quant_30"][1]))

### Get all Convolutions or Linears to be pruned

In [12]:
all_nodes = model.graph.node

convs_to_prune = []

for node in all_nodes:
    for key in layers_to_prune.keys():
        if key == node.name:
            successor_node = model.find_direct_successors(node)[0]
            convs_to_prune.append(successor_node.name)

print("All convolutions to prune")
for conv in convs_to_prune:
    print(conv)

# # Remove last 5 convs, as they are harder to prune
# for i in range(5):
#     convs_to_prune.pop()

# # Print again
# print("\nEasy convolutions to prune")
# for conv in convs_to_prune:
#     print(conv)

All convolutions to prune
Conv_12
Conv_13
Conv_15
Conv_16
Conv_18
Conv_19
Conv_21
Conv_22
Conv_24
Conv_25
Conv_26
Conv_27
Conv_28
Conv_29
Conv_30


### Get Sparsity to compare after pruning

Retrieve all weights from Convolutions and perform:
$$
Sparsity = \frac{N_{Zeros}}{N_{Tensors}}
$$

In [13]:
def get_sparsity(model_wrapper, layers_to_prune):
    
    sparse_dict = {}
    
    for key in layers_to_prune.keys():
        init_name = key + "_param0"
        np_init = model_wrapper.get_initializer(init_name)
        n_zeros = np.count_nonzero(np_init == 0)
        total_values = np_init.size
        sparsity = round(n_zeros/total_values, 2)
        #print(init_name, n_zeros, total_values, sparsity*100)
        sparse_dict[init_name] = {"zeros": n_zeros, "total": total_values, "sparsity": sparsity}

    return sparse_dict

In [14]:
sparsity_before_pruning = get_sparsity(model, layers_to_prune)
for k, v in sparsity_before_pruning.items():
    print(k, v)

Quant_12_param0 {'zeros': 122, 'total': 1152, 'sparsity': 0.11}
Quant_13_param0 {'zeros': 37, 'total': 432, 'sparsity': 0.09}
Quant_15_param0 {'zeros': 315, 'total': 2304, 'sparsity': 0.14}
Quant_16_param0 {'zeros': 86, 'total': 864, 'sparsity': 0.1}
Quant_18_param0 {'zeros': 902, 'total': 4096, 'sparsity': 0.22}
Quant_19_param0 {'zeros': 211, 'total': 1152, 'sparsity': 0.18}
Quant_21_param0 {'zeros': 1358, 'total': 4096, 'sparsity': 0.33}
Quant_22_param0 {'zeros': 352, 'total': 1152, 'sparsity': 0.31}
Quant_24_param0 {'zeros': 226, 'total': 2048, 'sparsity': 0.11}
Quant_25_param0 {'zeros': 56, 'total': 576, 'sparsity': 0.1}
Quant_26_param0 {'zeros': 720, 'total': 4096, 'sparsity': 0.18}
Quant_27_param0 {'zeros': 5019, 'total': 8192, 'sparsity': 0.61}
Quant_28_param0 {'zeros': 642, 'total': 1152, 'sparsity': 0.56}
Quant_29_param0 {'zeros': 5004, 'total': 8192, 'sparsity': 0.61}
Quant_30_param0 {'zeros': 3045, 'total': 8192, 'sparsity': 0.37}


# Check Layers with all zero kernels and replace scales

## Process:

Args: 
- model: ModelWrapper of the model to prune
- conv: string of the convolution layer to prune
>**Steps**: <br>
>1. Get Conv Node from model.
>2. Find direct predecessors: [1] will be the convolution weights, so store it.
>3. Modify convolution weights.
>4. Find direct successor, which is batch norm layer.
>5. Modify batch norm layer: weights.

#### Prune weights of convolution

In [15]:
def prune_conv_weights(model, quant_node):

    print(f'\n############ Pruning Weights of {quant_node.name} node ############')
    quant_1 = quant_node.input[1]
    np_q1 = model.get_initializer(quant_1)
    print(f'Quant 1 shape: {np_q1.shape}') 

    np_q1_abs = np.abs(np_q1)
    zero_idx = np.where((np_q1_abs < eps) * (np_q1_abs > 0))[0]
    non_zero_idx = np.where(np_q1_abs > eps)[0]
    print("-------------------------------------")
    print(f'*** Zero IDX, channels to be pruned ({zero_idx.size} elements):\n{zero_idx}')
    print(f'### Non Zero IDX, channels to keep ({non_zero_idx.size} elements):\n{non_zero_idx}')
    print("-------------------------------------")

    np_q1_replace = np_q1[non_zero_idx][0] # uses directly the scale of element [0] of non zero idx
    np_q1_copy = np_q1.copy() # Original array is read-only, so it must be copied first
    np_q1_copy[zero_idx] = np_q1_replace
    np_q1 = np_q1_copy
    print(f'Value of [0] channel to use it as default for zero elements: {np_q1_replace}')

    model.set_initializer(
        tensor_name = quant_1, 
        tensor_value = np_q1)

    return non_zero_idx, zero_idx

##### Test weight pruning

In [16]:
def test_prune_conv_weights(model, conv: str):
    
    conv_node = model.get_node_from_name(conv)
    conv_node_predec = model.find_direct_predecessors(conv_node)
    conv_node_weights = conv_node_predec[1]
    
    non_zero_idx, zero_idx = prune_conv_weights(model=model, quant_node=conv_node_weights)

    return non_zero_idx, zero_idx

In [17]:
model = ModelWrapper(qonnx_clean_filename)
non_zero_idx, zero_idx = test_prune_conv_weights(model, "Conv_0")


############ Pruning Weights of Quant_0 node ############
Quant 1 shape: (24, 1, 1, 1)
-------------------------------------
*** Zero IDX, channels to be pruned (0 elements):
[]
### Non Zero IDX, channels to keep (24 elements):
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
-------------------------------------
Value of [0] channel to use it as default for zero elements: [[[0.03360252]]]


In [18]:
test_prune_weights = prune_folder + "01_test_prune_weights.onnx"
model.save(test_prune_weights)

In [19]:
showInNetron(test_prune_weights)

Stopping http://0.0.0.0:8083
Serving './manual_pruning/only_thresholds/Sparse24/01_test_prune_weights.onnx' at http://0.0.0.0:8083


#### Prune batch norm

In [20]:
def prune_bn(model, bn_node, non_zero_idx, zero_idx):

    print(f'\n############ Prune Batch Norm {bn_node.name} node ############')
    bn_0 = bn_node.input[1]
    bn_1 = bn_node.input[2]
    bn_2 = bn_node.input[3]
    bn_3 = bn_node.input[4]
    np_bn0 = model.get_initializer(bn_0)
    np_bn1 = model.get_initializer(bn_1)
    np_bn2 = model.get_initializer(bn_2)
    np_bn3 = model.get_initializer(bn_3)
    
    print("-------------------------------------")
    print(f'*** Zero IDX, scale value of channels to be pruned:\n{np_bn0[zero_idx]}')
    print("-------------------------------------")

    np_bn0_replace = np_bn0[non_zero_idx][0]
    np_bn1_replace = np_bn1[non_zero_idx][0]
    np_bn2_replace = np_bn2[non_zero_idx][0]
    np_bn3_replace = np_bn3[non_zero_idx][0]
    
    np_bn0_copy = np_bn0.copy()
    np_bn0_copy[zero_idx] = np_bn0_replace #0. # Replace ultra small scale with element [0] scale
    np_bn1_copy = np_bn1.copy()
    np_bn1_copy[zero_idx] = np_bn1_replace
    np_bn2_copy = np_bn2.copy()
    np_bn2_copy[zero_idx] = np_bn2_replace
    np_bn3_copy = np_bn3.copy()
    np_bn3_copy[zero_idx] = np_bn3_replace

    model.set_initializer(
        tensor_name = bn_0, 
        tensor_value = np_bn0_copy)
    model.set_initializer(
        tensor_name = bn_1, 
        tensor_value = np_bn1_copy)
    model.set_initializer(
        tensor_name = bn_2, 
        tensor_value = np_bn2_copy)
    model.set_initializer(
        tensor_name = bn_3, 
        tensor_value = np_bn3_copy) 

##### Test prune batch norm

In [21]:
def test_prune_conv_bn(model, conv: str):
    
    conv_node = model.get_node_from_name(conv)
    conv_node_predec = model.find_direct_predecessors(conv_node)
    conv_node_weights = conv_node_predec[1]

    # Prune weights
    non_zero_idx, zero_idx = prune_conv_weights(model=model, quant_node=conv_node_weights)
    # Prune batch norm
    bn_node = model.find_direct_successors(conv_node)[0]
    prune_bn(model, bn_node, non_zero_idx, zero_idx)

    return non_zero_idx, zero_idx

In [22]:
model = ModelWrapper(qonnx_clean_filename)
non_zero_idx, zero_idx = test_prune_conv_bn(model, "Conv_0")


############ Pruning Weights of Quant_0 node ############
Quant 1 shape: (24, 1, 1, 1)
-------------------------------------
*** Zero IDX, channels to be pruned (0 elements):
[]
### Non Zero IDX, channels to keep (24 elements):
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
-------------------------------------
Value of [0] channel to use it as default for zero elements: [[[0.03360252]]]

############ Prune Batch Norm BatchNormalization_0 node ############
-------------------------------------
*** Zero IDX, scale value of channels to be pruned:
[]
-------------------------------------


In [23]:
test_prune_conv_bn_onnx = prune_folder + "02_test_prune_conv_bn.onnx"
model.save(test_prune_conv_bn_onnx)

In [24]:
showInNetron(test_prune_conv_bn_onnx)

Stopping http://0.0.0.0:8083
Serving './manual_pruning/only_thresholds/Sparse24/02_test_prune_conv_bn.onnx' at http://0.0.0.0:8083


# Test Pruning of first 2 Convs

In [25]:
# model = ModelWrapper(qonnx_clean_filename)
# # non_zero_idx, zero_idx, new_ch, new_shape = test_prune_conv(model, "Conv_0")
# # non_zero_idx, zero_idx, new_ch, new_shape = test_prune_conv(model, "Conv_1")

# _, _, _, _ = test_prune_conv(model, "Conv_0")
# _, _, _, _ = test_prune_conv(model, "Conv_1")

In [26]:
# test_prune_2_conv = prune_folder + "10_test_prune_2_conv.onnx"

# model = model.transform(InferShapes())
# model.save(test_prune_2_conv)

In [27]:
# showInNetron(test_prune_2_conv)

# Test Pruning the Whole Model - NO MUL 4 or 8

In [28]:
model = ModelWrapper(qonnx_clean_filename)

for conv in convs_to_prune:
    print(f'\n______________________________________________________________________________________________________')
    print(f'                                                {conv} ')
    print(f'______________________________________________________________________________________________________')

    _, _, = test_prune_conv_bn(model, conv)


______________________________________________________________________________________________________
                                                Conv_12 
______________________________________________________________________________________________________

############ Pruning Weights of Quant_12 node ############
Quant 1 shape: (48, 1, 1, 1)
-------------------------------------
*** Zero IDX, channels to be pruned (1 elements):
[28]
### Non Zero IDX, channels to keep (47 elements):
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
-------------------------------------
Value of [0] channel to use it as default for zero elements: [[[0.02151186]]]

############ Prune Batch Norm BatchNormalization_12 node ############
-------------------------------------
*** Zero IDX, scale value of channels to be pruned:
[-4.909e-42]
-------------------------------------

_______________________________

In [29]:
model = model.transform(InferShapes())

In [30]:
prune_all_convs_onnx = prune_folder + "prune_all_convs_only_thres.onnx"
model.save(prune_all_convs_onnx)

In [31]:
showInNetron(prune_all_convs_onnx)

Stopping http://0.0.0.0:8083
Serving './manual_pruning/only_thresholds/Sparse24/prune_all_convs_only_thres.onnx' at http://0.0.0.0:8083


In [32]:
sparsity_after_pruning = get_sparsity(model, layers_to_prune)

for k1, k2 in zip(sparsity_before_pruning.keys(), sparsity_after_pruning.keys()):
    before = sparsity_before_pruning[k1]["sparsity"]
    after = sparsity_after_pruning[k2]["sparsity"]
    assert k1 == k2, f'{k1} is not the same as {k2}'
    print(f'{k1}: \tbefore: {before:<4} - after: {after:<4}')

Quant_12_param0: 	before: 0.11 - after: 0.11
Quant_13_param0: 	before: 0.09 - after: 0.09
Quant_15_param0: 	before: 0.14 - after: 0.14
Quant_16_param0: 	before: 0.1  - after: 0.1 
Quant_18_param0: 	before: 0.22 - after: 0.22
Quant_19_param0: 	before: 0.18 - after: 0.18
Quant_21_param0: 	before: 0.33 - after: 0.33
Quant_22_param0: 	before: 0.31 - after: 0.31
Quant_24_param0: 	before: 0.11 - after: 0.11
Quant_25_param0: 	before: 0.1  - after: 0.1 
Quant_26_param0: 	before: 0.18 - after: 0.18
Quant_27_param0: 	before: 0.61 - after: 0.61
Quant_28_param0: 	before: 0.56 - after: 0.56
Quant_29_param0: 	before: 0.61 - after: 0.61
Quant_30_param0: 	before: 0.37 - after: 0.37
