## Accelerate Inference: Neural Network Pruning

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pickle


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, regularizers
from tensorflow.keras.layers import *
from tensorflow.keras.models import clone_model


print(tf.version.VERSION)

2.14.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# untar
!tar -xvzf '/content/drive/My Drive/dataset.tar.gz'
# load train
train_images = pickle.load(open('train_images.pkl', 'rb'))
train_labels = pickle.load(open('train_labels.pkl', 'rb'))
# load val
val_images = pickle.load(open('val_images.pkl', 'rb'))
val_labels = pickle.load(open('val_labels.pkl', 'rb'))

train_images.pkl
train_labels.pkl
val_images.pkl
val_labels.pkl


In [None]:
# Define the neural network architecture (don't change this)

model = models.Sequential()
model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-5), input_shape=(25,25,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3), kernel_regularizer=regularizers.l2(1e-5)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-5)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(1e-5)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))

In [None]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 25, 25, 32)        896       
                                                                 
 activation (Activation)     (None, 25, 25, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 23, 23, 32)        9248      
                                                                 
 activation_1 (Activation)   (None, 23, 23, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 11, 11, 32)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 11, 11, 32)        0         
                                                        

In [None]:
# you can use the default hyper-parameters for training,
# val accuracy ~72% after 50 epochs

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001, weight_decay=1e-6),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

history = model.fit(train_images, train_labels, batch_size=32, epochs=80,
                    validation_data=(val_images, val_labels)) # train for 50 epochs, with batch size 32

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [None]:
results = model.evaluate(val_images, val_labels, batch_size=128)



Magnitude-based pruning - Percentile-based global threshold determination

In [None]:
def prune_and_retrain(model, train_images, train_labels, val_images, val_labels,
                      pruning_rate=0.1, retrain_epochs=10, batch_size=128):
    pruned_model = clone_model(model)
    pruned_model.set_weights(model.get_weights())  # Copy weights from the original model

    weights = model.get_weights()
    # Step 1: Prune the network
    flat_weights = np.concatenate([w.flatten() for w in weights if w.ndim > 1])
    threshold = np.percentile(np.abs(flat_weights), pruning_rate * 100)

    new_weights = []
    for w in weights:
        if w.ndim > 1:  # Applies to weights in Conv and Dense layers
            new_w = np.where(np.abs(w) < threshold, 0, w)
        else:  # Bias and other parameters are typically not pruned
            new_w = w
        new_weights.append(new_w)
    pruned_model.set_weights(new_weights)

    # Step 2: Retrain the network
    pruned_model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])

    # use a lower learning rate for retraining
    old_lr = tf.keras.backend.get_value(pruned_model.optimizer.lr)
    new_lr = old_lr * 0.1
    tf.keras.backend.set_value(pruned_model.optimizer.lr, new_lr)

    history = pruned_model.fit(train_images, train_labels,
                        batch_size=batch_size,
                        epochs=retrain_epochs,
                        validation_data=(val_images, val_labels))

    # Restore the original learning rate if desired
    # tf.keras.backend.set_value(model.optimizer.lr, old_lr)

    return history, new_weights, pruned_model

history, pruned_weights, pruned_model = prune_and_retrain(model, train_images, train_labels, val_images, val_labels,
                            pruning_rate=0.95, retrain_epochs=15, batch_size=32)

# Evaluate the pruned model
val_loss, val_accuracy = pruned_model.evaluate(val_images, val_labels, batch_size=128)
print(f"Validation Accuracy: {val_accuracy}")

# Calculate the total number of weights
total_weights = np.sum([np.prod(w.shape) for w in pruned_weights])

# Calculate the number of zero weights (sparsity)
num_zero_weights = np.sum([np.count_nonzero(w == 0) for w in pruned_weights])
sparsity = num_zero_weights / total_weights
print(f"Sparsity: {sparsity}")

# Calculate the score based on the provided formula
if val_accuracy > 0.6 and sparsity > 0:
    score = (val_accuracy + sparsity) / 2
else:
    score = 0

print(f"Model Score: {score}")

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Validation Accuracy: 0.7433663606643677
Sparsity: 0.9488626876898402
Model Score: 0.846114524177104


In [None]:
# you need to save the model's weights, naming it 'my_model_weights.h5'
pruned_model.save_weights("my_model_weights_1.h5")

# running this cell will immediately download a file called 'my_model_weights.h5'
from google.colab import files
files.download("my_model_weights_1.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Magnitude-based pruning - Standard deviation-based threshold determination

In [None]:
import numpy as np
import tensorflow as tf

def prune_and_retrain(model, train_images, train_labels, val_images, val_labels, quality_parameter=0.5, retrain_epochs=10, batch_size=128):
    pruned_model = clone_model(model)
    pruned_model.set_weights(model.get_weights())  # Copy weights from the original model

    # Get the weights of the model
    weights = model.get_weights()

    # Step 1: Prune the network using a layer-specific standard deviation-based threshold
    new_weights = []
    for w in weights:
        if w.ndim > 1:  # Applies to weights in Conv and Dense layers
            # Calculate the threshold as a quality parameter times the standard deviation
            threshold = quality_parameter * np.std(w)
            new_w = np.where(np.abs(w) < threshold, 0, w)
        else:  # Bias and other parameters are typically not pruned
            new_w = w
        new_weights.append(new_w)

    pruned_model.set_weights(new_weights)

    # Step 2: Retrain the network
    pruned_model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])

    # use a lower learning rate for retraining
    old_lr = tf.keras.backend.get_value(pruned_model.optimizer.lr)
    new_lr = old_lr * 0.1
    tf.keras.backend.set_value(pruned_model.optimizer.lr, new_lr)

    history = pruned_model.fit(train_images, train_labels,
                        batch_size=batch_size,
                        epochs=retrain_epochs,
                        validation_data=(val_images, val_labels))

    # Restore the original learning rate if desired
    # tf.keras.backend.set_value(pruned_model.optimizer.lr, old_lr)

    return history, new_weights, pruned_model

history, pruned_weights, pruned_model = prune_and_retrain(model, train_images, train_labels, val_images, val_labels,
                            quality_parameter=1.8, retrain_epochs=80, batch_size=32)

# Evaluate the pruned model
val_loss, val_accuracy = pruned_model.evaluate(val_images, val_labels, batch_size=128)
print(f"Validation Accuracy: {val_accuracy}")

# Calculate the total number of weights
total_weights = np.sum([np.prod(w.shape) for w in pruned_weights])

# Calculate the number of zero weights (sparsity)
num_zero_weights = np.sum([np.count_nonzero(w == 0) for w in pruned_weights])
sparsity = num_zero_weights / total_weights
print(f"Sparsity: {sparsity}")

# Calculate the score based on the provided formula
if val_accuracy > 0.6 and sparsity > 0:
    score = (val_accuracy + sparsity) / 2
else:
    score = 0

print(f"Model Score: {score}")

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Validation Accuracy: 0.7548514604568481
Sparsity:

In [None]:
# you need to save the model's weights, naming it 'my_model_weights.h5'
model.save_weights("my_model_weights_2.h5")

# running this cell will immediately download a file called 'my_model_weights.h5'
from google.colab import files
files.download("my_model_weights_2.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

L1-norm based filter pruning



In [None]:
def prune_and_finetune(model, train_images, train_labels, val_images, val_labels,
                       pruning_rate=0.1, retrain_epochs=10, batch_size=128):
    pruned_model = clone_model(model)
    pruned_model.set_weights(model.get_weights())  # Copy weights from the original model

    # Step 1: Calculate the L1-norm of each filter/neuron and prune
    l1_norms = []
    for layer in pruned_model.layers:
        if isinstance(layer, (tf.keras.layers.Conv2D, tf.keras.layers.Dense)):
            weights = layer.get_weights()[0]
            if isinstance(layer, tf.keras.layers.Dense):
                # Calculate norms per neuron for dense layers
                norms = np.sum(np.abs(weights), axis=0)
            else:
                # Calculate norms per filter for Conv2D layers
                norms = np.sum(np.abs(weights), axis=(0, 1, 2))
            l1_norms.extend(norms)

    threshold = np.percentile(l1_norms, pruning_rate * 100)

    all_new_weights = []  # Use a different variable name for storing all layer weights
    for layer in pruned_model.layers:
        if isinstance(layer, (tf.keras.layers.Conv2D, tf.keras.layers.Dense)):
            weights, biases = layer.get_weights()
            if isinstance(layer, tf.keras.layers.Dense):
                norms = np.sum(np.abs(weights), axis=0)
            else:
                norms = np.sum(np.abs(weights), axis=(0, 1, 2))
            pruned_weights = np.where(norms < threshold, 0, weights)  # Use a different name here
            all_new_weights.append([pruned_weights, biases])  # Append pruned weights and biases
        else:
            all_new_weights.append(layer.get_weights())

    # Set the pruned weights to the model
    for i, layer in enumerate(pruned_model.layers):
        if layer.get_weights():
            layer.set_weights(all_new_weights[i])

    # Step 2: Retrain the network
    pruned_model.compile(optimizer='adam',
                         loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                         metrics=['accuracy'])

    # reduce the learning rate for retraining
    old_lr = tf.keras.backend.get_value(pruned_model.optimizer.lr)
    new_lr = old_lr * 0.1
    tf.keras.backend.set_value(pruned_model.optimizer.lr, new_lr)

    history = pruned_model.fit(train_images, train_labels,
                               batch_size=batch_size,
                               epochs=retrain_epochs,
                               validation_data=(val_images, val_labels))

    # restore the original learning rate
    tf.keras.backend.set_value(pruned_model.optimizer.lr, old_lr)

    # Return the history for analysis and the model with pruned and fine-tuned weights
    return history, pruned_model



history, pruned_model = prune_and_finetune(model, train_images, train_labels, val_images, val_labels,
                            pruning_rate=0.7, retrain_epochs=80, batch_size=32)

# Evaluate the pruned model
val_loss, val_accuracy = pruned_model.evaluate(val_images, val_labels, batch_size=128)
print(f"Validation Accuracy: {val_accuracy}")

total_weights = 0
zero_weights = 0
for layer in pruned_model.layers:
    if isinstance(layer, (tf.keras.layers.Conv2D, tf.keras.layers.Dense)):
        weights = layer.get_weights()[0]  # Get filter weights
        biases = layer.get_weights()[1]
        total_weights += np.size(weights) + np.size(biases)
        zero_weights += np.count_nonzero(weights == 0)

sparsity = zero_weights / total_weights
print(f"Sparsity: {sparsity}")

# Calculate the score based on the provided formula
if val_accuracy > 0.6 and sparsity > 0:
    score = (val_accuracy + sparsity) / 2
else:
    score = 0

print(f"Model Score: {score}")


Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Validation Accuracy: 0.5972277522087097
Sparsity:

L1-norm based filter pruning - Gradual Pruning with Fine-Tuning

In [None]:
def gradual_prune_and_finetune(model, train_images, train_labels, val_images, val_labels,
                               initial_pruning_rate=0.1, pruning_steps=5, retrain_epochs=10, batch_size=128):
    pruned_model = clone_model(model)
    pruned_model.set_weights(model.get_weights())  # Copy weights from the original model

    for step in range(pruning_steps):
        current_pruning_rate = initial_pruning_rate * (step + 1) / pruning_steps

        # Step 1: Prune the network
        for layer in pruned_model.layers:
            if isinstance(layer, (tf.keras.layers.Conv2D, tf.keras.layers.Dense)):
                weights, biases = layer.get_weights()
                if isinstance(layer, tf.keras.layers.Dense):
                    norms = np.sum(np.abs(weights), axis=0)
                else:  # Conv2D
                    norms = np.sum(np.abs(weights), axis=(0, 1, 2))

                threshold = np.percentile(norms, current_pruning_rate * 100)
                new_weights = np.where(norms < threshold, 0, weights)
                layer.set_weights([new_weights, biases])

        # Step 2: Retrain the network
        pruned_model.compile(optimizer='adam',
                             loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                             metrics=['accuracy'])
        pruned_model.fit(train_images, train_labels, batch_size=batch_size, epochs=retrain_epochs,
                         validation_data=(val_images, val_labels))

    return pruned_model

pruned_model = gradual_prune_and_finetune(model, train_images, train_labels, val_images, val_labels,
                                          initial_pruning_rate=0.9, pruning_steps=3, retrain_epochs=80, batch_size=32)

# Evaluate the pruned model
val_loss, val_accuracy = pruned_model.evaluate(val_images, val_labels, batch_size=128)
print(f"Validation Accuracy after pruning: {val_accuracy}")

# Calculate sparsity
total_weights = 0
zero_weights = 0
for layer in pruned_model.layers:
    if isinstance(layer, (tf.keras.layers.Conv2D, tf.keras.layers.Dense)):
        weights, biases = layer.get_weights()
        total_weights += np.size(weights) + np.size(biases)
        zero_weights += np.count_nonzero(weights == 0)

sparsity = zero_weights / total_weights
print(f"Sparsity: {sparsity}")

# Calculate the model score
if val_accuracy > 0.6 and sparsity > 0:
    score = (val_accuracy + sparsity) / 2
else:
    score = 0

print(f"Model Score: {score}")

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch

In [None]:
# you need to save the model's weights, naming it 'my_model_weights.h5'
pruned_model.save_weights("my_model_weights_3.h5")

# running this cell will immediately download a file called 'my_model_weights.h5'
from google.colab import files
files.download("my_model_weights_3.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>