# Data Preprocessing

In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import tempfile
import os

from keras.datasets import cifar10
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
# Set a seed for reproducability
your_seed_value = 42
np.random.seed(your_seed_value)
tf.random.set_seed(your_seed_value)

In [None]:
# Load CIFAR10 Dataset
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
# Normalize the data
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.0
X_test /= 255.0

In [None]:
Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)

In [None]:
import tracemalloc, time

In [None]:
# Stop at target accuracy
class AccuracyThresholdCallback(tf.keras.callbacks.Callback):
    def __init__(self, threshold):
        super(AccuracyThresholdCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        if logs is not None and logs.get('val_accuracy') >= self.threshold:
            print(f"\nReached {self.threshold * 100}% accuracy so stopping training!")
            self.model.stop_training = True

target_accuracy = 0.705
accuracy_callback = AccuracyThresholdCallback(target_accuracy)

# ResNet50: non-quantized

In [None]:
# Load the model
model = ResNet50(weights=None, input_shape=(32, 32, 3), classes=10)

In [None]:
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 32, 32, 3)]          0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 38, 38, 3)            0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 16, 16, 64)           9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 16, 16, 64)           256       ['conv1_conv[0][0]']          
 on)                                                                                       

In [None]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Track peak memory footprint and accuracy
tracemalloc.start()
start = time.time()
history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=35, callbacks=[accuracy_callback])
end = time.time()
print("Tracemalloc's output: ", tracemalloc.get_traced_memory())
tracemalloc.stop()

print(f"Training took {end - start} seconds.")
print(f"tf GPU memory tracking {tf.config.experimental.get_memory_info('GPU:0')}")

Epoch 1/2
Epoch 2/2
Tracemalloc's output:  (30205040, 617201070)
Training took 201.8720452785492 seconds.
tf GPU memory tracking {'current': 1035736064, 'peak': 5921249536}


In [None]:
# Evaluate the model
# scores = model.evaluate(X_test, Y_test, verbose=1)
# print('Test loss:', scores[0])
# print('Test accuracy:', scores[1])



# ResNet50: quantized (preparation)

In [None]:
base_model = ResNet50(weights=None, input_shape=(32, 32, 3), classes=10)

In [None]:
# Install and import quantized-aware-training libraries
!pip install tensorflow_model_optimization

Collecting tensorflow_model_optimization
  Downloading tensorflow_model_optimization-0.7.5-py2.py3-none-any.whl (241 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.2/241.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_model_optimization
Successfully installed tensorflow_model_optimization-0.7.5


In [None]:
import tensorflow_model_optimization as tfmot

In [None]:
# Extract all the layers
conv2d_layer_names = []

# Iterate over the layers of the model
for layer in base_model.layers:
    # Check if the layer is an instance of Conv2D
    if isinstance(layer, tf.keras.layers.Conv2D):
        # Add the layer name to the list
        conv2d_layer_names.append(layer.name)

In [None]:
conv2d_layer_names

['conv1_conv',
 'conv2_block1_1_conv',
 'conv2_block1_2_conv',
 'conv2_block1_0_conv',
 'conv2_block1_3_conv',
 'conv2_block2_1_conv',
 'conv2_block2_2_conv',
 'conv2_block2_3_conv',
 'conv2_block3_1_conv',
 'conv2_block3_2_conv',
 'conv2_block3_3_conv',
 'conv3_block1_1_conv',
 'conv3_block1_2_conv',
 'conv3_block1_0_conv',
 'conv3_block1_3_conv',
 'conv3_block2_1_conv',
 'conv3_block2_2_conv',
 'conv3_block2_3_conv',
 'conv3_block3_1_conv',
 'conv3_block3_2_conv',
 'conv3_block3_3_conv',
 'conv3_block4_1_conv',
 'conv3_block4_2_conv',
 'conv3_block4_3_conv',
 'conv4_block1_1_conv',
 'conv4_block1_2_conv',
 'conv4_block1_0_conv',
 'conv4_block1_3_conv',
 'conv4_block2_1_conv',
 'conv4_block2_2_conv',
 'conv4_block2_3_conv',
 'conv4_block3_1_conv',
 'conv4_block3_2_conv',
 'conv4_block3_3_conv',
 'conv4_block4_1_conv',
 'conv4_block4_2_conv',
 'conv4_block4_3_conv',
 'conv4_block5_1_conv',
 'conv4_block5_2_conv',
 'conv4_block5_3_conv',
 'conv4_block6_1_conv',
 'conv4_block6_2_conv',
 

# ResNet50: quantized (experiments)

## Experiment 1

In [None]:
# Note: there are 53 layers
def run_experiment(quant_start_layer: int = 53):
  quantized_layers = conv2d_layer_names[quant_start_layer:]

  def apply_quantization_to_dense(layer):
    if layer.name in quantized_layers:
      return tfmot.quantization.keras.quantize_annotate_layer(layer)
    return layer

  annotated_model = tf.keras.models.clone_model(
    base_model,
    clone_function=apply_quantization_to_dense
  )
  quant_aware_model = tfmot.quantization.keras.quantize_apply(annotated_model)
  return quant_aware_model

In [None]:
# Make the model quantization-aware
quant_aware_model = run_experiment(51)
quant_aware_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 32, 32, 3)]          0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 38, 38, 3)            0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 16, 16, 64)           9472      ['conv1_pad[1][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 16, 16, 64)           256       ['conv1_conv[1][0]']          
 on)                                                                                       

In [None]:
# Compile and train the model
quant_aware_model.compile(optimizer= Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
# Track peak memory footprint and accuracy
tracemalloc.start()
start = time.time()
history = quant_aware_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=35, callbacks=[accuracy_callback])
end = time.time()
print("Tracemalloc's output: ", tracemalloc.get_traced_memory())
tracemalloc.stop()

print(f"Training took {end - start} seconds.")
print(f"tf GPU memory tracking {tf.config.experimental.get_memory_info('GPU:0')}")

# If target accuracy not reached, print the accuracy
scores = quant_aware_model.evaluate(X_test, Y_test, verbose=1)
print('Test accuracy:', scores[1])

Epoch 1/2
Reached 25.0% accuracy so stopping training!
Tracemalloc's output:  (41154583, 617026964)
Training took 1001.9592595100403 seconds.
tf GPU memory tracking {'current': 1161043712, 'peak': 6124698112}


In [None]:
# # Calculate the size
converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model = converter.convert()

_, quantized_file = tempfile.mkstemp('.tflite')
with open(quantized_file, 'wb') as f:
  f.write(quantized_tflite_model)



In [None]:
# Print the size
print("Quantized model in Mb:", os.path.getsize(quantized_file) / float(2**20))

Quantized model in Mb: 23.543243408203125
