## Library load

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
cd /content/drive/MyDrive/Portofolio/fruit_quant_aware

/content/drive/MyDrive/Portofolio/fruit_quant_aware


In [3]:
!ls

 Callbacks		     'Copy of weight_clustering.ipynb'	 TFLite_Models
 cluster_saved_models	      pruned_saved_models		 Training
'Copy of custom_cnn3.ipynb'   quant_saved_models		 Validation
'Copy of mobilenet.ipynb'     saved_models
'Copy of pruning.ipynb'       Test


In [4]:
!pip install  tensorflow-model-optimization
# !pip uninstall tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
# !pip install tensorflow-gpu==2.3.0

In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import glob

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator


from PIL import Image
from matplotlib import image as plt_image
import cv2

from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input 
#mobilenet expects inputs in the range [-1 1] of float data type

from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Dropout
from tensorflow.keras import Sequential 
#https://keras.io/api/applications/mobilenet/ #mobilenet explanation

In [7]:
import tensorflow_model_optimization as tfmot


In [8]:
np.random.seed(42)# keras seed fixing 
tf.random.set_seed(42)# tensorflow seed fixing

## Hyperparameters

In [9]:
class hyperparams:
  def __init__(self):
    self.dim2d = (48,48) #image dimensions we want downscale
    self.dim3d = (48,48,3) #128 is the minimum for mobilenet
    self.batch_size = 64
    self.no_epochs = 50#30
    self.lr = 1e-3
  
hparams =  hyperparams()
print(hparams.dim2d,hparams.dim3d)
hparams.lr

(48, 48) (48, 48, 3)


0.001

##Data Augmentation

### augmentation and preprocess


In [10]:
# rescale = tf.keras.layers.Rescaling(scale=1./127.5,offset=-1)
# def preproc(inp):
#   # ret =  2.*(inp - np.min(inp))/np.ptp(inp)-1
#   return rescale(inp)

In [11]:
# def preproc(inp):#custom one without using tf functions
#   return (inp*1.0/127.5)-1

#   #ONLY NEED FOR MOBILENET

In [12]:
def preproc(inp):#custom one without using tf functions
  return (inp*1.0/255)



In [13]:
train_datagen = ImageDataGenerator(#featurewise_center=True,
                             rotation_range=(0-30),
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             brightness_range=[0.5,1.5],
                             shear_range=0.2, 
                             zoom_range=0.2,
                             channel_shift_range=0.2,
                             horizontal_flip=True, 
                             #vertical_flip=True,
                             fill_mode='nearest',
                             preprocessing_function=preproc,
                             
                             dtype=float)

val_datagen = ImageDataGenerator(
                                  dtype=float,
                                  preprocessing_function=preproc
                                  ) #no augmentation for test 


test_datagen = ImageDataGenerator(
                                  dtype=float,
                                  preprocessing_function=preproc
                                  ) #no augmentation for test 


### post augmentation images and generator creation 

In [14]:
train_generator = train_datagen.flow_from_directory(
    "Training",
    target_size=hparams.dim2d,
    batch_size=hparams.batch_size,
    class_mode='categorical',
    shuffle=True,
    color_mode="rgb",
    interpolation="bilinear",
    ) # set as training data

validation_generator = val_datagen.flow_from_directory(
    "Validation", # same directory as training data
    target_size=hparams.dim2d,
    batch_size=hparams.batch_size,
    class_mode='categorical',
    shuffle=False,
    color_mode="rgb",
    interpolation="bilinear",
    ) # set as validation data


test_generator = test_datagen.flow_from_directory(
    "Test", 
    target_size=hparams.dim2d,
    batch_size=hparams.batch_size,
    class_mode='categorical',
    interpolation="bilinear",
    color_mode="rgb",
    ) # set as test data

Found 6231 images belonging to 24 classes.
Found 3114 images belonging to 24 classes.
Found 3110 images belonging to 24 classes.


In [15]:
 img = next(train_generator)[0]
 print(img.shape)
 print(img.dtype)
 img.min(),img.max()

(64, 48, 48, 3)
float32


(0.0, 1.0)

In [16]:
 img = next(validation_generator)[0]
 print(img.shape)
 print(img.dtype)
 img.min(),img.max()

(64, 48, 48, 3)
float32


(0.0, 1.0)

In [17]:
 img = next(test_generator)[0]
 print(img.shape)
 print(img.dtype)
 img.min(),img.max()

(64, 48, 48, 3)
float32


(0.0, 1.0)

## Model Load

In [18]:
model = tf.keras.models.load_model('saved_models')
model.evaluate(test_generator, steps=test_generator.samples // hparams.batch_size)#baseline



[0.7086260318756104, 0.8642578125]

## Pruning fine tuning

Magnitude-based weight pruning gradually zeroes out model weights during the training process to achieve model sparsity. Sparse models are easier to compress, and we can skip the zeroes during inference for latency improvements.

https://www.tensorflow.org/model_optimization/guide/pruning

More info on finetuning:

https://www.tensorflow.org/model_optimization/guide/pruning/comprehensive_guide

In [27]:
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Define model for pruning.
no_train_images = 2934 ######
# The model starts with 50% sparsity (50% zeros in weights) and end with 70% sparsity.
end_step = np.ceil(no_train_images / hparams.batch_size).astype(np.int32) * hparams.no_epochs
pruning_params = {'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                               final_sparsity=0.70,
                                                               begin_step=0,
                                                               end_step=end_step)
}


model_for_pruning = prune_low_magnitude(model, **pruning_params)

# Use smaller learning rate for fine-tuning clustered model
opt = tf.keras.optimizers.Adam(learning_rate=1e-4)

model_for_pruning.compile(optimizer=opt,
              loss="categorical_crossentropy",
              metrics=['accuracy'])


  aggregation=tf.VariableAggregation.MEAN)
  aggregation=tf.VariableAggregation.MEAN)
  trainable=False)


In [28]:
model_for_pruning.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d_  (None, 46, 46, 16)       882       
 6 (PruneLowMagnitude)                                           
                                                                 
 prune_low_magnitude_batch_n  (None, 46, 46, 16)       65        
 ormalization_6 (PruneLowMag                                     
 nitude)                                                         
                                                                 
 prune_low_magnitude_conv2d_  (None, 44, 44, 16)       4626      
 7 (PruneLowMagnitude)                                           
                                                                 
 prune_low_magnitude_batch_n  (None, 44, 44, 16)       65        
 ormalization_7 (PruneLowMag                                     
 nitude)                                              

In [29]:
'''
Adding Callbacks and EarlyStopping
Callbacks and Checkpoints help to keep an eye on model while training and stop the training
if the performance has reached an optimum.
'''
from keras.callbacks import ModelCheckpoint, EarlyStopping

filepath = 'Callbacks/weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor = 'val_accuracy', 
                             verbose = 1,
                             save_best_only = True,
                             mode = 'max',
                             save_freq = "epoch", #check and save at the end of the epoch   
                             save_weights_only=False,   #save model too   
                             )#best accuracy saved

early_stop = EarlyStopping(monitor = 'val_loss',
                           patience = 7, #wait 7 epochs before you restore best weights and stop model trainng
                           mode="min", 
                           verbose = 1,
                           min_delta=0.01,
                           restore_best_weights=True)#go to the model that had the best accuracy before the early stopping before patience epochs

#https://keras.io/api/callbacks/model_checkpoint/
#https://keras.io/api/callbacks/early_stopping/


In [30]:
import tempfile

logdir = tempfile.mkdtemp()

def lr_time_based_decay(epoch, lr):
    initial_learning_rate = hparams.lr /100
    epochs = hparams.no_epochs
    decay = initial_learning_rate / (epochs) *1000

    return lr * 1 / (1 + decay * epoch)

time_decay_learning_rate = tf.keras.callbacks.LearningRateScheduler (lr_time_based_decay, verbose=1) #CALLBACK 
callbacks = [checkpoint, early_stop,time_decay_learning_rate,
              tfmot.sparsity.keras.UpdatePruningStep(),
              tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),]

In [31]:
history = model_for_pruning.fit(
            train_generator,
            steps_per_epoch = train_generator.samples // hparams.batch_size,
            validation_data = validation_generator, 
            validation_steps = validation_generator.samples // hparams.batch_size,
            epochs = 10,#3rd epoch always the best
            callbacks=[callbacks]
            )


Epoch 1: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 1/10
Epoch 1: val_accuracy improved from -inf to 0.79590, saving model to Callbacks/weights-improvement-01-0.80.hdf5

Epoch 2: LearningRateScheduler setting learning rate to 9.998000147349282e-05.
Epoch 2/10
Epoch 2: val_accuracy improved from 0.79590 to 0.83105, saving model to Callbacks/weights-improvement-02-0.83.hdf5

Epoch 3: LearningRateScheduler setting learning rate to 9.994002713141118e-05.
Epoch 3/10
Epoch 3: val_accuracy improved from 0.83105 to 0.84538, saving model to Callbacks/weights-improvement-03-0.85.hdf5

Epoch 4: LearningRateScheduler setting learning rate to 9.98801009705491e-05.
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.84538

Epoch 5: LearningRateScheduler setting learning rate to 9.980026403299462e-05.
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.84538

Epoch 6: LearningRateScheduler setting learning rate to 9.970056459672385e-05.
Epoch 6/10
Epoch 6: va

In [32]:
model_for_pruning.evaluate(validation_generator, steps=validation_generator.samples // hparams.batch_size)



[0.7505474090576172, 0.8453776240348816]

In [33]:
model_for_pruning.evaluate(test_generator, steps=test_generator.samples // hparams.batch_size)



[0.7464613914489746, 0.8447265625]

In [34]:
model_for_pruning.save("/content/drive/MyDrive/Portofolio/fruit_quant_aware/pruned_saved_models/")



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Portofolio/fruit_quant_aware/pruned_saved_models/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Portofolio/fruit_quant_aware/pruned_saved_models/assets


In [35]:
temp = tf.keras.models.load_model('pruned_saved_models')

In [36]:
temp.evaluate(validation_generator, steps=validation_generator.samples // hparams.batch_size)



[0.7505474090576172, 0.8453776240348816]

In [37]:
temp.evaluate(test_generator, steps=test_generator.samples // hparams.batch_size)#baseline



[0.7397515773773193, 0.8460286259651184]

In [38]:
# %load_ext tensorboard

# #docs_infra: no_execute
# %tensorboard --logdir={logdir}

## Convert to TF-Lite and Evaluate

In [39]:
labels = list(train_generator.class_indices )
labels[:2]

['apple_6', 'apple_braeburn_1']

## full integer quantization

In [40]:
def representative_data_gen():

    imgs = tf.data.Dataset.from_tensor_slices(next(test_generator)[0]).batch(1)
    for i in imgs.take(64):#batch size
        i = tf.dtypes.cast(i, tf.float32)
        yield [i]

In [41]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

_, pruned_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file)

converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()

_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file, 'wb') as f:
  f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_file)





Saved pruned Keras model to: /tmp/tmp9e3_17uf.h5
INFO:tensorflow:Assets written to: /tmp/tmpd1h6d2b7/assets


INFO:tensorflow:Assets written to: /tmp/tmpd1h6d2b7/assets


Saved pruned TFLite model to: /tmp/tmpurmdhxgw.tflite


In [42]:
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.
  import os
  import zipfile

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)

In [43]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')


open("TFLite_Models/model.tflite","wb").write(quantized_and_pruned_tflite_model)



INFO:tensorflow:Assets written to: /tmp/tmp3510f4ul/assets


INFO:tensorflow:Assets written to: /tmp/tmp3510f4ul/assets


90752

In [44]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Ign:3 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Hit:5 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease
Get:6 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Hit:7 http://archive.ubuntu.com/ubuntu bionic InRelease
Get:9 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Hit:10 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Hit:11 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease
Get:12 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]
Hit:13 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease
Fetched 252 kB in 2s (1

In [45]:
print("Header file in MB:", os.path.getsize("TFLite_Models/model.h") / float(2**20))
print("TFLite Model in MB:", os.path.getsize("TFLite_Models/model.tflite") / float(2**20))
print("TFLite Model in KB:", os.path.getsize("TFLite_Models/model.tflite") / float(2**10))

Header file in MB: 0.5338096618652344
TFLite Model in MB: 0.0865478515625
TFLite Model in KB: 88.625


In [46]:
#accuracy evaluator

# Initialize the TFLite interpreter
tfl_inter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)

# Allocate the tensors
tfl_inter.allocate_tensors()

# Get input/output layer information
i_details = tfl_inter.get_input_details()[0]
o_details = tfl_inter.get_output_details()[0]

def classify(i_data):
  
  input_data = i_data[np.newaxis, ...] #add batch dimension
  i_value_f32 = tf.dtypes.cast(input_data, tf.float32)
  

  #leave input as it is
  i_value_s8 = i_value_f32

  tfl_inter.set_tensor(i_details["index"], i_value_s8)
  tfl_inter.invoke()
  o_pred = tfl_inter.get_tensor(o_details["index"])[0]

  return o_pred

In [47]:
import PIL
from PIL import Image
print('Pillow Version:', PIL.__version__)

Pillow Version: 7.1.2


In [48]:
num_correct_samples = 0
num_total_samples   = len(glob.glob("/content/drive/MyDrive/Portofolio/fruit_quant_aware/Training/*/*"))

ind = 0

for img_path in glob.glob("/content/drive/MyDrive/Portofolio/fruit_quant_aware/Training/*/*"):
  image = Image.open(img_path)
  image = image.resize(hparams.dim2d) #image resize
  image = np.array(image) #convert to numpy
  image = image/255.0 #standardize

  pred = classify(image)
  label = (img_path.split("/")[-2])#contains the true label

  # print(labels[np.argmax(pred)],label)
  # break
  if labels[np.argmax(pred)]==label:
    num_correct_samples = num_correct_samples + 1

  if ind%1000==0:
    print(f"{ind+1} sample")
  ind = ind + 1
  

acc = num_correct_samples/num_total_samples
acc

1 sample
1001 sample
2001 sample
3001 sample
4001 sample
5001 sample
6001 sample


0.8221794254533783

## full integer quantization with input quantization

In [49]:
def representative_data_gen():

    imgs = tf.data.Dataset.from_tensor_slices(next(test_generator)[0]).batch(1)
    for i in imgs.take(64):#batch size
        i = tf.dtypes.cast(i, tf.float32)
        yield [i]

In [50]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

_, pruned_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file)

converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()

_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file, 'wb') as f:
  f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_file)





Saved pruned Keras model to: /tmp/tmpkko_ea_u.h5
INFO:tensorflow:Assets written to: /tmp/tmp5e3ye4qo/assets


INFO:tensorflow:Assets written to: /tmp/tmp5e3ye4qo/assets


Saved pruned TFLite model to: /tmp/tmp1cgo9p63.tflite


In [51]:
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.
  import os
  import zipfile

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)

In [52]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.representative_dataset = tf.lite.RepresentativeDataset(representative_data_gen)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8

quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')


open("TFLite_Models/model.tflite","wb").write(quantized_and_pruned_tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmp20v8730s/assets


INFO:tensorflow:Assets written to: /tmp/tmp20v8730s/assets


90256

In [53]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
Ign:3 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Hit:5 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease
Get:6 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Hit:8 http://archive.ubuntu.com/ubuntu bionic InRelease
Get:9 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Hit:10 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Hit:11 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease
Get:12 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]
Hit:13 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease
Fetched 252 kB in 2s (1

In [54]:
print("Header file in MB:", os.path.getsize("TFLite_Models/model.h") / float(2**20))
print("TFLite Model in MB:", os.path.getsize("TFLite_Models/model.tflite") / float(2**20))
print("TFLite Model in KB:", os.path.getsize("TFLite_Models/model.tflite") / float(2**10))

Header file in MB: 0.5308933258056641
TFLite Model in MB: 0.0860748291015625
TFLite Model in KB: 88.140625


In [55]:
#accuracy evaluator

# Initialize the TFLite interpreter
tfl_inter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)

# Allocate the tensors
tfl_inter.allocate_tensors()

# Get input/output layer information
i_details = tfl_inter.get_input_details()[0]
o_details = tfl_inter.get_output_details()[0]

i_quant = i_details["quantization_parameters"]
o_quant = o_details["quantization_parameters"]
i_scale      = i_quant['scales'][0]
i_zero_point = i_quant['zero_points'][0]


def classify(i_data):
  
  input_data = i_data[np.newaxis, ...] #add batch dimension
  i_value_f32 = tf.dtypes.cast(input_data, tf.float32)
  
  # Quantize (float -> 8-bit) the input (check if input layer is 8-bit, first)
  i_value_f32 = i_value_f32 / i_scale + i_zero_point
  i_value_s8 = tf.cast(i_value_f32, dtype=tf.int8)


  tfl_inter.set_tensor(i_details["index"], i_value_s8)
  tfl_inter.invoke()
  o_pred = tfl_inter.get_tensor(o_details["index"])[0]

  return o_pred

In [56]:
import PIL
from PIL import Image
print('Pillow Version:', PIL.__version__)

Pillow Version: 7.1.2


In [57]:
num_correct_samples = 0
num_total_samples   = len(glob.glob("/content/drive/MyDrive/Portofolio/fruit_quant_aware/Training/*/*"))

ind = 0

for img_path in glob.glob("/content/drive/MyDrive/Portofolio/fruit_quant_aware/Training/*/*"):
  image = Image.open(img_path)
  image = image.resize(hparams.dim2d) #image resize
  image = np.array(image) #convert to numpy
  image = image/255.0 #standardize

  pred = classify(image)
  label = (img_path.split("/")[-2])#contains the true label

  # print(labels[np.argmax(pred)],label)
  # break
  if labels[np.argmax(pred)]==label:
    num_correct_samples = num_correct_samples + 1

  if ind%1000==0:
    print(f"{ind+1} sample")
  ind = ind + 1
  

acc = num_correct_samples/num_total_samples
acc

1 sample
1001 sample
2001 sample
3001 sample
4001 sample
5001 sample
6001 sample


0.8216979618038838