In [2]:
import tensorflow as tf
import numpy as np
import keras
import os

# Data path
DATA_DIR = "data/TinyDataset/trainData"
IMAGE_SIZE = (112, 112)

# Outputs paths
MODEL_ID = "QA_model"

MODEL_PATH = "models/quant_model"
MODEL_TFLITE_PATH = f"outputs/{MODEL_ID}/model.tflite"
MODEL_QUANT_PATH = f"outputs/{MODEL_ID}/model_quant.tflite"
MODEL_QUANT_INT_FLOAT_PATH = f"outputs/{MODEL_ID}/model_quant_int_float.tflite"
MODEL_QUANT_FUL_INT_PATH = f"outputs/{MODEL_ID}/model_quant_full_int.tflite"


# A small util
def store_model(model, path):
    with open(path, "wb") as f:
        f.write(model)

2024-01-21 22:26:05.849859: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


As part of the transformation process, in order ot estimate quantization values, we need to feed the converter some of the train data:

In [3]:
class_names = sorted( [ d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR, d))] )

# Load the dataset
dataset = keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="categorical",
    class_names=class_names,
    color_mode="rgb",
    image_size=IMAGE_SIZE,
    batch_size=1,
)

# This is a standard preprocessing function
preprocessing = keras.Sequential([keras.layers.Rescaling(scale=1.0 / 255.0)])

# Apply the preprocessing
dataset = dataset.map(lambda x, y: (preprocessing(x, training=True), y))

# Define the feeding data for the converter
def representative_dataset():
    for images, _ in dataset.take(100):
        yield [images]

Found 242 files belonging to 6 classes.


# 1. Transform models:

Here we are showing the transformation from a pre-trained `tf-keras` model into the `tflite` and `tf-lite-quant` versions.

In [4]:
# Load model from pre-trained checkpoint
model = tf.keras.models.load_model(MODEL_PATH)
model.summary()

Model: "QAModel"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Backbone (KerasLayer)       (None, 256)               218544    
                                                                 
 quantize_layer_20 (Quantiz  (None, 256)               3         
 eLayer)                                                         
                                                                 
 quant_BatchNorm1 (Quantize  (None, 256)               1031      
 WrapperV2)                                                      
                                                                 
 quant_Output (QuantizeWrap  (None, 6)                 1547      
 perV2)                                                          
                                                                 
Total params: 221125 (863.77 KB)
Trainable params: 215126 (840.34 KB)
Non-trainable params: 5999 (23.43 KB)
_________________

## 1. Standard TF-LITE:
This is a `tflite` model, still using `float32` for all parameters:

In [6]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
store_model(tflite_model, MODEL_TFLITE_PATH)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpwi89szb8/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpwi89szb8/assets
2024-01-21 22:27:13.898849: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-21 22:27:13.898868: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-21 22:27:13.899082: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpwi89szb8
2024-01-21 22:27:13.910872: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-21 22:27:13.910890: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpwi89szb8
2024-01-21 22:27:13.951327: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-21 22:27:14.327693: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

## 2. Dynamic range quantization

Here we are still using `float32` for input and output, but most of the weights will be converted to `8-bit` precision. Activations are also quantized.

In [7]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_quant = converter.convert()
store_model(tflite_model_quant, MODEL_QUANT_PATH)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmphesfgqz9/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmphesfgqz9/assets
2024-01-21 22:27:29.268689: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-21 22:27:29.268707: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-21 22:27:29.268906: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmphesfgqz9
2024-01-21 22:27:29.283027: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-21 22:27:29.283045: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmphesfgqz9
2024-01-21 22:27:29.332084: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-21 22:27:29.713693: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

## 3. Full integer quantization

Basically here we are quantisizing also quantizing activations ( and input/output ). Fro this we need to calibrate the quantization of those values, and hence, we need to feed some data.

### 3.1 Integer with float fallback:

Here we are still using float implementation when integer ops are not available:

In [8]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.float16,
]
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()
store_model(tflite_quant_model, MODEL_QUANT_INT_FLOAT_PATH)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpcq_wr2b_/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpcq_wr2b_/assets
2024-01-21 22:27:44.892123: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-21 22:27:44.892144: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-21 22:27:44.892349: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpcq_wr2b_
2024-01-21 22:27:44.907264: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-21 22:27:44.907283: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpcq_wr2b_
2024-01-21 22:27:44.955429: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-21 22:27:45.337701: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

### 3.3 Integer only:

Finally, this is a model with `input` and `output` as `uint8`. This should reduce memory usage to its maximum.

In [9]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS_INT8,
    tf.lite.OpsSet.TFLITE_BUILTINS,
]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()

store_model(tflite_model, MODEL_QUANT_FUL_INT_PATH)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpikjmkaq4/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpikjmkaq4/assets
2024-01-21 22:28:11.416427: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-21 22:28:11.416445: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-21 22:28:11.416643: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpikjmkaq4
2024-01-21 22:28:11.429579: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-21 22:28:11.429614: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpikjmkaq4
2024-01-21 22:28:11.473472: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-21 22:28:11.870943: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

Finally, this will export the model as a set of bytes for operating in the arduino:

In [10]:
!xxd -n model_tflite -i {MODEL_QUANT_FUL_INT_PATH} > outputs/{MODEL_ID}_model.cc