In [55]:
import tensorflow as tf
import numpy as np
import keras
import os

# Data path
DATA_DIR = "data/trainData"
IMAGE_SIZE = (112, 112)

# Outputs paths
MODEL_ID = "base"

MODEL_PATH = "models/trained_model"
MODEL_TFLITE_PATH = f"outputs/{MODEL_ID}/model.tflite"
MODEL_QUANT_PATH = f"outputs/{MODEL_ID}/model_quant.tflite"
MODEL_QUANT_INT_FLOAT_PATH = f"outputs/{MODEL_ID}/model_quant_int_float.tflite"
MODEL_QUANT_FUL_INT_PATH = f"outputs/{MODEL_ID}/model_quant_full_int.tflite"


# A small util
def store_model(model, path):
    with open(path, "wb") as f:
        f.write(model)

As part of the transformation process, in order ot estimate quantization values, we need to feed the converter some of the train data:

In [42]:
class_names = sorted(os.listdir(DATA_DIR))

# Load the dataset
dataset = keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="categorical",
    class_names=class_names,
    color_mode="rgb",
    image_size=IMAGE_SIZE,
    batch_size=1,
)

# This is a standard preprocessing function
preprocessing = keras.Sequential([keras.layers.Rescaling(scale=1.0 / 255.0)])

# Apply the preprocessing
dataset = dataset.map(lambda x, y: (preprocessing(x, training=False), y))


# Define the feeding data for the converter
def representative_dataset():
    for images, _ in dataset.take(100):
        yield [images]

Found 14457 files belonging to 7 classes.


# 1. Transform models:

Here we are showing the transformation from a pre-trained `tf-keras` model into the `tflite` and `tf-lite-quant` versions.

In [48]:
# Load model from pre-trained checkpoint
base_model = tf.keras.models.load_model(MODEL_PATH)
base_model.summary()

Model: "BaseModel"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Backbone (KerasLayer)       (None, 256)               218544    
                                                                 
 BatchNorm1 (BatchNormaliza  (None, 256)               1024      
 tion)                                                           
                                                                 
 Output (Dense)              (None, 7)                 1799      
                                                                 
Total params: 221367 (864.71 KB)
Trainable params: 215383 (841.34 KB)
Non-trainable params: 5984 (23.38 KB)
_________________________________________________________________


## 1. Standard TF-LITE:
This is a `tflite` model, still using `float32` for all parameters:

In [50]:
converter = tf.lite.TFLiteConverter.from_keras_model(base_model)
tflite_model = converter.convert()
store_model(tflite_model, MODEL_TFLITE_PATH)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmphpvrgs71/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmphpvrgs71/assets
2024-01-16 16:24:32.341707: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-16 16:24:32.341760: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-16 16:24:32.342147: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmphpvrgs71
2024-01-16 16:24:32.352877: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-16 16:24:32.352896: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmphpvrgs71
2024-01-16 16:24:32.390860: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-16 16:24:32.734090: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

## 2. Dynamic range quantization

Here we are still using `float32` for input and output, but most of the weights will be converted to `8-bit` precision. Activations are also quantized.

In [51]:
converter = tf.lite.TFLiteConverter.from_keras_model(base_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_quant = converter.convert()
store_model(tflite_model_quant, MODEL_QUANT_PATH)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp2_xk6aog/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp2_xk6aog/assets
2024-01-16 16:28:00.506527: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-16 16:28:00.506545: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-16 16:28:00.506747: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp2_xk6aog
2024-01-16 16:28:00.517803: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-16 16:28:00.517822: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp2_xk6aog
2024-01-16 16:28:00.554196: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-16 16:28:00.901910: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

## 3. Full integer quantization

Basically here we are quantisizing also quantizing activations ( and input/output ). Fro this we need to calibrate the quantization of those values, and hence, we need to feed some data.

### 3.1 Integer with float fallback:

Here we are still using float implementation when integer ops are not available:

In [66]:
converter = tf.lite.TFLiteConverter.from_keras_model(base_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.float16,
]
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()
store_model(tflite_quant_model, MODEL_QUANT_INT_FLOAT_PATH)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp4v22h3lk/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp4v22h3lk/assets
2024-01-16 16:44:42.403160: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-16 16:44:42.403174: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-16 16:44:42.403398: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp4v22h3lk
2024-01-16 16:44:42.415509: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-16 16:44:42.415525: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp4v22h3lk
2024-01-16 16:44:42.455595: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-16 16:44:42.807599: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

### 3.3 Integer only:

Finally, this is a model with `input` and `output` as `uint8`. This should reduce memory usage to its maximum.

In [62]:
converter = tf.lite.TFLiteConverter.from_keras_model(base_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS_INT8,
    tf.lite.OpsSet.TFLITE_BUILTINS,
]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()

store_model(tflite_model, MODEL_QUANT_FUL_INT_PATH)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp0c59lffy/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp0c59lffy/assets
2024-01-16 16:42:51.503987: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-16 16:42:51.504005: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-16 16:42:51.504248: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp0c59lffy
2024-01-16 16:42:51.516343: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-16 16:42:51.516362: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp0c59lffy
2024-01-16 16:42:51.557537: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-16 16:42:51.933837: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

Finally, this will export the model as a set of bytes for operating in the arduino:

In [30]:
!xxd -n model_tflite -i {BASE_QUANT_INT_FULL_MODEL} > outputs/model.cc

# 2. Testing functionalities:

In [31]:
interpreter = tf.lite.Interpreter(model_path=BASE_QUANT_INT_FULL_MODEL)
interpreter.allocate_tensors()

In [32]:
test_image = np.expand_dims(np.random.rand(112, 112, 3), axis=0).astype(np.uint8)

input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]

input_index = input_details["index"]
output_index = output_details["index"]

interpreter.set_tensor(input_index, test_image)
interpreter.invoke()
predictions = interpreter.get_tensor(output_index)

In [33]:
predictions

array([[ 93, -63,  74,  97, -30]], dtype=int8)

In [34]:
interpreter.get_input_details()

[{'name': 'serving_default_Backbone_input:0',
  'index': 0,
  'shape': array([  1, 112, 112,   3], dtype=int32),
  'shape_signature': array([ -1, 112, 112,   3], dtype=int32),
  'dtype': numpy.uint8,
  'quantization': (0.003921568859368563, 0),
  'quantization_parameters': {'scales': array([0.00392157], dtype=float32),
   'zero_points': array([0], dtype=int32),
   'quantized_dimension': 0},
  'sparsity_parameters': {}}]

In [35]:
interpreter.get_output_details()

[{'name': 'StatefulPartitionedCall:0',
  'index': 92,
  'shape': array([1, 5], dtype=int32),
  'shape_signature': array([-1,  5], dtype=int32),
  'dtype': numpy.int8,
  'quantization': (0.016782592982053757, 33),
  'quantization_parameters': {'scales': array([0.01678259], dtype=float32),
   'zero_points': array([33], dtype=int32),
   'quantized_dimension': 0},
  'sparsity_parameters': {}}]