In [1]:
from tensorflow import keras
import tensorflow as tf
import numpy as np

import tensorflow_hub as hub

NUM_CLASSES = 6
BASE_MODEL = "Mobile V1"

2024-01-05 03:08:51.342549: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Define the basic model:

In [10]:
model = tf.keras.Sequential(
    layers = [
        hub.KerasLayer(
            handle=f"{BASE_MODEL}/features",
            trainable=True,
            arguments=dict(batch_norm_momentum=0.997),
            name="Backbone"
        ),
        tf.keras.layers.Dense(NUM_CLASSES, activation=None, name="Output")
    ],
    name=f'{BASE_MODEL.replace(" ", "_")}'
)

model.build([None, 112, 112, 3])
model.summary()

Model: "Mobile_V1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Backbone (KerasLayer)       (None, 256)               218544    
                                                                 
 Output (Dense)              (None, 6)                 1542      
                                                                 
Total params: 220086 (859.71 KB)
Trainable params: 214614 (838.34 KB)
Non-trainable params: 5472 (21.38 KB)
_________________________________________________________________


Check the final sizes:

In [11]:
# Full size model ~2MB | ~3.9MB
keras.saving.save_model(model, f'{BASE_MODEL}/model')

# TFLite conversion ~800KB | ~1.6MB
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with open(f'{BASE_MODEL}/model-lite/model.tflite', 'wb') as f:
  f.write(tflite_model)

# TFLITE quant version ~292KB | ~574KB
converter.optimizations = [ tf.lite.Optimize.DEFAULT ]
tflite_quant_model = converter.convert()
with open(f'{BASE_MODEL}/model-quant/model.tflite', 'wb') as f:
  f.write(tflite_quant_model)





INFO:tensorflow:Assets written to: Mobile V1/model/assets


INFO:tensorflow:Assets written to: Mobile V1/model/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp1fc06v5i/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp1fc06v5i/assets
2024-01-05 03:20:50.799546: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-05 03:20:50.799567: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-05 03:20:50.799795: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp1fc06v5i
2024-01-05 03:20:50.807233: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-05 03:20:50.807266: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmp1fc06v5i
2024-01-05 03:20:50.833769: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-05 03:20:51.042933: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmparm6u5z6/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmparm6u5z6/assets
2024-01-05 03:20:56.093154: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-05 03:20:56.093174: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-05 03:20:56.093399: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmparm6u5z6
2024-01-05 03:20:56.106870: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-05 03:20:56.106890: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmparm6u5z6
2024-01-05 03:20:56.147343: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-05 03:20:56.522155: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

In [12]:
# TFLITE quant version ~322KB | ~649KB
def representative_dataset():
    for _ in range(100):
        data = np.random.rand(1, 112, 112, 3)
        yield [ data.astype(np.float32) ]

converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS_INT8 ]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_quant_full_model = converter.convert()
with open(f'{BASE_MODEL}/model-quant-full/model.tflite', 'wb') as f:
  f.write(tflite_quant_full_model)

INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpukaty95g/assets


INFO:tensorflow:Assets written to: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpukaty95g/assets
2024-01-05 03:21:01.175746: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-05 03:21:01.175765: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-05 03:21:01.175955: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpukaty95g
2024-01-05 03:21:01.183401: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-05 03:21:01.183417: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/5j/vfb1vn5d7mxd7fmy30glls2c0000gn/T/tmpukaty95g
2024-01-05 03:21:01.210675: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-05 03:21:01.410652: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

## Testing functionalities:

In [13]:
interpreter = tf.lite.Interpreter(model_path="./Mobile V1/model-quant-full/model.tflite")
interpreter.allocate_tensors()

In [14]:
test_image = np.expand_dims(np.random.rand(112, 112, 3), axis=0).astype(np.int8)

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

interpreter.set_tensor(input_index, test_image)
interpreter.invoke()
predictions = interpreter.get_tensor(output_index)

In [15]:
interpreter.get_output_details()

[{'name': 'StatefulPartitionedCall:0',
  'index': 91,
  'shape': array([1, 6], dtype=int32),
  'shape_signature': array([-1,  6], dtype=int32),
  'dtype': numpy.int8,
  'quantization': (0.020725928246974945, -73),
  'quantization_parameters': {'scales': array([0.02072593], dtype=float32),
   'zero_points': array([-73], dtype=int32),
   'quantized_dimension': 0},
  'sparsity_parameters': {}}]