<a href="https://colab.research.google.com/github/wayne0git/tensorflow_basic/blob/main/tflite/tflite_basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Tensorflow Lite Basics
Ref - https://learning.edx.org/course/course-v1:HarvardX+TinyML2+3T2020

In [5]:
import tensorflow as tf # 2.8.0
import numpy as np   # 1.21.6

### Prepare simple Tensorflow model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Linear regression model
model = Sequential([Dense(units=1, input_shape=[1])])
model.compile(optimizer='sgd', loss='mean_squared_error')

# Train
xs = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)

model.fit(xs, ys, epochs=500)

In [None]:
# Check result
print(model.predict([10.0]))
print("Here is what I learned: {}".format(model.layers[0].get_weights()))

In [None]:
# Save Tensorflow Model
# *.pb / assets / variables
export_dir = 'saved_model/1'
tf.saved_model.save(model, export_dir)

### Tensorflow Lite Converter

In [10]:
# Init the converter (*.pb)
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)

# # Init the converter (keras model)
# converter = tf.lite.TFLiteConverter.from_keras_model(model)

In [36]:
# 1. Convert Options (No Optimization)
# converter.optimizations = []

# 2. Convert Options (Default : 8-bit weight quantization)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY]
# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]

In [37]:
# Float 16 quantization (Weight only)
# converter.target_spec.supported_types = [tf.float16]

# 8-bit quantization (Weight & Activation)
def representative_data_gen():
    yield [np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=np.float32)]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

In [None]:
# Convert the model
tflite_model = converter.convert()

In [39]:
# Save the TFLite model
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)

### Tensorflow Lite Interpreter

In [None]:
# Load TFLite model (From File)
interpreter = tf.lite.Interpreter(model_path='model.tflite')

# Load TFLite model (From convert model)
# interpreter = tf.lite.Interpreter(model_content=tflite_model)

In [None]:
# Allocate tensors.
interpreter.allocate_tensors()

In [None]:
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details) # list[dict]
print(output_details) # list[dict]

[{'name': 'serving_default_dense_1_input:0', 'index': 0, 'shape': array([1, 1], dtype=int32), 'shape_signature': array([-1,  1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'StatefulPartitionedCall:0', 'index': 3, 'shape': array([1, 1], dtype=int32), 'shape_signature': array([-1,  1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [None]:
# Inference
interpreter.set_tensor(input_details[0]['index'], np.array([[10.0]], dtype=np.float32))
interpreter.invoke()
interpreter.get_tensor(output_details[0]['index'])