## Data and Library Load

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/Portofolio/smart_grid_stab

/content/drive/MyDrive/Portofolio/smart_grid_stab


In [3]:
!ls

Callbacks		 representative_labels.csv
model_quant.ipynb	 smart_grid_stability_augmented.csv
Models			 smart_grid_stability_train.ipynb
representative_data.csv  TFLite_Models


In [4]:
!ls /content/drive/MyDrive/Portofolio/smart_grid_stab/Models

model1	model2	model3	model4	model5	model6


In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import glob

import tensorflow as tf


In [6]:
model = tf.keras.models.load_model('Models/model3')
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_29 (Dense)            (None, 256)               3328      
                                                                 
 batch_normalization_9 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dropout_8 (Dropout)         (None, 256)               0         
                                                                 
 dense_30 (Dense)            (None, 128)               32896     
                                                                 
 batch_normalization_10 (Bat  (None, 128)              512       
 chNormalization)                                                
                                                                 
 dropout_9 (Dropout)         (None, 128)              

In [7]:
X_test = pd.read_csv("representative_data.csv") 
X_test.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
0,1.34736,-0.765822,-0.910389,-1.192113,0.320451,0.003686,0.195272,-0.754046,1.56327,0.119907,-0.080848,0.149504
1,0.220956,1.679136,-1.241222,-0.248694,-1.675419,1.657664,0.291911,0.953829,-0.13488,-0.672706,0.551318,-1.424976
2,-0.916888,-1.293003,-1.331651,-1.329583,1.719103,-1.395985,0.083366,-1.666063,1.46627,1.257912,0.264974,-1.039676
3,1.152211,0.655809,-1.170751,-0.123448,0.172703,-0.083205,0.778455,-0.993445,-0.092881,1.344447,-0.603063,-0.983731
4,0.931419,1.58081,-0.348958,-1.485702,0.109953,0.141723,-1.668924,1.334284,-1.035258,-0.594951,1.337576,-0.385165


In [8]:
y_test = pd.read_csv("representative_labels.csv") 
y_test.head()

Unnamed: 0,stabf
0,1
1,1
2,1
3,0
4,1


In [9]:
X_t =X_test.values.astype('float32')
y_t =y_test.values.astype('float32').flatten()

## Convert to TF-LITE + Quantization

https://www.tensorflow.org/lite/performance/post_training_quantization

In [10]:
model.evaluate(X_t,y_t,)#baseline



[0.0675126165151596, 0.9797452688217163]

### No Quant

In [None]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open("TFLite_Models/model.tflite","wb").write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpk9bam48e/assets




181084

In [None]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease [1,581 B]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Hit:4 http://archive.ubuntu.com/ubuntu bionic InRelease
Ign:5 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Packages [806 kB]
Get:7 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease [15.9 kB]
Hit:8 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Get:9 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Hit:11 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Get:12 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]
Get:13 http://ppa.launchpad.net/deadsnake

In [None]:
print("Model in Mb:", os.path.getsize("TFLite_Models/model.h") / float(2**20))

Model in Mb: 1.0650529861450195


In [None]:
interpreter = tf.lite.Interpreter(model_path="TFLite_Models/model.tflite")
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [None]:
interpreter.set_tensor(input_details[0]['index'], [X_t[0]])
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

output_data

array([[0.97291076]], dtype=float32)

In [None]:
#inference 
y_pred = []

for row in range(X_t.shape[0]):
  interpreter.set_tensor(input_details[0]['index'], [X_t[row]])
  interpreter.invoke()
  output_data = interpreter.get_tensor(output_details[0]['index'])

  y_pred.append(output_data)


In [None]:
y_pred = np.array(y_pred).flatten()
y_pred[y_pred>0.5] = 1 
y_pred[y_pred<=0.5] = 0 

In [None]:
acc = (y_pred==y_t).sum()/y_t.shape[0]
acc

0.9797452815712752

### DEFAULT optimization tries to optimize for both size and latency, minimizing accuracy drop

### Dynamic range quantization

The simplest form of post-training quantization statically quantizes only the weights from floating point to integer, which has 8-bits of precision. Quantizes all fixed parameters (such as weights)

https://www.tensorflow.org/lite/performance/post_training_integer_quant#convert_using_dynamic_range_quantization



In [None]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

open("TFLite_Models/model.tflite","wb").write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmp1yi639zc/assets


INFO:tensorflow:Assets written to: /tmp/tmp1yi639zc/assets


49264

In [None]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
0% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com] [Conn0% [1 InRelease gpgv 1,581 B] [Connecting to archive.ubuntu.com] [Connecting to                                                                               Ign:2 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
0% [1 InRelease gpgv 1,581 B] [Connecting to archive.ubuntu.com] [Connecting to                                                                               Hit:3 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Hit:4 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
Hit:5 http://archive.ubuntu.com/ubuntu bionic InRelease
Hit:6 http://security.ubuntu.com/ubuntu bionic-security InRelease
Hit:8 http://archive.ubuntu.com/ubuntu bionic-updates InRelease
Hit:9 http:

In [None]:
print("Model in Mb:", os.path.getsize("TFLite_Models/model.h") / float(2**20))

Model in Mb: 0.28981971740722656


In [None]:
interpreter = tf.lite.Interpreter(model_path="TFLite_Models/model.tflite")
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [None]:
interpreter.set_tensor(input_details[0]['index'], [X_t[0]])
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

output_data

array([[0.97231555]], dtype=float32)

In [None]:
#inference 
y_pred = []

for row in range(X_t.shape[0]):
  interpreter.set_tensor(input_details[0]['index'], [X_t[row]])
  interpreter.invoke()
  output_data = interpreter.get_tensor(output_details[0]['index'])

  y_pred.append(output_data)


In [None]:
y_pred = np.array(y_pred).flatten()
y_pred[y_pred>0.5] = 1 
y_pred[y_pred<=0.5] = 0 

acc = (y_pred==y_t).sum()/y_t.shape[0]
acc

0.9802056160810189

### Full integer quantization

To quantize the variable data (such as model input/output and intermediates between layers), you need to provide a RepresentativeDataset. This is a generator function that provides a set of input data that's large enough to represent typical values. It allows the converter to estimate a dynamic range for all the variable data. (The dataset does not need to be unique compared to the training or evaluation dataset.) To support multiple inputs, each representative data point is a list and elements in the list are fed to the model according to their indices.

Now all weights and variable data are quantized, and the model is significantly smaller compared to the original TensorFlow Lite model.

However, to maintain compatibility with applications that traditionally use float model input and output tensors, the TensorFlow Lite Converter leaves the model input and output tensors in float:

https://www.tensorflow.org/lite/performance/post_training_integer_quant#convert_using_float_fallback_quantization

In [155]:
def representative_data_gen():
    for i_value in tf.data.Dataset.from_tensor_slices(X_t).batch(1).take(117):
      i_value_f32 = tf.dtypes.cast(i_value,tf.float32)
      yield [i_value_f32]


In [156]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
tflite_model = converter.convert()

open("TFLite_Models/model.tflite","wb").write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpq4ikcazl/assets


INFO:tensorflow:Assets written to: /tmp/tmpq4ikcazl/assets


49600

In [157]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
0% [Waiting for headers] [Waiting for headers] [Waiting for headers] [Connectin                                                                               Hit:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
0% [Waiting for headers] [Waiting for headers] [Connecting to ppa.launchpad.net0% [1 InRelease gpgv 1,581 B] [Waiting for headers] [Waiting for headers] [Conn                                                                               Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
0% [1 InRelease gpgv 1,581 B] [Waiting for headers] [3 InRelease 14.2 kB/88.7 k                                                                               Hit:4 http://archive.ubuntu.com/ubuntu bionic InRelease
0% [1 InRelease gpgv 1,581 B] [3 InRelease 14.2 kB/88.7 kB 16%] [Connecting to                              

In [158]:
print("Model in Mb:", os.path.getsize("TFLite_Models/model.h") / float(2**20))

Model in Mb: 0.2917957305908203


In [159]:
interpreter = tf.lite.Interpreter(model_path="TFLite_Models/model.tflite")
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [160]:
interpreter.set_tensor(input_details[0]['index'], [X_t[0]])
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

output_data

array([[0.97265625]], dtype=float32)

In [161]:
#inference 
y_pred = []

for row in range(X_t.shape[0]):
  interpreter.set_tensor(input_details[0]['index'], [X_t[row]])
  interpreter.invoke()
  output_data = interpreter.get_tensor(output_details[0]['index'])

  y_pred.append(output_data)


In [162]:
y_pred = np.array(y_pred).flatten()
y_pred[y_pred>0.5] = 1 
y_pred[y_pred<=0.5] = 0 

acc = (y_pred==y_t).sum()/y_t.shape[0]
acc

0.9800521712444377

### Integer-Only uint8 quantization - NO LONGER SUPPORTED

To quantize the input and output tensors, and make the converter throw an error if it encounters an operation it cannot quantize, convert the model again with some additional parameters

https://www.tensorflow.org/lite/performance/post_training_integer_quant#convert_using_integer-only_quantization

In [None]:
def representative_data_gen():
    for i_value in tf.data.Dataset.from_tensor_slices(X_t).batch(1).take(117):
      i_value_f32 = tf.dtypes.cast(i_value,tf.float32)
      yield [i_value_f32]

In [None]:
for test in representative_data_gen():
  print(test)
  break

[<tf.Tensor: shape=(1, 12), dtype=float32, numpy=
array([[ 1.3473599 , -0.76582247, -0.9103893 , -1.1921128 ,  0.3204515 ,
         0.00368551,  0.19527173, -0.7540456 ,  1.5632699 ,  0.11990678,
        -0.08084752,  0.1495043 ]], dtype=float32)>]


In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint8 (APIs added in r2.3)

#ERROR IN DOCUMENTATION uint8 isn't used anymore, use int8
#https://github.com/tensorflow/tflite-micro/issues/280

#No longer supported
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8


tflite_model = converter.convert()

open("TFLite_Models/model.tflite","wb").write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmp2r4wso4h/assets


INFO:tensorflow:Assets written to: /tmp/tmp2r4wso4h/assets


49632

In [None]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
0% [Connecting to archive.ubuntu.com] [Waiting for headers] [Connected to cloud0% [1 InRelease gpgv 1,581 B] [Connecting to archive.ubuntu.com] [Waiting for h                                                                               Get:2 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
0% [1 InRelease gpgv 1,581 B] [Connecting to archive.ubuntu.com] [2 InRelease 1                                                                               Hit:3 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
0% [1 InRelease gpgv 1,581 B] [Connecting to archive.ubuntu.com] [2 InRelease 1                                                                               Ign:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:5 https://developer.download.nvidia.com/compute/machine-le

In [None]:
print("Model in Mb:", os.path.getsize("TFLite_Models/model.h") / float(2**20))

Model in Mb: 0.29198265075683594


In [None]:
interpreter = tf.lite.Interpreter(model_path="TFLite_Models/model.tflite")
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [None]:
# interpreter.set_tensor(input_details[0]['index'], [X_t[0]])
# interpreter.invoke()
# output_data = interpreter.get_tensor(output_details[0]['index'])

# output_data

#error 
#ValueError: Cannot set tensor: Got value of type FLOAT32 but expected type UINT8 for input 0, name: serving_default_dense_29_input:0 

In [None]:
input_details[0]['dtype']

numpy.uint8

In [None]:
input_details[0]["quantization"]

(0.019139347597956657, 127)

In [None]:
X_t[0]#input needs to be converted to uint8

array([ 1.3473599 , -0.76582247, -0.9103893 , -1.1921128 ,  0.3204515 ,
        0.00368551,  0.19527173, -0.7540456 ,  1.5632699 ,  0.11990678,
       -0.08084752,  0.1495043 ], dtype=float32)

In [None]:
input_scale, input_zero_point = input_details[0]["quantization"]
((X_t[0]/input_scale) + input_zero_point).astype(input_details[0]["dtype"])

array([197,  86,  79,  64, 143, 127, 137,  87, 208, 133, 122, 134],
      dtype=uint8)

In [None]:
#inference 
y_pred = []

for row in range(X_t.shape[0]):
  interpreter.set_tensor(input_details[0]['index'], [((X_t[row]/input_scale) + input_zero_point).astype(input_details[0]["dtype"])])
  interpreter.invoke()
  output_data = interpreter.get_tensor(output_details[0]['index'])

  y_pred.append(output_data)


In [None]:
y_pred[0]

array([[250]], dtype=uint8)

In [None]:
output_details[0]["quantization"]

(0.00390625, 0)

In [None]:
output_scale, output_zero_point = output_details[0]["quantization"]
(y_pred[0]-output_zero_point) * output_scale

array([[0.9765625]])

In [None]:
y_pred = np.array(y_pred).flatten()
y_pred = (y_pred-output_zero_point) * output_scale
y_pred[y_pred>0.5] = 1 
y_pred[y_pred<=0.5] = 0 

acc = (y_pred==y_t).sum()/y_t.shape[0]
acc

0.9740678226177689

### Interger Only int8 instead of uint8 - low accuracy

In [142]:
def representative_data_gen():
    for i_value in tf.data.Dataset.from_tensor_slices(X_t).batch(1).take(117):
      i_value_f32 = tf.dtypes.cast(i_value,tf.float32)
      yield [i_value_f32]

In [143]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint8 (APIs added in r2.3)

#ERROR IN DOCUMENTATION uint8 isn't used anymore, use int8
#https://github.com/tensorflow/tflite-micro/issues/280


converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()

open("TFLite_Models/model.tflite","wb").write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmps2h9eftl/assets


INFO:tensorflow:Assets written to: /tmp/tmps2h9eftl/assets


49288

In [144]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Get:1 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
0% [Connecting to archive.ubuntu.com (91.189.91.38)] [1 InRelease 14.2 kB/88.7                                                                                Hit:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
0% [Waiting for headers] [1 InRelease 43.1 kB/88.7 kB 49%] [Connecting to ppa.l                                                                               Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
0% [Waiting for headers] [1 InRelease 43.1 kB/88.7 kB 49%] [Waiting for headers0% [2 InRelease gpgv 3,626 B] [Waiting for headers] [1 InRelease 43.1 kB/88.7 k                                                                               Hit:4 http://archive.ubuntu.com/ubuntu bionic InRelease
Get:5 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Ign:6 https://developer.download.nvi

In [145]:
interpreter = tf.lite.Interpreter(model_path="TFLite_Models/model.tflite")
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [146]:
input_details[0]["quantization"]

(0.019139347597956657, -1)

In [147]:
input_scale, input_zero_point = input_details[0]["quantization"]

In [148]:
#inference 
y_pred = []

for row in range(X_t.shape[0]):
  interpreter.set_tensor(input_details[0]['index'], [((X_t[row]/input_scale) + input_zero_point).astype(input_details[0]["dtype"])])
  interpreter.invoke()
  output_data = interpreter.get_tensor(output_details[0]['index'])

  y_pred.append(output_data)

In [149]:
input_details[0]["dtype"]

numpy.int8

In [150]:
output_details[0]["dtype"]

numpy.int8

In [151]:
output_details[0]["quantization"]

(0.00390625, -128)

In [152]:
output_scale, output_zero_point = output_details[0]["quantization"]


In [153]:
scale, zero_point = output_details[0]["quantization"]
tflite_output = y_pred[0].astype(np.float32)
tflite_output = (tflite_output - zero_point) * scale
tflite_output

array([[0.97265625]], dtype=float32)

In [154]:
y_pred = np.array(y_pred).flatten()
y_pred = (y_pred.astype(np.float32)-output_zero_point) * output_scale
#treat positive as 1 and negatives as zero
y_pred[y_pred>0] = 1 
y_pred[y_pred<=0] = 0 

acc = (y_pred==y_t).sum()/y_t.shape[0]
acc

0.7989872640785638

In [None]:
# # Output of the TFLite model.
# interpreter = tf.lite.Interpreter(model_content=tflite_model) 
# interpreter.allocate_tensors()
# input_details = interpreter.get_input_details()[0]
# # Manually quantize the input from float to integer
# scale, zero_point = input_details['quantization']
# tflite_integer_input = tf_input / scale + zero_point
# tflite_integer_input = tflite_integer_input.astype(input_details['dtype'])
# interpreter.set_tensor(input_details['index'], tflite_integer_input)
# interpreter.invoke()
# output_details = interpreter.get_output_details()[0]
# tflite_integer_output = interpreter.get_tensor(output_details['index'])
# # Manually dequantize the output from integer to float
# scale, zero_point = output_details['quantization']
# tflite_output = tflite_integer_output.astype(np.float32)
# tflite_output = (tflite_output - zero_point) * scale

### Post-training float16 quantization

TensorFlow Lite now supports converting weights to 16-bit floating point values during model conversion from TensorFlow to TensorFlow Lite's flat buffer format. This results in a 2x reduction in model size. Some hardware, like GPUs, can compute natively in this reduced precision arithmetic, realizing a speedup over traditional floating point execution. The Tensorflow Lite GPU delegate can be configured to run in this way. However, a model converted to float16 weights can still run on the CPU without additional modification: the float16 weights are upsampled to float32 prior to the first inference. This permits a significant reduction in model size in exchange for a minimal impacts to latency and accuracy.

https://www.tensorflow.org/lite/performance/post_training_float16_quant

In [None]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()

open("TFLite_Models/model.tflite","wb").write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpd57qf_g0/assets


INFO:tensorflow:Assets written to: /tmp/tmpd57qf_g0/assets


93552

In [None]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
0% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (185.10% [1 InRelease gpgv 3,626 B] [Connecting to archive.ubuntu.com] [Connecting to                                                                               Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu bionic InRelease
Ign:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:5 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease
Get:6 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Hit:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Get:8 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Get:9 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]


In [None]:
print("Model in Mb:", os.path.getsize("TFLite_Models/model.h") / float(2**20))

Model in Mb: 0.5502758026123047


In [None]:
interpreter = tf.lite.Interpreter(model_path="TFLite_Models/model.tflite")
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [None]:
interpreter.set_tensor(input_details[0]['index'], [X_t[0]])
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

output_data

array([[0.9729052]], dtype=float32)

In [None]:
y_pred = []

for row in range(X_t.shape[0]):
  interpreter.set_tensor(input_details[0]['index'], [X_t[row]])
  interpreter.invoke()
  output_data = interpreter.get_tensor(output_details[0]['index'])

  y_pred.append(output_data)

In [None]:
y_pred = np.array(y_pred).flatten()
y_pred[y_pred>0.5] = 1 
y_pred[y_pred<=0.5] = 0 

acc = (y_pred==y_t).sum()/y_t.shape[0]
acc

0.9797452815712752

### Post-training integer quantization with int16 activation

TensorFlow Lite now supports converting activations to 16-bit integer values and weights to 8-bit integer values during model conversion from TensorFlow to TensorFlow Lite's flat buffer format. We refer to this mode as the "16x8 quantization mode". This mode can improve accuracy of the quantized model significantly, when activations are sensitive to the quantization, while still achieving almost 3-4x reduction in model size. Moreover, this fully quantized model can be consumed by integer-only hardware accelerators.

https://www.tensorflow.org/lite/performance/post_training_integer_quant_16x8

In [None]:
def representative_data_gen():
    for i_value in tf.data.Dataset.from_tensor_slices(X_t).batch(1).take(117):
      i_value_f32 = tf.dtypes.cast(i_value,tf.float32)
      yield [i_value_f32]

In [None]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
converter.representative_dataset = representative_data_gen
tflite_model = converter.convert()

open("TFLite_Models/model.tflite","wb").write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmp2v0tpnxu/assets


INFO:tensorflow:Assets written to: /tmp/tmp2v0tpnxu/assets


51744

In [None]:
!apt-get update && apt-get -qq install xxd
!xxd -i TFLite_Models/model.tflite > TFLite_Models/model.h

0% [Working]            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease
0% [Connecting to archive.ubuntu.com (185.125.190.36)] [Connecting to security.0% [1 InRelease gpgv 3,626 B] [Connecting to archive.ubuntu.com (185.125.190.36                                                                               Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Ign:3 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Get:5 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Hit:6 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease
Hit:7 http://archive.ubuntu.com/ubuntu bionic InRelease
Get:8 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Hit:9 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Hit:11 h

In [None]:
print("Model in Mb:", os.path.getsize("TFLite_Models/model.h") / float(2**20))

Model in Mb: 0.30440330505371094


In [None]:
interpreter = tf.lite.Interpreter(model_path="TFLite_Models/model.tflite")
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [None]:
interpreter.set_tensor(input_details[0]['index'], [X_t[0]])
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

output_data

array([[0.35510254]], dtype=float32)

In [None]:
y_pred = []

for row in range(X_t.shape[0]):
  interpreter.set_tensor(input_details[0]['index'], [X_t[row]])
  interpreter.invoke()
  output_data = interpreter.get_tensor(output_details[0]['index'])

  y_pred.append(output_data)

In [None]:
y_pred = np.array(y_pred).flatten()
y_pred[y_pred>0.5] = 1 
y_pred[y_pred<=0.5] = 0 

acc = (y_pred==y_t).sum()/y_t.shape[0]
acc

0.7649225103575265

### OPTIMIZE_FOR_SIZE AND OPTIMIZE_FOR_LATENCY ARE THE SAME AS OPTIMIZE_DEFAULT

no longer supported basically