# Computation of the quantization process

In [2]:
import tensorflow as tf
import numpy as np
import pathlib
import netron

# Crate nn model

In [3]:
train_data = np.array(
    [[ 0.52986234, -0.2172981,  -1.436018  ],
    [-0.70177984,  1.6174504,   0.03002556 ],
    [-0.8451145,  -0.16036184,  0.6119909 ],
    [ 0.95792836,  0.5761335,  -1.4148498 ]], dtype=np.float32)
train_labels = np.array([0, 1, 0, 1], dtype=np.float32)

test_data = np.array([[-0.60904104,  0.73394835,  0.8958842 ]], dtype=np.float32)
test_labels = np.array([1], dtype=np.float32)

eva_data = np.array(
    [[ 0.52986234, -0.2172981,  -1.436018  ],
    [-0.70177984,  1.6174504,   0.03002556 ],
    [-0.8451145,  -0.16036184,  0.6119909 ],
    [ 0.95792836,  0.5761335,  -1.4148498 ],
     [-0.60904104,  0.73394835,  0.8958842 ]], dtype=np.float32)


# Define a simple sequential model
def create_model():
  model = tf.keras.Sequential([
    tf.keras.layers.Dense(3, activation='relu', input_shape=(3,)),
  tf.keras.layers.Dense(2)
  ])

  model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])

  return model

# Create a basic model instance
model = create_model()

# Display the model's architecture
print(model.summary())

model.fit(
  train_data,
  train_labels,
  epochs=5,
  validation_data=(test_data, test_labels)
)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 3)                 12        
                                                                 
 dense_1 (Dense)             (None, 2)                 8         
                                                                 
Total params: 20
Trainable params: 20
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1fe27695708>

# save float weights

In [4]:
tflite_models_dir = pathlib.Path("models")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

float_weights_path = r"./models/float_nn_tf/float_weights"

# Save the model:
float_weights_model_file = tflite_models_dir / "float_nn_tf/float_weights"
float_weights_model_file_index = tflite_models_dir / "float_nn_tf/float_weights.index"
if not float_weights_model_file_index.is_file():
    model.save_weights(float_weights_model_file)
    print("Float weights saved to: ", float_weights_model_file)


# load float weights

In [5]:
model = create_model()
model.load_weights(float_weights_model_file)

print(model.summary(), '\n-----------------------------------------\n-----------------------------------------\n-----------------------------------------')
first_layer_weights = model.layers[0].get_weights()[0]
first_layer_biases  = model.layers[0].get_weights()[1]
second_layer_weights = model.layers[1].get_weights()[0]
second_layer_biases  = model.layers[1].get_weights()[1]

print(f'first_layer_weights:\n {first_layer_weights} \n-----------')
print(f'first_layer_biases:\n {first_layer_biases} \n-----------\n-----------')
print(f'second_layer_weights:\n {second_layer_weights} \n-----------')
print(f'second_layer_biases:\n {second_layer_biases} \n-----------')

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 3)                 12        
                                                                 
 dense_3 (Dense)             (None, 2)                 8         
                                                                 
Total params: 20
Trainable params: 20
Non-trainable params: 0
_________________________________________________________________
None 
-----------------------------------------
-----------------------------------------
-----------------------------------------
first_layer_weights:
 [[ 0.38925368  0.538839   -0.7989037 ]
 [ 0.19111627 -0.04035597 -0.33481574]
 [ 0.13654101 -0.4816512   0.7096706 ]] 
-----------
first_layer_biases:
 [-0.00499839 -0.00499784 -0.0049962 ] 
-----------
-----------
second_layer_weights:
 [[ 0.80786335 -0.38606948]
 [ 1.0455463  -0.54191   ]
 [ 0.060

In [6]:
if not pathlib.Path(float_weights_path + r'_details.txt').is_file():

    with open(float_weights_path + r'_details.txt', 'w') as f:
        first_layer_weights = model.layers[0].get_weights()[0]
        first_layer_biases  = model.layers[0].get_weights()[1]
        second_layer_weights = model.layers[1].get_weights()[0]
        second_layer_biases  = model.layers[1].get_weights()[1]

        f.write(f'first_layer_weights:\n {first_layer_weights} \n-----------\n')
        f.write(f'first_layer_biases:\n {first_layer_biases} \n-----------\n-----------\n')
        f.write(f'second_layer_weights:\n {second_layer_weights} \n-----------\n')
        f.write(f'second_layer_biases:\n {second_layer_biases} \n-----------\n')

    print('Details saved to: ', float_weights_path + r'_details.txt')

# Convert nn to int8 quantized model

In [7]:
def representative_data_gen():
    for input_value in train_data:
        yield [input_value]


converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

# Set the input and output tensors to int8
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

tflite_model_quant = converter.convert()

INFO:tensorflow:Assets written to: C:\Users\Patrik\AppData\Local\Temp\tmp_sgtgdje\assets




# save q model

In [8]:
tflite_models_dir = pathlib.Path("models")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

model_path = r"models/int8_tflite_model"

# Save the model:
int8_tflite_model_file = tflite_models_dir / "int8_tflite_model.tflite"
if not int8_tflite_model_file.is_file():
    int8_tflite_model_file.write_bytes(tflite_model_quant)
    print("Model saved to: ", int8_tflite_model_file)

In [9]:
netron.start(model_path + r'.tflite')

Serving 'models/int8_tflite_model.tflite' at http://localhost:8080


('localhost', 8080)

# load interpreter

In [10]:
if not pathlib.Path(model_path + r'_details.txt').is_file():

    interpreter_saved_details = tf.lite.Interpreter(model_path=model_path + r'.tflite')
    interpreter_saved_details.allocate_tensors()

    with open(model_path + r'_details.txt', 'w') as f:
        f.write('Tensor details:\n\n')
        for dict in interpreter_saved_details.get_tensor_details():
            i = dict['index']
            tensor_name = dict['name']
            scales = dict['quantization_parameters']['scales']
            zero_points = dict['quantization_parameters']['zero_points']
            tensor = interpreter_saved_details.tensor(i)()
            type = dict['dtype']

            tensor_arr = np.array(tensor)

            f.write(
                f'{i} {type} {tensor_name} \n scales:\n {scales} \n zero_points:\n {zero_points} \n tensor_shape:\n {tensor.shape}\n tensor:\n {np.array2string(tensor_arr, threshold=np.inf, max_line_width=np.inf, separator=", ")}\n')
            f.write(
                '\n\n------------------------------------------------------------------------------------------------------------------------\n\n')

        for item in interpreter_saved_details.get_tensor_details():
            f.write(str(item).replace('{\'name', '\n{\'name'))

    print('Details saved to: ', model_path + r'_details.txt')



# Calculations

In [11]:
tf.quantization.quantize

<function tensorflow.python.ops.array_ops.quantize(input, min_range, max_range, T, mode='MIN_COMBINED', round_mode='HALF_AWAY_FROM_ZERO', name=None, narrow_range=False, axis=None, ensure_minimum_range=0.01)>

# Weight quantization

In [12]:
def weight_scaling_factor(a, b, min_T, max_T):
    s_a = a / min_T
    s_b = b / max_T

    if s_a > s_b:
        return s_a, a, max_T * s_a
    else:
        return s_b, min_T * s_b, b

def clamp(r,a,b):
    return min(max(r, a), b)

def weight_quan(r,a,b, min_T, max_T):

    s, a, b = weight_scaling_factor(a, b, min_T, max_T)

    print(f'r: {r} a: {a} b: {b}')

    q_value = np.round((clamp(r,a,b) - a) / s) + min_T

    print(f's: {s}')
    print(f'q_value: {q_value}')

    # z = q_value - (r / s)
    z = 0
    r = s * (q_value - z)

    print(f'z: {z} r: {r}')

    return q_value

In [13]:
weight_scaling_factor(-0.7989037, 0.7096706, -127, 127)

(0.00629058031496063, -0.7989037, 0.7989037)

In [14]:
weight_quan(-0.04035597, -0.7989037, 0.7096706, -127, 127)

r: -0.04035597 a: -0.7989037 b: 0.7989037
s: 0.00629058031496063
q_value: -6.0
z: 0 r: -0.03774348188976378


-6.0

# Activation quantization

In [15]:
def activation_scaling_factor(a, b, n):
   return (b - a) / (n - 1)

def clamp(r,a,b):
    return min(max(r, a), b)

def activation_quan(r,a,b, n):

    s = activation_scaling_factor(a, b, n)

    print(f'r: {r} a: {a} b: {b}')

    q_value = np.round((clamp(r,a,b) - a) / s) - n/2

    print(f's: {s}')
    print(f'q_value: {q_value}')

    z = q_value - (r / s)
    # z = 0
    r = s * (q_value - z)

    print(f'z: {z} r: {r}')

    return q_value

In [16]:
activation_quan(0, -1.436018, 1.6174504, 256)

r: 0 a: -1.436018 b: 1.6174504
s: 0.011974385882352941
q_value: -8.0
z: -8.0 r: 0.0


-8.0

# Bias quantization

In [17]:
def bias_quan(r, s_w, s_i):
    return np.round(r / (s_i * s_w))

In [18]:
bias_quan(-0.0049962, 0.00629058031496063, 0.011974385882352941)

-66.0

# Calculation of a and b

In [19]:
def calculation_a_b(num_arrays, input_array):

    a = np.zeros(num_arrays)
    b = np.zeros(num_arrays)

    for i in range(num_arrays):
        a[i] = np.min(input_array[i])
        b[i] = np.max(input_array[i])

    return np.min(a), np.max(b)

In [22]:
calculation_a_b(np.shape(train_data)[0], train_data)
# calculation_a_b(0, weinghts)

(-1.4360179901123047, 1.6174503564834595)

In [23]:
np.shape(train_data)

(3,)