# This is designed for inference calculation check only

In [2]:
import logging
logging.getLogger("tensorflow").setLevel(logging.DEBUG)

import tensorflow as tf
import netron
import pathlib
import numpy as np
from collections import OrderedDict

# Model

In [3]:
train_data = np.array(
    [[ 0.52986234, -0.2172981,  -1.436018  ],
    [-0.70177984,  1.6174504,   0.03002556 ],
    [-0.8451145,  -0.16036184,  0.6119909 ],
    [ 0.95792836,  0.5761335,  -1.4148498 ]], dtype=np.float32)
train_labels = np.array([0, 1, 0, 1], dtype=np.float32)

test_data = np.array([[-0.60904104,  0.73394835,  0.8958842 ]], dtype=np.float32)
test_labels = np.array([1], dtype=np.float32)

eva_data = np.array(
    [[ 0.52986234, -0.2172981,  -1.436018  ],
    [-0.70177984,  1.6174504,   0.03002556 ],
    [-0.8451145,  -0.16036184,  0.6119909 ],
    [ 0.95792836,  0.5761335,  -1.4148498 ],
     [-0.60904104,  0.73394835,  0.8958842 ]], dtype=np.float32)

model = tf.keras.Sequential([
  tf.keras.layers.Dense(3, activation='relu', input_shape=(3,)),
  tf.keras.layers.Dense(2)
])


# Train the digit classification model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(
                  from_logits=True),
              metrics=['accuracy'])
model.fit(
  train_data,
  train_labels,
  epochs=5,
  validation_data=(test_data, test_labels)
)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1d4e6b79948>

# Quantization

In [4]:
def representative_data_gen():
    for input_value in train_data:
        yield [input_value]


converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

# Set the input and output tensors to uint8
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model_quant = converter.convert()

INFO:tensorflow:Assets written to: C:\Users\Patrik\AppData\Local\Temp\tmpqmieoknn\assets




# Save model

In [5]:
tflite_models_dir = pathlib.Path("models")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

model_path = r"models/ezmode_tflite_model_quant"

# Save the model:
tflite_model_file = tflite_models_dir / "ezmode_tflite_model_quant.tflite"
if not tflite_model_file.is_file():
    tflite_model_file.write_bytes(tflite_model_quant)
    print("Model saved to: ", tflite_model_file)

# Evaluate model

In [6]:
interpreter = tf.lite.Interpreter(model_path=model_path + r'.tflite')

input_type = interpreter.get_input_details()[0]
print('input: ', input_type['dtype'], input_type['index'])
output_type = interpreter.get_output_details()[0]
print('output: ', output_type['dtype'], output_type['index'])

input:  <class 'numpy.uint8'> 0
output:  <class 'numpy.uint8'> 8


In [7]:
def evaluate_model(interpreter):
  interpreter.allocate_tensors()
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]
  input_details = interpreter.get_input_details()[0]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_d in enumerate(eva_data):
    # Pre-processing: add batch dimension and convert to match with the model's input data format.

    # Check if the input type is quantized, then rescale input data to uint8
    if input_details["dtype"] == np.uint8:
      input_scale, input_zero_point = input_details["quantization"]
      test_d = test_d / input_scale + input_zero_point

    test_d = np.expand_dims(test_d, axis=0).astype(input_details["dtype"])
    # print('input: ', test_d)
    # print('test_d: ', test_d.shape, test_d.dtype, test_d)
    interpreter.set_tensor(input_index, test_d)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest probability.
    output = interpreter.get_tensor(output_index)
    print("input: ", interpreter.get_tensor(0))
    print("q1_output: ", interpreter.get_tensor(5))
    print("l1_output: ", interpreter.get_tensor(6))
    print("l2_output: ", interpreter.get_tensor(7))
    print('q2_output: ', output)
    print('-----------------')
    digit = np.argmax(output[0])
    prediction_digits.append(digit)

  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy, output, prediction_digits

In [8]:
acc, output, pred = evaluate_model(interpreter)
print('acc: ', acc)
print('output: ', output)
print('pred: ', pred)

input:  [[164 101   0]]
q1_output:  [[  86   15 -128]]
l1_output:  [[ -42 -113 -128]]
l2_output:  [[86 15]]
q2_output:  [[214 143]]
-----------------
input:  [[ 61 255 122]]
q1_output:  [[ 126 -128   -6]]
l1_output:  [[ -2   0 127]]
l2_output:  [[ 126 -128]]
q2_output:  [[254   0]]
-----------------
input:  [[ 49 106 171]]
q1_output:  [[28 36 43]]
l1_output:  [[-100  -92  -74]]
l2_output:  [[28 36]]
q2_output:  [[156 164]]
-----------------
input:  [[199 168   1]]
q1_output:  [[  84   14 -127]]
l1_output:  [[ -44 -114 -115]]
l2_output:  [[84 14]]
q2_output:  [[212 142]]
-----------------
input:  [[ 69 181 194]]
q1_output:  [[ 87 -22  66]]
l1_output:  [[-41 106 -18]]
l2_output:  [[ 87 -22]]
q2_output:  [[215 106]]
-----------------
acc:  0.2
output:  [[215 106]]
pred:  [0 0 1 0 0]


# Save model details after evaluation

In [9]:
if not pathlib.Path(model_path + r'_details.txt').is_file():

    interpreter_saved_details = tf.lite.Interpreter(model_path=model_path + r'.tflite')
    evaluate_model(interpreter_saved_details)

    with open(model_path + r'_details.txt', 'w') as f:
        f.write('Tensor details:\n\n')
        for dict in interpreter_saved_details.get_tensor_details():
            i = dict['index']
            tensor_name = dict['name']
            scales = dict['quantization_parameters']['scales']
            zero_points = dict['quantization_parameters']['zero_points']
            tensor = interpreter_saved_details.tensor(i)()
            type = dict['dtype']

            tensor_arr = np.array(tensor)

            f.write(
                f'{i} {type} {tensor_name} \n scales:\n {scales} \n zero_points:\n {zero_points} \n tensor_shape:\n {tensor.shape}\n tensor:\n {np.array2string(tensor_arr, threshold=np.inf, max_line_width=np.inf, separator=", ")}\n')
            f.write(
                '\n\n------------------------------------------------------------------------------------------------------------------------\n\n')

        for item in interpreter_saved_details.get_tensor_details():
            f.write(str(item).replace('{\'name', '\n{\'name'))

    print('Details saved to: ', model_path + r'_details.txt')

# Netron

In [10]:
netron.start(model_path + r'.tflite')

Serving 'models/ezmode_tflite_model_quant.tflite' at http://localhost:8080


('localhost', 8080)

# Manual calculation of Inference

In [11]:
def affine_mapping(S, Z, q):
    return S * (q - Z)

def affine_mapping_inv(S, Z, r):
    return int((r / S) + Z)


In [12]:
class Module:
    def __init__(self):
        self.modules = OrderedDict()

    def add_module(self, module, name:str):
        if hasattr(self, name) and name not in self.modules:
            raise KeyError("attribute '{}' already exists".format(name))
        elif '.' in name:
            raise KeyError("module name can't contain \".\"")
        elif name == '':
            raise KeyError("module name can't be empty string \"\"")
        self.modules[name] = module

    def forward(self, input) -> np.ndarray:
        for module in self.modules:
            print(f"module: {module}")
            input = self.modules[module].forward(input)

        return input

    def backward(self, *args, **kwargs):
        pass

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)


In [13]:
#------------------------------------------------------------------------------
#   Linear class
#------------------------------------------------------------------------------
class Linear(Module):
    def __init__(self, w, b, s_w, s_i, s_o, z_i, z_o):
        super(Linear, self).__init__()
        self.W = w
        self.b = b
        self.z_i = z_i
        self.z_o = z_o
        self.s_i = s_i
        self.s_w = s_w
        self.s_o = s_o
        self.m = s_i * s_w / s_o
        self.s_b = s_i * s_w
        self.m_s = s_i / s_w

    def forward(self, input: np.ndarray) -> np.ndarray:

        # print(input.shape, self.W.shape)

        # o = np.zeros((input.shape[0], self.W.shape[1]))
        #
        #
        # for i in range(input.shape[0]):
        #     for k in range(self.W.shape[1]):
        #         for j in range(input.shape[1]):
        #             o[i][k] += (input[i][j] - self.z_i) * self.W[j][k]
        #             print((input[i][j] - self.z_i) * self.W[j][k])
        #             print(f'input[i][j] = {input[i][j]}, z_i = {self.z_i}, W[j][k] = {self.W[j][k]}')
        #             print((self.z_o + self.m * (input[i][j] - self.z_i) * self.W[j][k]))
        #
        #         print(f'z_o = {self.z_o}, m = {self.m}, o = {o[i][k]}')
        #         o[i][k] = self.z_o + self.m * o[i][k]
        #
        # print(f'input: {input} \noutput1: {o}\n-----------------')

        output = np.zeros((input.shape[0], self.W.shape[1]))
        a2 = np.zeros((self.W.shape[1]))

        for i in range(input.shape[0]):
            for k in range(self.W.shape[1]):
                for j in range(input.shape[1]):
                    a2[k] += self.W[j][k]
                    output[i][k] += input[i][j] * self.W[j][k]

                    # print(f'W = {self.W[j][k]}, output_64 = {input[i][j] * self.W[j][k]}')
                # output[i][k] = int((self.z_o + self.m * (-self.z_i*a2[k] + output[i][k] + self.b[0][k] )) / self.m_s)
                # output[i][k] = int(self.z_o + self.m * (-self.z_i*a2[k] + output[i][k] + self.b[0][k] ))
                print(f'z_o = {self.z_o}, m = {self.m}, o = {output[i][k]}, b = {self.b[0][k]}, a2 = {a2[k]}, z_i = {self.z_i}, zia2: {-self.z_i*a2[k]}')
                output[i][k] = tf.dtypes.saturate_cast(self.z_o + self.m * (-self.z_i*a2[k] + output[i][k] + self.b[0][k] ), tf.int8)


        print(f'input: {input} \noutput2: {output}\n-----------------')

        return output

    def backward(self, dNet):
        pass

#------------------------------------------------------------------------------
#   Quantization class
#------------------------------------------------------------------------------
class Quantization(Module):
    def __init__(self, s, z_i, z_o, sign):
        super(Quantization, self).__init__()
        self.z_i = z_i
        self.z_o = z_o
        self.s = s
        self.sign = sign

    def forward(self, input: np.ndarray) -> np.ndarray:
        # arr_r = np.zeros(input.shape, dtype=np.float32)
        # for i in range(input.shape[0]):
        #     for j in range(input.shape[1]):
        #         arr_r[i][j] = affine_mapping(self.s, self.z_i, input[i][j])
        #
        # print(arr_r)
        #
        # arr_q = np.zeros(input.shape)
        # for i in range(input.shape[0]):
        #     for j in range(input.shape[1]):
        #         arr_q[i][j] = affine_mapping_inv(self.s, self.z_o, arr_r[i][j])


        arr_q = (input - 128) if (self.sign == 0) else (input + 128)


        # print(f'input: {input} \noutput: {arr_q}\n-----------------')
        return arr_q


    def backward(self, dNet):
        pass

In [14]:
quan_s1 = 0.01197439
quan_z_i1 = 120
quan_z_o1 = -8

w1 = np.array( [[ -63, -127,   -4], [  87,  -84,  -65], [-127,  114,  -49]]).T
b1 = np.array([[-104, -103, -104]])
s_i1 = 0.01197439
s_w1 = 0.00402647
s_o1 = 0.00426930
z_i1 = -8
z_o1 = -128

w2 = np.array( [[-113,   34,   47], [  43,  -51, -127]]).T
b2 = np.array([[-143,  143]])
s_i2 = 0.00426930
s_w2 = 0.00819842
s_o2 = 0.00605172
z_i2 = -128
z_o2 = 58

quan_s2 = 0.00605172
quan_z_i2 = 58
quan_z_o2 = 186

In [15]:
module = Module()
module.add_module(Quantization(quan_s1, quan_z_i1, quan_z_o1, 0), 'q1')
module.add_module(Linear(w1, b1, s_w1, s_i1, s_o1, z_i1, z_o1), 'l1')
module.add_module(Linear(w2, b2, s_w2, s_i2, s_o2, z_i2, z_o2), 'l2')
module.add_module(Quantization(quan_s2, quan_z_i2, quan_z_o2, 1), 'q2')
module.forward(np.array( [[ 69, 181, 194]]))

module: q1
module: l1
z_o = -128, m = 0.01129330852910313, o = -3278.0, b = -104, a2 = -194.0, z_i = -8, zia2: -1552.0
z_o = -128, m = 0.01129330852910313, o = -13875.0, b = -103, a2 = -62.0, z_i = -8, zia2: -496.0
z_o = -128, m = 0.01129330852910313, o = 10301.0, b = -104, a2 = -62.0, z_i = -8, zia2: -496.0
input: [[-59  53  66]] 
output2: [[-128. -128.  -18.]]
-----------------
module: l2
z_o = 58, m = 0.005783729998413674, o = 9266.0, b = -143, a2 = -32.0, z_i = -128, zia2: -4096.0
z_o = 58, m = 0.005783729998413674, o = 3310.0, b = 143, a2 = -135.0, z_i = -128, zia2: -17280.0
input: [[-128. -128.  -18.]] 
output2: [[ 87. -21.]]
-----------------
module: q2


array([[215., 107.]])

In [16]:
evaluate_model(interpreter)
print('-----------------')
print(f"my: ", module.forward(np.array( [[164, 101,   0]])))
print('-----------------\n-----------------')
print(f"my: ", module.forward(np.array( [[ 61, 255, 122]])))
print(f"my: ", module.forward(np.array( [[ 49, 106, 171]])))
print(f"my: ", module.forward(np.array( [[199, 168,   1]])))
print(f"my: ", module.forward(np.array( [[ 69, 181, 194]])))

input:  [[164 101   0]]
q1_output:  [[  86   15 -128]]
l1_output:  [[ -42 -113 -128]]
l2_output:  [[86 15]]
q2_output:  [[214 143]]
-----------------
input:  [[ 61 255 122]]
q1_output:  [[ 126 -128   -6]]
l1_output:  [[ -2   0 127]]
l2_output:  [[ 126 -128]]
q2_output:  [[254   0]]
-----------------
input:  [[ 49 106 171]]
q1_output:  [[28 36 43]]
l1_output:  [[-100  -92  -74]]
l2_output:  [[28 36]]
q2_output:  [[156 164]]
-----------------
input:  [[199 168   1]]
q1_output:  [[  84   14 -127]]
l1_output:  [[ -44 -114 -115]]
l2_output:  [[84 14]]
q2_output:  [[212 142]]
-----------------
input:  [[ 69 181 194]]
q1_output:  [[ 87 -22  66]]
l1_output:  [[-41 106 -18]]
l2_output:  [[ 87 -22]]
q2_output:  [[215 106]]
-----------------
-----------------
module: q1
module: l1
z_o = -128, m = 0.01129330852910313, o = 1673.0, b = -104, a2 = -194.0, z_i = -8, zia2: -1552.0
z_o = -128, m = 0.01129330852910313, o = 13720.0, b = -103, a2 = -62.0, z_i = -8, zia2: -496.0
z_o = -128, m = 0.0112933085