# This is designed for inference calculation only

In [7]:
import logging
logging.getLogger("tensorflow").setLevel(logging.DEBUG)

import tensorflow as tf
import numpy as np
from collections import OrderedDict

# Data preparation

In [8]:
quan_s1 = np.float32(0.011974385008215904)
quan_z_i1 = np.uint8(120)
quan_z_o1 = np.int8(-8)

w1 = np.array( [[ -63, -127,   -4], [  87,  -84,  -65], [-127,  114,  -49]], dtype=np.int8).T
b1 = np.array([-104, -103, -104], dtype=np.int32)
s_i1 = np.float32(0.011974385008215904)
s_w1 = np.float32(0.004026466049253941)
s_o1 = np.float32(0.004269302356988192)
z_i1 = np.int8(-8)
z_o1 = np.int8(-128)

w2 = np.array( [[-113,   34,   47], [  43,  -51, -127]], dtype=np.int8).T
b2 = np.array([-143,  143], dtype=np.int32)
s_i2 = np.float32(0.004269302356988192)
s_w2 = np.float32(0.008198416791856289)
s_o2 = np.float32(0.006051715463399887)
z_i2 = np.int8(-128)
z_o2 = np.int8(58)

quan_s2 = np.float32(0.006051715463399887)
quan_z_i2 = np.int8(58)
quan_z_o2 = np.uint8(186)

input1 = np.array( [[164, 101,   0]], dtype=np.uint8)
input2 = np.array( [[ 61, 255, 122]], dtype=np.uint8)
input3 = np.array( [[ 49, 106, 171]], dtype=np.uint8)
input4 = np.array( [[199, 168,   1]], dtype=np.uint8)
input5 = np.array( [[ 69, 181, 194]], dtype=np.uint8)

# Manual calculation of Inference for quantized nn

In [9]:
class Module:
    def __init__(self):
        self.modules = OrderedDict()

    def add_module(self, module, name:str):
        self.modules[name] = module

    def forward(self, input) -> np.ndarray:
        for module in self.modules:
            # print(f"module: {module}")
            input = self.modules[module].forward(input)

        return input

In [10]:
#------------------------------------------------------------------------------
#   FullyConnected class
#------------------------------------------------------------------------------
class FullyConnected(Module):
    def __init__(self, w, b, s_w, s_i, s_o, z_i, z_o):
        super(FullyConnected, self).__init__()
        self.W = w
        self.b = b
        self.z_i = z_i
        self.z_o = z_o
        self.m = s_i * s_w / s_o
        self.s_b = s_i * s_w

    def forward(self, input: np.ndarray) -> np.ndarray:

        output_int8 = np.zeros((input.shape[0], self.W.shape[1]), dtype=np.int8)
        output = np.zeros((input.shape[0], self.W.shape[1]), dtype=np.int32)
        a2 = np.zeros((self.W.shape[1]), dtype=np.int32)

        for i in range(input.shape[0]):
            for k in range(self.W.shape[1]):
                for j in range(input.shape[1]):
                    a2[k] += np.int32(self.W[j][k])
                    output[i][k] += np.int32(input[i][j]) * np.int32(self.W[j][k])

                    # print(f'i = {input[i][j]}, W = {self.W[j][k]}, output_32 = {output[i][k]}, a2={a2[k]}')

                # print('-----------------')
                # print(f'z_o = {self.z_o}, m = {self.m}, o = {output[i][k]}, b = {self.b[k]}, a2 = {a2[k]}, z_i = {self.z_i}, zia2: {-self.z_i*a2[k]}')
                # print(f'output_before_saturate_cast = ',np.int32(self.z_o) + self.m * (-(np.int32(self.z_i)*a2[k]) + output[i][k] + self.b[k]))

                output_int8[i][k] = tf.dtypes.saturate_cast(np.round(np.int32(self.z_o) + self.m * (-(np.int32(self.z_i)*a2[k]) + output[i][k] + self.b[k])), tf.int8)

                # print('output_int8: ', output_int8[i][k])
                # print('-----------------')

        # print(f'input: {input} \noutput: {output_int8}\n-----------------\n-----------------')
        # print(f'input: {input.dtype} \noutput2: {output_int8.dtype}\n-----------------')

        return output_int8

#------------------------------------------------------------------------------
#   Quantize class
#------------------------------------------------------------------------------
class Quantize(Module):
    def __init__(self, s, z_i, z_o, d_type):
        super(Quantize, self).__init__()
        self.z_i = z_i
        self.z_o = z_o
        self.s = s
        self.d_type = d_type

        # print(f'Quantize: z_i: {self.z_i} z_o: {self.z_o} s: {self.s} d_type: {self.d_type}')

    def forward(self, input: np.ndarray) -> np.ndarray:
        # converts from int8 to uint8 and vice versa
        if self.d_type is np.int8:
            arr_q = (input + 128).astype(np.uint8)
        elif self.d_type is np.uint8:
            arr_q = (input - 128).astype(np.int8)
        else:
            raise ValueError(f'input type is not supported: {input.dtype}')

        # print(f'input: {input} \noutput: {arr_q}\n-----------------\n-----------------')
        # print(f'input: {input.dtype} \noutput: {arr_q.dtype}\n-----------------')

        return arr_q


# Create model

In [11]:
model = Module()
model.add_module(Quantize(quan_s1, quan_z_i1, quan_z_o1, np.uint8), 'q1')
model.add_module(FullyConnected(w1, b1, s_w1, s_i1, s_o1, z_i1, z_o1), 'l1')
model.add_module(FullyConnected(w2, b2, s_w2, s_i2, s_o2, z_i2, z_o2), 'l2')
model.add_module(Quantize(quan_s2, quan_z_i2, quan_z_o2, np.int8), 'q2')

In [12]:
print(f"1: {model.forward(input1)} \n-------------\n-------------" )
# print(f"2: {model.forward(input2)} \n-------------\n-------------" )
# print(f"3: {model.forward(input3)} \n-------------\n-------------" )
# print(f"4: {model.forward(input4)} \n-------------\n-------------" )
# print(f"5: {model.forward(input5)} \n-------------\n-------------" )

1: [[214 143]] 
-------------
-------------


# Compare with TFLite

In [16]:
# Initialize the interpreter
import pathlib

tflite_file = pathlib.Path('.\models\ezmode_tflite_model_quant.tflite')
interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
interpreter.allocate_tensors()

def run_tflite_model(interpreter, input):
    global test_images

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    interpreter.set_tensor(input_details["index"], input)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details["index"])

    return output

In [None]:
r = 256
# dif = np.zeros((r,1), dtype=np.int64)
#
# for i in range(r):
#     for j in range(r):
#         for k in range(r):
#             input = np.array( [[i, j, k]], dtype=np.uint8)
#             model_m = model.forward(input)
#             model_tf = run_tflite_model(interpreter, input)
#
#             dif[i] += np.abs(np.int64(model_m[0][0]) - np.int64(model_tf[0][0]))
#             dif[i] += np.abs(np.int64(model_m[0][1]) - np.int64(model_tf[0][1]))
#
#
#     dif[i] = dif[i] / (r*r)
#
# np.save('array_dif_rounding_100acc.npy', dif)
#
# mistake_output = np.sum(dif) / r
# mistake_value = mistake_output / 2
#
# print(f'Mistake in output: {mistake_output}')
# print(f'Mistake in value: {mistake_value}')

In [None]:
dif = np.load('array_dif.npy')
mistake_output = np.sum(dif) / r
mistake_value = mistake_output / 2

print(f'Mistake in output: {mistake_output}')
print(f'Mistake in value: {mistake_value}')

In [None]:
m1 = 0.005783735308796167
m2 = 0.011293286457657814

for m in [m1, m2]:
    for i in range (32):
        m0 = 2 ** (i) * m
        if 0.5 <= m0 < 1:
            print(f"i: {i}\nm0: {m0}")
            m_with_m0 = 2 ** (-i) * m0
            print(f'{m_with_m0} m_with_m0')
            print(f'{m} m')
            print('-----------------')
            break
