In [None]:
import tensorflow as tf
import numpy as np

from tflite2xcore.xcore_interpreter import XCOREInterpreter, make_op_state_capture_callback
from tflite2xcore.model_generation.utils import quantize, dequantize
from tflite2xcore.serialization import read_flatbuffer

NOTE: the Mobilenet model needs to be built with `include_top=False`, and converted with the following passes turned off: SplitPaddingPass, FuseConv2dPaddingPass, FuseConsecutivePadsPass

TIP: change the saturation logic in vpu_sim.{c,h} and recompile the xInterpreter to reduce errors from -127 saturation.

In [None]:
model_xcore = read_flatbuffer('./models/model_xcore.tflite')
model_stripped = read_flatbuffer('./models/model_xcore.tflite')

In [None]:
interp_float = XCOREInterpreter(model_path='./models/model_float.tflite')
interp_float.allocate_tensors()
interp_xcore = XCOREInterpreter(model_path='./models/model_xcore.tflite')
interp_xcore.allocate_tensors()
interp_atomic = XCOREInterpreter(model_path='./models/model_xcore.tflite')
interp_atomic.allocate_tensors()
interp_stripped = XCOREInterpreter(model_path='./models/model_stripped.tflite')
interp_stripped.allocate_tensors()

In [None]:
input_details = interp_xcore.get_input_details()[0]
input_idx = input_details['index']
input_quantization = input_details['quantization']

output_details = interp_xcore.get_output_details()[0]
output_idx = output_details['index']

In [None]:
data = np.load('./test_data/data.npz')
x = data['export']
x_int8 = quantize(x, *input_quantization)

In [None]:
interp_float.set_tensor(input_idx, x[0:1])
interp_float.invoke(capture_op_states=True)
y_float = interp_float.get_tensor(output_idx)
float_states = interp_float._op_states

In [None]:
interp_xcore.set_tensor(input_idx, x_int8[0:1])
interp_xcore.invoke(capture_op_states=True)
y_int8_xcore = interp_xcore.get_tensor(output_idx)
xcore_states = interp_xcore._op_states

In [None]:
interp_stripped.set_tensor(input_idx, x_int8[0:1])
interp_stripped.invoke(capture_op_states=True)
y_int8_stripped = interp_stripped.get_tensor(output_idx)
stripped_states = interp_stripped._op_states

In [None]:
def make_op_output_replace_callback(ref_op_states):
    assert isinstance(ref_op_states, list)

    def _callback(interpreter, operator_details):
        ref_state = ref_op_states[operator_details["index"]]
        for ref_output in ref_state["outputs"]:
            if operator_details["index"] <= 8:
                interpreter.set_tensor(ref_output["index"], ref_output["values"])

    return _callback

In [None]:
atomic_states = []
output_capture_cb = make_op_state_capture_callback(atomic_states, inputs=False)
output_replace_cb = make_op_output_replace_callback(stripped_states)
def combined_cb(interpreter, operator_details):
    output_capture_cb(interpreter, operator_details)
    output_replace_cb(interpreter, operator_details)

interp_atomic.set_tensor(input_idx, x_int8[0:1])
interp_atomic.invoke(postinvoke_callback=combined_cb)
y_int8_xcore = interp_atomic.get_tensor(output_idx)

In [None]:
for j, (op_state_xcore, op_state_ref, op_state_atomic) in enumerate(zip(xcore_states, stripped_states, atomic_states)):
    out_xcore = op_state_xcore['outputs'][0]['values'].astype(np.int32)
    out_ref = op_state_ref['outputs'][0]['values'].astype(np.int32)
    out_atomic = op_state_atomic['outputs'][0]['values'].astype(np.int32)
    print(
        f"Operator {j:2d}:  ",
        "max(xcore - ref): {:3d}   ".format(np.max(np.abs(out_xcore-out_ref))),
        "max(atomic - ref): {:3d}   ".format(np.max(np.abs(out_atomic-out_ref))),
        "sum(atomic - ref): {:7d}   ".format(np.sum(np.abs(out_atomic-out_ref))),
        "ratio: {:.2%}".format(np.count_nonzero(np.abs(out_atomic-out_ref)) / out_ref.size)
    )

In [None]:
op_idx = -1
out_float = float_states[op_idx]['outputs'][0]['values'].astype(np.float32)
out_ref = stripped_states[op_idx]['outputs'][0]['values'].astype(np.int32)
out_atomic = atomic_states[op_idx]['outputs'][0]['values'].astype(np.int32)
tensor_idx = stripped_states[op_idx]['outputs'][0]['index']
out_quantization = interp_stripped._get_tensor_details(tensor_idx)['quantization']

diff = (out_ref - out_atomic)
a = out_ref[np.abs(diff) > 0]
np.unique(a, return_counts=True)

In [None]:
ref_err = np.linalg.norm(np.abs(dequantize(out_ref, *out_quantization) - out_float)) / np.linalg.norm(out_float)
atomic_err = np.linalg.norm(np.abs(dequantize(out_atomic, *out_quantization) - out_float)) / np.linalg.norm(out_float)
ref_err, atomic_err

In [None]:
xcore_states[3]['inputs'][1]['values']

In [None]:
xcore_states[3]['inputs'][2]['values'][0,:,:].astype(np.int32)

In [None]:
out_float = float_states[3]['outputs'][0]['values']
out_x = xcore_states[3]['outputs'][0]['values'].astype(np.int32)
out_ref = stripped_states[3]['outputs'][0]['values'].astype(np.int32)
out_x[0,0,0,:] - out_ref[0,0,0,:]

In [None]:
xcore_states[3]['inputs'][1]['values']

In [None]:
stripped_states[3]['inputs'][2]['values']