## Imports

In [1]:
from utensor_cgen.api.export import tflm_keras_export

In [2]:
import tensorflow as tf
import numpy as np

from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import Model

## Define Model

In [3]:
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.pool = MaxPooling2D()
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)

    def call(self, x):
        x0 = self.conv1(x)
        x1 = self.pool(x0)
        x2 = self.flatten(x1)
        x3 = self.d1(x2)
        return self.d2(x3)

# Create an instance of the model
model = MyModel()

## Training

In [4]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a channels dimension
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

In [5]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)
).shuffle(10000).batch(32)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

In [6]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [7]:
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)

In [8]:
@tf.function
def test_step(images, labels):
    predictions = model(images, training=False)
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [9]:
EPOCHS = 1

for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for images, labels in train_ds:
        train_step(images, labels)

    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result()*100,
                          test_loss.result(),
                          test_accuracy.result()*100))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1, Loss: 0.15297885239124298, Accuracy: 95.40499877929688, Test Loss: 0.057287149131298065, Test Accuracy: 98.05999755859375


In [10]:
model.save('saved_model/test_model')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: saved_model/test_model/assets


## Exporting to uTensor

In [11]:
# representative data function
num_calibration_steps = 128
calibration_dtype = tf.float32

def representative_dataset_gen():
    for _ in range(num_calibration_steps):
        rand_idx = np.random.randint(0, x_test.shape[0]-1)
        sample = x_test[rand_idx]
        sample = sample[tf.newaxis, ...]
        sample = tf.cast(sample, dtype=calibration_dtype)
        yield [sample]

In [38]:
tflm_keras_export(
    'saved_model/test_model',
    representive_dataset=representative_dataset_gen,
    model_name='my_model',
    target='utensor',
)

[INFO transformer.py transform @ 23] Transforming graph: my_model
[INFO transformer.py transform @ 24] Transform pipeline: dropout(name_pattern=r'(dropout[_\w\d]*)/.*') -> inline -> biasAdd -> remove_id_op
[INFO transformer.py transform @ 28] Graph transormation done
[INFO generic_graph_lower.py apply @ 56] topo ordered tensor life span analysis done
[INFO utils.py wrapped @ 469] collapsed time of calling apply: 0.0243 seconds
[INFO generic_graph_lower.py _solve_space_alloc @ 204] optimal tensor allocation plan solved, total memory required: 27040 bytes
[INFO generic_graph_lower.py _solve_space_alloc @ 205] number of tensors allocated: 7
[INFO utils.py wrapped @ 469] collapsed time of calling _solve_space_alloc: 0.0835 seconds
[INFO _code_generator.py _generate_files @ 164] model parameters header file generated: data/my_model/params_my_model.hpp
[INFO _code_generator.py _generate_files @ 180] model header file generated: models/my_model/my_model.hpp
[INFO _code_generator.py _generate_

### Generated Cpp File: `models/my_model/my_model.cpp`

```cpp
/* Auto-generated by utensor cli */
#include "uTensor.h"
#include "models/my_model/my_model.hpp"
#include "data/my_model/params_my_model.hpp"
#include <vector>


using namespace uTensor;

// start rendering global declare snippets
// end of rendering global declare snippets

void compute_my_model(Tensor& input_10, Tensor& Identity0){
    // start rendering local declare snippets
    MaxPoolOperator<int8_t> op_000({ 2, 2 }, { 1, 2, 2, 1 }, VALID);

    ReshapeOperator<int8_t> op_001({ 1, 5408 });

    TFLM::QuantizeOperator<int8_t, float> op_002;

    QuantizedFullyConnectedOperator<int8_t> op_003(TFLM::TfLiteFusedActivation::kTfLiteActRelu);

    QuantizedFullyConnectedOperator<int8_t> op_004(TFLM::TfLiteFusedActivation::kTfLiteActNone);

    TFLM::DequantizeOperator<float, int8_t> op_005;

    QuantizedDepthwiseSeparableConvOperator<int8_t> op_006({ 1, 1 }, VALID, 32, { 1, 1 }, TFLM::TfLiteFusedActivation::kTfLiteActRelu);

    Tensor input_1_int80 = new RamTensor({ 1, 28, 28, 1 }, i8);
    int input_1_int80_zp = -128;
    float input_1_int80_scale = 0.003921569;
    PerTensorQuantizationParams input_1_int80_quant_params(input_1_int80_zp, input_1_int80_scale);
    input_1_int80->set_quantization_params(input_1_int80_quant_params);

    Tensor StatefulPartitionedCallmy_modelconv2dRelu0 = new RamTensor({ 1, 26, 26, 32 }, i8);
    int StatefulPartitionedCallmy_modelconv2dRelu0_zp = -128;
    float StatefulPartitionedCallmy_modelconv2dRelu0_scale = 0.0026465168;
    PerTensorQuantizationParams StatefulPartitionedCallmy_modelconv2dRelu0_quant_params(StatefulPartitionedCallmy_modelconv2dRelu0_zp, StatefulPartitionedCallmy_modelconv2dRelu0_scale);
    StatefulPartitionedCallmy_modelconv2dRelu0->set_quantization_params(StatefulPartitionedCallmy_modelconv2dRelu0_quant_params);

    Tensor StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0 = new RamTensor({ 1, 13, 13, 32 }, i8);
    int StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0_zp = -128;
    float StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0_scale = 0.0026465168;
    PerTensorQuantizationParams StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0_quant_params(StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0_zp, StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0_scale);
    StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0->set_quantization_params(StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0_quant_params);

    Tensor StatefulPartitionedCallmy_modeldenseRelu0 = new RamTensor({ 1, 128 }, i8);
    int StatefulPartitionedCallmy_modeldenseRelu0_zp = -128;
    float StatefulPartitionedCallmy_modeldenseRelu0_scale = 0.023930669;
    PerTensorQuantizationParams StatefulPartitionedCallmy_modeldenseRelu0_quant_params(StatefulPartitionedCallmy_modeldenseRelu0_zp, StatefulPartitionedCallmy_modeldenseRelu0_scale);
    StatefulPartitionedCallmy_modeldenseRelu0->set_quantization_params(StatefulPartitionedCallmy_modeldenseRelu0_quant_params);

    Tensor Identity_int80 = new RamTensor({ 1, 10 }, i8);
    int Identity_int80_zp = -13;
    float Identity_int80_scale = 0.113923624;
    PerTensorQuantizationParams Identity_int80_quant_params(Identity_int80_zp, Identity_int80_scale);
    Identity_int80->set_quantization_params(Identity_int80_quant_params);

    Tensor StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00 = new RamTensor({ 1, 5408 }, i8);
    int StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00_zp = -128;
    float StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00_scale = 0.0026465168;
    PerTensorQuantizationParams StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00_quant_params(StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00_zp, StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00_scale);
    StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00->set_quantization_params(StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00_quant_params);

    Tensor StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0 = new RomTensor({ 1, 3, 3, 32 }, i8, data_StatefulPartitionedCall_my_model_conv2d_Conv2D_ReadVariableOp_0);
    int arr_StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0_zp[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    float arr_StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0_scale[32] = { 0.0019741713, 0.0011097809, 0.001178485, 0.0018986871, 0.0031697543, 0.0022199969, 0.0034489078, 0.0010821758, 0.0037110085, 0.00084193127, 0.002423992, 0.0017963316, 0.0010024284, 0.002468313, 0.0011061964, 0.0012666524, 0.0029382005, 0.0011719828, 0.0027343666, 0.0015651162, 0.0027160055, 0.0038842058, 0.0012370645, 0.0010993244, 0.0022167282, 0.0023632639, 0.002288837, 0.002926964, 0.0019453545, 0.0011736463, 0.0014855345, 0.002383109 };
    PerChannelQuantizationParams StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0_quant_params(arr_StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0_zp, arr_StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0_scale);
    StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0->set_quantization_params(StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0_quant_params);

    Tensor StatefulPartitionedCallmy_modelconv2dConv2D_bias0 = new RomTensor({ 32 }, i32, data_StatefulPartitionedCall_my_model_conv2d_Conv2D_bias_0);
    int32_t arr_StatefulPartitionedCallmy_modelconv2dConv2D_bias0_zp[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    float arr_StatefulPartitionedCallmy_modelconv2dConv2D_bias0_scale[32] = { 7.741848e-06, 4.352082e-06, 4.62151e-06, 7.445832e-06, 1.243041e-05, 8.705871e-06, 1.3525129e-05, 4.243827e-06, 1.4552975e-05, 3.3016913e-06, 9.505851e-06, 7.044438e-06, 3.9310917e-06, 9.679659e-06, 4.3380255e-06, 4.9672644e-06, 1.1522356e-05, 4.5960114e-06, 1.0723007e-05, 6.137711e-06, 1.0651002e-05, 1.5232181e-05, 4.8512334e-06, 4.3110763e-06, 8.693052e-06, 9.267702e-06, 8.975831e-06, 1.14782915e-05, 7.6288416e-06, 4.602535e-06, 5.8256255e-06, 9.3455255e-06 };
    PerChannelQuantizationParams StatefulPartitionedCallmy_modelconv2dConv2D_bias0_quant_params(arr_StatefulPartitionedCallmy_modelconv2dConv2D_bias0_zp, arr_StatefulPartitionedCallmy_modelconv2dConv2D_bias0_scale);
    StatefulPartitionedCallmy_modelconv2dConv2D_bias0->set_quantization_params(StatefulPartitionedCallmy_modelconv2dConv2D_bias0_quant_params);

    Tensor StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0 = new RomTensor({ 5408, 128 }, i8, data_StatefulPartitionedCall_my_model_dense_MatMul_ReadVariableOp_transpose_0);
    int StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0_zp = 0;
    float StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0_scale = 0.0022179913;
    PerTensorQuantizationParams StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0_quant_params(StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0_zp, StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0_scale);
    StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0->set_quantization_params(StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0_quant_params);

    Tensor StatefulPartitionedCallmy_modeldenseMatMul_bias0 = new RomTensor({ 128 }, i32, data_StatefulPartitionedCall_my_model_dense_MatMul_bias_0);
    int32_t StatefulPartitionedCallmy_modeldenseMatMul_bias0_zp = 0;
    float StatefulPartitionedCallmy_modeldenseMatMul_bias0_scale = 5.869951e-06;
    PerTensorQuantizationParams StatefulPartitionedCallmy_modeldenseMatMul_bias0_quant_params(StatefulPartitionedCallmy_modeldenseMatMul_bias0_zp, StatefulPartitionedCallmy_modeldenseMatMul_bias0_scale);
    StatefulPartitionedCallmy_modeldenseMatMul_bias0->set_quantization_params(StatefulPartitionedCallmy_modeldenseMatMul_bias0_quant_params);

    Tensor StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0 = new RomTensor({ 128, 10 }, i8, data_StatefulPartitionedCall_my_model_dense_1_MatMul_ReadVariableOp_transpose_0);
    int StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0_zp = 0;
    float StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0_scale = 0.0034516868;
    PerTensorQuantizationParams StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0_quant_params(StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0_zp, StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0_scale);
    StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0->set_quantization_params(StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0_quant_params);

    Tensor StatefulPartitionedCallmy_modeldense_1MatMul_bias0 = new RomTensor({ 10 }, i32, data_StatefulPartitionedCall_my_model_dense_1_MatMul_bias_0);
    int32_t StatefulPartitionedCallmy_modeldense_1MatMul_bias0_zp = 0;
    float StatefulPartitionedCallmy_modeldense_1MatMul_bias0_scale = 8.2601175e-05;
    PerTensorQuantizationParams StatefulPartitionedCallmy_modeldense_1MatMul_bias0_quant_params(StatefulPartitionedCallmy_modeldense_1MatMul_bias0_zp, StatefulPartitionedCallmy_modeldense_1MatMul_bias0_scale);
    StatefulPartitionedCallmy_modeldense_1MatMul_bias0->set_quantization_params(StatefulPartitionedCallmy_modeldense_1MatMul_bias0_quant_params);

    // end of rendering local declare snippets
    // start rendering eval snippets
    op_002
    .set_inputs({
        { TFLM::QuantizeOperator<int8_t, float>::input, input_10 },
    })
    .set_outputs({
        { TFLM::QuantizeOperator<int8_t, float>::output, input_1_int80}
    })
    .eval();

    op_006
    .set_inputs({
        { QuantizedDepthwiseSeparableConvOperator<int8_t>::in, input_1_int80 },
        { QuantizedDepthwiseSeparableConvOperator<int8_t>::filter, StatefulPartitionedCallmy_modelconv2dConv2DReadVariableOp0 },
        { QuantizedDepthwiseSeparableConvOperator<int8_t>::bias, StatefulPartitionedCallmy_modelconv2dConv2D_bias0 },
    })
    .set_outputs({
        { QuantizedDepthwiseSeparableConvOperator<int8_t>::out, StatefulPartitionedCallmy_modelconv2dRelu0}
    })
    .eval();

    op_000
    .set_inputs({
        { MaxPoolOperator<int8_t>::in, StatefulPartitionedCallmy_modelconv2dRelu0 },
    })
    .set_outputs({
        { MaxPoolOperator<int8_t>::out, StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0}
    })
    .eval();

    op_001
    .set_inputs({
        { ReshapeOperator<int8_t>::input, StatefulPartitionedCallmy_modelmax_pooling2dMaxPool0 },
    })
    .set_outputs({
        { ReshapeOperator<int8_t>::output, StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00}
    })
    .eval();

    op_003
    .set_inputs({
        { QuantizedFullyConnectedOperator<int8_t>::input, StatefulPartitionedCallmy_modelmax_pooling2dMaxPool_0_Reshape00 },
        { QuantizedFullyConnectedOperator<int8_t>::filter, StatefulPartitionedCallmy_modeldenseMatMulReadVariableOptranspose0 },
        { QuantizedFullyConnectedOperator<int8_t>::bias, StatefulPartitionedCallmy_modeldenseMatMul_bias0 },
    })
    .set_outputs({
        { QuantizedFullyConnectedOperator<int8_t>::output, StatefulPartitionedCallmy_modeldenseRelu0}
    })
    .eval();

    op_004
    .set_inputs({
        { QuantizedFullyConnectedOperator<int8_t>::input, StatefulPartitionedCallmy_modeldenseRelu0 },
        { QuantizedFullyConnectedOperator<int8_t>::filter, StatefulPartitionedCallmy_modeldense_1MatMulReadVariableOptranspose0 },
        { QuantizedFullyConnectedOperator<int8_t>::bias, StatefulPartitionedCallmy_modeldense_1MatMul_bias0 },
    })
    .set_outputs({
        { QuantizedFullyConnectedOperator<int8_t>::output, Identity_int80}
    })
    .eval();

    op_005
    .set_inputs({
        { TFLM::DequantizeOperator<float, int8_t>::a, Identity_int80 },
    })
    .set_outputs({
        { TFLM::DequantizeOperator<float, int8_t>::b, Identity0}
    })
    .eval();
    // end of rendering eval snippets
}
```

### Write `main.cpp`

```cpp
#include <cmath>
#include <iostream>

// include the model header and uTensor
#include "models/model/model.hpp"
#include "uTensor.h"

using namespace uTensor;

using std::cout;
using std::endl;

// meta_allocator and ram_allocator will limit the memory usage for
// 1. the meta data: the meta data of the operators or the tensors, such as shape, dimensions, ...etc
// 2. the ram: it's where to store all the values of tensors in the model
static localCircularArenaAllocator<2048> meta_allocator;
static localCircularArenaAllocator<40000, uint32_t> ram_allocator;

// the input image pixel values, 28x28 image is flattened into a 1D array
const float arr_input_image[784] = {
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.32941176470588235, 0.7254901960784313, 0.6235294117647059, 0.592156862745098, 
  0.23529411764705882, 0.1411764705882353, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8705882352941177, 0.996078431372549, 0.996078431372549, 
  0.996078431372549, 0.996078431372549, 0.9450980392156862, 0.7764705882352941, 0.7764705882352941, 0.7764705882352941, 
  0.7764705882352941, 0.7764705882352941, 0.7764705882352941, 0.7764705882352941, 0.7764705882352941, 
  0.6666666666666666, 0.20392156862745098, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2627450980392157, 
  0.4470588235294118, 0.2823529411764706, 0.4470588235294118, 0.6392156862745098, 0.8901960784313725, 0.996078431372549, 
  .8823529411764706, 0.996078431372549, 0.996078431372549, 0.996078431372549, 0.9803921568627451, 0.8980392156862745, 
  0.996078431372549, 0.996078431372549, 0.5490196078431373, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.06666666666666667, 0.25882352941176473, 0.054901960784313725, 0.2627450980392157, 0.2627450980392157, 
  0.2627450980392157, 0.23137254901960785, 0.08235294117647059, 0.9254901960784314, 0.996078431372549, 0.41568627450980394, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.3254901960784314, 0.9921568627450981, 0.8196078431372549, 0.07058823529411765, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.08627450980392157, 0.9137254901960784, 1.0, 
  0.3254901960784314, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.5058823529411764, 0.996078431372549, 0.9333333333333333, 0.17254901960784313, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.23137254901960785, 
  0.9764705882352941, 0.996078431372549, 0.24313725490196078, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5215686274509804, 0.996078431372549, 0.7333333333333333, 
  0.0196078431372549, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.03529411764705882, 0.803921568627451, 0.9725490196078431, 0.22745098039215686, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.49411764705882355, 
  0.996078431372549, 0.7137254901960784, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.29411764705882354, 0.984313725490196, 0.9411764705882353, 0.2235294117647059, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07450980392156863, 
  0.8666666666666667, 0.996078431372549, 0.6509803921568628, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.011764705882352941, 0.796078431372549, 0.996078431372549, 0.8588235294117647, 0.13725490196078433, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14901960784313725, 
  0.996078431372549, 0.996078431372549, 0.30196078431372547, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12156862745098039, 0.8784313725490196, 0.996078431372549, 0.45098039215686275, 0.00392156862745098, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5215686274509804, 
  0.996078431372549, 0.996078431372549, 0.20392156862745098, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.23921568627450981, 0.9490196078431372, 0.996078431372549, 0.996078431372549, 0.20392156862745098, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4745098039215686, 
  0.996078431372549, 0.996078431372549, 0.8588235294117647, 0.1568627450980392, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4745098039215686, 0.996078431372549, 0.8117647058823529, 0.07058823529411765, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };

int main(int argc, const char** argv) {
  // before allocating any tesnors/operators, make sure you set the allocators properly
  Context::get_default_context()->set_metadata_allocator(&meta_allocator);
  Context::get_default_context()->set_ram_data_allocator(&ram_allocator);
  
  // create the input/output tensor
  Tensor input_image = new RomTensor({1, 28, 28, 1}, flt, arr_input_image);
  Tensor logits = new RamTensor({1, 10}, flt);
  // evaluate the model
  compute_model(input_image, logits);

  // make prediction
  float max_value = static_cast<float>(logits(0));
  int max_index = 0;
  for (int i = 1; i < 10; ++i) {
    float value = static_cast<float>(logits(i));
    if (value >= max_value) {
      max_value = value;
      max_index = i;
    }
  }
  cout << "pred label: " << max_index << endl;
  return 0;
}
```

### Compile and Run

![end2end-output](images/end2end_keras_output.png)