In [1]:
import torch
from torchinfo import summary

from torch.nn import BatchNorm1d
from torch.nn import BatchNorm2d
from torch.nn import MaxPool2d
from torch.nn import Module
from torch.nn import ModuleList

from brevitas.core.restrict_val import RestrictValueType
from brevitas.nn import QuantIdentity
from brevitas.nn import QuantConv2d
from brevitas.nn import QuantReLU
from brevitas.nn import TruncAvgPool2d
from brevitas.nn import QuantLinear

from brevitas.quant import TruncTo8bit


# from brevitas.quant import Int8ActPerTensorFloat # For Quant ADD node
from common_imagenet import CommonIntWeightPerTensorQuant
from common_imagenet import CommonUintActQuant
from common_imagenet import CommonIntActQuant # For initial Q1.7 Identity Layer
from tensor_norm import TensorNorm

from brevitas.export import export_qonnx

# Custom Quantizers

In [2]:
class MyWeightsQuant_PerTensor(CommonIntWeightPerTensorQuant):
    restrict_scaling_type = RestrictValueType.POWER_OF_TWO

class MyReLUQuant(CommonUintActQuant):
    restrict_scaling_type = RestrictValueType.POWER_OF_TWO

# Tiny Model

In [3]:
class TINY_RESNET(Module):

    def __init__(self, 
                 num_classes = 2, 
                 weight_bit_width = 4,
                 act_bit_width = 4, 
                 in_bit_width = 8, 
                 in_channels = 3):
        super(TINY_RESNET, self).__init__()
        
        self.conv_features = ModuleList()
        self.conv_branch = ModuleList()
        self.linear_features = ModuleList()

        # Input 230x230x3
        self.conv_features.append(QuantIdentity( # for Q1.7 input format -> sign.7bits
            act_quant = CommonIntActQuant,
            bit_width = in_bit_width,
            min_val = -1.0,
            max_val = 1.0 - 2.0 ** (-7),
            narrow_range = False, 
            restrict_scaling_type = RestrictValueType.POWER_OF_TWO))

        # CNNBlock 224x224
            # conv1
        self.conv_features.append(
            QuantConv2d(
                kernel_size=3, stride=1, padding=1,
                in_channels=in_channels,
                out_channels=12,
                bias=False,
                weight_quant=MyWeightsQuant_PerTensor,
                weight_bit_width=weight_bit_width))
        self.conv_features.append(BatchNorm2d(12))
        self.conv_features.append(
            QuantReLU(
                act_quant=MyReLUQuant,
                bit_width=act_bit_width, 
                return_quant_tensor=True))
        
        # self.conv_features.append(MaxPool2d(kernel_size=2, stride=2))

        # CNNBlock 112x112
            # conv2
        self.conv_branch.append(
            QuantConv2d(
                kernel_size=3, stride=1, padding=1,
                in_channels=12,
                out_channels=12,
                bias=False,
                weight_quant=MyWeightsQuant_PerTensor,
                weight_bit_width=weight_bit_width))
        self.conv_branch.append(BatchNorm2d(12))
        self.conv_branch.append(
            QuantReLU(
                act_quant=self.conv_features[-1].act_quant, #MyReLUQuant,
                bit_width=act_bit_width, 
                return_quant_tensor=True))

        self.relu_out =  QuantReLU(
                act_quant=MyReLUQuant,
                bit_width=act_bit_width, 
                return_quant_tensor=True)
        
        self.avg_pool = TruncAvgPool2d(
                kernel_size=224,  
                trunc_quant=TruncTo8bit,
                float_to_int_impl_type='FLOOR')

        # Linear 1
        self.linear_features.append(
            QuantLinear(
                in_features=12,
                out_features=8,
                bias=False,
                weight_quant=MyWeightsQuant_PerTensor,
                weight_bit_width=weight_bit_width))
        self.linear_features.append(BatchNorm1d(8))
        self.linear_features.append(
            QuantReLU(
                act_quant=MyReLUQuant,
                bit_width=act_bit_width, 
                return_quant_tensor=False))

        # Linear 2
        self.linear_features.append(
            QuantLinear(
                in_features=8,
                out_features=2,
                bias=False,
                weight_quant=MyWeightsQuant_PerTensor,
                weight_bit_width=weight_bit_width))
        self.linear_features.append(TensorNorm())

        self.bipolar_out = QuantIdentity(
            quant_type='binary', 
            scaling_impl_type='const',
            bit_width=1, min_val=-1.0, max_val=1.0)

    def forward(self, x):
        x = 2.0 * x - torch.tensor([1.0], device=x.device)
        for mod in self.conv_features:
            x = mod(x)

        # Branch
        x_res = self.conv_branch[0](x)
        x_res = self.conv_branch[1](x_res)
        x_res = self.conv_branch[-1](x_res)
        
        x = x + x_res
        x = self.relu_out(x)

        x = self.avg_pool(x)
        
        x = x.view(x.shape[0], -1)
        for mod in self.linear_features:
            x = mod(x)

        x = self.bipolar_out(x)
        
        return x

In [4]:
model_qnn = TINY_RESNET().to('cpu')

  warn('Keyword arguments are being passed but they not being used.')


In [5]:
input_shape = (1, 3, 224, 224)
# print(summary(model_qnn, input_size=input_shape))

In [6]:
models_folder = './step_by_step_tiny_v2'
model_qnn_filename = models_folder + '/TINY_Resnet__QONNX.onnx' 

In [7]:
model_qnn.eval();
export_qonnx(model_qnn, torch.randn(input_shape), model_qnn_filename);

# FINN Flow

## Load Model and View

In [8]:
from finn.util.visualization import showSrc, showInNetron
from qonnx.util.cleanup import cleanup as qonnx_cleanup

In [9]:
showInNetron(model_qnn_filename)

Serving './step_by_step_tiny_v2/TINY_Resnet__QONNX.onnx' at http://0.0.0.0:8083


## Clean

In [10]:
qonnx_clean_filename = models_folder + '/01_clean.onnx'
qonnx_cleanup(model_qnn_filename, out_file=qonnx_clean_filename)

In [11]:
showInNetron(qonnx_clean_filename)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/01_clean.onnx' at http://0.0.0.0:8083


## Convert to FINN

In [12]:
from qonnx.core.modelwrapper import ModelWrapper

In [13]:
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

In [14]:
model = ModelWrapper(qonnx_clean_filename)
model = model.transform(ConvertQONNXtoFINN())
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())

In [15]:
finn_tidy = models_folder + '/02_finn_tidy.onnx'
model.save(finn_tidy)

In [16]:
showInNetron(finn_tidy)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/02_finn_tidy.onnx' at http://0.0.0.0:8083


## Preprocess

In [17]:
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType
from qonnx.transformation.infer_datatypes import InferDataTypes

In [18]:
model = ModelWrapper(finn_tidy)
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = models_folder + "/prepro_node.onnx"
export_qonnx(totensor_pyt, torch.randn(ishape), chkpt_preproc_name)
qonnx_cleanup(chkpt_preproc_name, out_file=chkpt_preproc_name)
pre_model = ModelWrapper(chkpt_preproc_name)
pre_model = pre_model.transform(ConvertQONNXtoFINN())

# join preprocessing and core model
model = model.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType["UINT8"])



### Tidy again

In [19]:
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())

In [20]:
finn_prepro = models_folder + '/03_finn_prepro.onnx'
model.save(finn_prepro)

In [21]:
showInNetron(finn_prepro)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/03_finn_prepro.onnx' at http://0.0.0.0:8083


## Streamline

In [22]:
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul

from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

from finn.transformation.streamline import Streamline
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd

from finn.transformation.streamline.reorder import MoveMulPastFork

In [23]:
model = ModelWrapper(finn_prepro)
# model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
# model = model.transform(absorb.AbsorbAddIntoMultiThreshold())

model = model.transform(MoveMulPastFork())
model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [24]:
finn_mul_add_to_multithres = models_folder + '/040_finn_mul_add_to_multithres.onnx'
model.save(finn_mul_add_to_multithres)

In [25]:
showInNetron(finn_mul_add_to_multithres)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/040_finn_mul_add_to_multithres.onnx' at http://0.0.0.0:8083


In [26]:
model = ModelWrapper(finn_mul_add_to_multithres)

In [27]:
model = model.transform(MoveLinearPastEltwiseAdd())
model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())



In [28]:
finn_move_mul_past_add = models_folder + '/041_finn_move_mul_past_add.onnx'
model.save(finn_move_mul_past_add)

In [29]:
showInNetron(finn_move_mul_past_add)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/041_finn_move_mul_past_add.onnx' at http://0.0.0.0:8083


In [30]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.streamline.reorder import MoveTransposePastFork 

In [31]:
model = ModelWrapper(finn_move_mul_past_add)

In [32]:
model = model.transform(to_hw.InferAddStreamsLayer())
# model = model.transform(MoveTransposePastFork())
model = model.transform(LowerConvsToMatMul())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(absorb.AbsorbConsecutiveTransposes())


In [33]:
finn_add_to_hw = models_folder + '/042_finn_add_to_hw.onnx'
model.save(finn_add_to_hw)

In [34]:
showInNetron(finn_add_to_hw)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/042_finn_add_to_hw.onnx' at http://0.0.0.0:8083


In [35]:
model = ModelWrapper(finn_add_to_hw)

In [36]:
model = model.transform(ChangeDataLayoutQuantAvgPool2d())
model = model.transform(absorb.AbsorbConsecutiveTransposes())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [37]:
finn_avgpool = models_folder + '/043_finn_avgpool.onnx'
model.save(finn_avgpool)

In [38]:
showInNetron(finn_avgpool)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/043_finn_avgpool.onnx' at http://0.0.0.0:8083


# Old Streamline plus some test: it does not work

In [30]:
# model = ModelWrapper(finn_prepro)
# # model = model.transform(MoveLinearPastEltwiseAdd())
# model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
# model = model.transform(absorb.AbsorbMulIntoMultiThreshold())

# model = model.transform(MoveScalarLinearPastInvariants())
# model = model.transform(Streamline())
# model = model.transform(LowerConvsToMatMul())
# model = model.transform(MakeMaxPoolNHWC())
# model = model.transform(ChangeDataLayoutQuantAvgPool2d())
# model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

# model = model.transform(Streamline())
# model = model.transform(InferDataLayouts())
# model = model.transform(RemoveUnusedTensors())

In [31]:
# finn_streamline = models_folder + '/04_finn_streamline.onnx'
# model.save(finn_streamline)

In [32]:
# showInNetron(finn_streamline)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/04_finn_streamline.onnx' at http://0.0.0.0:8083


# To HW Layers

In [39]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten

from qonnx.custom_op.registry import getCustomOp

In [40]:
model = ModelWrapper(finn_avgpool)

In [41]:
model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [42]:
finn_hw_mvau = models_folder + '/044_finn_hw_mvau.onnx'
model.save(finn_hw_mvau)

In [43]:
showInNetron(finn_hw_mvau)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/044_finn_hw_mvau.onnx' at http://0.0.0.0:8083


In [44]:
model = ModelWrapper(finn_hw_mvau)

In [45]:
model = model.transform(to_hw.InferConvInpGen())
model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [46]:
finn_hw_convs = models_folder + '/045_finn_hw_convs.onnx'
model.save(finn_hw_convs)

In [47]:
showInNetron(finn_hw_convs)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/045_finn_hw_convs.onnx' at http://0.0.0.0:8083


In [48]:
model = ModelWrapper(finn_hw_convs)

In [49]:
model = model.transform(to_hw.InferPool())
model = model.transform(to_hw.InferConvInpGen())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [50]:
finn_hw_pool = models_folder + '/046_finn_hw_pool.onnx'
model.save(finn_hw_pool)

In [51]:
showInNetron(finn_hw_pool)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/046_finn_hw_pool.onnx' at http://0.0.0.0:8083


In [52]:
model = ModelWrapper(finn_hw_pool)

In [53]:
model = model.transform(to_hw.InferThresholdingLayer())
model = model.transform(RemoveCNVtoFCFlatten())
model = model.transform(absorb.AbsorbConsecutiveTransposes())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [54]:
finn_hw_multithres_flaten = models_folder + '/047_finn_hw_multithres_flaten.onnx'
model.save(finn_hw_multithres_flaten)

In [55]:
showInNetron(finn_hw_multithres_flaten)

Stopping http://0.0.0.0:8083
Serving './step_by_step_tiny_v2/047_finn_hw_multithres_flaten.onnx' at http://0.0.0.0:8083
