# Mobilenet Resnet Model

In [1]:
models_folder = './mvau_wwidth_max'
model_qnn_filename = models_folder + '/MY_MBLNET_V2_RESNET_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx' 

# FINN Flow

## Load Model and View

In [2]:
from finn.util.visualization import showSrc, showInNetron
from qonnx.util.cleanup import cleanup as qonnx_cleanup

In [3]:
showInNetron(model_qnn_filename)

Serving './mvau_wwidth_max/MY_MBLNET_V2_RESNET_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx' at http://0.0.0.0:8083


## Clean

In [4]:
qonnx_clean_filename = models_folder + '/01_clean.onnx'
qonnx_cleanup(model_qnn_filename, out_file=qonnx_clean_filename)

In [5]:
showInNetron(qonnx_clean_filename)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/01_clean.onnx' at http://0.0.0.0:8083


## Convert to FINN

In [6]:
from qonnx.core.modelwrapper import ModelWrapper

In [7]:
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

In [8]:
model = ModelWrapper(qonnx_clean_filename)
model = model.transform(ConvertQONNXtoFINN())
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())



In [9]:
finn_tidy = models_folder + '/02_finn_tidy.onnx'
model.save(finn_tidy)

In [10]:
showInNetron(finn_tidy)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/02_finn_tidy.onnx' at http://0.0.0.0:8083


## Preprocess

In [14]:
import torch
from brevitas.export import export_qonnx
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType
from qonnx.transformation.infer_datatypes import InferDataTypes

In [15]:
model = ModelWrapper(finn_tidy)
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = models_folder + "/prepro_node.onnx"
export_qonnx(totensor_pyt, torch.randn(ishape), chkpt_preproc_name)
qonnx_cleanup(chkpt_preproc_name, out_file=chkpt_preproc_name)
pre_model = ModelWrapper(chkpt_preproc_name)
pre_model = pre_model.transform(ConvertQONNXtoFINN())

# join preprocessing and core model
model = model.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType["UINT8"])



### Tidy again

In [16]:
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())

In [17]:
finn_prepro = models_folder + '/03_finn_prepro.onnx'
model.save(finn_prepro)

In [18]:
showInNetron(finn_prepro)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/03_finn_prepro.onnx' at http://0.0.0.0:8083


## Streamline

In [19]:
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul

from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

from finn.transformation.streamline import Streamline
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd

from finn.transformation.streamline.reorder import MoveMulPastFork

### Move Mul Past Fork

In [20]:
model = ModelWrapper(finn_prepro)

In [21]:
model = model.transform(MoveMulPastFork())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

  Tnew = T / A.reshape(-1, 1)


In [22]:
finn_mul_past_fork = models_folder + '/040_finn_mul_past_fork.onnx'
model.save(finn_mul_past_fork)

In [23]:
showInNetron(finn_mul_past_fork)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/040_finn_mul_past_fork.onnx' at http://0.0.0.0:8083


### Move Mul Past Residual Adds

In [24]:
model = ModelWrapper(finn_mul_past_fork)

In [25]:
model = model.transform(MoveLinearPastEltwiseAdd())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())



In [26]:
finn_move_mul_past_add = models_folder + '/041_finn_move_mul_past_add.onnx'
model.save(finn_move_mul_past_add)

In [27]:
showInNetron(finn_move_mul_past_add)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/041_finn_move_mul_past_add.onnx' at http://0.0.0.0:8083


In [28]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw

In [29]:
model = ModelWrapper(finn_move_mul_past_add)

In [30]:
model = model.transform(LowerConvsToMatMul())
model = model.transform(ChangeDataLayoutQuantAvgPool2d())

model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [31]:
finn_lowerconvs_avgpool = models_folder + '/042_finn_lowerconvs_avgpool.onnx'
model.save(finn_lowerconvs_avgpool)

In [32]:
showInNetron(finn_lowerconvs_avgpool)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/042_finn_lowerconvs_avgpool.onnx' at http://0.0.0.0:8083


In [40]:
# model = model.transform(to_hw.InferAddStreamsLayer())
# model = model.transform(LowerConvsToMatMul())

# model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
# model = model.transform(absorb.AbsorbConsecutiveTransposes())

# model = model.transform(Streamline())
# model = model.transform(InferDataLayouts())
# model = model.transform(RemoveUnusedTensors())

In [41]:
# finn_add_to_hw_lower_convs = models_folder + '/042_finn_add_to_hw_lower_convs.onnx'
# model.save(finn_add_to_hw_lower_convs)

In [42]:
# showInNetron(finn_add_to_hw_lower_convs)

Stopping http://0.0.0.0:8083
Serving './step_by_step_onnx_trained_standalone/042_finn_add_to_hw_lower_convs.onnx' at http://0.0.0.0:8083


### Average Pooling

In [45]:
# model = ModelWrapper(finn_add_to_hw_lower_convs)

In [46]:
# model = model.transform(ChangeDataLayoutQuantAvgPool2d())
# model = model.transform(absorb.AbsorbConsecutiveTransposes())

# model = model.transform(Streamline())
# model = model.transform(InferDataTypes())
# model = model.transform(InferDataLayouts())
# model = model.transform(RemoveUnusedTensors())

In [47]:
# finn_avgpool = models_folder + '/043_finn_avgpool.onnx'
# model.save(finn_avgpool)

In [48]:
# showInNetron(finn_avgpool)

Stopping http://0.0.0.0:8083
Serving './step_by_step_onnx_trained_standalone/043_finn_avgpool.onnx' at http://0.0.0.0:8083


# To HW Layers

In [33]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten

from qonnx.custom_op.registry import getCustomOp

# Convert Last Bipolar Node to Binary and Nodes.Input[1] Float to INT32

In [34]:
# model = ModelWrapper(finn_avgpool)

model = ModelWrapper(finn_lowerconvs_avgpool)

In [35]:
Multithreshold_node = model.get_nodes_by_op_type("MultiThreshold") 

In [36]:
for node in Multithreshold_node:
    node_inst = getCustomOp(node)
    if node_inst.get_nodeattr("out_dtype") == "BIPOLAR":
        node_inst.set_nodeattr("out_dtype", "BINARY")
        node_inst.set_nodeattr("out_scale", 1.0)
        node_inst.set_nodeattr("out_bias", 0.0)
        print(f'{node.name} converted from Bipolar to Binary\n{node}')

MultiThreshold_37 converted from Bipolar to Binary
input: "MatMul_31_out0"
input: "MultiThreshold_37_param0"
output: "global_out"
name: "MultiThreshold_37"
op_type: "MultiThreshold"
attribute {
  name: "out_dtype"
  s: "BINARY"
  type: STRING
}
attribute {
  name: "out_scale"
  f: 1.0
  type: FLOAT
}
attribute {
  name: "out_bias"
  f: 0.0
  type: FLOAT
}
domain: "qonnx.custom_op.general"



In [37]:
for node in Multithreshold_node:
    if model.get_tensor_datatype(node.input[1]) == "FLOAT32":
        print(f'{node.name}: node with Float32 annotation')
        model.set_tensor_datatype(node.input[1], DataType["INT32"])
        print(f'{node.name}: changed to datatype {model.get_tensor_datatype(node.input[1])}')

MultiThreshold_37: node with Float32 annotation
MultiThreshold_37: changed to datatype INT32


In [38]:
global_out_name = model.graph.output[0].name
global_out_name

'global_out'

In [39]:
model.set_tensor_datatype(global_out_name, DataType["BINARY"])

In [40]:
finn_bipolar_to_binary = models_folder + '/044_finn_bipolar_to_binary.onnx'
model.save(finn_bipolar_to_binary)

In [41]:
showInNetron(finn_bipolar_to_binary)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/044_finn_bipolar_to_binary.onnx' at http://0.0.0.0:8083


### Standalone before MVAU/VVAU conversion

In [42]:
model = ModelWrapper(finn_bipolar_to_binary)

In [43]:
model = model.transform(to_hw.InferAddStreamsLayer())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())


model = model.transform(to_hw.InferThresholdingLayer())
model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())
model = model.transform(to_hw.InferVectorVectorActivation())
model = model.transform(to_hw.InferPool())
model = model.transform(to_hw.InferConvInpGen())

model = model.transform(RemoveCNVtoFCFlatten())
model = model.transform(absorb.AbsorbConsecutiveTransposes())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [44]:
finn_hw_standalone = models_folder + '/045_finn_hw_standalone.onnx'
model.save(finn_hw_standalone)

In [45]:
showInNetron(finn_hw_standalone)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/045_finn_hw_standalone.onnx' at http://0.0.0.0:8083


# Infer Duplicate Streams

In [46]:
from qonnx.transformation.general import SortGraph

In [47]:
model = ModelWrapper(finn_hw_standalone)

In [48]:
model = model.transform(to_hw.InferDuplicateStreamsLayer())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveUnusedTensors())

model = model.transform(SortGraph())

In [49]:
finn_hw_duplicate = models_folder + '/46_finn_hw_duplicate.onnx'
model.save(finn_hw_duplicate)

In [50]:
showInNetron(finn_hw_duplicate)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/46_finn_hw_duplicate.onnx' at http://0.0.0.0:8083


# Dataflow Partition

In [51]:
model = ModelWrapper(finn_hw_duplicate)
parent_model = model.transform(CreateDataflowPartition())

In [52]:
finn_parent_filename = models_folder + '/50_finn_dataflow_parent.onnx'
parent_model.save(finn_parent_filename)

In [53]:
showInNetron(finn_parent_filename)

Stopping http://0.0.0.0:8083
Serving './step_by_step_onnx_trained_standalone_duplicateLayer/50_finn_dataflow_parent.onnx' at http://0.0.0.0:8083


In [53]:
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_filename = sdp_node.get_nodeattr("model")
dataflow_model = ModelWrapper(dataflow_filename)

In [54]:
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers

In [55]:
from finn.util.basic import pynq_part_map
# change this if you have a different PYNQ board, see list above
pynq_board = "Pynq-Z1"
fpga_part = pynq_part_map[pynq_board]
target_clk_ns = 10

# Specialize Layers

In [56]:
FMPadding_node = dataflow_model.get_nodes_by_op_type("FMPadding")

for node in FMPadding_node:
    node_inst = getCustomOp(node)
    node_inst.set_nodeattr("preferred_impl_style", "hls")
    print(f'Node {node.name} forced to HLS')

Node FMPadding_0 forced to HLS
Node FMPadding_1 forced to HLS
Node FMPadding_2 forced to HLS
Node FMPadding_3 forced to HLS
Node FMPadding_4 forced to HLS
Node FMPadding_5 forced to HLS
Node FMPadding_6 forced to HLS
Node FMPadding_7 forced to HLS
Node FMPadding_8 forced to HLS
Node FMPadding_9 forced to HLS
Node FMPadding_10 forced to HLS


In [57]:
# save the dataflow partition with a different name for easier access
# and specialize the layers to HLS variants
dataflow_model = dataflow_model.transform(SpecializeLayers(fpga_part))

dataflow_model = dataflow_model.transform(GiveUniqueNodeNames())
dataflow_model = dataflow_model.transform(GiveReadableTensorNames())

In [58]:
finn_dataflow_filename = models_folder + '/60_finn_dataflow_model.onnx'
dataflow_model.save(finn_dataflow_filename)

In [59]:
showInNetron(finn_dataflow_filename)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/60_finn_dataflow_model.onnx' at http://0.0.0.0:8083


# Analyze MVAU_WWIDTH_MAX

In [60]:
model = ModelWrapper(finn_dataflow_filename)

In [62]:
for node in model.graph.node:
    op_type = node.op_type
    node_inst = getCustomOp(node)
    if op_type in ["MVAU_rtl"]:
        print(node.name)
        print(node_inst.get_weight_datatype().bitwidth())
        break

MVAU_rtl_0
4


# Folding Factors

### Calculate cycles per frame first, for a FPS target

In [72]:
FPS_target = 750
frame_latency = 1 / FPS_target
my_target_cycles_per_frame = int(frame_latency / (target_clk_ns*1e-9))

print(f'Frame latency = {frame_latency}')
print(f'Cycles per frame = {my_target_cycles_per_frame}')

Frame latency = 0.0013333333333333333
Cycles per frame = 133333


In [73]:
from finn.transformation.fpgadataflow.set_folding import SetFolding

In [74]:
model = ModelWrapper(finn_dataflow_filename)

In [75]:
model = model.transform(SetFolding(
    target_cycles_per_frame=my_target_cycles_per_frame,
    mvau_wwidth_max=36,
    two_pass_relaxation=True)
)

In [76]:
folding_filename = models_folder + '/70_finn_folding.onnx'
model.save(folding_filename)

In [77]:
showInNetron(folding_filename)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/70_finn_folding.onnx' at http://0.0.0.0:8083


### Check estimated cycles per layer

In [78]:
import numpy as np

In [79]:
all_nodes = model.get_finn_nodes()

In [80]:
i = 0
total_cycles = []
for node in all_nodes:
    my_node = getCustomOp(node)
    node_cycles = my_node.get_nodeattr("cycles_estimate")
    total_cycles.append(node_cycles)
    print(f'Node {i} estimated cycles: {node_cycles}')
    i += 1
print(f'\nTotal estimated cycles: {np.array(total_cycles).sum()}')

Node 0 estimated cycles: 50176
Node 1 estimated cycles: 51076
Node 2 estimated cycles: 113351
Node 3 estimated cycles: 75264
Node 4 estimated cycles: 100352
Node 5 estimated cycles: 103968
Node 6 estimated cycles: 113127
Node 7 estimated cycles: 112896
Node 8 estimated cycles: 100352
Node 9 estimated cycles: 100352
Node 10 estimated cycles: 100352
Node 11 estimated cycles: 100352
Node 12 estimated cycles: 100352
Node 13 estimated cycles: 103968
Node 14 estimated cycles: 123684
Node 15 estimated cycles: 112896
Node 16 estimated cycles: 50176
Node 17 estimated cycles: 100352
Node 18 estimated cycles: 50176
Node 19 estimated cycles: 50176
Node 20 estimated cycles: 100352
Node 21 estimated cycles: 100352
Node 22 estimated cycles: 107648
Node 23 estimated cycles: 113372
Node 24 estimated cycles: 112896
Node 25 estimated cycles: 100352
Node 26 estimated cycles: 100352
Node 27 estimated cycles: 50176
Node 28 estimated cycles: 50176
Node 29 estimated cycles: 50176
Node 30 estimated cycles: 100

# Minimize

In [81]:
from finn.transformation.fpgadataflow.minimize_accumulator_width import (
    MinimizeAccumulatorWidth,
)
from finn.transformation.fpgadataflow.minimize_weight_bit_width import (
    MinimizeWeightBitWidth,
)

In [82]:
model = ModelWrapper(folding_filename)

In [83]:
model = model.transform(MinimizeAccumulatorWidth())
model = model.transform(MinimizeWeightBitWidth())



In [84]:
minimize_filename = models_folder + '/71_finn_minimize.onnx'
model.save(minimize_filename)

In [85]:
showInNetron(minimize_filename)

Stopping http://0.0.0.0:8083
Serving './mvau_wwidth_max/71_finn_minimize.onnx' at http://0.0.0.0:8083
