# Folders setup

In [35]:
ori_folder = './qonnx_models'
ori_filename = ori_folder + '/BNN_BED_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx'

models_folder = './step_by_step_bipolar'

# Model Clean

In [36]:
from finn.util.visualization import showSrc, showInNetron
from qonnx.util.cleanup import cleanup as qonnx_cleanup

In [37]:
qonnx_clean_filename = models_folder + '/01_clean.onnx'
qonnx_cleanup(ori_filename, out_file=qonnx_clean_filename)

In [38]:
showInNetron(ori_filename)

Stopping http://0.0.0.0:8083
Serving './qonnx_models/BNN_BED_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx' at http://0.0.0.0:8083


In [39]:
showInNetron(qonnx_clean_filename)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/01_clean.onnx' at http://0.0.0.0:8083


# Dummy Input

In [40]:
from qonnx.core.modelwrapper import ModelWrapper
import qonnx.core.onnx_exec as oxe

import numpy as np

In [41]:
test_ip = np.random.randint(low=0, high=256, size=(1, 3, 230, 230)) / 255.
test_ip = test_ip.astype(np.float32)

In [42]:
clean_model = ModelWrapper(qonnx_clean_filename)

In [43]:
input_dict = {"global_in": test_ip}
output_dict = oxe.execute_onnx(clean_model, input_dict)
produced_clean_qonnx = output_dict[list(output_dict.keys())[0]]
produced_clean_qonnx

array([[1., 1.]], dtype=float32)

# Convert to FINN

In [44]:
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

In [45]:
model = ModelWrapper(qonnx_clean_filename)
model = model.transform(ConvertQONNXtoFINN())
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())

In [46]:
finn_tidy = models_folder + '/02_finn_tidy.onnx'
model.save(finn_tidy)

In [47]:
showInNetron(finn_tidy)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/02_finn_tidy.onnx' at http://0.0.0.0:8083


# PreProcess

In [48]:
import torch
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType
from brevitas.export import export_qonnx

In [49]:
model = ModelWrapper(finn_tidy)
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = models_folder + "/prepro_node.onnx"
export_qonnx(totensor_pyt, torch.randn(ishape), chkpt_preproc_name)
qonnx_cleanup(chkpt_preproc_name, out_file=chkpt_preproc_name)
pre_model = ModelWrapper(chkpt_preproc_name)
pre_model = pre_model.transform(ConvertQONNXtoFINN())

# join preprocessing and core model
model = model.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType["UINT8"])



In [50]:
from qonnx.transformation.infer_datatypes import InferDataTypes

### Save prepro after tidy

In [51]:
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())

In [52]:
finn_prepro = models_folder + '/03_finn_prepro.onnx'
model.save(finn_prepro)

In [53]:
showInNetron(finn_prepro)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/03_finn_prepro.onnx' at http://0.0.0.0:8083


# Streamline

In [54]:
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

from finn.transformation.streamline import Streamline
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC

In [55]:
model = ModelWrapper(finn_prepro)
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(ChangeDataLayoutQuantAvgPool2d())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(ConvertBipolarMatMulToXnorPopcount())
# model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
# model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
# model = model.transform(RoundAndClipThresholds())

model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

In [56]:
finn_streamline = models_folder + '/04_finn_streamline.onnx'
model.save(finn_streamline)

In [57]:
showInNetron(finn_streamline)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/04_finn_streamline.onnx' at http://0.0.0.0:8083


# HW Layers

In [58]:
from finn.util.basic import pynq_part_map
# change this if you have a different PYNQ board, see list above
pynq_board = "Pynq-Z1"
fpga_part = pynq_part_map[pynq_board]
target_clk_ns = 10

In [59]:
print(pynq_part_map)
print(fpga_part)

{'Ultra96': 'xczu3eg-sbva484-1-e', 'Ultra96-V2': 'xczu3eg-sbva484-1-i', 'Pynq-Z1': 'xc7z020clg400-1', 'Pynq-Z2': 'xc7z020clg400-1', 'ZCU102': 'xczu9eg-ffvb1156-2-e', 'ZCU104': 'xczu7ev-ffvc1156-2-e', 'ZCU111': 'xczu28dr-ffvg1517-2-e', 'RFSoC2x2': 'xczu28dr-ffvg1517-2-e', 'RFSoC4x2': 'xczu48dr-ffvg1517-2-e', 'KV260_SOM': 'xck26-sfvc784-2LV-c'}
xc7z020clg400-1


In [60]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten

from qonnx.custom_op.registry import getCustomOp

### Change last Bipolar Node to Binary

In [None]:
# model = ModelWrapper(finn_streamline)

In [None]:
# Multithreshold_node = model.get_nodes_by_op_type("MultiThreshold") 

In [None]:
# for node in Multithreshold_node:
#     node_inst = getCustomOp(node)
#     if node_inst.get_nodeattr("out_dtype") == "BIPOLAR":
#         node_inst.set_nodeattr("out_dtype", "BINARY")
#         node_inst.set_nodeattr("out_scale", 1.0)
#         node_inst.set_nodeattr("out_bias", 0.0)
#         print(f'{node.name} converted from Bipolar to Binary\n{node}')

In [None]:
# global_out_name = model.graph.output[0].name
# global_out_name

In [None]:
# model.set_tensor_datatype(global_out_name, DataType["BINARY"])

In [None]:
# finn_bipolar_to_binary = models_folder + '/05_finn_bipolar_to_binary.onnx'

In [None]:
# model.save(finn_bipolar_to_binary)

In [None]:
# showInNetron(finn_bipolar_to_binary)

### Standlone Thresholds

In [None]:
# model = ModelWrapper(finn_bipolar_to_binary)

# model = model.transform(to_hw.InferThresholdingLayer())

In [None]:
# finn_std_alone_thres = models_folder + '/06_finn_std_alone_thres.onnx'

In [None]:
# model.save(finn_std_alone_thres)

In [None]:
# showInNetron(finn_std_alone_thres)

## Rest of the Streamline Process

In [61]:
model = ModelWrapper(finn_streamline)

### Convert Multithreshold to INT32

In [31]:
# Multithreshold_node = model.get_nodes_by_op_type("MultiThreshold")    

# for node in Multithreshold_node:
#     if model.get_tensor_datatype(node.input[1]) == "FLOAT32":
#         print(f'{node.name}: node with Float32 annotation')
#         model.set_tensor_datatype(node.input[1], DataType["INT32"])
#         print(f'{node.name}: changed to datatype {model.get_tensor_datatype(node.input[1])}')

MultiThreshold_13: node with Float32 annotation
MultiThreshold_13: changed to datatype INT32


In [62]:
model = model.transform(to_hw.InferBinaryMatrixVectorActivation())
# Maybe for the first Conv, which receives UINT8 from the input
model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())

# input quantization (if any) to standalone thresholding
model = model.transform(to_hw.InferThresholdingLayer())
model = model.transform(to_hw.InferPool())
model = model.transform(to_hw.InferStreamingMaxPool())
model = model.transform(to_hw.InferConvInpGen())

# get rid of Reshape(-1, 1) operation between hw nodes 
model = model.transform(RemoveCNVtoFCFlatten())

# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())

# infer tensor data layouts
model = model.transform(InferDataLayouts())

model = model.transform(Streamline())

In [63]:
finn_hw_layers = models_folder + '/05_fin_hw_layers.onnx'
model.save(finn_hw_layers)

In [64]:
showInNetron(finn_hw_layers)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/05_fin_hw_layers.onnx' at http://0.0.0.0:8083


# Dataflow Partition

In [65]:
model = ModelWrapper(finn_hw_layers)
parent_model = model.transform(CreateDataflowPartition())

In [66]:
finn_parent_filename = models_folder + '/00_finn_dataflow_parent.onnx'
parent_model.save(finn_parent_filename)

In [67]:
showInNetron(finn_parent_filename)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/00_finn_dataflow_parent.onnx' at http://0.0.0.0:8083


In [68]:
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_filename = sdp_node.get_nodeattr("model")
dataflow_model = ModelWrapper(dataflow_filename)

# Specialize Layers

In [69]:
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers

### Change Padding Nodes to HLS, so Auto Folding can be applied

In [None]:
# FMPadding_node = dataflow_model.get_nodes_by_op_type("FMPadding")

# for node in FMPadding_node:
#     node_inst = getCustomOp(node)
#     node_inst.set_nodeattr("preferred_impl_style", "hls")
#     print(f'Node {node.name} forced to HLS')

In [70]:
# save the dataflow partition with a different name for easier access
# and specialize the layers to HLS variants
dataflow_model = dataflow_model.transform(SpecializeLayers(fpga_part))

dataflow_model = dataflow_model.transform(GiveUniqueNodeNames())
dataflow_model = dataflow_model.transform(GiveReadableTensorNames())

finn_dataflow_filename = models_folder + '/10_finn_dataflow_model.onnx'
dataflow_model.save(finn_dataflow_filename)

In [71]:
showInNetron(finn_dataflow_filename)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/10_finn_dataflow_model.onnx' at http://0.0.0.0:8083


### Check execution???

In [None]:
# parent_dataflow_model = ModelWrapper(finn_parent_filename)

In [None]:
# input_dict = {"global_in": test_ip*255}
# output_dict = oxe.execute_onnx(parent_dataflow_model, input_dict)
# produced_clean_qonnx = output_dict[list(output_dict.keys())[0]]
# produced_clean_qonnx

# Folding Factors

In [72]:
from finn.transformation.fpgadataflow.set_folding import SetFolding

**Taregt Cycles Per Frame**

If target is 25 FPS, inference time is $\frac{1}{25}=40ms$

If $clk = 10 ns$:
$$
Target~Cycles~Per~Frame = \frac{40\times 10^{-3}}{10\times 10^{-9}}= 4\times 10^{6}
$$

No se tiene en cuenta el tiempo de preprocesado, que en realidad debería ser inexistente, ya que está embebido en el preprocess del modelo. 

In [73]:
model = ModelWrapper(finn_dataflow_filename)

apply method of SetFolding returns (model, False), so model is [0]

maybe it is easier to do: model, _ = folder.apply(...)

In [74]:
model = model.transform(SetFolding(
    target_cycles_per_frame=1000,
    mvau_wwidth_max=80,
    two_pass_relaxation=False)
)

In [75]:
folding_filename = models_folder + '/20_finn_folding.onnx'
#model[0].save(folding_filename)
model.save(folding_filename)

In [76]:
showInNetron(folding_filename)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/20_finn_folding.onnx' at http://0.0.0.0:8083


### Check Total Estimated Cycles, looping over each node attribute

In [77]:
all_nodes = model.get_finn_nodes()

In [78]:
i = 0
total_cycles = []
for node in all_nodes:
    my_node = getCustomOp(node)
    node_cycles = my_node.get_nodeattr("cycles_estimate")
    total_cycles.append(node_cycles)
    print(f'Node {i} estimated cycles: {node_cycles}')
    i += 1
print(f'\nTotal estimated cycles: {np.array(total_cycles).sum()}')

Node 0 estimated cycles: 52900
Node 1 estimated cycles: 52902
Node 2 estimated cycles: 51984
Node 3 estimated cycles: 52442
Node 4 estimated cycles: 51984
Node 5 estimated cycles: 12998
Node 6 estimated cycles: 50176
Node 7 estimated cycles: 12770
Node 8 estimated cycles: 12544
Node 9 estimated cycles: 3136
Node 10 estimated cycles: 3138
Node 11 estimated cycles: 5832
Node 12 estimated cycles: 2916
Node 13 estimated cycles: 2918
Node 14 estimated cycles: 10816
Node 15 estimated cycles: 2810
Node 16 estimated cycles: 2704
Node 17 estimated cycles: 676
Node 18 estimated cycles: 678
Node 19 estimated cycles: 2304
Node 20 estimated cycles: 576
Node 21 estimated cycles: 578
Node 22 estimated cycles: 1936
Node 23 estimated cycles: 968
Node 24 estimated cycles: 486
Node 25 estimated cycles: 1600
Node 26 estimated cycles: 2379
Node 27 estimated cycles: 800
Node 28 estimated cycles: 512
Node 29 estimated cycles: 32

Total estimated cycles: 398495


# Minimize 

In [79]:
from finn.transformation.fpgadataflow.minimize_accumulator_width import (
    MinimizeAccumulatorWidth,
)
from finn.transformation.fpgadataflow.minimize_weight_bit_width import (
    MinimizeWeightBitWidth,
)

In [80]:
model = ModelWrapper(folding_filename)

In [81]:
model = model.transform(MinimizeAccumulatorWidth())
model = model.transform(MinimizeWeightBitWidth())

In [82]:
minimize_filename = models_folder + '/21_finn_minimize.onnx'
model.save(minimize_filename)

In [83]:
showInNetron(minimize_filename)

Stopping http://0.0.0.0:8083
Serving './step_by_step_bipolar/21_finn_minimize.onnx' at http://0.0.0.0:8083


# HW IP Generation: PrepareIP and HLSSynthIP 

In [None]:
# from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
# from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP

In [None]:
# model = ModelWrapper(minimize_filename)

In [None]:
# model = model.transform(PrepareIP(fpga_part, target_clk_ns))
# model = model.transform(HLSSynthIP())

In [None]:
# hw_filename = models_folder + '32_finn_hw_ipgen.onnx'
# model.save(hw_filename)

In [None]:
# showInNetron(hw_filename)

# FIFO depths

In [None]:
from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths

In [None]:
#model = ModelWrapper(hw_filename)

# model = ModelWrapper(minimize_filename)

In [None]:
# model = model.transform(InsertAndSetFIFODepths(
#     fpgapart=fpga_part,
#     clk_ns=10.0,
#     max_qsrl_depth=256,
#     max_depth=None,
#     swg_exception=False,#True, # Used to optimize convolution FIFOs, splitting in several with Power of Two
#     vivado_ram_style="auto",
#     force_python_sim=False)
# )

In [None]:
# fifo_filename = models_folder + '31_finn_fifo.onnx'
# #model[0].save(fifo_filename)
# model.save(fifo_filename)

In [None]:
# showInNetron(fifo_filename)

### Streamline FIFOs

In [None]:
from finn.transformation.fpgadataflow.set_fifo_depths import SplitLargeFIFOs
from finn.transformation.fpgadataflow.set_fifo_depths import RemoveShallowFIFOs

In [None]:
#model = model[0].transform(SplitLargeFIFOs())

# model = model.transform(SplitLargeFIFOs())
# model = model.transform(RemoveShallowFIFOs())

In [None]:
# after FIFOs are ready to go, call PrepareIP and HLSSynthIP again
# this will only run for the new nodes (e.g. FIFOs and DWCs) -> DWCs for Mobilenet
# model = model.transform(PrepareIP(fpga_part, target_clk_ns))
# model = model.transform(HLSSynthIP())

In [None]:
# fifo_streamline_filename = models_folder + '33_finn_fifo_streamline.onnx'
# model.save(fifo_streamline_filename)

In [None]:
# showInNetron(fifo_streamline_filename)

# PYNQ Driver

In [None]:
# from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild

In [None]:
# model = ModelWrapper(fifo_streamline_filename)
# model = model.transform(ZynqBuild(platform = pynq_board, period_ns = target_clk_ns))

In [None]:
# from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver

In [None]:
# model = model.transform(MakePYNQDriver("zynq-iodma"))

In [None]:
# pynq_driver_filename = '/40_pynq_driver.onnx'
# model.save(pynq_driver_filename)