In [1]:
#!pip install torchinfo

In [1]:
import config
from model_CNV import CNV

import numpy as np
import torch
from torchinfo import summary

from brevitas.export import export_qonnx

In [2]:
simple_cnv = CNV().to('cuda')
simple_cnv.eval();

In [3]:
summary(simple_cnv, input_size=(1, 3, 56, 56))

  return super(Tensor, self).rename(names)


Layer (type:depth-idx)                                                 Output Shape              Param #
CNV                                                                    [1, 2]                    --
├─ModuleList: 1-13                                                     --                        (recursive)
│    └─QuantIdentity: 2-1                                              [1, 3, 56, 56]            --
│    │    └─ActQuantProxyFromInjector: 3-1                             [1, 3, 56, 56]            --
│    │    └─ActQuantProxyFromInjector: 3-2                             [1, 3, 56, 56]            --
├─ModuleList: 1-14                                                     --                        (recursive)
│    └─QuantLinear: 2-23                                               --                        (recursive)
│    │    └─WeightQuantProxyFromInjector: 3-28                         --                        (recursive)
├─ModuleList: 1-13                                         

In [84]:
test_ip_numpy = np.random.randint(low=0, high=256, size=(1, 3, 56, 56))
test_ip = torch.tensor((test_ip_numpy / 255.), dtype=torch.float32).to('cuda')
test_ip.shape

torch.Size([1, 3, 56, 56])

In [85]:
simple_cnv.eval()
test_out = simple_cnv(test_ip)

In [86]:
print(test_out.shape)

torch.Size([1, 2])


In [87]:
simple_cnv.to('cpu');

In [88]:
export_qonnx(simple_cnv, torch.randn((1, 3, 56, 56)), 'simple_cpu.onnx');

In [89]:
simple_cnv.to('cuda')
export_qonnx(simple_cnv, torch.randn((1, 3, 56, 56)).to('cuda'), 'simple_cuda.onnx');

In [90]:
from finn.util.visualization import showSrc, showInNetron
from qonnx.util.cleanup import cleanup as qonnx_cleanup

In [91]:
qonnx_cleanup('simple_cuda.onnx', out_file='01_clean.onnx')

In [13]:
showInNetron('simple_cuda.onnx')

Stopping http://0.0.0.0:8083
Serving 'simple_cuda.onnx' at http://0.0.0.0:8083


In [14]:
showInNetron('01_clean.onnx')

Stopping http://0.0.0.0:8083
Serving '01_clean.onnx' at http://0.0.0.0:8083


# Compare All Outputs

In [15]:
from qonnx.core.modelwrapper import ModelWrapper
import qonnx.core.onnx_exec as oxe

In [92]:
test_ip = np.random.randint(low=0, high=256, size=(1, 3, 56, 56)) / 255.
test_ip = test_ip.astype(np.float32)
test_ip_torch = torch.tensor(test_ip, dtype=torch.float32).to('cuda')

In [93]:
torch_out = simple_cnv(test_ip_torch)
torch_out

tensor([[ 1., -1.]], device='cuda:0', grad_fn=<MulBackward0>)

In [94]:
ori_model = ModelWrapper('simple_cuda.onnx')
clean_model = ModelWrapper('01_clean.onnx')

In [95]:
input_dict = {"global_in": test_ip}
# output_dict = oxe.execute_onnx(ori_model, input_dict)
# produced_ori_qonnx = output_dict[list(output_dict.keys())[0]]
# produced_ori_qonnx

In [96]:
output_dict = oxe.execute_onnx(clean_model, input_dict)
produced_clean_qonnx = output_dict[list(output_dict.keys())[0]]
produced_clean_qonnx

array([[ 1., -1.]], dtype=float32)

# Convert to FINN

In [24]:
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

In [25]:
model = ModelWrapper('01_clean.onnx')
model = model.transform(ConvertQONNXtoFINN())
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())

In [26]:
model.save('02_finn_tidy.onnx')

In [27]:
showInNetron('02_finn_tidy.onnx')

Stopping http://0.0.0.0:8083
Serving '02_finn_tidy.onnx' at http://0.0.0.0:8083


# PreProcess

In [28]:
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType

model = ModelWrapper('02_finn_tidy.onnx')
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = "./prepro_node.onnx"
export_qonnx(totensor_pyt, torch.randn(ishape), chkpt_preproc_name)
qonnx_cleanup(chkpt_preproc_name, out_file=chkpt_preproc_name)
pre_model = ModelWrapper(chkpt_preproc_name)
pre_model = pre_model.transform(ConvertQONNXtoFINN())

# join preprocessing and core model
model = model.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType["UINT8"])



In [29]:
model.save('03_finn_prepro.onnx')

In [30]:
showInNetron('03_finn_prepro.onnx')

Stopping http://0.0.0.0:8083
Serving '03_finn_prepro.onnx' at http://0.0.0.0:8083


In [32]:
from qonnx.transformation.infer_datatypes import InferDataTypes

In [33]:
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save('03_finn_prepro.onnx')

In [34]:
showInNetron('03_finn_prepro.onnx')

Stopping http://0.0.0.0:8083
Serving '03_finn_prepro.onnx' at http://0.0.0.0:8083


# Streamline

In [35]:
from finn.transformation.streamline import Streamline
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

In [36]:
model = ModelWrapper('03_finn_prepro.onnx')
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())

In [37]:
model.save('04_finn_MoveScalarLinearPastInvariants.onnx')

In [38]:
showInNetron('04_finn_MoveScalarLinearPastInvariants.onnx')

Stopping http://0.0.0.0:8083
Serving '04_finn_MoveScalarLinearPastInvariants.onnx' at http://0.0.0.0:8083


In [39]:
model = ModelWrapper('04_finn_MoveScalarLinearPastInvariants.onnx')
model = model.transform(LowerConvsToMatMul())
model = model.transform(Streamline())

In [40]:
model.save('05_finn_lowering.onnx')

In [41]:
showInNetron('05_finn_lowering.onnx')

Stopping http://0.0.0.0:8083
Serving '05_finn_lowering.onnx' at http://0.0.0.0:8083


# MaxPool

In [42]:
model = ModelWrapper('05_finn_lowering.onnx')
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(Streamline())

In [43]:
model.save('06_finn_maxpool.onnx')

In [44]:
showInNetron('06_finn_maxpool.onnx')

Stopping http://0.0.0.0:8083
Serving '06_finn_maxpool.onnx' at http://0.0.0.0:8083


# QuantAvgPool

In [45]:
from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d

In [46]:
model = ModelWrapper('06_finn_maxpool.onnx')
model = model.transform(ChangeDataLayoutQuantAvgPool2d())
model = model.transform(Streamline())

In [47]:
model.save('07_finn_globalavgpool.onnx')

In [48]:
showInNetron('07_finn_globalavgpool.onnx')

Stopping http://0.0.0.0:8083
Serving '07_finn_globalavgpool.onnx' at http://0.0.0.0:8083


# Absorb Consecutive Transposes

In [49]:
model = ModelWrapper('07_finn_globalavgpool.onnx')
model = model.transform(absorb.AbsorbConsecutiveTransposes())
model = model.transform(Streamline())

In [50]:
model.save('08_finn_absorbtransposes.onnx')

In [51]:
showInNetron('08_finn_absorbtransposes.onnx')

Stopping http://0.0.0.0:8083
Serving '08_finn_absorbtransposes.onnx' at http://0.0.0.0:8083


# Transpose into Multithreshold

Creo que no es necesario, porque no hay ninguna operación así

In [53]:
# model = ModelWrapper('08_finn_absorbtransposes.onnx')
# model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
# model = model.transform(Streamline())

In [54]:
# model.save('09_finn_transposeMultiThres.onnx')

In [52]:
# showInNetron('09_finn_transposeMultiThres.onnx')

# HW Layers

In [53]:
from finn.util.basic import pynq_part_map
# change this if you have a different PYNQ board, see list above
pynq_board = "Pynq-Z1"
fpga_part = pynq_part_map[pynq_board]
target_clk_ns = 10

import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_data_layouts import InferDataLayouts

In [54]:
model = ModelWrapper('08_finn_absorbtransposes.onnx')
#model = ModelWrapper('09_finn_transposeMultiThres.onnx')
model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())

# input quantization (if any) to standalone thresholding
model = model.transform(to_hw.InferThresholdingLayer())
model = model.transform(to_hw.InferConvInpGen())
model = model.transform(to_hw.InferStreamingMaxPool())

# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())
# infer tensor data layouts
model = model.transform(InferDataLayouts())

model = model.transform(Streamline())

In [55]:
model.save('10_fin_hw_01.onnx')

In [56]:
showInNetron('10_fin_hw_01.onnx')

Stopping http://0.0.0.0:8083
Serving '10_fin_hw_01.onnx' at http://0.0.0.0:8083


# Global Average to HW

In [57]:
model = ModelWrapper('10_fin_hw_01.onnx')
model = model.transform(to_hw.InferPool())
model = model.transform(to_hw.InferConvInpGen())
model = model.transform(InferShapes())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(Streamline())

In [58]:
model.save('11_finn_hw_quantavgpool.onnx')

In [59]:
showInNetron('11_finn_hw_quantavgpool.onnx')

Stopping http://0.0.0.0:8083
Serving '11_finn_hw_quantavgpool.onnx' at http://0.0.0.0:8083


# Flatten

In [60]:
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten

In [61]:
model = ModelWrapper('11_finn_hw_quantavgpool.onnx')
model = model.transform(RemoveCNVtoFCFlatten())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())
model = model.transform(Streamline())

In [62]:
model.save('12_finn_hw_flatten.onnx')

In [63]:
showInNetron('12_finn_hw_flatten.onnx')

Stopping http://0.0.0.0:8083
Serving '12_finn_hw_flatten.onnx' at http://0.0.0.0:8083


# Dataflow Partition

In [64]:
model = ModelWrapper('12_finn_hw_flatten.onnx')
parent_model = model.transform(CreateDataflowPartition())

In [65]:
parent_model.save('20_dataflow_parent.onnx')

In [66]:
showInNetron('20_dataflow_parent.onnx')

Stopping http://0.0.0.0:8083
Serving '20_dataflow_parent.onnx' at http://0.0.0.0:8083


In [67]:
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
# and specialize the layers to HLS variants
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model = dataflow_model.transform(SpecializeLayers(fpga_part))
dataflow_model.save('21_dataflow_model.onnx')

In [68]:
showInNetron('21_dataflow_model.onnx')

Stopping http://0.0.0.0:8083
Serving '21_dataflow_model.onnx' at http://0.0.0.0:8083


### Check execution???

In [82]:
parent_dataflow_model = ModelWrapper('20_dataflow_parent.onnx')

In [97]:
input_dict = {"global_in": test_ip*255}
output_dict = oxe.execute_onnx(parent_dataflow_model, input_dict)
produced_clean_qonnx = output_dict[list(output_dict.keys())[0]]
produced_clean_qonnx

array([[ 1., -1.]], dtype=float32)

# Folding Factors