In [1]:
import os
import config
#from model_CNV import CNV
from model_CNV_imagenet import CNV

import numpy as np
import torch
from torchinfo import summary

from brevitas.nn import QuantIdentity
from brevitas.export import export_qonnx

# FINN Folders setup

In [2]:
finn_root_dir = os.environ["FINN_ROOT"]
nb_dir = finn_root_dir + "/notebooks/uav_finn/classification/qonnx_to_finn_driver/"
# Leave all build files inside experiments folder
os.environ["FINN_BUILD_DIR"] = nb_dir + config.BUILD_FOLDER
os.environ["FINN_HOST_BUILD_DIR"] = nb_dir + config.TMP_FOLDER

models_folder = config.MODELS_FOLDER

# Model setup

In [3]:
base_model = CNV().to('cpu')
base_model.eval();

In [4]:
summary(base_model, input_size=(1, config.NUM_CHANNELS, config.IMG_H, config.IMG_W))

  return super(Tensor, self).rename(names)


Layer (type:depth-idx)                                                      Output Shape              Param #
CNV                                                                         [1, 2]                    --
├─ModuleList: 1-77                                                          --                        (recursive)
│    └─QuantIdentity: 2-1                                                   [1, 3, 224, 224]          --
│    │    └─ActQuantProxyFromInjector: 3-1                                  [1, 3, 224, 224]          --
│    │    └─ActQuantProxyFromInjector: 3-2                                  [1, 3, 224, 224]          1
├─ModuleList: 1-78                                                          --                        (recursive)
│    └─QuantLinear: 2-123                                                   --                        (recursive)
│    │    └─WeightQuantProxyFromInjector: 3-147                             --                        (recursive)
├─ModuleList: 1

### Convert to Bipolar

In [5]:
class CNV_BIPOLAR_OUT(torch.nn.Module):
    def __init__(self, base_model):
        super(CNV_BIPOLAR_OUT, self).__init__()
        self.base_model = base_model
        self.qnt_output = QuantIdentity(
            quant_type='binary', 
            scaling_impl_type='const',
            bit_width=1, min_val=-1.0, max_val=1.0)

    def forward(self, x):
        x = self.base_model(x)
        x = self.qnt_output(x)
        return x

In [6]:
brevitas_model = CNV_BIPOLAR_OUT(base_model).to('cpu')

In [7]:
test_ip_numpy = np.random.randint(low=0, high=256, size=(1, config.NUM_CHANNELS, config.IMG_H, config.IMG_W))
test_ip = torch.tensor((test_ip_numpy / 255.), dtype=torch.float32).to('cuda')
test_ip.shape

torch.Size([1, 3, 224, 224])

In [8]:
brevitas_model.to('cuda')
test_out = brevitas_model(test_ip)
brevitas_model.to('cpu');

In [9]:
print(test_out.shape)
print(test_out)

torch.Size([1, 2])
tensor([[1., 1.]], device='cuda:0', grad_fn=<MulBackward0>)


### Model to QONNX

In [10]:
brevitas_cpu = models_folder + '00_brevitas_cpu.onnx'
export_qonnx(brevitas_model, torch.randn((1, config.NUM_CHANNELS, config.IMG_H, config.IMG_W)), brevitas_cpu);

# FINN IMPORTS

In [11]:
from finn.util.visualization import showSrc, showInNetron

In [12]:
showInNetron(brevitas_cpu)

Serving 'experiments/031_w4W2a4_build_config__full_build/models/00_brevitas_cpu.onnx' at http://0.0.0.0:8083


# FINN Build IMPORTS

In [13]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg

# Custom step: Preprocess

In [14]:
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.core.datatype import DataType

def custom_step_add_pre_proc(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    global_inp_name = model.graph.input[0].name
    ishape = model.get_tensor_shape(global_inp_name)
    preproc = ToTensor()
    export_qonnx(preproc, torch.randn(ishape), "preproc.onnx", opset_version=11)
    preproc_model = ModelWrapper("preproc.onnx")
    # set input finn datatype to UINT8
    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType["UINT8"])
    # merge pre-processing onnx model with cnv model (passed as input argument)
    model = model.transform(MergeONNXModels(preproc_model))
    
    return model

# Custom step: Streamline

In [15]:
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

from finn.transformation.streamline import Streamline
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants

def custom_step_streamline(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(Streamline())
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(ChangeDataLayoutQuantAvgPool2d())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(Streamline())
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())
    
    return model

# Custom step: Convert to HW

In [16]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.transformation.general import GiveUniqueNodeNames

def custom_step_convert_to_hw(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())   
    # input quantization (if any) to standalone thresholding. 
        # Wortel: this is the right order, to avoid splitting threholds and matrix in MVAUs
    model = model.transform(to_hw.InferThresholdingLayer())
    model = model.transform(to_hw.InferPool())
    model = model.transform(to_hw.InferStreamingMaxPool())
    model = model.transform(to_hw.InferConvInpGen())
    # get rid of Reshape(-1, 1) operation between hw nodes 
    model = model.transform(RemoveCNVtoFCFlatten())
    # get rid of Tranpose -> Tranpose identity seq
    model = model.transform(absorb.AbsorbConsecutiveTransposes())
    # infer tensor data layouts
    model = model.transform(InferDataLayouts())
    model = model.transform(GiveUniqueNodeNames())
    #model = model.transform(Streamline()) -> MAYBE NOT NEEDED ????
   
    return model

# Custom step: Specialize Layers -> redefine FMPadding as HLS

In [17]:
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.general import GiveReadableTensorNames
from qonnx.custom_op.registry import getCustomOp

def custom_step_specialize_layers(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    # Change all FMPadding to HLS, as Folding does not support this layer as RTL
    FMPadding_node = model.get_nodes_by_op_type("FMPadding")
    i = 0
    for node in FMPadding_node:
        node_inst = getCustomOp(node)
        node_inst.set_nodeattr("preferred_impl_style", "hls")
        print(f'Node {i}: {node}')
        i += 1
    # Specialize
    model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())   
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())

    return model

# FPGA Part

In [None]:
from finn.util.basic import pynq_part_map
import pandas as pd

In [None]:
fpga_df = pd.DataFrame(pynq_part_map.items(), columns=['Board', 'FPGA Part'])
fpga_df

In [None]:
# change this if you have a different PYNQ board, see list above
pynq_board = "Pynq-Z1"
fpga_part = pynq_part_map[pynq_board]

# Build estimate reports

In [18]:
import os
import shutil

In [19]:
# model_file = brevitas_cpu

# estimates_output_dir = config.RUN_FOLDER + "output_estimates_only"

# #Delete previous run results if exist
# if os.path.exists(estimates_output_dir):
#     shutil.rmtree(estimates_output_dir)
#     print("Previous run results deleted!")
# else:
#     print("Folder does not exist and it will be created")

# my_steps = [
#     custom_step_add_pre_proc,
#     "step_qonnx_to_finn",
#     "step_tidy_up",
#     custom_step_streamline,
#     custom_step_convert_to_hw,
#     "step_create_dataflow_partition",
#     custom_step_specialize_layers,
#     "step_target_fps_parallelization",
#     "step_apply_folding_config",
#     "step_minimize_bit_width",
#     "step_generate_estimate_reports",
# ]

# cfg_estimates = build.DataflowBuildConfig(
#     output_dir                    = estimates_output_dir,
#     mvau_wwidth_max               = 36,
#     target_fps                    = 25,
#     synth_clk_period_ns           = 10.0,
#     board                         = "Pynq-Z1",
#     fpga_part                     = "xc7z020clg400-1",
#     shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
#     default_swg_exception         = True,
#     auto_fifo_depths              = True,
#     auto_fifo_strategy            = "largefifo_rtlsim", #"characterize", -> the other option, takes toooo long
#     split_large_fifos             = True,

#     steps                         = my_steps,
#     generate_outputs=[
#         build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
#     ],
# )

In [20]:
# %%time
# build.build_dataflow_cfg(model_file, cfg_estimates)

# Build FULL Flow

In [21]:
model_file = brevitas_cpu

full_build_output_dir = config.RUN_FOLDER + "output_full_build"

#Delete previous run results if exist
if os.path.exists(full_build_output_dir):
    shutil.rmtree(full_build_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_steps = [
    custom_step_add_pre_proc,
    "step_qonnx_to_finn",
    "step_tidy_up",
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    "step_target_fps_parallelization",
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
    "step_hw_codegen",
    "step_hw_ipgen",
    "step_set_fifo_depths",
    "step_create_stitched_ip",
    "step_measure_rtlsim_performance",
    "step_out_of_context_synthesis",
    "step_synthesize_bitfile",
    "step_make_pynq_driver",
    "step_deployment_package",
]

cfg_full_build = build.DataflowBuildConfig(
    output_dir                    = full_build_output_dir,
    mvau_wwidth_max               = 36,
    target_fps                    = 25,
    synth_clk_period_ns           = 10.0,
    board                         = "Pynq-Z1",
    fpga_part                     = "xc7z020clg400-1",
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    default_swg_exception         = True,
    auto_fifo_depths              = True,
    auto_fifo_strategy            = "largefifo_rtlsim", #"characterize", -> the other option, takes toooo long
    split_large_fifos             = True,

    steps                         = my_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ],
)

Folder does not exist and it will be created


In [22]:
%%time
build.build_dataflow_cfg(model_file, cfg_full_build)

Building dataflow accelerator from experiments/031_w4W2a4_build_config__full_build/models/00_brevitas_cpu.onnx
Intermediate outputs will be generated in /home/gmoreno/workspace
Final outputs will be generated in experiments/031_w4W2a4_build_config__full_build/output_full_build
Build log is at experiments/031_w4W2a4_build_config__full_build/output_full_build/build_dataflow.log
Running step: custom_step_add_pre_proc [1/20]
Running step: step_qonnx_to_finn [2/20]
Running step: step_tidy_up [3/20]
Running step: custom_step_streamline [4/20]
Running step: custom_step_convert_to_hw [5/20]
Running step: step_create_dataflow_partition [6/20]
Running step: custom_step_specialize_layers [7/20]
Running step: step_target_fps_parallelization [8/20]
Running step: step_apply_folding_config [9/20]
Running step: step_minimize_bit_width [10/20]
Running step: step_generate_estimate_reports [11/20]
Running step: step_hw_codegen [12/20]
Running step: step_hw_ipgen [13/20]
Running step: step_set_fifo_depths

Traceback (most recent call last):
  File "/home/gmoreno/uav/finn/src/finn/builder/build_dataflow.py", line 158, in build_dataflow_cfg
    model = transform_step(model, cfg)
  File "/home/gmoreno/uav/finn/src/finn/builder/build_dataflow_steps.py", line 767, in step_out_of_context_synthesis
    model = model.transform(
  File "/home/gmoreno/uav/finn/deps/qonnx/src/qonnx/core/modelwrapper.py", line 140, in transform
    (transformed_model, model_was_changed) = transformation.apply(transformed_model)
  File "/home/gmoreno/uav/finn/src/finn/transformation/fpgadataflow/synth_ooc.py", line 61, in apply
    ret = out_of_context_synth(
  File "/home/gmoreno/uav/finn/src/finn/util/vivado.py", line 66, in out_of_context_synth
    with open(res_counts_path, "r") as myfile:
FileNotFoundError: [Errno 2] No such file or directory: '/home/gmoreno/workspace/synth_out_of_context_wrh8m0uh/results_finn_design_wrapper/res.txt'


> [0;32m/home/gmoreno/uav/finn/src/finn/util/vivado.py[0m(66)[0;36mout_of_context_synth[0;34m()[0m
[0;32m     64 [0;31m    [0mres_counts_path[0m [0;34m=[0m [0mvivado_proj_folder[0m [0;34m+[0m [0;34m"/res.txt"[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     65 [0;31m[0;34m[0m[0m
[0m[0;32m---> 66 [0;31m    [0;32mwith[0m [0mopen[0m[0;34m([0m[0mres_counts_path[0m[0;34m,[0m [0;34m"r"[0m[0;34m)[0m [0;32mas[0m [0mmyfile[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     67 [0;31m        [0mres_data[0m [0;34m=[0m [0mmyfile[0m[0;34m.[0m[0mread[0m[0;34m([0m[0;34m)[0m[0;34m.[0m[0msplit[0m[0;34m([0m[0;34m"\n"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     68 [0;31m    [0mret[0m [0;34m=[0m [0;34m{[0m[0;34m}[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  q


Build failed
CPU times: user 1min 39s, sys: 28.6 s, total: 2min 8s
Wall time: 19h 7min 23s


-1