In [1]:
import os
import config

import numpy as np
import torch
from torchinfo import summary

from brevitas.export import export_qonnx

Run folder created in: experiments/0331_zcu104__workspace__w4W2a4__full_build/


# FINN Folders setup

In [2]:
# finn_root_dir = os.environ["FINN_ROOT"]
# nb_dir = finn_root_dir + "/notebooks/uav_finn/classification/qonnx_to_finn_driver/"
# # Leave all build files inside experiments folder
# os.environ["FINN_BUILD_DIR"] = nb_dir + config.BUILD_FOLDER
# os.environ["FINN_HOST_BUILD_DIR"] = nb_dir + config.TMP_FOLDER

# models_folder = config.MODELS_FOLDER

# Original QONNX Model

In [3]:
brevitas_cpu = 'BED_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx'

# FINN IMPORTS

In [4]:
from finn.util.visualization import showSrc, showInNetron

In [5]:
showInNetron(brevitas_cpu)

Serving 'BED_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx' at http://0.0.0.0:8083


# FINN Build IMPORTS

In [6]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg

# Custom step: Preprocess

In [7]:
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.core.datatype import DataType

def custom_step_add_pre_proc(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    global_inp_name = model.graph.input[0].name
    ishape = model.get_tensor_shape(global_inp_name)
    preproc = ToTensor()
    export_qonnx(preproc, torch.randn(ishape), "preproc.onnx", opset_version=11)
    preproc_model = ModelWrapper("preproc.onnx")
    # set input finn datatype to UINT8
    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType["UINT8"])
    # merge pre-processing onnx model with cnv model (passed as input argument)
    model = model.transform(MergeONNXModels(preproc_model))
    
    return model

# Custom step: Streamline

In [8]:
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

from finn.transformation.streamline import Streamline
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants

def custom_step_streamline(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(Streamline())
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(ChangeDataLayoutQuantAvgPool2d())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(Streamline())
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())
    
    return model

# Custom step: Convert to HW

In [9]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.transformation.general import GiveUniqueNodeNames

def custom_step_convert_to_hw(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())   
    # input quantization (if any) to standalone thresholding. 
        # Wortel: this is the right order, to avoid splitting threholds and matrix in MVAUs
    model = model.transform(to_hw.InferThresholdingLayer())
    model = model.transform(to_hw.InferPool())
    model = model.transform(to_hw.InferStreamingMaxPool())
    model = model.transform(to_hw.InferConvInpGen())
    # get rid of Reshape(-1, 1) operation between hw nodes 
    model = model.transform(RemoveCNVtoFCFlatten())
    # get rid of Tranpose -> Tranpose identity seq
    model = model.transform(absorb.AbsorbConsecutiveTransposes())
    # infer tensor data layouts
    model = model.transform(InferDataLayouts())
    model = model.transform(GiveUniqueNodeNames())
    #model = model.transform(Streamline()) -> MAYBE NOT NEEDED ????
   
    return model

# Custom step: Specialize Layers -> redefine FMPadding as HLS

In [10]:
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.general import GiveReadableTensorNames
from qonnx.custom_op.registry import getCustomOp

def custom_step_specialize_layers(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    # Change all FMPadding to HLS, as Folding does not support this layer as RTL
    FMPadding_node = model.get_nodes_by_op_type("FMPadding")
    i = 0
    for node in FMPadding_node:
        node_inst = getCustomOp(node)
        node_inst.set_nodeattr("preferred_impl_style", "hls")
        print(f'Node {i}: {node}')
        i += 1
    # Specialize
    model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())   
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())

    return model

# FPGA Part

In [11]:
from finn.util.basic import pynq_part_map
import pandas as pd

In [12]:
fpga_df = pd.DataFrame(pynq_part_map.items(), columns=['Board', 'FPGA Part'])
fpga_df

Unnamed: 0,Board,FPGA Part
0,Ultra96,xczu3eg-sbva484-1-e
1,Ultra96-V2,xczu3eg-sbva484-1-i
2,Pynq-Z1,xc7z020clg400-1
3,Pynq-Z2,xc7z020clg400-1
4,ZCU102,xczu9eg-ffvb1156-2-e
5,ZCU104,xczu7ev-ffvc1156-2-e
6,ZCU111,xczu28dr-ffvg1517-2-e
7,RFSoC2x2,xczu28dr-ffvg1517-2-e
8,RFSoC4x2,xczu48dr-ffvg1517-2-e
9,KV260_SOM,xck26-sfvc784-2LV-c


In [13]:
# change this if you have a different PYNQ board, see list above
pynq_board = "ZCU104"
fpga_part = pynq_part_map[pynq_board]

In [14]:
print(fpga_part)

xczu7ev-ffvc1156-2-e


# Build estimate reports

In [15]:
import shutil

In [16]:
model_file = brevitas_cpu
print(model_file)

BED_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx


In [17]:
estimates_output_dir = config.RUN_FOLDER + "output_estimates_only"

#Delete previous run results if exist
if os.path.exists(estimates_output_dir):
    shutil.rmtree(estimates_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_steps = [
    custom_step_add_pre_proc,
    "step_qonnx_to_finn",
    "step_tidy_up",
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    "step_target_fps_parallelization",
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
]

cfg_estimates = build.DataflowBuildConfig(
    output_dir                    = estimates_output_dir,
    mvau_wwidth_max               = 36,
    target_fps                    = 25,
    synth_clk_period_ns           = 10.0,
    board                         = pynq_board,
    fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    default_swg_exception         = False, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = True,
    auto_fifo_strategy            = "largefifo_rtlsim", #"characterize", -> the other option, takes toooo long
    split_large_fifos             = False, # Change to True to save resources

    steps                         = my_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
    ],
)

Folder does not exist and it will be created


# Build FULL Flow

In [18]:
full_build_output_dir = config.RUN_FOLDER + "output_full_build"

#Delete previous run results if exist
if os.path.exists(full_build_output_dir):
    shutil.rmtree(full_build_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_steps = [
    custom_step_add_pre_proc,
    "step_qonnx_to_finn",
    "step_tidy_up",
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    "step_target_fps_parallelization",
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
    "step_hw_codegen",
    "step_hw_ipgen",
    "step_set_fifo_depths",
    "step_create_stitched_ip",
    "step_measure_rtlsim_performance",
    "step_out_of_context_synthesis",
    "step_synthesize_bitfile",
    "step_make_pynq_driver",
    "step_deployment_package",
]

cfg_full_build = build.DataflowBuildConfig(
    output_dir                    = full_build_output_dir,
    mvau_wwidth_max               = 36,
    target_fps                    = 25,
    synth_clk_period_ns           = 10.0,
    board                         = pynq_board,
    fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    default_swg_exception         = False, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = True,
    auto_fifo_strategy            = "largefifo_rtlsim", #"characterize", -> the other option, takes toooo long
    split_large_fifos             = False, # Change to True to save resources

    steps                         = my_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ],
)

Folder does not exist and it will be created


# Build using JSON file for Folding or FIFO sizes

In [19]:
full_build_output_dir = config.RUN_FOLDER + "output_full_build"

#Delete previous run results if exist
if os.path.exists(full_build_output_dir):
    shutil.rmtree(full_build_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_steps = [
    custom_step_add_pre_proc,
    "step_qonnx_to_finn",
    "step_tidy_up",
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    "step_target_fps_parallelization",
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
    "step_hw_codegen",
    "step_hw_ipgen",
    "step_set_fifo_depths",
    "step_create_stitched_ip",
    "step_measure_rtlsim_performance",
    "step_out_of_context_synthesis",
    "step_synthesize_bitfile",
    "step_make_pynq_driver",
    "step_deployment_package",
]

cfg_full_build_json_folding = build.DataflowBuildConfig(
    output_dir                    = full_build_output_dir,
    mvau_wwidth_max               = 36,
    #target_fps                    = 25, # Comment if folding.json is provided
    synth_clk_period_ns           = 10.0,
    board                         = pynq_board,
    #fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    default_swg_exception         = False, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = False,
    auto_fifo_strategy            = "largefifo_rtlsim", #"characterize", -> the other option, takes toooo long
    split_large_fifos             = True, # Change to True to save resources

    steps                         = my_steps,
    folding_config_file           = "./experiments" +
                                    "/0330_zcu104__workspace__w4W2a4__full_build/output_full_build" +
                                    "/final_hw_config.json",
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ],
)

Folder does not exist and it will be created


In [20]:
# flow_config = "estimates"
# flow_config = "full_build"
flow_config = "full_build_json_folding"

if flow_config == "estimates":
    current_build_config = cfg_estimates
elif flow_config == "full_build":
    current_build_config = cfg_full_build
elif flow_config == "full_build_json_folding":
    current_build_config = cfg_full_build_json_folding
else:
    raise ValueError("Wrong config")

print(f'Perform: {flow_config}')

Perform: full_build_json_folding


In [21]:
%%time
build.build_dataflow_cfg(model_file, current_build_config)

Building dataflow accelerator from BED_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx
Intermediate outputs will be generated in /home/gmoreno/workspace
Final outputs will be generated in experiments/0331_zcu104__workspace__w4W2a4__full_build/output_full_build
Build log is at experiments/0331_zcu104__workspace__w4W2a4__full_build/output_full_build/build_dataflow.log
Running step: custom_step_add_pre_proc [1/20]
Running step: step_qonnx_to_finn [2/20]
Running step: step_tidy_up [3/20]
Running step: custom_step_streamline [4/20]
Running step: custom_step_convert_to_hw [5/20]
Running step: step_create_dataflow_partition [6/20]
Running step: custom_step_specialize_layers [7/20]
Running step: step_target_fps_parallelization [8/20]
Running step: step_apply_folding_config [9/20]
Running step: step_minimize_bit_width [10/20]
Running step: step_generate_estimate_reports [11/20]
Running step: step_hw_codegen [12/20]
Running step: step_hw_ipgen [13/20]
Running step: step_set_fifo_depths [14/20]

0