In [1]:
import os
import logging
import config

import numpy as np
import torch
from torchinfo import summary

from brevitas.export import export_qonnx

Run folder created in: experiments_pynq-z1/09_pynq-z1__16FPS__workspace__AIMET_Balanced__BIPOLAR__w4W2a4__estimates/


# Logging

In [2]:
log_path = config.RUN_FOLDER

logger = logging.getLogger("GonLogger")
logger.propagate = False
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(log_path + 'logfile.log')
# formatter = logging.Formatter('%(message)s')
# file_handler.setFormatter(formatter)

# add file handler to logger
logger.addHandler(file_handler)

# FINN Folders setup

In [3]:
# finn_root_dir = os.environ["FINN_ROOT"]
# nb_dir = finn_root_dir + "/notebooks/uav_finn/classification/qonnx_to_finn_driver/"
# # Leave all build files inside experiments folder
# os.environ["FINN_BUILD_DIR"] = nb_dir + config.BUILD_FOLDER
# os.environ["FINN_HOST_BUILD_DIR"] = nb_dir + config.TMP_FOLDER

# models_folder = config.MODELS_FOLDER

# Original QONNX Model

In [4]:
#brevitas_cpu = './models/BED_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx'
#brevitas_cpu = './models/Best_F1_AIMET__Bipolar.onnx'
# brevitas_cpu = './aimet_onnx/BED_classifier__best_mean_F1__BIPOLAR_Out__QONNX.onnx'
# brevitas_cpu = './models/aimet_onnx/BED_classifier__best_mean_F1__AIMET_NoPadding__BIPOLAR_Out__QONNX.onnx'
# brevitas_cpu = './models/aimet_onnx/BED_classifier__best_mean_F1__AIMET_NoPading_QIdy__BIPOLAR_Out__QONNX.onnx'
#brevitas_cpu = './models/aimet_onnx/BED_classifier__best_mean_F1__AIMET_NoPad_QIdy__Balanced__NoTrain__QONNX.onnx'
brevitas_cpu = './models/aimet_onnx/BED_classifier__best_mean_F1__AIMET_Balanced__BIPOLAR_Out__QONNX.onnx'

# FINN IMPORTS

In [5]:
from finn.util.visualization import showSrc, showInNetron

In [6]:
# showInNetron(brevitas_cpu)

# FINN Build IMPORTS

In [7]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg

# Custom step: Preprocess

In [8]:
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.core.datatype import DataType

def custom_step_add_pre_proc(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    global_inp_name = model.graph.input[0].name
    ishape = model.get_tensor_shape(global_inp_name)
    preproc = ToTensor()
    export_qonnx(preproc, torch.randn(ishape), "preproc.onnx", opset_version=11)
    preproc_model = ModelWrapper("preproc.onnx")
    # set input finn datatype to UINT8
    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType["UINT8"])
    # merge pre-processing onnx model with cnv model (passed as input argument)
    model = model.transform(MergeONNXModels(preproc_model))
    
    return model

# Custom step: Streamline

In [9]:
from qonnx.core.datatype import DataType

from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
#from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

from finn.transformation.streamline import Streamline
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from finn.transformation.streamline.reorder import MoveTransposePastScalarMul

def custom_step_streamline(model: ModelWrapper, cfg: build.DataflowBuildConfig):

    # Find MultiThreshold that FINN could not annotate datatype properly and set them to INT32
    Multithreshold_node = model.get_nodes_by_op_type("MultiThreshold")    
    for node in Multithreshold_node:
        if model.get_tensor_datatype(node.input[1]) == "FLOAT32":
            print(f'{node.name}: node with Float32 annotation')
            model.set_tensor_datatype(node.input[1], DataType["INT32"])
            print(f'{node.name}: changed to datatype {model.get_tensor_datatype(node.input[1])}')
    
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(Streamline())
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(ChangeDataLayoutQuantAvgPool2d())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    #model = model.transform(RoundAndClipThresholds())
    model = model.transform(Streamline())
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())
    
    return model

# Custom step: Convert to HW

In [10]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.transformation.general import GiveUniqueNodeNames

def custom_step_convert_to_hw(model: ModelWrapper, cfg: build.DataflowBuildConfig):
   
    if cfg.standalone_thresholds:
        # doing this first causes all threshold layers to be standalone
        model = model.transform(to_hw.InferThresholdingLayer())
    model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())   
    # input quantization (if any) to standalone thresholding. 
    model = model.transform(to_hw.InferThresholdingLayer())
    model = model.transform(to_hw.InferPool())
    model = model.transform(to_hw.InferStreamingMaxPool())
    model = model.transform(to_hw.InferConvInpGen())
    # get rid of Reshape(-1, 1) operation between hw nodes 
    model = model.transform(RemoveCNVtoFCFlatten())
    # get rid of Tranpose -> Tranpose identity seq
    model = model.transform(absorb.AbsorbConsecutiveTransposes())

    # # Move transpose
    # model = model.transform(Streamline())
    # model = model.transform(MoveTransposePastScalarMul()) # Only AIMET model
    # model = model.transform(absorb.AbsorbConsecutiveTransposes())
    
    # infer tensor data layouts
    model = model.transform(InferDataLayouts())
    model = model.transform(GiveUniqueNodeNames())
    #model = model.transform(Streamline()) -> MAYBE NOT NEEDED ????
   
    return model

# Custom step: Specialize Layers -> redefine FMPadding as HLS

In [11]:
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.general import GiveReadableTensorNames
from qonnx.custom_op.registry import getCustomOp

def custom_step_specialize_layers(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    # Change all FMPadding to HLS, as Folding does not support this layer as RTL
    # It does not hurt if Padding is not present, as it will do nothing
    FMPadding_node = model.get_nodes_by_op_type("FMPadding")
    i = 0
    for node in FMPadding_node:
        node_inst = getCustomOp(node)
        node_inst.set_nodeattr("preferred_impl_style", "hls")
        print(f'Node {i}: {node}')
        i += 1  
    
    # Change all MVAU to RTL, which is more optimized
    # It does not work:
        # Warning
    # /home/gmoreno/uav/finn/src/finn/transformation/fpgadataflow/specialize_layers.py:143: 
    # UserWarning: There is no RTL variant for MVAU_17. The node will automatically be
    # set to HLS variant. Please check the bit-widths to be <= 8 and ensure the
    # thresholds are implemented as standalone layer
    #
    MVAU_node = model.get_nodes_by_op_type("MVAU")
    j = 0
    for node in MVAU_node:
        node_inst = getCustomOp(node)
        node_inst.set_nodeattr("preferred_impl_style", "rtl")
        print(f'Node {j}: {node}')
        j += 1
    
    # Specialize
    model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())   
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())

    return model

# FPGA Part

In [12]:
from finn.util.basic import pynq_part_map
import pandas as pd

In [13]:
fpga_df = pd.DataFrame(pynq_part_map.items(), columns=['Board', 'FPGA Part'])
fpga_df

Unnamed: 0,Board,FPGA Part
0,Ultra96,xczu3eg-sbva484-1-e
1,Ultra96-V2,xczu3eg-sbva484-1-i
2,Pynq-Z1,xc7z020clg400-1
3,Pynq-Z2,xc7z020clg400-1
4,ZCU102,xczu9eg-ffvb1156-2-e
5,ZCU104,xczu7ev-ffvc1156-2-e
6,ZCU111,xczu28dr-ffvg1517-2-e
7,RFSoC2x2,xczu28dr-ffvg1517-2-e
8,RFSoC4x2,xczu48dr-ffvg1517-2-e
9,KV260_SOM,xck26-sfvc784-2LV-c


In [14]:
# change this if you have a different PYNQ board, see list above
pynq_board = "Pynq-Z1"
fpga_part = pynq_part_map[pynq_board]

In [15]:
print(fpga_part)

xc7z020clg400-1


In [16]:
model_file = brevitas_cpu
print(model_file)

./models/aimet_onnx/BED_classifier__best_mean_F1__AIMET_Balanced__BIPOLAR_Out__QONNX.onnx


# Parameters

In [17]:
my_target_fps = 16
my_mvau_wwidth_max = 80
my_default_swg_exception = True
my_standalone_thresholds = True
my_auto_fifo_depths = True
my_auto_fifo_strategy = "largefifo_rtlsim"
my_split_large_fifos = True
my_folding_config_file = "./experiments_pynq-z1" + "/final_hw_config_edited_16FPS_balanced_v0.json"

# Build estimate reports

In [18]:
import shutil

In [19]:
estimates_output_dir = config.RUN_FOLDER + "output_estimates_only"

#Delete previous run results if exist
if os.path.exists(estimates_output_dir):
    shutil.rmtree(estimates_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_estimate_steps = [
    custom_step_add_pre_proc,
    "step_qonnx_to_finn",
    "step_tidy_up",
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    "step_target_fps_parallelization",
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
]

cfg_estimates = build.DataflowBuildConfig(
    output_dir                    = estimates_output_dir,
    mvau_wwidth_max               = my_mvau_wwidth_max,#36,
    target_fps                    = my_target_fps,
    synth_clk_period_ns           = 10.0,
    board                         = pynq_board,
    fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    standalone_thresholds         = my_standalone_thresholds,
    default_swg_exception         = my_default_swg_exception, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = my_auto_fifo_depths,
    auto_fifo_strategy            = my_auto_fifo_strategy, #"characterize", -> the other option, takes toooo long
    split_large_fifos             = my_split_large_fifos, # Change to True to save resources

    steps                         = my_estimate_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
    ],
)

Folder does not exist and it will be created


# Build FULL Flow

In [20]:
full_build_output_dir = config.RUN_FOLDER + "output_full_build"

#Delete previous run results if exist
if os.path.exists(full_build_output_dir):
    shutil.rmtree(full_build_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_build_steps = [
    custom_step_add_pre_proc,
    "step_qonnx_to_finn",
    "step_tidy_up",
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    "step_target_fps_parallelization",
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
    "step_hw_codegen",
    "step_hw_ipgen",
    "step_set_fifo_depths",
    "step_create_stitched_ip",
    #"step_measure_rtlsim_performance",
    "step_out_of_context_synthesis",
    "step_synthesize_bitfile",
    "step_make_pynq_driver",
    "step_deployment_package",
]

cfg_full_build = build.DataflowBuildConfig(
    output_dir                    = full_build_output_dir,
    mvau_wwidth_max               = my_mvau_wwidth_max, #36,
    target_fps                    = my_target_fps,
    synth_clk_period_ns           = 10.0,
    board                         = pynq_board,
    fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    standalone_thresholds         = my_standalone_thresholds,
    default_swg_exception         = my_default_swg_exception, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = my_auto_fifo_depths,
    auto_fifo_strategy            = my_auto_fifo_strategy, #"characterize", -> the other option, takes toooo long
    split_large_fifos             = my_split_large_fifos, # Change to True to save resources

    steps                         = my_build_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ],
)

Folder does not exist and it will be created


# Build using JSON file for Folding or FIFO sizes

In [21]:
full_build_output_dir = config.RUN_FOLDER + "output_full_build"

#Delete previous run results if exist
if os.path.exists(full_build_output_dir):
    shutil.rmtree(full_build_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_build_json_steps = [
    custom_step_add_pre_proc,
    "step_qonnx_to_finn",
    "step_tidy_up",
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    "step_target_fps_parallelization",
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
    "step_hw_codegen",
    "step_hw_ipgen",
    "step_set_fifo_depths",
    "step_create_stitched_ip",
    "step_measure_rtlsim_performance",
    "step_out_of_context_synthesis",
    "step_synthesize_bitfile",
    "step_make_pynq_driver",
    "step_deployment_package",
]

cfg_full_build_json_folding = build.DataflowBuildConfig(
    output_dir                    = full_build_output_dir,
    #mvau_wwidth_max               = my_mvau_wwidth_max, #36,
    #target_fps                    = my_target_fps,
    synth_clk_period_ns           = 10.0,
    board                         = pynq_board,
    fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    standalone_thresholds         = my_standalone_thresholds,
    default_swg_exception         = my_default_swg_exception, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = my_auto_fifo_depths,
    auto_fifo_strategy            = my_auto_fifo_strategy, #"characterize", -> the other option, takes toooo long
    split_large_fifos             = my_split_large_fifos, # Change to True to save resources

    steps                         = my_build_json_steps,
    folding_config_file           = my_folding_config_file,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ],
)

Folder does not exist and it will be created


# Choose type of build: estimate, full build or build with json

In [22]:
flow_config = "estimates"
# flow_config = "full_build"
# flow_config = "full_build_json_folding"

if flow_config == "estimates":
    current_build_config = cfg_estimates
elif flow_config == "full_build":
    current_build_config = cfg_full_build
elif flow_config == "full_build_json_folding":
    current_build_config = cfg_full_build_json_folding
else:
    raise ValueError("Wrong config")

print(f'Perform: {flow_config}')

Perform: estimates


# Logging before build

In [23]:
logger.info(f'PYNQ board: {pynq_board}\n' +  
            f'\tModel used: {model_file}\n' +
            f'\tTarget fps: my_target_fps: {my_target_fps}.\n' +
            f'\tmvau_wwidth_max: {my_mvau_wwidth_max}.\n'+ 
            f'\tDefault sliding window exception: {my_default_swg_exception}.\n'+ 
            f'\tAuto FIFO depth: {my_auto_fifo_depths}.\n'+ 
            f'\tAuto FIFO strategy: {my_auto_fifo_strategy}.\n'+ 
            f'\tSplit large FIFOs: {my_split_large_fifos}.\n'+ 
            f'\tFolding JSON file: {my_folding_config_file}.\n'+ 
            f'\tFlow config: {flow_config}.\n')

# Build command

In [24]:
%%time
build.build_dataflow_cfg(model_file, current_build_config)

Building dataflow accelerator from ./models/aimet_onnx/BED_classifier__best_mean_F1__AIMET_Balanced__BIPOLAR_Out__QONNX.onnx
Intermediate outputs will be generated in /home/gmoreno/workspace
Final outputs will be generated in experiments_pynq-z1/09_pynq-z1__16FPS__workspace__AIMET_Balanced__BIPOLAR__w4W2a4__estimates/output_estimates_only
Build log is at experiments_pynq-z1/09_pynq-z1__16FPS__workspace__AIMET_Balanced__BIPOLAR__w4W2a4__estimates/output_estimates_only/build_dataflow.log
Running step: custom_step_add_pre_proc [1/11]
Running step: step_qonnx_to_finn [2/11]
Running step: step_tidy_up [3/11]
Running step: custom_step_streamline [4/11]


Traceback (most recent call last):
  File "/home/gmoreno/uav/finn/src/finn/builder/build_dataflow.py", line 158, in build_dataflow_cfg
    model = transform_step(model, cfg)
  File "/tmp/ipykernel_28677/498140205.py", line 12, in custom_step_convert_to_hw
    model = model.transform(to_hw.InferThresholdingLayer())
  File "/home/gmoreno/uav/finn/deps/qonnx/src/qonnx/core/modelwrapper.py", line 140, in transform
    (transformed_model, model_was_changed) = transformation.apply(transformed_model)
  File "/home/gmoreno/uav/finn/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py", line 236, in apply
    assert scale == 1.0, (
AssertionError: MultiThreshold_19: MultiThreshold out_scale must be 1 for HLS conversion.


Running step: custom_step_convert_to_hw [5/11]
> [0;32m/home/gmoreno/uav/finn/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py[0m(236)[0;36mapply[0;34m()[0m
[0;32m    234 [0;31m                [0modt[0m [0;34m=[0m [0mmodel[0m[0;34m.[0m[0mget_tensor_datatype[0m[0;34m([0m[0mthl_output[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    235 [0;31m                [0mscale[0m [0;34m=[0m [0mgetCustomOp[0m[0;34m([0m[0mnode[0m[0;34m)[0m[0;34m.[0m[0mget_nodeattr[0m[0;34m([0m[0;34m"out_scale"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 236 [0;31m                assert scale == 1.0, (
[0m[0;32m    237 [0;31m                    [0mnode[0m[0;34m.[0m[0mname[0m [0;34m+[0m [0;34m": MultiThreshold out_scale must be 1 for HLS conversion."[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    238 [0;31m                )
[0m


ipdb>  q


Build failed
CPU times: user 13.2 s, sys: 510 ms, total: 13.7 s
Wall time: 2min 51s


-1