In [1]:
import os
import logging
import config

import numpy as np
import torch
from torchinfo import summary

from brevitas.export import export_qonnx

Run folder created in: experiments/A_2500_FPS/112/15_full_build_json_mvau_rtl_mvau_wwidth_max_24_manual_folding/


# Logging

In [2]:
log_path = config.RUN_FOLDER

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(log_path + 'logfile.log', mode="a", encoding="utf-8")
formatter = logging.Formatter(
    "{asctime} - {message}",
    style="{",
    datefmt="%Y-%m-%d %H:%M",
)
file_handler.setFormatter(formatter)

# add file handler to logger
logger.addHandler(file_handler)

# Original QONNX Model

In [3]:
# brevitas_cpu = './onnx_models/Mobilenetv2_Mini_Resnet_4bitINP__best_F1__Bipolar.onnx'

brevitas_cpu = './onnx_models/Mobilenetv2_Mini_Resnet_112__best_F1__Bipolar.onnx'

# brevitas_cpu = './onnx_models/Mobilenetv2_Mini_Resnet_Sparse24__best_F1__Bipolar.onnx'

# brevitas_cpu = './onnx_models/Sparse24__only_thres.onnx'

# brevitas_cpu = './onnx_models/Sparse24_mul8.onnx'

# FINN IMPORTS

In [4]:
from finn.util.visualization import showSrc, showInNetron
from qonnx.util.cleanup import cleanup as qonnx_cleanup

In [5]:
# showInNetron(brevitas_cpu)

# Clean QONNX Model trained with Brevitas

In [6]:
qonnx_clean_filename = config.RUN_FOLDER + 'clean_model.onnx'
qonnx_cleanup(brevitas_cpu, out_file=qonnx_clean_filename)

# FINN Build IMPORTS

In [7]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg

# Custom step: Tidy Up

In [8]:
from qonnx.core.modelwrapper import ModelWrapper

from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

In [9]:
def custom_step_tidy_up(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    
    model = model.transform(ConvertQONNXtoFINN())
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(RemoveStaticGraphInputs())
    
    return model

# Custom step: Preprocess

In [10]:
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType

In [11]:
def custom_step_add_pre_proc(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    
    global_inp_name = model.graph.input[0].name
    ishape = model.get_tensor_shape(global_inp_name)
    # preprocessing: torchvision's ToTensor divides uint8 inputs by 255
    totensor_pyt = ToTensor()
    chkpt_preproc_name = config.RUN_FOLDER +  "/prepro_node.onnx"
    export_qonnx(totensor_pyt, torch.randn(ishape), chkpt_preproc_name)
    qonnx_cleanup(chkpt_preproc_name, out_file=chkpt_preproc_name)
    pre_model = ModelWrapper(chkpt_preproc_name)
    pre_model = pre_model.transform(ConvertQONNXtoFINN())
    
    # join preprocessing and core model
    model = model.transform(MergeONNXModels(pre_model))
    # add input quantization annotation: UINT8 for all BNN-PYNQ models
    global_inp_name = model.graph.input[0].name
    model.set_tensor_datatype(global_inp_name, DataType["UINT8"])

    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())
    model = model.transform(RemoveStaticGraphInputs())
    
    return model

# Custom step: Streamline

In [12]:
from finn.transformation.streamline.reorder import MoveMulPastFork
from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d

from finn.transformation.streamline import Streamline
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

In [13]:
def custom_step_streamline(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    
    trans_list = [
        MoveMulPastFork,
        MoveLinearPastEltwiseAdd,
        LowerConvsToMatMul,
        ChangeDataLayoutQuantAvgPool2d,
    ]
    for trans in trans_list:
        model = model.transform(trans())
        model = model.transform(Streamline())
        model = model.transform(InferDataLayouts())
        model = model.transform(RemoveUnusedTensors())
    
    return model

# Custom step: Convert to HW

In [14]:
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw

import finn.transformation.streamline.absorb as absorb
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.transformation.general import GiveUniqueNodeNames
from qonnx.transformation.general import SortGraph

from qonnx.custom_op.registry import getCustomOp

In [15]:
def custom_step_convert_to_hw(model: ModelWrapper, cfg: build.DataflowBuildConfig):

    # Change BIPOLAR end node to BINARY
    Multithreshold_node = model.get_nodes_by_op_type("MultiThreshold") 
    for node in Multithreshold_node:
        node_inst = getCustomOp(node)
        if node_inst.get_nodeattr("out_dtype") == "BIPOLAR":
            node_inst.set_nodeattr("out_dtype", "BINARY")
            node_inst.set_nodeattr("out_scale", 1.0)
            node_inst.set_nodeattr("out_bias", 0.0)
            print(f'Node changed from BIPOLAR to BINARY, to fulfill standalone MultiThreshold requirement\n{node}')
            print("Set Output to BINARY Datatype")
            global_out_name = model.graph.output[0].name
            model.set_tensor_datatype(global_out_name, DataType["BINARY"])

    # Fix Float32 input to Int32
    for node in Multithreshold_node: # Línea añadida al hacer los experimentos de BED Resnet
                                     # Quizá no fuese necesario aquí, porque todos los threshold ya fuesen INT32
                                     # Añadido el indent en el if de abajo también
        if model.get_tensor_datatype(node.input[1]) == "FLOAT32":
            print(f'{node.name}: node with Float32 annotation')
            model.set_tensor_datatype(node.input[1], DataType["INT32"])
            print(f'{node.name}: changed to datatype {model.get_tensor_datatype(node.input[1])}')

    model = model.transform(to_hw.InferAddStreamsLayer())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    
    if cfg.standalone_thresholds:
        # doing this first causes all threshold layers to be standalone
        # It allows MVAU_rtl and optimization with DSP
        model = model.transform(to_hw.InferThresholdingLayer())  
    
    model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())
    model = model.transform(to_hw.InferVectorVectorActivation())
    model = model.transform(to_hw.InferThresholdingLayer()) # Convert in Multithres if Standalone is not applied
    model = model.transform(to_hw.InferPool())
    model = model.transform(to_hw.InferConvInpGen())
    
    model = model.transform(RemoveCNVtoFCFlatten())
    model = model.transform(absorb.AbsorbConsecutiveTransposes())

    # Very importante to stich Resnets. Fifo fails if not, as it cannot insert DWC.    
    model = model.transform(to_hw.InferDuplicateStreamsLayer())
    
    model = model.transform(Streamline())
    model = model.transform(InferDataLayouts())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(RemoveUnusedTensors())
    model = model.transform(SortGraph())
   
    return model

# Settings for Specialize

# Define Number of MVAU to keep in RTL. The others will be converted to HLS

https://github.com/Xilinx/finn/discussions/1021

Useful to control the number of DSP and LUT used.

RTL MVAU will consume DSP very efficiently. On the other hand, HLS variants will be forced to use LUTs

**Strategy**:
1. Check LUTs with all MVAU as HLS 
2. Check DSPs with all MVAU as RTL
3. Decide the best combination to fill the FPGA resources and fit in

<font color='red'>VVAU are still not supported for RTL DSP48, the one included in PYNQ-Z1</font>

In [16]:
ALL_HLS = False
ALL_RTL = True

if ALL_HLS:
    MVAU_list = [] # Empty, so no nodes are kept in RTL. 22 MVAU in MobilenetV2 Resnet
else:
    # MVAU_list = [i for i in range(16)] # Nodes to keep in RTL
    MVAU_list = [i for i in range(16)] + [17] # Nodes to keep in RTL

    # For MobilenetV2 Original
    # MVAU_list = [i for i in range(20)] # Nodes to keep in RTL

## VVAU to DSP or LUT

In [17]:
VVAU_DSP_list = [] # If empty, no VVAUs to DSP

## Conv Input Generator to BRAM or LutRAM

In [18]:
all_CONV_INP_GEN_bram = True
CONV_INP_GEN_list = [1, 6, 11]

## Padding RTL or HLS

In [19]:
padding_rtl = False

# Custom step: Specialize Layers -> redefine FMPadding as HLS

In [20]:
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
from qonnx.transformation.infer_datatypes import InferDataTypes

In [21]:
def custom_step_specialize_layers(model: ModelWrapper, cfg: build.DataflowBuildConfig):
    
    # Change all FMPadding to HLS, as Folding does not support this layer as RTL
    # It does not hurt if Padding is not present, as it will do nothing
    print(f'Padding to rtl: {padding_rtl}')
    if not padding_rtl:
        FMPadding_node = model.get_nodes_by_op_type("FMPadding")
        for node in FMPadding_node:
            node_inst = getCustomOp(node)
            node_inst.set_nodeattr("preferred_impl_style", "hls")
            print(f'Node {node.name} forced to HLS')

    # MVAUs in the list are kept like RTL. All other layers are converted to HLS
    if ALL_RTL == False:
        MVAU_nodes = model.get_nodes_by_op_type("MVAU")
        for idx in range(len(MVAU_nodes)):
            if idx in MVAU_list:
                print(f'MVAU {idx} left unchanged')
                continue
            else:
                MVAU_node = MVAU_nodes[idx]
                node_inst = getCustomOp(MVAU_node)
                node_inst.set_nodeattr("preferred_impl_style", "hls")
                node_inst.set_nodeattr("ram_style", "block")
                node_inst.set_nodeattr("resType", "lut")
                print(f'Node MVAU {idx} changed to hls, block, lut: \n{MVAU_node}')
    else:
        print("All MVAU configured to RTL")

    # All VVAU nodes inside VVAU_DSP_list to DSP
    VVAU_nodes = model.get_nodes_by_op_type("VVAU")
    for idx in range(len(VVAU_nodes)):
        VVAU_node = VVAU_nodes[idx]
        node_inst = getCustomOp(VVAU_node)
        if idx in VVAU_DSP_list:
            node_inst.set_nodeattr("preferred_impl_style", "hls")
            node_inst.set_nodeattr("ram_style", "block")
            node_inst.set_nodeattr("resType", "dsp")
            print(f'Node VVAU {idx} changed to hls, block, dsp: \n{VVAU_node}')  
        else:
            node_inst.set_nodeattr("preferred_impl_style", "hls")
            node_inst.set_nodeattr("ram_style", "block")
            node_inst.set_nodeattr("resType", "lut")
            print(f'Node VVAU {idx} changed to rtl, block, lut: \n{VVAU_node}')              

    # # Last ConvInputGen to BRAM
    # last_conv_inp_gen_node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[-1]
    # conv_node_inst = getCustomOp(last_conv_inp_gen_node)
    # conv_node_inst.set_nodeattr("ram_style", "block")
    # print(f'Last node ConvInGen {last_conv_inp_gen_node.name} changed to BRAM: \n{last_conv_inp_gen_node}')

    # # All ConvInputGen to BRAM to save LUTs
    ConvInGen_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    for conv_node in ConvInGen_nodes:
        node_inst = getCustomOp(conv_node)
        node_inst.set_nodeattr("ram_style", "block")
        print(f'Node ConvInGen {conv_node.name} changed to BRAM: \n{conv_node}')
    
    # # Only ConvInputGen in list to BRAM to save LUTs
    # ConvInpGen_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    # for idx in range(len(ConvInpGen_nodes)):
    #     if idx in CONV_INP_GEN_list:
    #         conv_node = ConvInpGen_nodes[idx]
    #         node_inst = getCustomOp(conv_node)
    #         node_inst.set_nodeattr("ram_style", "block")
    #         print(f'Node ConvInGen {conv_node.name} changed to BRAM: \n{conv_node}')
    #     else:
    #         print(f'ConvInpGen {idx} left unchanged')
    
    
    # Specialize
    model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())   
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())

    return model

# FPGA Part

In [22]:
from finn.util.basic import pynq_part_map
import pandas as pd

In [23]:
fpga_df = pd.DataFrame(pynq_part_map.items(), columns=['Board', 'FPGA Part'])
fpga_df

Unnamed: 0,Board,FPGA Part
0,Ultra96,xczu3eg-sbva484-1-e
1,Ultra96-V2,xczu3eg-sbva484-1-i
2,Pynq-Z1,xc7z020clg400-1
3,Pynq-Z2,xc7z020clg400-1
4,ZCU102,xczu9eg-ffvb1156-2-e
5,ZCU104,xczu7ev-ffvc1156-2-e
6,ZCU111,xczu28dr-ffvg1517-2-e
7,RFSoC2x2,xczu28dr-ffvg1517-2-e
8,RFSoC4x2,xczu48dr-ffvg1517-2-e
9,KV260_SOM,xck26-sfvc784-2LV-c


In [24]:
# change this if you have a different PYNQ board, see list above
pynq_board = "Pynq-Z1"
fpga_part = pynq_part_map[pynq_board]

In [25]:
print(fpga_part)

xc7z020clg400-1


In [26]:
model_file = qonnx_clean_filename
print(model_file)

experiments/A_2500_FPS/112/15_full_build_json_mvau_rtl_mvau_wwidth_max_24_manual_folding/clean_model.onnx


# Parameters

Perform following operations to calculate synth_clk_period, based in other experiments:

$$
clk = \frac{1}{FPS \times Max_{Cycles}}
$$

In [27]:
my_synth_clk_period_ns = 10
my_target_fps = 2500
my_mvau_wwidth_max = 24 # 
my_default_swg_exception = True
my_standalone_thresholds = True
my_auto_fifo_depths = True
my_auto_fifo_strategy = "largefifo_rtlsim" # "characterize"
my_split_large_fifos = True
my_folding_config_file =  './experiments/A_2500_FPS/112/manual_folding.json'
my_specialize_layers_config_file = None

In [28]:
assert (
    (my_auto_fifo_depths and my_folding_config_file is None)
    or
    (not my_auto_fifo_depths and my_folding_config_file is not None)
    or
    (my_auto_fifo_depths and my_folding_config_file is not None)
)       

# Build estimate reports

In [29]:
import shutil

In [30]:
estimates_output_dir = config.RUN_FOLDER + "output_estimates_only"

#Delete previous run results if exist
if os.path.exists(estimates_output_dir):
    shutil.rmtree(estimates_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

if my_folding_config_file is None:
    print("Not using folding config file -> step_target_fps_parallelization")
    my_estimate_steps = [
        custom_step_tidy_up,
        custom_step_add_pre_proc,
        custom_step_streamline,
        custom_step_convert_to_hw,
        "step_create_dataflow_partition",
        custom_step_specialize_layers,
        "step_target_fps_parallelization",
        "step_apply_folding_config",
        "step_minimize_bit_width",
        "step_generate_estimate_reports",
    ]
else:
    print("Using folding config file -> NO step_target_fps_parallelization")
    my_estimate_steps = [
        custom_step_tidy_up,
        custom_step_add_pre_proc,
        custom_step_streamline,
        custom_step_convert_to_hw,
        "step_create_dataflow_partition",
        custom_step_specialize_layers,
        "step_apply_folding_config",
        "step_minimize_bit_width",
        "step_generate_estimate_reports",
    ]

cfg_estimates = build.DataflowBuildConfig(
    output_dir                    = estimates_output_dir,
    mvau_wwidth_max               = my_mvau_wwidth_max,
    target_fps                    = my_target_fps,
    synth_clk_period_ns           = my_synth_clk_period_ns, #10.0,
    board                         = pynq_board,
    fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    standalone_thresholds         = my_standalone_thresholds,
    default_swg_exception         = my_default_swg_exception, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = my_auto_fifo_depths,
    auto_fifo_strategy            = my_auto_fifo_strategy, #"characterize", -> the other option, takes toooo long
    split_large_fifos             = my_split_large_fifos, # Change to True to save resources

    steps                         = my_estimate_steps,
    #specialize_layers_config_file = my_specialize_layers_config_file,
    folding_config_file           = my_folding_config_file,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
    ],
)

Folder does not exist and it will be created
Using folding config file -> NO step_target_fps_parallelization


# Build FULL Flow

In [31]:
full_build_output_dir = config.RUN_FOLDER + "output_full_build"

#Delete previous run results if exist
if os.path.exists(full_build_output_dir):
    shutil.rmtree(full_build_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_build_steps = [
    custom_step_tidy_up,
    custom_step_add_pre_proc,
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    "step_target_fps_parallelization",
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
    "step_hw_codegen",
    "step_hw_ipgen",
    "step_set_fifo_depths",
    "step_create_stitched_ip",
    "step_measure_rtlsim_performance",
    "step_out_of_context_synthesis",
    "step_synthesize_bitfile",
    "step_make_pynq_driver",
    "step_deployment_package",
]

cfg_full_build = build.DataflowBuildConfig(
    output_dir                    = full_build_output_dir,
    mvau_wwidth_max               = my_mvau_wwidth_max, 
    target_fps                    = my_target_fps,
    synth_clk_period_ns           = my_synth_clk_period_ns, #10.0,
    board                         = pynq_board,
    fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    standalone_thresholds         = my_standalone_thresholds,
    default_swg_exception         = my_default_swg_exception, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = my_auto_fifo_depths,
    auto_fifo_strategy            = my_auto_fifo_strategy, #"characterize", -> the other option, takes toooo long
    split_large_fifos             = my_split_large_fifos, # Change to True to save resources

    steps                         = my_build_steps,
    #specialize_layers_config_file = my_specialize_layers_config_file,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ],
)

Folder does not exist and it will be created


# Build using JSON file for Folding or FIFO sizes

In [32]:
full_build_output_dir = config.RUN_FOLDER + "output_full_build"

#Delete previous run results if exist
if os.path.exists(full_build_output_dir):
    shutil.rmtree(full_build_output_dir)
    print("Previous run results deleted!")
else:
    print("Folder does not exist and it will be created")

my_build_json_steps = [
    custom_step_tidy_up,
    custom_step_add_pre_proc,
    custom_step_streamline,
    custom_step_convert_to_hw,
    "step_create_dataflow_partition",
    custom_step_specialize_layers,
    #"step_target_fps_parallelization", # REMOVE this STEP, as folding file will be used
    "step_apply_folding_config",
    "step_minimize_bit_width",
    "step_generate_estimate_reports",
    "step_hw_codegen",
    "step_hw_ipgen",
    "step_set_fifo_depths",
    "step_create_stitched_ip",
    "step_measure_rtlsim_performance",
    "step_out_of_context_synthesis",
    "step_synthesize_bitfile",
    "step_make_pynq_driver",
    "step_deployment_package",
]

cfg_full_build_json_folding = build.DataflowBuildConfig(
    output_dir                    = full_build_output_dir,
    mvau_wwidth_max               = my_mvau_wwidth_max, 
    target_fps                    = my_target_fps,
    synth_clk_period_ns           = my_synth_clk_period_ns, #10.0,
    board                         = pynq_board,
    fpga_part                     = fpga_part,
    shell_flow_type               = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    standalone_thresholds         = my_standalone_thresholds,
    default_swg_exception         = my_default_swg_exception, # Change to True to optimize ConvGenerators, removing FIFOs
    auto_fifo_depths              = my_auto_fifo_depths,
    auto_fifo_strategy            = my_auto_fifo_strategy, #"characterize", -> the other option, takes toooo long
    split_large_fifos             = my_split_large_fifos, # Change to True to save resources

    steps                         = my_build_json_steps,
    folding_config_file           = my_folding_config_file,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ],
)

Folder does not exist and it will be created


# Choose type of build: estimate, full build or build with json

In [33]:
# flow_config = "estimates"
# flow_config = "full_build"
flow_config = "full_build_json_folding"

if flow_config == "estimates":
    current_build_config = cfg_estimates
elif flow_config == "full_build":
    current_build_config = cfg_full_build
elif flow_config == "full_build_json_folding":
    current_build_config = cfg_full_build_json_folding
else:
    raise ValueError("Wrong config")

print(f'Perform: {flow_config}')

Perform: full_build_json_folding


# Logging before build

In [34]:
logger.info(f'PYNQ board: {pynq_board}\n' +  
            f'\tModel used: {brevitas_cpu}\n' +
            f'\tTarget clock period: {my_synth_clk_period_ns} ns.\n' +
            f'\tTarget fps: my_target_fps: {my_target_fps}.\n' +
            f'\tmvau_wwidth_max: {my_mvau_wwidth_max}.\n'+
            f'\tMVAU ALL HLS: {ALL_HLS}.\n'+ 
            f'\tMVAU ALL RTL: {ALL_RTL}.\n'+ 
            f'\tMVAU list: {MVAU_list}.\n'+
            f'\tVVAU list to DSP: {VVAU_DSP_list}.\n'+
            f'\tAll ConvInputGen to BRAM: {all_CONV_INP_GEN_bram}.\n'+
            f'\tConvInputGen list: {CONV_INP_GEN_list}.\n'+
            f'\tDefault sliding window exception: {my_default_swg_exception}.\n'+ 
            f'\tAuto FIFO depth: {my_auto_fifo_depths}.\n'+ 
            f'\tAuto FIFO strategy: {my_auto_fifo_strategy}.\n'+ 
            f'\tSplit large FIFOs: {my_split_large_fifos}.\n'+ 
            f'\tFolding JSON file: {my_folding_config_file}.\n'+
            f'\tSpecialize JSON file: {my_specialize_layers_config_file}.\n'+
            f'\tFlow config: {flow_config}.\n')

# Build command

In [35]:
%%time
build.build_dataflow_cfg(model_file, current_build_config)

Building dataflow accelerator from experiments/A_2500_FPS/112/15_full_build_json_mvau_rtl_mvau_wwidth_max_24_manual_folding/clean_model.onnx
Intermediate outputs will be generated in /home/gmoreno/workspace
Final outputs will be generated in experiments/A_2500_FPS/112/15_full_build_json_mvau_rtl_mvau_wwidth_max_24_manual_folding/output_full_build
Build log is at experiments/A_2500_FPS/112/15_full_build_json_mvau_rtl_mvau_wwidth_max_24_manual_folding/output_full_build/build_dataflow.log
Running step: custom_step_tidy_up [1/18]
Running step: custom_step_add_pre_proc [2/18]
Running step: custom_step_streamline [3/18]
Running step: custom_step_convert_to_hw [4/18]
Running step: step_create_dataflow_partition [5/18]
Running step: custom_step_specialize_layers [6/18]
Running step: step_apply_folding_config [7/18]
Running step: step_minimize_bit_width [8/18]
Running step: step_generate_estimate_reports [9/18]
Running step: step_hw_codegen [10/18]
Running step: step_hw_ipgen [11/18]
Running st

0