In [1]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil
import json

In [2]:
model_dir  = os.environ['FINN_ROOT'] + '/notebooks/projects/'
model_file = model_dir + '/model.onnx'
estimates_output_dir = model_dir + '/output_estimates_only'
rtlsim_output_dir = model_dir + '/output_ipstitch_ooc_rtlsim'

if os.path.exists(estimates_output_dir):
    shutil.rmtree(estimates_output_dir)
    print('estimates: Previous run results deleted')

if os.path.exists(rtlsim_output_dir):
    shutil.rmtree(rtlsim_output_dir)
    print('rtlsim: Previous run results deleted')


estimates: Previous run results deleted
rtlsim: Previous run results deleted


In [8]:
cfg_estimates = build.DataflowBuildConfig(
    output_dir          = estimates_output_dir,
    mvau_wwidth_max     = 4,
    target_fps          = 20,
    synth_clk_period_ns = 10.0,
    auto_fifo_depths    = True,
    split_large_fifos   = True,
    fpga_part           = 'xc7s25csga225-1',
    #fpga_part           = 'xczu5ev-sfvc784-1-e',
    #board               = 'kv260',
    steps               = build_cfg.estimate_only_dataflow_steps,
    generate_outputs    = [
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
    ]
)

In [9]:
%%time
build.build_dataflow_cfg(model_file, cfg_estimates)

Building dataflow accelerator from /home/petertso/finn/notebooks/projects//model.onnx
Intermediate outputs will be generated in /tmp/finn_dev_petertso
Final outputs will be generated in /home/petertso/finn/notebooks/projects//output_estimates_only
Build log is at /home/petertso/finn/notebooks/projects//output_estimates_only/build_dataflow.log
Running step: step_qonnx_to_finn [1/10]
Running step: step_tidy_up [2/10]
Running step: step_streamline [3/10]
Running step: step_convert_to_hw [4/10]
Running step: step_create_dataflow_partition [5/10]
Running step: step_specialize_layers [6/10]
Running step: step_target_fps_parallelization [7/10]
Running step: step_apply_folding_config [8/10]
Running step: step_minimize_bit_width [9/10]
Running step: step_generate_estimate_reports [10/10]
Completed successfully
CPU times: user 2.43 s, sys: 455 μs, total: 2.43 s
Wall time: 2.43 s


0

In [10]:
assert os.path.exists(estimates_output_dir + '/report/estimate_network_performance.json')

In [11]:
def read_json_dict(filename):
    with open(filename, 'r') as f:
        ret = json.load(f)
    return ret

read_json_dict(estimates_output_dir + '/report/estimate_layer_cycles.json')

{'ConvolutionInputGenerator_rtl_0': 22761,
 'MVAU_hls_0': 540000,
 'ConvolutionInputGenerator_rtl_1': 104400,
 'MVAU_hls_1': 3650400,
 'ConvolutionInputGenerator_rtl_2': 24552,
 'MVAU_hls_2': 1080000,
 'ConvolutionInputGenerator_rtl_3': 4512,
 'MVAU_hls_3': 248832,
 'ConvolutionInputGenerator_rtl_4': 1152,
 'MVAU_hls_4': 36864,
 'MVAU_hls_5': 6400,
 'MVAU_hls_6': 5000,
 'MVAU_hls_7': 500,
 'MVAU_rtl_0': 10}

In [12]:
read_json_dict(estimates_output_dir + '/report/estimate_layer_resources.json')

{'ConvolutionInputGenerator_rtl_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 364,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_0': {'BRAM_18K': 1,
  'BRAM_efficiency': 0.13020833333333334,
  'LUT': 370,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_1': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 632,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_1': {'BRAM_18K': 6,
  'BRAM_efficiency': 0.78125,
  'LUT': 352,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_2': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 516,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_2': {'BRAM_18K': 11,
  'BRAM_efficiency': 0.8522727272727273,
  'LUT': 352,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_3': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 368,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_3': {'BRAM_18K': 7,
  'BRAM_ef

In [54]:
cfg_stitched_ip = build.DataflowBuildConfig(
    output_dir          = rtlsim_output_dir,
    mvau_wwidth_max     = 8,
    target_fps          = 20,
    synth_clk_period_ns = 12.0,
    fpga_part           = 'xc7s25csga225-1',
    #fpga_part           = 'xczu5ev-sfvc784-1-e',
    auto_fifo_depths    = True,
    split_large_fifos   = True,
    generate_outputs    = [
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
    ]
)

In [None]:
%%time
build.build_dataflow_cfg(model_file, cfg_stitched_ip)

Building dataflow accelerator from /home/petertso/finn/notebooks/projects//model.onnx
Intermediate outputs will be generated in /tmp/finn_dev_petertso
Final outputs will be generated in /home/petertso/finn/notebooks/projects//output_ipstitch_ooc_rtlsim
Build log is at /home/petertso/finn/notebooks/projects//output_ipstitch_ooc_rtlsim/build_dataflow.log
Running step: step_qonnx_to_finn [1/19]
Running step: step_tidy_up [2/19]
Running step: step_streamline [3/19]
Running step: step_convert_to_hw [4/19]
Running step: step_create_dataflow_partition [5/19]
Running step: step_specialize_layers [6/19]
Running step: step_target_fps_parallelization [7/19]
Running step: step_apply_folding_config [8/19]
Running step: step_minimize_bit_width [9/19]
Running step: step_generate_estimate_reports [10/19]
Running step: step_hw_codegen [11/19]


Traceback (most recent call last):
  File "/home/petertso/finn/src/finn/builder/build_dataflow.py", line 158, in build_dataflow_cfg
    model = transform_step(model, cfg)
  File "/home/petertso/finn/src/finn/builder/build_dataflow_steps.py", line 515, in step_hw_codegen
    model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
  File "/home/petertso/finn/deps/qonnx/src/qonnx/core/modelwrapper.py", line 140, in transform
    (transformed_model, model_was_changed) = transformation.apply(transformed_model)
  File "/home/petertso/finn/src/finn/transformation/fpgadataflow/prepare_ip.py", line 94, in apply
    _codegen_single_node(node, model, self.fpgapart, self.clk)
  File "/home/petertso/finn/src/finn/transformation/fpgadataflow/prepare_ip.py", line 54, in _codegen_single_node
    inst.code_generation_ipgen(model, fpgapart, clk)
  File "/home/petertso/finn/src/finn/custom_op/fpgadataflow/hlsbackend.py", line 118, in code_generation_ipgen
    self.gene

> [0;32m/usr/local/lib/python3.10/dist-packages/bitstring.py[0m(822)[0;36m__new__[0;34m()[0m
[0;32m    820 [0;31m        [0;31m# instance for re-use.[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    821 [0;31m        [0;32mtry[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 822 [0;31m            [0;32mif[0m [0misinstance[0m[0;34m([0m[0mauto[0m[0;34m,[0m [0mbasestring[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    823 [0;31m                [0;32mtry[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    824 [0;31m                    [0;32mreturn[0m [0m_cache[0m[0;34m[[0m[0mauto[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m


In [15]:
assert os.path.exists(rtlsim_output_dir + "/report/ooc_synth_and_timing.json")
assert os.path.exists(rtlsim_output_dir + "/report/rtlsim_performance.json")
assert os.path.exists(rtlsim_output_dir + "/final_hw_config.json")

In [16]:
! ls {rtlsim_output_dir}/stitched_ip

all_verilog_srcs.txt		       finn_vivado_stitch_proj.xpr
data				       ip
finn_vivado_stitch_proj.cache	       make_project.sh
finn_vivado_stitch_proj.gen	       make_project.tcl
finn_vivado_stitch_proj.hw	       vivado.jou
finn_vivado_stitch_proj.ip_user_files  vivado.log
finn_vivado_stitch_proj.srcs


In [17]:
! ls {rtlsim_output_dir}/report

estimate_layer_resources_hls.json  rtlsim_performance.json
ooc_synth_and_timing.json


In [18]:
! cat {rtlsim_output_dir}/report/ooc_synth_and_timing.json

{
  "vivado_proj_folder": "/tmp/finn_dev_petertso/synth_out_of_context_bxeie46w/results_finn_design_wrapper",
  "LUT": 8936.0,
  "LUTRAM": 702.0,
  "FF": 10927.0,
  "DSP": 0.0,
  "BRAM": 32.0,
  "BRAM_18K": 10.0,
  "BRAM_36K": 27.0,
  "URAM": 0.0,
  "Carry": 501.0,
  "WNS": 0.393,
  "Delay": 0.393,
  "vivado_version": 2022.2,
  "vivado_build_no": 3671981.0,
  "": 0,
  "fmax_mhz": 104.0907671489539,
  "estimated_throughput_fps": 59.494037007861166
}

In [19]:
! cat {rtlsim_output_dir}/report/rtlsim_performance.json

{
  "N_IN_TXNS": 2304,
  "N_OUT_TXNS": 1,
  "cycles": 1851262,
  "N": 1,
  "latency_cycles": 1851262,
  "runtime[ms]": 18.512620000000002,
  "throughput[images/s]": 54.01720556031507,
  "fclk[mhz]": 100.0,
  "stable_throughput[images/s]": 54.01720556031507
}

In [18]:
! cat {rtlsim_output_dir}/final_hw_config.json

{
  "Defaults": {},
  "StreamingFIFO_rtl_0": {
    "ram_style": "auto",
    "depth": 2304,
    "impl_style": "rtl",
    "inFIFODepths": [
      0
    ],
    "outFIFODepths": [
      0
    ]
  },
  "ConvolutionInputGenerator_rtl_0": {
    "SIMD": 1,
    "parallel_window": 0,
    "ram_style": "distributed",
    "inFIFODepths": [
      2304
    ],
    "outFIFODepths": [
      12100
    ]
  },
  "StreamingFIFO_rtl_1": {
    "ram_style": "auto",
    "depth": 12100,
    "impl_style": "vivado",
    "inFIFODepths": [
      0
    ],
    "outFIFODepths": [
      0
    ]
  },
  "MVAU_hls_0": {
    "PE": 1,
    "SIMD": 1,
    "ram_style": "auto",
    "resType": "auto",
    "mem_mode": "internal_decoupled",
    "runtime_writeable_weights": 0,
    "inFIFODepths": [
      12100
    ],
    "outFIFODepths": [
      6933
    ]
  },
  "StreamingFIFO_rtl_2": {
    "ram_style": "auto",
    "depth": 6933,
    "impl_style": "vivado",
    "inFIFODepths": [
      0
    ],
    "outFIFODepths": [
      0
    ]
 