In [None]:
from finn.util.basic import make_build_dir
from finn.util.visualization import showInNetron


build_dir = "/workspace/finn"
base_file_name = "rpn"
config_path = "/workspace/finn/pointpillars/second/configs/pointpillars/car/xyres_16.proto"
in_shape = (1,64,320,320)

import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.double_to_single_float import DoubleToSingleFloat
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames

import torch
import torch.nn.functional as F
import torch.nn as nn
from brevitas.quant_tensor import pack_quant_tensor
import brevitas.nn as qnn
from brevitas.core.quant import QuantType
from brevitas.core.restrict_val import RestrictValueType
from brevitas.core.scaling import ScalingImplType
from brevitas.core.stats import StatsOp


from second.pytorch.builder import second_builder
from second.pytorch.models.quantization import QuantConfig
from torchplus.tools import change_default_args
from second.pytorch.models.quantization import MyQuantReLU
import torchplus
from second.protos import pipeline_pb2

QuantConfig.BACKBONE_CONV_QUANT_TYPE = QuantType.BINARY
QuantConfig.BACKBONE_CONV_BIT_WIDTH  = 1

QuantConfig.LAST_LAYER_QUANT_TYPE = QuantType.INT
QuantConfig.LAST_LAYER_BIT_WIDTH  = 8

QuantConfig.ACTIVATION_QUANT_TYPE = QuantType.INT
QuantConfig.ACTIVATION_BIT_WIDTH  = 2
QuantConfig.ACTIVATION_FUNCTION   = change_default_args(
    max_val           = 6,
    quant_type        = QuantConfig.ACTIVATION_QUANT_TYPE, 
    bit_width         = QuantConfig.ACTIVATION_BIT_WIDTH, 
    scaling_impl_type = ScalingImplType.CONST)(MyQuantReLU)

    
import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from google.protobuf import text_format

config = pipeline_pb2.TrainEvalPipelineConfig()
with open(config_path, "r") as f:
    proto_str = f.read()
    text_format.Merge(proto_str, config)
input_cfg = config.train_input_reader
eval_input_cfg = config.eval_input_reader
model_cfg = config.model.second
train_cfg = config.train_config

from second.pytorch.models.voxelnet import RPN

rpn = RPN(
     use_norm                   = True,
     num_class                  = 1,
     layer_nums                 = [3, 3, 3],
     layer_strides              = [2, 1, 1],
     num_filters                = [64, 128, 256],
     upsample_strides           = [1, 1, 1],
     num_upsample_filters       = [128, 128, 128],
     num_input_filters          = 64,
     num_anchor_per_loc         = 2,
     encode_background_as_zeros = True,
     use_direction_classifier   = True,
     use_groupnorm              = False,
     num_groups                 = 32,
     use_bev                    = False,
     box_code_size              = 7,
)
checkpoint_loc = "/workspace/finn/pp_net_params/rpn_weights"
checkpoint = torch.load(checkpoint_loc, map_location="cpu")
rpn.load_state_dict(checkpoint)
rpn = rpn.eval()
bo.export_finn_onnx(rpn, in_shape, build_dir + "/{}.onnx".format(base_file_name))

model = ModelWrapper(build_dir + "/{}.onnx".format(base_file_name))
model = model.transform(DoubleToSingleFloat())
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model.save(build_dir + "/{}_tidy.onnx".format(base_file_name))

In [None]:
showInNetron(build_dir + "/{}_tidy.onnx".format(base_file_name))

In [None]:
from finn.transformation.streamline import Streamline
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveAddMulPastIm2Col

model = ModelWrapper(build_dir + "/{}_tidy.onnx".format(base_file_name))
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(MoveAddMulPastIm2Col())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(ConvertBipolarMatMulToXnorPopcount())
model = model.transform(Streamline())
model.save(build_dir + "/{}_streamlined.onnx".format(base_file_name))


In [None]:
showInNetron(build_dir + "/{}_streamlined.onnx".format(base_file_name))

In [None]:
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from finn.custom_op.registry import getCustomOp

# choose the memory mode for the MVTU units, decoupled or const
mem_mode = "decoupled"

model = ModelWrapper(build_dir + "/{}_streamlined.onnx".format(base_file_name))
model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferStreamingMaxPool())
# get rid of Reshape(-1, 1) operation between hlslib nodes
model = model.transform(RemoveCNVtoFCFlatten())
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(build_dir + "/{}_dataflow_parent.onnx".format(base_file_name))
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save(build_dir + "/{}_dataflow_model.onnx".format(base_file_name))

In [None]:
showInNetron(build_dir + "/{}_dataflow_parent.onnx".format(base_file_name))

In [None]:
dbg = True
if dbg:
    import finn.custom_op.registry as registry
    import numpy as np
    for op_type in ["Mul", "Add", "MultiThreshold"]:
    
        model = ModelWrapper(build_dir + "/{}_dataflow_parent.onnx".format(base_file_name))
        nodes = model.get_nodes_by_op_type(op_type)
        node = nodes[0]
        
        if op_type == "MultiThreshold":
            inst = registry.custom_op[op_type](node)
            thresholds = model.get_initializer(node.input[1])
            out_scale  = inst.get_nodeattr("out_scale")
            out_bias   = inst.get_nodeattr("out_bias")
            data_layout = inst.get_nodeattr("data_layout")
            print("data layout (if other than NCHW, then check MultThreshold class code): {}".format(data_layout))
            print("out_scale: {}".format(type(out_scale), out_scale))
            print("out_bias: {}".format(type(out_bias), out_bias))
            np.save("{}/pp_net_params/thresholds.npy".format(build_dir), thresholds)
        elif op_type == "Add":
            tensor = model.get_initializer(node.input[1])
            print(tensor.shape)
            np.save("{}/pp_net_params/add_params.npy".format(build_dir), tensor)
        elif op_type == "Mul":
            tensor = model.get_initializer(node.input[1])
            print(tensor.shape)
            np.save("{}/pp_net_params/mul_params.npy".format(build_dir), tensor)


In [None]:
showInNetron(build_dir + "/{}_dataflow_model.onnx".format(base_file_name))

In [None]:
from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources

model = ModelWrapper(build_dir + "/{}_dataflow_model.onnx".format(base_file_name))
fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")

# each tuple is (PE, SIMD, in_fifo_depth, ram_style) for a layer
# there are 13 StreamingFCLayer_Batch
PEs   = 16
SIMDs = 16
FIFOs = 256
folding = [
    (PEs, SIMDs, FIFOs, "block"), #0
    (PEs, SIMDs, FIFOs, "block"),
    (PEs, SIMDs, FIFOs, "block"),
    (PEs, SIMDs, FIFOs, "block"),
    (PEs, SIMDs, FIFOs, "block"), #4
    
    (PEs, SIMDs, FIFOs, "block"), #5
    (PEs, SIMDs, FIFOs, "block"),
    (PEs, SIMDs, FIFOs, "block"),
    (PEs, SIMDs, FIFOs, "block"),
    (PEs, SIMDs, FIFOs, "block"), #9
    
    (PEs, SIMDs, FIFOs, "block"), #10
    (PEs, SIMDs, FIFOs, "block"),
    (1,   SIMDs, FIFOs, "block"),
]
for fcl, (pe, simd, ififodepth, ram_style) in zip(fc_layers, folding):
    fcl_inst = getCustomOp(fcl)
    fcl_inst.set_nodeattr("PE", pe)
    fcl_inst.set_nodeattr("SIMD", simd)
    fcl_inst.set_nodeattr("inFIFODepth", ififodepth)
    fcl_inst.set_nodeattr("ram_style", ram_style)
    

# use same SIMD values for the sliding window operators
swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
for i in range(len(swg_layers)):
    swg_inst = getCustomOp(swg_layers[i])
    simd = folding[i][1]
    swg_inst.set_nodeattr("SIMD", simd)

model = model.transform(InsertDWC())
model = model.transform(InsertFIFO())
model = model.transform(InsertTLastMarker())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(AnnotateResources("estimate"))
model.save(build_dir + "/{}_folded.onnx".format(base_file_name))
print("Estimation of used resources: {}".format(model.get_metadata_prop("res_total_estimate")))

In [None]:
showInNetron(build_dir + "/{}_folded.onnx".format(base_file_name))

In [None]:
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.util.basic import pynq_part_map
import time
import traceback

test_pynq_board = "ZCU104"
test_fpga_part = pynq_part_map[test_pynq_board]
target_clk_ns = 5

time_start = time.time()
try:
    model = ModelWrapper(build_dir + "/{}_folded.onnx".format(base_file_name))
    print("PrepareIP started!")
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    print("Preparing IP took {:.2f}".format(time.time() - time_start))
    time_start = time.time()
    print("HLSSynth started!")
    model = model.transform(HLSSynthIP(0))
    model.save(build_dir + "/{}_ipgen.onnx".format(base_file_name))
except Exception as e:
    print("Exception: {}\n {}".format(e, traceback.format_exc()))
print("HLS Synthesis took {:.2f} seconds".format(time.time() - time_start))

In [None]:
model = ModelWrapper(build_dir + "/{}_ipgen.onnx".format(base_file_name))
model = model.transform(AnnotateResources("hls"))
print("Estimation of used resources (HLS): {}".format(model.get_metadata_prop("res_total_hls")))

Once the HLS synthesis is complete, we can stitch together the generated IP blocks into a larger IP that is the implementation of our network:

In [None]:
from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
    ReplaceVerilogRelPaths,
)
from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP

model = ModelWrapper(build_dir + "/{}_ipgen.onnx".format(base_file_name))
model = model.transform(ReplaceVerilogRelPaths())
model = model.transform(CreateStitchedIP(test_fpga_part))
model.save(build_dir + "/{}_ipstitch.onnx".format(base_file_name))

In [None]:
from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject
from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject

model = ModelWrapper(build_dir + "/{}_ipstitch.onnx".format(base_file_name))
model = model.transform(MakePYNQProject(test_pynq_board))
vivado_proj = model.get_metadata_prop("vivado_pynq_proj")
print("Vivado synthesis project is at %s/resizer.xpr" % vivado_proj)
model.save(build_dir + "/{}_pynqproj.onnx".format(base_file_name))

In [None]:
model = ModelWrapper(build_dir + "/{}_pynqproj.onnx".format(base_file_name))
time_start = time.time()
try:
    model = model.transform(SynthPYNQProject())
    model.save(build_dir + "/{}_synth.onnx".format(base_file_name))
except Exception as e:
    print("Exception: {}\n {}".format(e, traceback.format_exc()))
print("Vivado project Synthesis took {:.2f} seconds".format(time.time() - time_start))

In [None]:
import os
from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
from finn.util.basic import make_build_dir
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper
from finn.custom_op.registry import getCustomOp
build_dir = "/workspace/finn"
base_file_name = "rpn"

# set up the following values according to your own environment
# FINN will use ssh to deploy and run the generated accelerator
# ip = os.getenv("PYNQ_IP", "192.168.1.99")
ip = "192.168.2.99"
username = os.getenv("PYNQ_USERNAME", "xilinx")
password = os.getenv("PYNQ_PASSWORD", "xilinx")
port = os.getenv("PYNQ_PORT", 22)
target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")

model = ModelWrapper(build_dir + "/{}_synth.onnx".format(base_file_name))
model = model.transform(MakePYNQDriver())
model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
deploy_dir = model.get_metadata_prop("pynq_deploy_dir")
model.save(build_dir + "/{}_pynq_deploy.onnx".format(base_file_name))

In [None]:
! sshpass -p {password} ssh {username}@{ip} -p {port} 'ls -l {target_dir}/*'
print(deploy_dir)

In [None]:
import pkg_resources as pk
import matplotlib.pyplot as plt
import numpy as np

x = np.ones((1,64,320,320)).astype(np.float32)

In [None]:
# point to the PYNQ-deployed model as the StreamingDataflowPartition in the parent
parent_model = ModelWrapper(build_dir+"/{}_dataflow_parent.onnx".format(base_file_name))
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
sdp_node.set_nodeattr("model", build_dir + "/rpn_pynq_deploy.onnx")
parent_model.save(build_dir+"/rpn_dataflow_parent_with_remote_bitfile_exec.onnx")

In [None]:
import numpy as np
from finn.core.onnx_exec import execute_onnx
iname = parent_model.graph.input[0].name
oname = parent_model.graph.output[0].name
ishape = parent_model.get_tensor_shape(iname)
input_dict = {iname: x.reshape(ishape)}
parent_model.set_metadata_prop("pynq_ip", ip)
parent_model.set_metadata_prop("pynq_port", str(port))
parent_model.set_metadata_prop("pynq_username", username)
parent_model.set_metadata_prop("pynq_password", password)
parent_model.set_metadata_prop("pynq_target_dir", target_dir)
parent_model.set_metadata_prop("pynq_deploy_dir", deploy_dir)
print(parent_model.get_metadata_prop("pynq_ip"))
print(parent_model.get_metadata_prop("pynq_port"))
print(parent_model.get_metadata_prop("pynq_username"))
print(parent_model.get_metadata_prop("pynq_password"))
print(parent_model.get_metadata_prop("pynq_target_dir"))
print(parent_model.get_metadata_prop("pynq_deploy_dir"))
ret = execute_onnx(parent_model, input_dict, True)