In [1]:
import resnet
from brevitas.export import export_qonnx
import torch

# Model and Inference Example

In [2]:
resnet18 = resnet.quant_resnet18().to('cpu')

In [19]:
input_shape = (1, 3, 32, 32)
dummy_in = torch.randn(input_shape)

In [20]:
out = resnet18(dummy_in)

In [21]:
out

tensor([[-4.0170, -1.6568,  2.6768,  3.4622,  6.6536,  1.1862,  2.1534, -6.0321,
          6.0447, -3.0883]], grad_fn=<AddmmBackward0>)

# Export to ONNX

In [22]:
resnet18_filename = './resnet18/resnet18_model.onnx'
resnet18.eval();
export_qonnx(resnet18, torch.randn(input_shape), resnet18_filename);

# Netron visualization

In [23]:
from finn.util.visualization import showSrc, showInNetron
from qonnx.util.cleanup import cleanup as qonnx_cleanup

In [24]:
showInNetron(resnet18_filename)

Stopping http://0.0.0.0:8083
Serving './resnet18/resnet18_model.onnx' at http://0.0.0.0:8083


# Tidy

In [25]:
from qonnx.core.modelwrapper import ModelWrapper

from qonnx.transformation.fold_constants import FoldConstants

from qonnx.transformation.general import (
    ConvertSubToAdd,
    ConvertDivToMul,
    GiveReadableTensorNames,
    GiveUniqueNodeNames,
    SortGraph,
    RemoveUnusedTensors,
    GiveUniqueParameterTensors,
    RemoveStaticGraphInputs,
    ApplyConfig,
)

from finn.transformation.streamline.absorb import (
    AbsorbScalarMulAddIntoTopK,
    AbsorbAddIntoMultiThreshold,
    AbsorbMulIntoMultiThreshold,
    FactorOutMulSignMagnitude,
    Absorb1BitMulIntoMatMul,
    Absorb1BitMulIntoConv,
    AbsorbConsecutiveTransposes,
    AbsorbTransposeIntoMultiThreshold,
)

from finn.transformation.streamline.collapse_repeated import (
    CollapseRepeatedAdd,
    CollapseRepeatedMul,
)

from finn.transformation.streamline.reorder import (
    MoveAddPastMul,
    MoveScalarMulPastMatMul,
    MoveScalarAddPastMatMul,
    MoveAddPastConv,
    MoveScalarMulPastConv,
    MoveScalarLinearPastInvariants,
    MoveMaxPoolPastMultiThreshold,
)

from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from finn.transformation.streamline.sign_to_thres import ConvertSignToThres
from qonnx.transformation.batchnorm_to_affine import BatchNormToAffine

# just for not linear
from finn.transformation.streamline.reorder import (
    MoveLinearPastEltwiseAdd,
    MoveLinearPastFork,
)

from qonnx.transformation.double_to_single_float import DoubleToSingleFloat
from qonnx.transformation.remove import RemoveIdentityOps
from qonnx.core.datatype import DataType

from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.insert_topk import InsertTopK
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul

from finn.builder.build_dataflow_config import (
    DataflowBuildConfig,
    ShellFlowType,
)

from finn.transformation.move_reshape import RemoveCNVtoFCFlatten

In [26]:
model = ModelWrapper(resnet18_filename)

In [27]:
model = model.transform(GiveUniqueParameterTensors())
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(RemoveStaticGraphInputs())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(InsertTopK())
model = model.transform(InferShapes())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())

In [28]:
finn_tidy = './resnet18/02_finn_tidy.onnx'
model.save(finn_tidy)

In [29]:
showInNetron(finn_tidy)

Stopping http://0.0.0.0:8083
Serving './resnet18/02_finn_tidy.onnx' at http://0.0.0.0:8083


# Streamline Linear 

In [40]:
def step_resnet50_streamline_linear(model: ModelWrapper):
    streamline_transformations = [
        AbsorbScalarMulAddIntoTopK(),  # before MoveAddPastMul to avoid int->float
        ConvertSubToAdd(),
        ConvertDivToMul(),
        RemoveIdentityOps(),
        CollapseRepeatedMul(),
        BatchNormToAffine(),
        ConvertSignToThres(),
        MoveAddPastMul(),
        MoveScalarAddPastMatMul(),
        MoveAddPastConv(),
        MoveScalarMulPastMatMul(),
        MoveScalarMulPastConv(),
        MoveScalarLinearPastInvariants(),
        MoveAddPastMul(),
        CollapseRepeatedAdd(),
        CollapseRepeatedMul(),
        AbsorbAddIntoMultiThreshold(),
        FactorOutMulSignMagnitude(),
        MoveMaxPoolPastMultiThreshold(),
        AbsorbMulIntoMultiThreshold(),
        # Absorb1BitMulIntoMatMul(),
        # Absorb1BitMulIntoConv(),
        RoundAndClipThresholds(),
    ]
    for trn in streamline_transformations:
        model = model.transform(trn)
        model = model.transform(GiveUniqueNodeNames())
    return model

# Streamline Non Linear

In [41]:
def step_resnet50_streamline_nonlinear(model: ModelWrapper):
    streamline_transformations = [
        MoveLinearPastEltwiseAdd(),
        MoveLinearPastFork(),
    ]
    for trn in streamline_transformations:
        model = model.transform(trn)
        model = model.transform(GiveUniqueNodeNames())
    return model

# Streamline

In [42]:
def step_resnet50_streamline(model: ModelWrapper):
    for iter_id in range(4):
        model = step_resnet50_streamline_linear(model)
        model = step_resnet50_streamline_nonlinear(model)

        # big loop tidy up
        model = model.transform(RemoveUnusedTensors())
        model = model.transform(GiveReadableTensorNames())
        model = model.transform(InferDataTypes())
        model = model.transform(SortGraph())

    model = model.transform(DoubleToSingleFloat())

    return model

In [43]:
model = ModelWrapper(finn_tidy)

In [44]:
model = step_resnet50_streamline(model)

In [45]:
finn_streamline = './resnet18/03_finn_streamline.onnx'
model.save(finn_streamline)

In [46]:
showInNetron(finn_streamline)

Stopping http://0.0.0.0:8083
Serving './resnet18/03_finn_streamline.onnx' at http://0.0.0.0:8083


# To HW Layers

In [47]:
def step_resnet50_convert_to_hw(model: ModelWrapper):
    model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"])
    model = model.transform(InferDataLayouts())
    model = model.transform(DoubleToSingleFloat())
    model = model.transform(InferDataTypes())
    model = model.transform(SortGraph())

    to_hw_transformations = [
        to_hw.InferAddStreamsLayer,
        LowerConvsToMatMul,
        to_hw.InferChannelwiseLinearLayer,
        to_hw.InferPool,
        AbsorbTransposeIntoMultiThreshold,
        RoundAndClipThresholds,
        to_hw.InferQuantizedMatrixVectorActivation,
        to_hw.InferThresholdingLayer,
        AbsorbConsecutiveTransposes,
        to_hw.InferConvInpGen,
        to_hw.InferDuplicateStreamsLayer,
        to_hw.InferLabelSelectLayer,
    ]
    for trn in to_hw_transformations:
        model = model.transform(trn())
        model = model.transform(InferDataLayouts())
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(InferDataTypes())

    model = model.transform(RemoveCNVtoFCFlatten())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(RemoveUnusedTensors())
    model = model.transform(SortGraph())

    return model

# Modify Add Nodes for Int Inputs

# Custom Add to HW with prints

In [54]:
import numpy as np
import qonnx.core.data_layout as DataLayout
import warnings
from onnx import TensorProto, helper
from qonnx.core.datatype import DataType
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.base import Transformation
from qonnx.transformation.general import SortGraph
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.util.basic import get_by_name
from qonnx.util.onnx import nchw_to_nhwc

In [58]:
class Custom_InferAddStreamsLayer(Transformation):
    """Convert any Add into a AddStreams HW layer."""

    def apply(self, model):
        graph = model.graph
        node_ind = 0
        graph_modified = False
        for node in graph.node:
            node_ind += 1
            if node.op_type == "Add":
                in0 = node.input[0]
                in1 = node.input[1]
                result = node.output[0]
                in0_shape = model.get_tensor_shape(in0)
                in1_shape = model.get_tensor_shape(in1)
                in0_static = not (model.get_initializer(in0) is None)
                in1_static = not (model.get_initializer(in1) is None)

                # skip if different shapes on inputs
                if in0_shape != in1_shape:
                    print(f'Skipping node {node.name} due to different shapes')
                    continue
                # skip if any of inputs have initializers
                # (this node is meant for adding two dynamic streams)
                if in0_static or in1_static:
                    print(f'Skipping node {node.name} due to static input')
                    continue

                idt0 = model.get_tensor_datatype(in0)
                idt1 = model.get_tensor_datatype(in1)

                # skip if different data types on inputs
                if idt0 != idt1:
                    print(f'Skipping node {node.name} due to different input datatypes')
                    continue

                idt = idt0

                skip conversion for layers with float input
                if not idt.is_integer():
                    print(f'Skipping node {node.name} due to float input datatype')
                    continue

                # check layout and convert if necessary
                in0_layout = model.get_tensor_layout(in0)
                in1_layout = model.get_tensor_layout(in1)
                result_layout = model.get_tensor_layout(result)

                if in0_layout == DataLayout.NCHW:
                    in0 = nchw_to_nhwc(in0, model, node_ind)
                    node_ind += 1
                    in0_shape = model.get_tensor_shape(in0)

                if in1_layout == DataLayout.NCHW:
                    in1 = nchw_to_nhwc(in1, model, node_ind)
                    node_ind += 1
                    in1_shape = model.get_tensor_shape(in1)

                # keep track of where we need to insert the HW Op
                # it has to be ahead of the output transform
                insert_point = node_ind

                if result_layout == DataLayout.NCHW:
                    result = nchw_to_nhwc(result, model, node_ind, reverse=True)
                    node_ind += 1

                # now safe to assume num_channels is size of last dimension
                num_channels = int(in0_shape[-1])
                # create node with no parallelization first
                pe = 1

                # create and insert new AddStreams node
                new_node = helper.make_node(
                    "AddStreams",
                    [in0, in1],
                    [result],
                    domain="finn.custom_op.fpgadataflow",
                    backend="fpgadataflow",
                    NumChannels=num_channels,
                    PE=pe,
                    inputDataType=idt.name,
                    numInputVectors=in0_shape[:-1],
                    name="AddStreams_" + node.name,
                )
                graph.node.insert(insert_point, new_node)
                # remove old node
                graph.node.remove(node)
                graph_modified = True

        if graph_modified:
            model = model.transform(InferShapes())
            model = model.transform(InferDataTypes())
        return (model, graph_modified)

In [59]:
model = ModelWrapper(finn_streamline)

In [60]:
model = model.transform(Custom_InferAddStreamsLayer())

Exception: Undefined for ScaledIntType

In [52]:
finn_infer_adds = './resnet18/04_finn_infer_adds.onnx'
model.save(finn_infer_adds)

In [53]:
showInNetron(finn_infer_adds)

Stopping http://0.0.0.0:8083
Serving './resnet18/04_finn_infer_adds.onnx' at http://0.0.0.0:8083
