# I- FINN PREPROCESSING 

### install required librabries

In [1]:
import torch
import torchvision
import brevitas
import onnx
import sklearn
print("All required libraries are installed.")

All required libraries are installed.


In [2]:
import os
print(os.getcwd())

/home/abc/Desktop/finn/notebooks


### Load and Preprocess GTSRB Dataset

In [17]:
df = pd.read_csv('./gtsrb/Test/GT-final_test.csv', sep=';')
print(df.columns.tolist())

['Width,Height,Roi.X1,Roi.Y1,Roi.X2,Roi.Y2,ClassId,Path']


In [2]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms

# -------------------------------
# Custom Dataset for Test Set
# -------------------------------
class GTSRBTestDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        """
        Args:
            csv_file (str): Path to CSV file with annotations.
            img_dir (str): Directory with all the test images.
            transform (callable, optional): Optional transform to be applied.
        """
        self.labels_df = pd.read_csv(csv_file, sep=';')
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        row = self.labels_df.iloc[idx]

        # Get image path from 'Path' column
        img_path = os.path.join(self.img_dir, row['Path'])
        image = Image.open(img_path).convert('RGB')  # Load image as RGB

        if self.transform:
            image = self.transform(image)

        label = torch.tensor(row['ClassId'], dtype=torch.long)
        return image, label


# -------------------------------
# Image Transformations
# -------------------------------
transform = transforms.Compose([
    transforms.Resize((48, 48)),                   # Resize all images
    transforms.Grayscale(num_output_channels=1),   # Convert to 1-channel grayscale
    transforms.ToTensor(),                         # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))           # Normalize to [-1, 1]
])


# -------------------------------
# Load Training Dataset
# -------------------------------
train_dir = './gtsrb/Train'
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)


# -------------------------------
# Load Test Dataset (Custom)
# -------------------------------
test_dir = './gtsrb/Test'
test_csv = os.path.join(test_dir, 'GT-final_test.csv')

try:
    test_dataset = GTSRBTestDataset(
        csv_file=test_csv,
        img_dir=test_dir,
        transform=transform
    )
    print("✅ Test dataset loaded successfully.")
except Exception as e:
    test_dataset = None
    print(f"❌ Failed to load test dataset: {e}")


# -------------------------------
# Create DataLoaders
# -------------------------------
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) if test_dataset else None


# -------------------------------
# Debug Info
# -------------------------------
print(f"✅ Number of training classes: {len(train_dataset.classes)}")
print(f"✅ Number of training samples: {len(train_dataset)}")
if test_dataset:
    print(f"✅ Number of test samples: {len(test_dataset)}")

# Optional: Check shape of one batch
sample_batch = next(iter(train_loader))
print(f"✅ First batch shape: {sample_batch[0].shape}")  # [B, 1, 48, 48]


✅ Test dataset loaded successfully.
✅ Number of training classes: 43
✅ Number of training samples: 39209
✅ Number of test samples: 12630
✅ First batch shape: torch.Size([64, 1, 48, 48])


### Define a Quantized CNN Model Using Brevitas

In [3]:
import torch
import torch.nn as nn
from brevitas.nn import QuantConv2d, QuantLinear, QuantReLU, QuantIdentity
from brevitas.quant import Int8ActPerTensorFixedPoint

class QuantizedGTSRBNet_8bit(nn.Module):
    def __init__(self, weight_bit_width=8, act_bit_width=8):
        super(QuantizedGTSRBNet_8bit, self).__init__()

        # Input quantizer
        self.quant_input = QuantIdentity(
            act_quant=Int8ActPerTensorFixedPoint,
            bit_width=act_bit_width
        )

        # Feature extractor
        self.features = nn.Sequential(
            QuantConv2d(1, 16, kernel_size=3, weight_bit_width=weight_bit_width, bias=True),
            nn.BatchNorm2d(16),
            QuantReLU(bit_width=act_bit_width),

            QuantConv2d(16, 32, kernel_size=3, weight_bit_width=weight_bit_width, bias=True),
            nn.BatchNorm2d(32),
            QuantReLU(bit_width=act_bit_width),
            nn.MaxPool2d(2),

            QuantConv2d(32, 64, kernel_size=3, weight_bit_width=weight_bit_width, bias=True),
            nn.BatchNorm2d(64),
            QuantReLU(bit_width=act_bit_width),
            nn.MaxPool2d(2),
        )

        # Classifier head
        self.classifier = nn.Sequential(
            QuantLinear(64 * 10 * 10, 128, weight_bit_width=weight_bit_width, bias=True),
            nn.BatchNorm1d(128),
            QuantReLU(bit_width=act_bit_width),
            QuantLinear(128, 43, weight_bit_width=weight_bit_width, bias=True)  # 43 classes
        )

    def forward(self, x):
        x = self.quant_input(x)
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

# Instantiate the model
model = QuantizedGTSRBNet_8bit(weight_bit_width=8, act_bit_width=8)

# Move to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print(model)


QuantizedGTSRBNet_8bit(
  (quant_input): QuantIdentity(
    (input_quant): ActQuantProxyFromInjector(
      (_zero_hw_sentinel): StatelessBuffer()
    )
    (act_quant): ActQuantProxyFromInjector(
      (_zero_hw_sentinel): StatelessBuffer()
      (fused_activation_quant_proxy): FusedActivationQuantProxy(
        (activation_impl): Identity()
        (tensor_quant): RescalingIntQuant(
          (int_quant): IntQuant(
            (float_to_int_impl): RoundSte()
            (tensor_clamp_impl): TensorClamp()
            (delay_wrapper): DelayWrapper(
              (delay_impl): _NoDelay()
            )
          )
          (scaling_impl): ParameterFromRuntimeStatsScaling(
            (stats_input_view_shape_impl): OverTensorView()
            (stats): _Stats(
              (stats_impl): AbsPercentile()
            )
            (restrict_scaling): _RestrictValue(
              (restrict_value_impl): PowerOfTwoRestrictValue(
                (float_to_int_impl): CeilSte()
                

### Step 4: Train the Model

In [5]:
import torch.optim as optim
import torch.nn as nn

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Training Loss: {epoch_loss:.4f}")

# Run training for a few epochs
for epoch in range(10):  # adjust number of epochs
    print(f"Epoch {epoch+1}")
    train(model, train_loader, optimizer, criterion, device)

Epoch 1


  return super(Tensor, self).rename(names)


Training Loss: 0.4160
Epoch 2
Training Loss: 0.0244
Epoch 3
Training Loss: 0.0076
Epoch 4
Training Loss: 0.0057
Epoch 5
Training Loss: 0.0104
Epoch 6
Training Loss: 0.0058
Epoch 7
Training Loss: 0.0009
Epoch 8
Training Loss: 0.0002
Epoch 9
Training Loss: 0.0147
Epoch 10
Training Loss: 0.0033


### Step 5: Save the Trained Model

In [6]:
torch.save(model.state_dict(), "gtsrb_quantized_8bit.pth")
print("Model saved.")

Model saved.


### Step 6: Export to ONNX Using Brevitas

In [4]:
from brevitas.export import export_qonnx
from qonnx.util.cleanup import cleanup as qonnx_cleanup
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.core.datatype import DataType
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN


In [8]:
from brevitas.export import export_qonnx
import torch
model = model.cpu()
model.eval()
dummy_input = torch.randn(1, 1, 48, 48).to(device)
export_path = "gtsrb_quantized_8bit.onnx"
export_qonnx(model, input_t=dummy_input, export_path=export_path)
print(f"Model exported to {export_path}")

Model exported to gtsrb_quantized_8bit.onnx


In [2]:
from finn.util.visualization import showInNetron

showInNetron('./gtsrb_quantized_8bit.onnx')

Stopping http://0.0.0.0:8081
Serving './gtsrb_quantized_8bit.onnx' at http://0.0.0.0:8081


In [17]:
# clean-up
qonnx_cleanup("./gtsrb_quantized_8bit.onnx", out_file="./gtsrb_quantized_8bit_cleaned.onnx")
ready_model_filename="./gtsrb_quantized_8bit_cleaned.onnx"


# ModelWrapper
model = ModelWrapper(ready_model_filename)
# Setting the input datatype explicitly because it doesn't get derived from the export function
model.set_tensor_datatype(model.graph.input[0].name, DataType["BIPOLAR"])
model = model.transform(ConvertQONNXtoFINN())
model.save(ready_model_filename)




In [1]:
from finn.util.visualization import showInNetron

showInNetron('./gtsrb_quantized_8bit_cleaned.onnx')

Serving './gtsrb_quantized_8bit_cleaned.onnx' at http://0.0.0.0:8081


# II- FINN Verification before PROCESSING

### Verify Exported ONNX Model in FINN

In [5]:
import onnx 
import torch 
import os
from qonnx.core.modelwrapper import ModelWrapper

ready_model_filename = './gtsrb_quantized_8bit_cleaned.onnx'
model_for_sim = ModelWrapper(ready_model_filename)

### Import model into FINN with ModelWrapper

In [6]:
from qonnx.core.datatype import DataType

finnonnx_in_tensor_name = model_for_sim.graph.input[0].name
finnonnx_out_tensor_name = model_for_sim.graph.output[0].name
print("Input tensor name: %s" % finnonnx_in_tensor_name)
print("Output tensor name: %s" % finnonnx_out_tensor_name)
finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)
finnonnx_model_out_shape = model_for_sim.get_tensor_shape(finnonnx_out_tensor_name)
print("Input tensor shape: %s" % str(finnonnx_model_in_shape))
print("Output tensor shape: %s" % str(finnonnx_model_out_shape))
finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)
finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)
print("Input tensor datatype: %s" % str(finnonnx_model_in_dt.name))
print("Output tensor datatype: %s" % str(finnonnx_model_out_dt.name))
print("List of node operator types in the graph: ")
print([x.op_type for x in model_for_sim.graph.node])

Input tensor name: global_in
Output tensor name: global_out
Input tensor shape: [1, 1, 48, 48]
Output tensor shape: [1, 43]
Input tensor datatype: BIPOLAR
Output tensor datatype: FLOAT32
List of node operator types in the graph: 
['MultiThreshold', 'Add', 'Mul', 'Conv', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'Conv', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MaxPool', 'Conv', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MaxPool', 'Reshape', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add']


### Network preparation: Tidy-up transformations

In [None]:
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.fold_constants import FoldConstants

model_for_sim = model_for_sim.transform(InferShapes())
model_for_sim = model_for_sim.transform(FoldConstants())
model_for_sim = model_for_sim.transform(GiveUniqueNodeNames())
model_for_sim = model_for_sim.transform(GiveReadableTensorNames())
model_for_sim = model_for_sim.transform(InferDataTypes())
model_for_sim = model_for_sim.transform(RemoveStaticGraphInputs())

verif_model_filename = './gtsrb_quantized_8bit_verified.onnx'
model.save(verif_model_filename)

In [6]:
from finn.util.visualization import showInNetron

showInNetron('./gtsrb_quantized_8bit_verified.onnx')

Stopping http://0.0.0.0:8081
Serving './gtsrb_quantized_8bit_verified.onnx' at http://0.0.0.0:8081


### Verify Brevitas and FINN models 

In [16]:
# Instantiate the model
brevitas_model = QuantizedGTSRBNet_8bit(weight_bit_width=8, act_bit_width=8)
brevitas_model.load_state_dict(torch.load("gtsrb_quantized_8bit.pth"))
brevitas_model.eval()

QuantizedGTSRBNet_8bit(
  (quant_input): QuantIdentity(
    (input_quant): ActQuantProxyFromInjector(
      (_zero_hw_sentinel): StatelessBuffer()
    )
    (act_quant): ActQuantProxyFromInjector(
      (_zero_hw_sentinel): StatelessBuffer()
      (fused_activation_quant_proxy): FusedActivationQuantProxy(
        (activation_impl): Identity()
        (tensor_quant): RescalingIntQuant(
          (int_quant): IntQuant(
            (float_to_int_impl): RoundSte()
            (tensor_clamp_impl): TensorClamp()
            (delay_wrapper): DelayWrapper(
              (delay_impl): _NoDelay()
            )
          )
          (scaling_impl): ParameterFromRuntimeStatsScaling(
            (stats_input_view_shape_impl): OverTensorView()
            (stats): _Stats(
              (stats_impl): AbsPercentile()
            )
            (restrict_scaling): _RestrictValue(
              (restrict_value_impl): PowerOfTwoRestrictValue(
                (float_to_int_impl): CeilSte()
                

In [44]:
# ----------------------------------
# Preprocess image
# ----------------------------------
input_tensor = load_and_preprocess_image("stop.png")

print("Image loaded and preprocessed:")
print("Input shape:", input_tensor.shape)

Image loaded and preprocessed:
Input shape: torch.Size([1, 1, 48, 48])


In [38]:
in_name = model_finn.graph.input[0].name
print("Input tensor name:", in_name)
print("Input shape expected by FINN:", model_finn.get_tensor_shape(in_name))


Input tensor name: global_in
Input shape expected by FINN: [1, 1, 48, 48]


In [41]:
import torch
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
import finn.core.onnx_exec as oxe
from qonnx.core.modelwrapper import ModelWrapper

# ----------------------------
# Load models
# ----------------------------
print("Loading models...")

# Brevitas model
model_brevitas = QuantizedGTSRBNet_8bit(weight_bit_width=8, act_bit_width=8)
model_brevitas.load_state_dict(torch.load("gtsrb_quantized_8bit.pth"))
model_brevitas.cpu().eval()

# FINN ONNX model
model_finn = ModelWrapper("gtsrb_quantized_8bit_verified.onnx")
print("Models loaded successfully.\n")


# ----------------------------
# Preprocessing
# ----------------------------
def load_and_preprocess_image(image_path):
    """
    Load image and preprocess for both Brevitas and FINN models.
    For Brevitas: output is [1, 1, 48, 48] tensor
    For FINN: output will be flattened, normalized, padded to match ONNX input shape
    """
    transform = transforms.Compose([
        transforms.Resize((48, 48)),
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    image = Image.open(image_path).convert('RGB')
    input_tensor = transform(image).unsqueeze(0)  # [1, 1, 48, 48]
    return input_tensor


def prepare_for_finn(input_tensor, finn_model):
    """
    Convert the [1,1,48,48] PyTorch tensor to the flat, padded, bipolar format for FINN.
    """
    # Remove batch/channel dims: [1,1,48,48] -> [48,48]
    flat = input_tensor.squeeze().numpy().flatten()

    # Rescale from [-1,1] (after Normalize) to [0,1]
    flat = (flat + 1) / 2

    # Pad to match FINN input
    in_tensor_name = finn_model.graph.input[0].name
    in_shape = finn_model.get_tensor_shape(in_tensor_name)
    expected_features = in_shape[-1]

    if flat.shape[0] > expected_features:
        raise ValueError(f"Image flattening yields {flat.shape[0]} features but model expects {expected_features}")

    if flat.shape[0] < expected_features:
        pad_width = expected_features - flat.shape[0]
        flat = np.pad(flat, (0, pad_width))

    # Convert to bipolar [-1,1]
    flat = 2 * flat - 1

    # Reshape to FINN's expected input shape
    flat = flat.reshape(in_shape)
    return flat


# ----------------------------
# Inference functions
# ----------------------------
def inference_with_brevitas(model, input_tensor):
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
    return output.argmax(dim=1).item()


def inference_with_finn_onnx(model_for_sim, input_tensor):
    import finn.core.onnx_exec as oxe

    # Convert to NumPy if it's a torch tensor
    if hasattr(input_tensor, "detach"):
        input_np = input_tensor.detach().numpy()
    else:
        input_np = input_tensor

    # FINN expects bipolar input in [-1, 1], so rescale from normalized [0, 1]
    # Your PyTorch preprocessing normalized to [-1, 1], so no extra rescaling needed
    # But to be safe, make sure it’s float32
    input_np = input_np.astype(np.float32)

    # Match model input name
    input_name = model_for_sim.graph.input[0].name
    input_dict = {input_name: input_np}

    # Run ONNX simulation
    output_dict = oxe.execute_onnx(model_for_sim, input_dict)
    output_name = model_for_sim.graph.output[0].name

    return output_dict[output_name]


# ----------------------------
# Main test
# ----------------------------
image_path = "stop.png"

# Load and preprocess image
input_tensor = load_and_preprocess_image(image_path)
print("Image loaded. Shape for Brevitas:", input_tensor.shape)

# Brevitas inference
brevitas_pred = inference_with_brevitas(model_brevitas, input_tensor)
print("✅ Brevitas predicted class:", brevitas_pred)

# Image is already loaded and preprocessed to shape [1, 1, 48, 48]
print("Input shape:", input_tensor.shape)  # Confirm it's [1, 1, 48, 48]

# Run FINN inference
def binarize_bipolar(x):
    return np.where(x >= 0, 1.0, -1.0).astype(np.float32)

input_np = input_tensor.detach().numpy()
input_np = binarize_bipolar(input_np)

finn_output = inference_with_finn_onnx(model_finn, input_np)
predicted_class = finn_output.argmax()
print("Predicted Class (FINN):", predicted_class)


# Compare
if brevitas_pred == predicted_class:
    print("\n🎯 SUCCESS: Both models predicted the same class!")
else:
    print("\n⚠️ WARNING: Prediction mismatch between Brevitas and FINN!")


Loading models...
Models loaded successfully.

Image loaded. Shape for Brevitas: torch.Size([1, 1, 48, 48])
✅ Brevitas predicted class: 6
Input shape: torch.Size([1, 1, 48, 48])
Predicted Class (FINN): 6

🎯 SUCCESS: Both models predicted the same class!


# FINN PROCESSING

### Launch a Build: Only Estimate Reports

In [1]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_dir = os.environ['FINN_ROOT'] + "/notebooks/output_estimates_only"
model_file = "gtsrb_quantized_8bit_verified.onnx"

estimates_output_dir = "output_estimates_only"

cfg_estimates = build.DataflowBuildConfig(
    output_dir          = estimates_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 1000000,
    synth_clk_period_ns = 10.0,
    fpga_part           = "xc7z020clg400-1",
    steps               = build_cfg.estimate_only_dataflow_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
    ]
)

In [2]:
%%time
build.build_dataflow_cfg(model_file, cfg_estimates)
assert os.path.exists(estimates_output_dir + "/report/estimate_network_performance.json")

Building dataflow accelerator from gtsrb_quantized_8bit_verified.onnx
Intermediate outputs will be generated in /tmp/finn_dev_abc
Final outputs will be generated in output_estimates_only
Build log is at output_estimates_only/build_dataflow.log
Running step: step_qonnx_to_finn [1/10]
Running step: step_tidy_up [2/10]
Running step: step_streamline [3/10]
Running step: step_convert_to_hw [4/10]
Running step: step_create_dataflow_partition [5/10]
Running step: step_specialize_layers [6/10]
Running step: step_target_fps_parallelization [7/10]
Running step: step_apply_folding_config [8/10]
Running step: step_minimize_bit_width [9/10]
Running step: step_generate_estimate_reports [10/10]
Completed successfully
CPU times: user 2.1 s, sys: 214 ms, total: 2.31 s
Wall time: 2.37 s


In [3]:
! ls {estimates_output_dir}

auto_folding_config.json  report
build_dataflow.log	  template_specialize_layers_config.json
intermediate_models	  time_per_step.json


In [4]:
! ls {estimates_output_dir}/report

estimate_layer_config_alternatives.json  estimate_network_performance.json
estimate_layer_cycles.json		 op_and_param_counts.json
estimate_layer_resources.json


In [5]:
! cat {estimates_output_dir}/report/estimate_network_performance.json

{
  "critical_path_cycles": 170550,
  "max_cycles": 30976,
  "max_cycles_node_name": "MVAU_hls_1",
  "estimated_throughput_fps": 3228.305785123967,
  "estimated_latency_ns": 1705500.0
}

In [6]:
import json
def read_json_dict(filename):
    with open(filename, "r") as f:
        ret = json.load(f)
    return ret

In [7]:
read_json_dict(estimates_output_dir + "/report/estimate_layer_cycles.json")

{'Thresholding_rtl_0': 2304,
 'ConvolutionInputGenerator_rtl_0': 19143,
 'MVAU_hls_0': 16928,
 'ConvolutionInputGenerator_rtl_1': 17519,
 'MVAU_hls_1': 30976,
 'StreamingMaxPool_hls_0': 2420,
 'ConvolutionInputGenerator_rtl_2': 29176,
 'MVAU_hls_2': 25600,
 'StreamingMaxPool_hls_1': 500,
 'MVAU_hls_3': 20480,
 'MVAU_rtl_0': 5504}

In [8]:
read_json_dict(estimates_output_dir + "/report/estimate_layer_resources.json")

{'Thresholding_rtl_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 27.0,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 324,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_0': {'BRAM_18K': 4,
  'BRAM_efficiency': 0.015625,
  'LUT': 2522,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_1': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 684,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_1': {'BRAM_18K': 64,
  'BRAM_efficiency': 0.03125,
  'LUT': 35957,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'StreamingMaxPool_hls_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 0,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_2': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 620,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_2': {'BRAM_18K': 64,
  'BRAM_efficiency': 0.125,
  'LUT':

## Launch a Build: Stitched IP, out-of-context synth and rtlsim Performance

In [2]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_file = "model.onnx"

rtlsim_output_dir = "output_IP"


cfg_stitched_ip = build.DataflowBuildConfig(
    output_dir          = rtlsim_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 1000000,
    synth_clk_period_ns = 10.0,
    fpga_part           = "xc7z020clg400-1",
    generate_outputs=[
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ]
)

In [3]:
import os

os.environ["XILINX_VITIS"] = "/opt/Xilinx/Vitis_HLS/2022.2"
os.environ["XILINX_VIVADO"] = "/opt/Xilinx/Vivado/2022.2"
os.environ["PATH"] = f"{os.environ['XILINX_VITIS']}/bin:{os.environ['XILINX_VIVADO']}/bin:" + os.environ["PATH"]


In [None]:
%%time
build.build_dataflow_cfg(model_file, cfg_stitched_ip)

Building dataflow accelerator from model.onnx
Intermediate outputs will be generated in /home/abc/Desktop/finn_build
Final outputs will be generated in output_IP
Build log is at output_IP/build_dataflow.log
Running step: step_qonnx_to_finn [1/19]
Running step: step_tidy_up [2/19]
Running step: step_streamline [3/19]
Running step: step_convert_to_hw [4/19]
Running step: step_create_dataflow_partition [5/19]
Running step: step_specialize_layers [6/19]
Running step: step_target_fps_parallelization [7/19]
Running step: step_apply_folding_config [8/19]
Running step: step_minimize_bit_width [9/19]
Running step: step_generate_estimate_reports [10/19]
Running step: step_hw_codegen [11/19]


In [11]:
!ls -l /opt/Xilinx


total 16
drwxrwxr-x 2 abc abc 4096 Jul 11 10:58 Downloads
drwxrwxr-x 3 abc abc 4096 Jul 11 10:27 Vitis_HLS
drwxrwxr-x 3 abc abc 4096 Jul 11 10:27 Vivado
drwxrwxr-x 8 abc abc 4096 Jul 11 10:59 xic
