In [1]:
import hls4ml
from tensorflow.keras.models import load_model
from qkeras.utils import _add_supported_quantized_objects
import tensorflow as tf 
from util import create_config

index      = 0
conv_code  = '1111'
dens_code  = '001'
model_dir  = f'./models/deeppicar-stats/models/{conv_code}-{dens_code}_64x64x1_0.1/'
model_name = f'{model_dir}/{index}-{conv_code}-{dens_code}-0.1.h5'

co = {}
_add_supported_quantized_objects(co)
model = load_model(model_name, custom_objects=co)

2025-04-30 10:08:09.916695: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-30 10:08:09.917818: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-30 10:08:09.937822: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-30 10:08:09.938430: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-04-30 10:08:10.878882: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful N

## Print Model Summary

In [12]:
model.summary()

Model: "sequential_105"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 q_activation_490 (QActivati  (None, 64, 64, 1)        0         
 on)                                                             
                                                                 
 conv1 (QConv2D)             (None, 30, 30, 2)         52        
                                                                 
 q_activation_491 (QActivati  (None, 30, 30, 2)        0         
 on)                                                             
                                                                 
 q_conv2d_420 (QConv2D)      (None, 13, 13, 4)         204       
                                                                 
 q_activation_492 (QActivati  (None, 13, 13, 4)        0         
 on)                                                             
                                                    

In [13]:
### Create HLS Model
directory = model_dir + '/hls/project_1'
config, hls_model = create_config(model, output_dir=directory)
print(config)

Interpreting Sequential
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 64, 64, 1]], output shape: [None, 64, 64, 1]
Layer name: q_activation_490, layer type: Activation, input shapes: [[None, 64, 64, 1]], output shape: [None, 64, 64, 1]
Layer name: conv1, layer type: QConv2D, input shapes: [[None, 64, 64, 1]], output shape: [None, 30, 30, 2]
Layer name: q_activation_491, layer type: Activation, input shapes: [[None, 30, 30, 2]], output shape: [None, 30, 30, 2]
Layer name: q_conv2d_420, layer type: QConv2D, input shapes: [[None, 30, 30, 2]], output shape: [None, 13, 13, 4]
Layer name: q_activation_492, layer type: Activation, input shapes: [[None, 13, 13, 4]], output shape: [None, 13, 13, 4]
Layer name: q_conv2d_421, layer type: QConv2D, input shapes: [[None, 13, 13, 4]], output shape: [None, 5, 5, 5]
Layer name: q_activation_493, layer type: Activation, input shapes: [[None, 5, 5, 5]], output shape: [None, 5, 5, 5]
Layer name: q_conv2d_422, layer type: QCo

In [14]:
hls_model.compile()

Writing HLS project
Done


In [41]:
### Check to see if it has the similar error
from get_dataset import get_dataset
from util import calculate_accuracy
import numpy as np

imgs_train, imgs_test, vals_train, vals_test = get_dataset('./deeppicar')

preds = hls_model.predict(imgs_test.astype(np.float32))
vals_test = np.array(vals_test)

# mse
print(f'HLS MSE: {np.mean(((preds.reshape(-1) - vals_test.reshape(-1)) ** 2))}')
print(f'Accuracy: {calculate_accuracy(preds, vals_test)}')

preds = model.predict(imgs_test.astype(np.float32))
vals_test = np.array(vals_test)

# mse
print(f'QKeras MSE: {np.mean(((preds.reshape(-1) - vals_test.reshape(-1)) ** 2))}')
print(f'Accuracy: {calculate_accuracy(preds, vals_test)}')

HLS MSE: 0.0369175523519516
Accuracy: 0.8472
QKeras MSE: 0.03689606487751007
Accuracy: 0.8472


In [2]:
## Get best model
from get_dataset import get_dataset
from util import calculate_accuracy
import numpy as np
import pandas as pd

imgs_train, imgs_test, vals_train, vals_test = get_dataset('./deeppicar')

directory = "tmp/tmp_prj_hls_test"
x = pd.read_csv('valid_models.txt',dtype=str)
y=x[x['Good']=='Yes']

co = {}
_add_supported_quantized_objects(co)
best_loss = np.inf
best_config = ("","",-1)
for i in range(len(y)):
    u = y.iloc[i]
    
    conv, dense, height, width, mult, _ = u

    height, width = int(height), int(width)
    mult = float(mult)

    print(f'Model: {conv}, {dense}, {mult}')

    model_dir = f"models/deeppicar-stats/models/{conv}-{dense}_{height}x{width}x1_{mult}"
    model_path = f"{model_dir}/0-{conv}-{dense}-{mult}.h5"

    model = load_model(model_path, custom_objects=co)
    config, hls_model = create_config(model, output_dir=directory)
    hls_model.compile()

    preds = hls_model.predict(imgs_test.astype(np.float32))
    vals_test = np.array(vals_test)

    loss = np.mean(((preds.reshape(-1) - vals_test.reshape(-1)) ** 2))
    # mse
    print(f'\tHLS MSE: {loss}')
    print(f'\tAccuracy: {calculate_accuracy(preds, vals_test)}')

    if loss < best_loss:
        best_loss = loss
        best_config = (conv, dense, mult)

    preds = model.predict(imgs_test.astype(np.float32))
    vals_test = np.array(vals_test)

    # mse
    print(f'\tQKeras MSE: {np.mean(((preds.reshape(-1) - vals_test.reshape(-1)) ** 2))}')
    print(f'\tAccuracy: {calculate_accuracy(preds, vals_test)}')

        
print(f'Loss: {best_loss}, Config: {best_config}')

Model: 1100, 000, 0.1
Interpreting Sequential
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 64, 64, 1]], output shape: [None, 64, 64, 1]
Layer name: q_activation, layer type: Activation, input shapes: [[None, 64, 64, 1]], output shape: [None, 64, 64, 1]
Layer name: conv1, layer type: QConv2D, input shapes: [[None, 64, 64, 1]], output shape: [None, 30, 30, 2]
Layer name: q_activation_1, layer type: Activation, input shapes: [[None, 30, 30, 2]], output shape: [None, 30, 30, 2]
Layer name: q_conv2d, layer type: QConv2D, input shapes: [[None, 30, 30, 2]], output shape: [None, 13, 13, 4]
Layer name: q_activation_2, layer type: Activation, input shapes: [[None, 13, 13, 4]], output shape: [None, 13, 13, 4]
Layer name: q_conv2d_1, layer type: QConv2D, input shapes: [[None, 13, 13, 4]], output shape: [None, 5, 5, 5]
Layer name: q_activation_3, layer type: Activation, input shapes: [[None, 5, 5, 5]], output shape: [None, 5, 5, 5]
Layer name: flatten, layer type: Re



	HLS MSE: 0.06740472465753555
	Accuracy: 0.8472
	QKeras MSE: 0.036880362778902054
	Accuracy: 0.8472
Model: 1100, 010, 0.1
Interpreting Sequential
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 64, 64, 1]], output shape: [None, 64, 64, 1]
Layer name: q_activation_40, layer type: Activation, input shapes: [[None, 64, 64, 1]], output shape: [None, 64, 64, 1]
Layer name: conv1, layer type: QConv2D, input shapes: [[None, 64, 64, 1]], output shape: [None, 30, 30, 2]
Layer name: q_activation_41, layer type: Activation, input shapes: [[None, 30, 30, 2]], output shape: [None, 30, 30, 2]
Layer name: q_conv2d_40, layer type: QConv2D, input shapes: [[None, 30, 30, 2]], output shape: [None, 13, 13, 4]
Layer name: q_activation_42, layer type: Activation, input shapes: [[None, 13, 13, 4]], output shape: [None, 13, 13, 4]
Layer name: q_conv2d_41, layer type: QConv2D, input shapes: [[None, 13, 13, 4]], output shape: [None, 5, 5, 5]
Layer name: q_activation_43, layer type: 

In [4]:
directory = "best_model/model_dir"
model = load_model("models/deeppicar-stats/models/1100-101_64x64x1_0.1/0-1100-101-0.1.h5", custom_objects=co)
config, hls_model = create_config(model, output_dir=directory)
hls_model.compile()
print(config)
hls_model.build(csim=False)

Interpreting Sequential
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 64, 64, 1]], output shape: [None, 64, 64, 1]
Layer name: q_activation_100, layer type: Activation, input shapes: [[None, 64, 64, 1]], output shape: [None, 64, 64, 1]
Layer name: conv1, layer type: QConv2D, input shapes: [[None, 64, 64, 1]], output shape: [None, 30, 30, 2]
Layer name: q_activation_101, layer type: Activation, input shapes: [[None, 30, 30, 2]], output shape: [None, 30, 30, 2]
Layer name: q_conv2d_100, layer type: QConv2D, input shapes: [[None, 30, 30, 2]], output shape: [None, 13, 13, 4]
Layer name: q_activation_102, layer type: Activation, input shapes: [[None, 13, 13, 4]], output shape: [None, 13, 13, 4]
Layer name: q_conv2d_101, layer type: QConv2D, input shapes: [[None, 13, 13, 4]], output shape: [None, 5, 5, 5]
Layer name: q_activation_103, layer type: Activation, input shapes: [[None, 5, 5, 5]], output shape: [None, 5, 5, 5]
Layer name: flatten_25, layer type: Resha

{'CSynthesisReport': {'TargetClockPeriod': '30.00',
  'EstimatedClockPeriod': '20.557',
  'BestLatency': '225240',
  'WorstLatency': '225290',
  'IntervalMin': '16386',
  'IntervalMax': '225282',
  'BRAM_18K': '33',
  'DSP': '42',
  'FF': '10718',
  'LUT': '17303',
  'URAM': '0',
  'AvailableBRAM_18K': '100',
  'AvailableDSP': '90',
  'AvailableFF': '41600',
  'AvailableLUT': '20800',
  'AvailableURAM': '0'}}

In [5]:
hls4ml.report.read_vivado_report(directory)

Found 1 solution(s) in best_model/model_dir/myproject_prj.
Reports for solution "solution1":

C simulation report not found.
SYNTHESIS REPORT:
== Vitis HLS Report for 'myproject'
* Date:           Wed Apr 30 10:13:27 2025

* Version:        2024.2 (Build 5238294 on Nov  8 2024)
* Project:        myproject_prj
* Solution:       solution1 (Vivado IP Flow Target)
* Product family: artix7
* Target device:  xc7a35t-cpg236-1


== Performance Estimates
+ Timing: 
    * Summary: 
    +--------+----------+-----------+------------+
    |  Clock |  Target  | Estimated | Uncertainty|
    +--------+----------+-----------+------------+
    |ap_clk  |  30.00 ns|  20.557 ns|     8.10 ns|
    +--------+----------+-----------+------------+

+ Latency: 
    * Summary: 
    +---------+---------+----------+----------+-------+--------+----------+
    |  Latency (cycles) |  Latency (absolute) |    Interval    | Pipeline |
    |   min   |   max   |    min   |    max   |  min  |   max  |   Type   |
    +------