# Punch Workshop 2024

## Introduction
This tutorial guides you through the process of converting a QKeras machine learning model into Vitis HLS code. You'll learn how to apply quantization-aware training (QAT) and model pruning using QKeras to optimize your model. Additionally, the tutorial covers the steps to transform the optimized model into Vitis HLS code using the HLS4ML framework, with a practical example using the MNIST dataset.

## Requirements
- tensorflow/qkeras (build the ML model)
- HLS4ML (convert the QKeras model to HLS) https://github.com/fastmachinelearning/hls4ml
- Vitis_HLS 2022.2 (complie the HLS code and export the IP)
- Vitis/Vivado 2022.2 (generate the xclbin file that can run on Alveo card)
- Xilinx Runtime (XRT is a low level communication layer (APIs and drivers) between the host and the card.) https://xilinx.github.io/XRT/2022.2/html/index.html
- Development Target Platform (The deployment target platform is the communication layer physically implemented and flashed into the card.) https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/alveo/u55c.html
- pynq >3.0.1 (a Jupyter-based framework with Python APIs for using AMD Xilinx Adaptive Computing platforms) https://pynq.readthedocs.io/en/latest/

In [None]:
%load_ext autoreload
%autoreload 2

import tensorflow as tf
from tensorflow import keras
import qkeras
from tensorflow.keras.callbacks import ModelCheckpoint
import datetime
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
#import plotting
%matplotlib inline
seed = 0
np.random.seed(seed)
import tensorflow as tf
tf.random.set_seed(seed)
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

os.environ['PATH'] = '/home/ypmen/Data/Xilinx/Vitis_HLS/2022.2/bin:' + os.environ['PATH']

In [None]:
# Load the MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [None]:
# Normalize pixel values to be between 0 and 1
train_images = (train_images) / 255.0
test_images = (test_images) / 255.0

train_images = train_images.reshape(train_images.shape[0], train_images.shape[1], train_images.shape[2], 1)
test_images = test_images.reshape(test_images.shape[0], test_images.shape[1], test_images.shape[2], 1)

In [None]:
from tensorflow.keras.layers import *
from qkeras import *
# Define the model architecture
model = keras.Sequential([
	QActivation(activation=quantized_bits(8, 2), input_shape=(28, 28, 1)),
    QConv2D(8, 3, activation=quantized_bits(8, 2), kernel_quantizer=quantized_bits(8,2,alpha=1), bias_quantizer=quantized_bits(8,2,alpha=1), kernel_initializer='lecun_uniform'),
	QActivation(activation=quantized_relu(8, 2)),
    QBatchNormalization(beta_quantizer=quantized_bits(32, 8), gamma_quantizer=quantized_bits(32, 8), mean_quantizer=quantized_bits(32, 8), variance_quantizer=quantized_bits(32, 8)),
    QActivation(activation=quantized_bits(8, 2)),
	MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
    QConv2D(8, 3, activation=quantized_bits(8, 2), kernel_quantizer=quantized_bits(8,2,alpha=1), bias_quantizer=quantized_bits(8,2,alpha=1), kernel_initializer='lecun_uniform'),
	QActivation(activation=quantized_relu(8, 2)),
    QBatchNormalization(beta_quantizer=quantized_bits(32, 8), gamma_quantizer=quantized_bits(32, 8), mean_quantizer=quantized_bits(32, 8), variance_quantizer=quantized_bits(32, 8)),
    QActivation(activation=quantized_bits(8, 2)),
	MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
	Flatten(),
   	QDense(10, kernel_quantizer=quantized_bits(8,2,alpha=1), bias_quantizer=quantized_bits(8,2,alpha=1), kernel_initializer='lecun_uniform'),
    Softmax()
])

model.summary()

In [None]:
# Compile the model

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


# callbacks
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

checkpoint = ModelCheckpoint('chechpoint/test.h5', monitor='val_loss', save_best_only=True)

In [None]:
# Train the model
model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels), callbacks=[tensorboard_callback, checkpoint])

In [None]:
#model.save('mnist.h5')
model = qkeras.utils.load_qmodel('mnist.h5')

In [None]:
import tensorflow_model_optimization as tfmot

pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.5, 0),
    'block_size': (1, 1),
    'block_pooling_type': 'AVG'
}

def apply_pruning(layer):
  if isinstance(layer, QDense):
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)
  elif isinstance(layer, QConv2D):
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)
  return layer

model_for_pruning = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning,
)

callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    # Log sparsity and other metrics in Tensorboard.
    tfmot.sparsity.keras.PruningSummaries(log_dir=log_dir)
]

model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning.summary()

In [None]:
# Train the model
model_for_pruning.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels), callbacks=callbacks)

In [None]:
# Evaluate the model

from sklearn.metrics import classification_report

#model = qkeras.utils.load_qmodel('mnist.h5')

pred = model.predict(test_images)
predicted = np.argmax(pred, axis=1)
report = classification_report(test_labels, predicted)

print(report)

test_loss, test_acc = model.evaluate(test_images, test_labels)
print('Test accuracy:', test_acc)

In [None]:
# Evaluate the model

from sklearn.metrics import classification_report

pred = model_for_pruning.predict(test_images)
predicted = np.argmax(pred, axis=1)
report = classification_report(test_labels, predicted)

print(report)

test_loss, test_acc = model_for_pruning.evaluate(test_images, test_labels)
print('Test accuracy:', test_acc)

# HLS4ML

In [None]:
import hls4ml

import yaml

with open("config.yaml", 'r') as ymlfile:
	config = yaml.safe_load(ymlfile)

print("-----------------------------------")
print("Configuration")
print("-----------------------------------")
hls_model = hls4ml.converters.convert_from_keras_model(model,
                                                       hls_config=config,
                                                       output_dir='model_1/hls4ml_prj',
                                                       part='xcu55c-fsvh2892-2L-e',
                                                       io_type='io_stream',
                                                       backend='Vitis')

In [None]:
hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file=None)

In [None]:
hls_model.compile()

In [None]:
labels_keras = np.argmax(model.predict(test_images), 1)

In [None]:
labels_hls = np.argmax(hls_model.predict(test_images), 1)

In [None]:
from sklearn.metrics import accuracy_score

print("Keras  Accuracy: {}".format(accuracy_score(test_labels, labels_keras)))
print("hls4ml Accuracy: {}".format(accuracy_score(test_labels, labels_hls)))

In [None]:
hls_model.predict(np.round(test_images[0:2]*2**5)/2**5)

In [None]:
np.arange(len(labels_keras))[labels_keras != labels_hls]

In [None]:
hls_model.build(csim=False)

In [None]:
hls4ml.report.read_vivado_report('model_1/hls4ml_prj')

In [None]:
np.savetxt('model_1/hls4ml_prj/tb_data/tb_input_features.dat', (np.round(test_images[0]*2**5)/2**5).flatten())

In [None]:
np.savetxt('model_1/hls4ml_prj/tb_data/tb_output_predictions.dat', hls_model.predict(np.round(test_images[0:2]*2**5)/2**5)[0])

In [None]:
a = (np.round(test_images[0]*2**5)/2**5).flatten()

In [None]:
a[np.arange(len(a))[a != 0]]*32