# Qkeras Tutorial

## Section 1: Preparation before quantization

### 1.1: Please run the following cell to check if your qkeras and other needed packages are installed and import correctly.

In [7]:
import qkeras
from qkeras.utils import model_quantize
from qkeras.utils import model_save_quantized_weights
import numpy as np
import matplotlib.pyplot as plt
import h5py
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import roc_auc_score
from tensorflow.keras.layers import Dense, Activation, BatchNormalization, LSTM, Masking, Input, GRU, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1
from tensorflow.keras import regularizers
from tensorflow.keras.models import load_model

### The following three sections in our tutorial is corresponding to three different ways for doing quantization in Qkeras: Post Training Quantization, Quantization Aware Training, and Auto Qkeras. 

## Section 2:  Post-training Quantization

### 2.1: What is Post-training Quantization?

### Post-training Quantization is a kind of efficient model compression technique, which can directly quantize neural network models after training. 

### 2.2: How to do Post-training quantization with qkeras?

### First, we need to have a already-trainined keras model. You can download a toy model from this github page: https://github.com/uw-acme/HLS4ML_RNN

In [3]:
# load the toy model
toy_lstm = load_model('hls-rnn-btag/new_lstm/model_lstm_weights.h5')

2022-10-12 20:52:10.819928: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-10-12 20:52:10.822024: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-10-12 20:52:10.824587: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (DESKTOP-CHSBCRQ): /proc/driver/nvidia/version does not exist
2022-10-12 20:52:10.847798: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### The toy model we are using is an b-tag model with one LSTM layer.
### Before starting your quantization you need to know what your model looks like. 
### model.summary() is a great method in keras that you will use frequently to check the layers in your model

In [5]:
# A demonstration of what our toy model looks like

def lstmmodel(max_len, n_var, rec_units, ndense=[10], l1_reg=0,
              l2_reg=0, rec_act='sigmoid', extra_lab='none', rec_kernel_init='VarianceScaling',
             dense_kernel_init='lecun_uniform', domask=False):
    
    hidden = x_in = Input(shape=(max_len, n_var,))
    hidden = LSTM(units=rec_units,
                  recurrent_activation = rec_act,
                  kernel_initializer = rec_kernel_init, 
                  name = 'lstm1')(hidden)
    hidden = Dense(50, kernel_initializer=dense_kernel_init, name='dense_0' )(hidden)
    hidden = Activation('relu', name = 'relu_0')(hidden)
    hidden = Dense(10, kernel_initializer=dense_kernel_init, name='dense_1' )(hidden)
    hidden = Activation('relu', name = 'relu_1')(hidden)
    hidden = Dense(3, kernel_initializer=dense_kernel_init, name='dense_2' )(hidden)
    hidden = Activation('softmax', name='output_softmax')(hidden)
    model = Model(inputs=x_in, outputs=hidden)
    
    return model

l1_reg = 0
l2_reg = 0
model = lstmmodel(15, 6, 120, [50, 10], l1_reg=l1_reg, l2_reg=l2_reg)

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 15, 6)]           0         
                                                                 
 lstm1 (LSTM)                (None, 120)               60960     
                                                                 
 dense_0 (Dense)             (None, 50)                6050      
                                                                 
 relu_0 (Activation)         (None, 50)                0         
                                                                 
 dense_1 (Dense)             (None, 10)                510       
                                                                 
 relu_1 (Activation)         (None, 10)                0         
                                                                 
 dense_2 (Dense)             (None, 3)                 33    

### Then we need to check the weight of our toy model before quantization. This is one of the most important steps in quantization for checking if you quantized your model correctly. I highly suggest you to not skip this step.

In [13]:
for layer in toy_lstm.layers:
    weights = layer.get_weights()
    print(layer.name, ":", weights)

input_1 : []
lstm1 : [array([[-0.58963495, -0.65306294,  0.50985825, ...,  1.0137032 ,
         0.02196546,  0.15770978],
       [-0.15424801,  0.57628304, -0.36964563, ..., -0.09466681,
         0.41260427, -0.08563282],
       [ 0.18494439, -0.03675471, -0.79524565, ...,  0.47767442,
        -0.09714114,  0.3135381 ],
       [-0.14735329,  0.6082327 , -0.61543125, ...,  0.26261836,
         0.08817887, -0.37429032],
       [ 0.34194303, -0.09358562,  0.01487087, ..., -0.5444205 ,
         0.01626718,  0.95701003],
       [-0.33648208, -0.5004817 , -0.34336048, ...,  0.1277528 ,
        -0.03531963,  0.3862824 ]], dtype=float32), array([[ 0.17079291,  0.15018411,  0.07714663, ..., -0.11801086,
        -0.07126733, -0.20896389],
       [ 0.00498595,  0.14601009, -0.00364938, ..., -0.19730644,
        -0.21843092, -0.11314441],
       [ 0.04462222,  0.07113124,  0.03417065, ...,  0.01916615,
        -0.07676554,  0.03190433],
       ...,
       [-0.17137632, -0.00948324,  0.07338994, ..

### After checking the weight of our toy model before quantization, we can finally do our Post-training quantization!!!

### For doing Post-training quantization, we need to create a "config" to quantize each layer separately(we don't need to quantize the input layer and the last layer when doing quantization).

In [15]:
config = {
    # give quantize-paramter to LSTM layer
    "QLSTM":{
        "kernel_quantizer" : f"quantized_bits(6,2,1)",
            "bias_quantizer" : f"quantized_bits(6,2,1)",
            "recurrent_quantizer": f"quantized_bits(6,2,1)",
            "state_quantizer" : f"quantized_bits(6,2,1)"
    },
    # give quantize-paramter to all three Dense layer
    "QDense":{
        "kernel_quantizer" : f"quantized_bits(6,2,1)",
        "bias_quantizer" : f"quantized_bits(6,2,1)"
    },
    # give quantizate-paramter to the first Activation layer
    "relu_0" : f"quantized_relu(6,2,1)",
    # give quantizate-paramter to the second Activation layer
    "relu_1" : f"quantized_relu(6,2,1)",
}

### Then we use the "model_quantize" function to quantize our toy lstm model

In [16]:
toy_qlstm = model_quantize(toy_lstm, config, 6, transfer_weights=True)

### We can also check the quantize-parameter we provided to our toy model by printing them out

In [17]:
for layer in toy_qlstm.layers:
            if hasattr(layer, "recurrent_quantizer"):
                print(layer.name, "kernel:", str(layer.kernel_quantizer_internal), "bias:", str(layer.bias_quantizer_internal), 
                     "recurrent:", str(layer.recurrent_quantizer_internal), "state:", str(layer.state_quantizer_internal))
            elif hasattr(layer, "kernel_quantizer"):
                print(layer.name, "kernel:", str(layer.kernel_quantizer_internal), "bias:", str(layer.bias_quantizer_internal))
            elif hasattr(layer, "quantized_relu"):
                print(layer.name, "quantized_relu:", str(layer.quantizer))
            else:
                print(layer.name)

input_1
lstm1 kernel: quantized_bits(6,2,1,alpha='auto_po2') bias: quantized_bits(6,2,1) recurrent: quantized_bits(6,2,1,alpha='auto_po2') state: quantized_bits(6,2,1)
dense_0 kernel: quantized_bits(6,2,1,alpha='auto_po2') bias: quantized_bits(6,2,1)
relu_0
dense_1 kernel: quantized_bits(6,2,1,alpha='auto_po2') bias: quantized_bits(6,2,1)
relu_1
dense_2 kernel: quantized_bits(6,2,1,alpha='auto_po2') bias: quantized_bits(6,2,1)
output_softmax


### To check if we quantized our model successfully, we need to check the weight for our model after quantization

In [18]:
model_save_quantized_weights(toy_qlstm, "ptq2int5fra_weight")

... quantizing model


{'lstm1': {'weights': [array([[-0.484375, -0.484375,  0.5     , ...,  0.96875 ,  0.015625,
            0.15625 ],
          [-0.15625 ,  0.484375, -0.375   , ..., -0.09375 ,  0.40625 ,
           -0.09375 ],
          [ 0.1875  , -0.03125 , -0.78125 , ...,  0.46875 , -0.09375 ,
            0.3125  ],
          [-0.140625,  0.484375, -0.625   , ...,  0.25    ,  0.09375 ,
           -0.375   ],
          [ 0.34375 , -0.09375 ,  0.      , ..., -0.53125 ,  0.015625,
            0.96875 ],
          [-0.34375 , -0.484375, -0.34375 , ...,  0.125   , -0.03125 ,
            0.375   ]], dtype=float32),
   array([[ 0.171875,  0.15625 ,  0.078125, ..., -0.125   , -0.078125,
           -0.203125],
          [ 0.      ,  0.140625,  0.      , ..., -0.203125, -0.21875 ,
           -0.109375],
          [ 0.046875,  0.078125,  0.03125 , ...,  0.015625, -0.078125,
            0.03125 ],
          ...,
          [-0.171875, -0.015625,  0.078125, ...,  0.03125 ,  0.0625  ,
            0.      ],
        

### By comparing the weight we get after training with the weight we get before training, we can tell that our model has been successfully quantized! Great job!

### For the last step in our Post-training quantization, we need to check and compare the accuracy of our model before quantization and after quantization. 

### 2.3: Now is your time to do Post-training quantization to a similiar model!

### You can download a toy model from this github page: https://github.com/uw-acme/HLS4ML_RNN

### The model we are using is another b-tag model with one GRU layer.
### Here you can see how the model looks like

In [19]:
def grumodel(max_len, n_var, rec_units, ndense=[50, 10], l1_reg=0,
              l2_reg=0, rec_act='sigmoid', extra_lab='none', rec_kernel_init='VarianceScaling',
             dense_kernel_init='lecun_uniform'):
    
    hidden = x_in = Input(shape=(max_len, n_var,))
    hidden = GRU(units=rec_units,
                  recurrent_activation = rec_act,
                  kernel_initializer = rec_kernel_init, 
                  name = 'gru')(hidden)
    
    hidden = Dense(50, kernel_initializer=dense_kernel_init, name='dense_0' )(hidden)
    hidden = Activation('relu', name = 'relu_0')(hidden)
    
    hidden = Dense(10, kernel_initializer=dense_kernel_init, name='dense_1' )(hidden)
    hidden = Activation('relu', name = 'relu_1')(hidden)

    hidden = Dense(3, kernel_initializer=dense_kernel_init, name = 'dense_2')(hidden)
    hidden = Activation('softmax', name = 'output_softmax')(hidden)
    
    model = Model(inputs=x_in, outputs=hidden)
    
    return model

l1_reg = 0
l2_reg = 0
model = grumodel(15, 6, 120, [50, 10], l1_reg=l1_reg, l2_reg=l2_reg)

model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 15, 6)]           0         
                                                                 
 gru (GRU)                   (None, 120)               46080     
                                                                 
 dense_0 (Dense)             (None, 50)                6050      
                                                                 
 relu_0 (Activation)         (None, 50)                0         
                                                                 
 dense_1 (Dense)             (None, 10)                510       
                                                                 
 relu_1 (Activation)         (None, 10)                0         
                                                                 
 dense_2 (Dense)             (None, 3)                 33  

### Don't forget to check your model's weight before quantization

In [None]:
# check the weight for our keras model
# Replace this line with your own code

### Write the "config" for applying quantization to our model

In [None]:
config = {
    # Replace this line with your own code
}

### Use the "model_quantize" function to quantize our toy lstm model

In [None]:
# Replace this line with your own code

### Check the quantize-parameter

In [None]:
# Replace this line with your own code

### Check the weight for our quantized model

In [None]:
# Replace this line with your own code