## RandomForestClassifier on PYNQ


### Load the dataset

In [None]:
# import struct
# import pandas as pd
# import numpy as np


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

print(X_train.shape)
print(X_test.shape)

### Load the IP

Load the IP generated by Vivado. See also [PYNQ Overlay tutorial](https://pynq.readthedocs.io/en/v2.4/overlay_design_methodology/overlay_tutorial.html).

In [None]:
from pynq import Overlay

ol = Overlay('out.bit')
pred = ol.predict_0  # inference function

### Hardware Inference

In [None]:
# PYNQ mmap address
mmap_input_addr = 0x10
mmap_output_addr = 0x18

# Allocate shared memory buffer
from pynq import Xlnk
xlnk = Xlnk()
input_buffer = xlnk.cma_array(shape=(X_train.shape[0],), dtype=np.float32)  # input buffer for inference function
# input_buffer = xlnk.cma_array(shape=(X_train.shape[0],), dtype=np.float16)  # in the case of FP16

def pred_hardware(input_buffer):
    input_buffer[:] = X_test.values[i]
    pred.write(mmap_input_addr, input_buffer.physical_address)
    
    # read output of the IP
    y_pred_byte = pred.read(mmap_output_addr).to_bytes(4, byteorder='little')
    y_pred = struct.unpack('<f', y_pred_byte)
    return y_pred

In [None]:
y_pred_hw = np.array([X_test[i] for i in range(len(X_test))])
y_pred_hw

### Software Inference
We'll performe classification in pure software environment for comparison.

In [None]:
# Load original classifier
import pickle
with open('randomforest.pkl', 'rb') as f:
    clf = pickle.load(f)

print(clf)

In [None]:
y_pred_sw = clf.predict(X_test)
y_pred_sw

In [None]:
%%timeit
clf.predict(X_test)

### Comparison

In [None]:
from sklearn.metrics import accuracy_score

accuracy_hw = accuracy_score(y_test, y_pred_hw)
accuracy_sw = accuracy_score(y_test, y_pred_sw)

print(f'Hardware accuracy: {accuracy_hw}')
print(f'Hardware accuracy: {accuracy_sw}')