## RandomForestClassifier on PYNQ


### Load the dataset

In [None]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

print(X_train.shape)
print(X_test.shape)

### Load the IP

Load the IP generated by Vivado. See also [PYNQ Overlay tutorial](https://pynq.readthedocs.io/en/v2.4/overlay_design_methodology/overlay_tutorial.html).

In [2]:
from pynq import Overlay

ol = Overlay('out.bit')
pred = ol.predict_0  # inference function

### Hardware Inference

In [3]:
# PYNQ mmap address
mmap_input_addr = 0x10
mmap_output_addr = 0x18

# Allocate shared memory buffer
from pynq import Xlnk
xlnk = Xlnk()
input_buffer = xlnk.cma_array(shape=(X_train.shape[0],), dtype=np.float32)  # input buffer for inference function
# input_buffer = xlnk.cma_array(shape=(X_train.shape[0],), dtype=np.float16)  # in the case of FP16

def pred_hardware(input_buffer):
    input_buffer[:] = X_test.values[i]
    pred.write(mmap_input_addr, input_buffer.physical_address)
    
    # read output of the IP
    y_pred_byte = pred.read(mmap_output_addr).to_bytes(4, byteorder='little')
    y_pred = struct.unpack('<f', y_pred_byte)
    return y_pred

In [4]:
# y_pred_hw = np.array([X_test[i] for i in range(len(X_test))])
accuracy_score(y_test, y_pred_hw)

0.8644444444444445

In [5]:
%%timeit
y_pred_hw = np.array([X_test[i] for i in range(len(X_test))])


61.5 µs ± 69.1 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Software Inference
We'll performe classification in pure software environment for comparison.

In [7]:
# Load original classifier
import pickle
with open('randomforest.pkl', 'rb') as f:
    clf = pickle.load(f)

print(clf)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=6, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=5,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)


In [8]:
y_pred_sw = clf.predict(X_test)
accuracy_score(y_test, y_pred_sw)

0.8755555555555555

In [9]:
%%timeit
clf.predict(X_test)

1.11 ms ± 4.46 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
