In [1]:
from pynq import Overlay
from pynq import allocate
import numpy as np
from time import time

In [2]:
# Program bitstream to FPGA
overlay = Overlay('/home/xilinx/workspace/nn.bit')

In [3]:
# Access to AXI DMA
dma = overlay.axi_dma_0
dma_send = overlay.axi_dma_0.sendchannel
dma_recv = overlay.axi_dma_0.recvchannel

In [4]:
# Allocate physical memory
input_buffer = allocate(shape=(14,), dtype=np.uint32)
output_buffer = allocate(shape=(4,), dtype=np.uint32)

In [5]:
# Measure the time required for HW ANN
t1 = time()
# Weight and bias
input_buffer[0] = 0x057A057A
input_buffer[1] = 0x0000B07A
input_buffer[2] = 0x03E10314
input_buffer[3] = 0x0000FC66
input_buffer[4] = 0x028F0433
input_buffer[5] = 0x0000FC70
input_buffer[6] = 0xFAC21C70
input_buffer[7] = 0xF5A30051
input_buffer[8] = 0x0685E399
input_buffer[9] = 0x00CC07E1
# Input
input_buffer[10] = 0x20002000
input_buffer[11] = 0x14001400
input_buffer[12] = 0x14002000
input_buffer[13] = 0x14002000
for i in range(1000000):
    # Do AXI DMA MM2S transfer
    dma_send.transfer(input_buffer)
    # Do AXI DMA S2MM transfer
    dma_recv.transfer(output_buffer)
# Output
pred_hw = np.zeros((2,4))
pred_hw[0][0] = ((output_buffer[0] & 0x0000FFFF) / 1024.0)
pred_hw[0][1] = (((output_buffer[0] & 0xFFFF0000) >> 16) / 1024.0)
pred_hw[0][2] = ((output_buffer[1] & 0x0000FFFF) / 1024.0)
pred_hw[0][3] = (((output_buffer[1] & 0xFFFF0000) >> 16) / 1024.0)
pred_hw[1][0] = ((output_buffer[2] & 0x0000FFFF) / 1024.0)
pred_hw[1][1] = (((output_buffer[2] & 0xFFFF0000) >> 16) / 1024.0)
pred_hw[1][2] = ((output_buffer[3] & 0x0000FFFF) / 1024.0)
pred_hw[1][3] = (((output_buffer[3] & 0xFFFF0000) >> 16) / 1024.0)
t2 = time()
t_diff = t2 - t1
print('Time used for HW NN: {}s'.format(t_diff))

Time used for HW NN: 416.6207766532898s


In [6]:
# Check prediction
np.round(pred_hw)

array([[1., 0., 0., 0.],
       [0., 1., 1., 1.]])

In [7]:
# Measure the time required for SW ANN
t1 = time()
k = np.array([[8, 8, 5, 5],
              [8, 5, 8, 5]])
wb2 = np.array([[1.37, 1.37, -19.88],
                [0.77, 0.97,  -0.90],
                [1.05, 0.64,  -0.89]])
wb3 = np.array([[ 7.11, -1.31, 0.08, -2.59],
                [-7.10,  1.63, 1.97,  0.20]])
for i in range(1000000):
    k_padded = np.concatenate((k, np.array([[1, 1, 1, 1]])), axis=0)
    z2 = np.matmul(wb2, k_padded)
    a2 = 1/(1+np.exp(-z2))
    a2_padded = np.concatenate((a2, np.array([[1, 1, 1, 1]])), axis=0)
    z3 = np.matmul(wb3, a2_padded)
    a3 = 1/(1+np.exp(-z3))
t2 = time()
t_diff = t2 - t1
print('Time used for SW ANN: {}s'.format(t_diff))

Time used for SW ANN: 468.4689140319824s


In [8]:
# Check prediction
np.round(a3)

array([[1., 0., 0., 0.],
       [0., 1., 1., 1.]])