In [1]:
from pynq import Overlay
from pynq import allocate
import numpy as np

# Program bitstream to FPGA
overlay = Overlay('/home/xilinx/design_1.bit')

# Access to AXI DMA
dma = overlay.axi_dma_0
dma_send = overlay.axi_dma_0.sendchannel
dma_recv = overlay.axi_dma_0.recvchannel

In [2]:
# Allocate physical memory for AXI DMA MM2S
input_buffer = allocate(shape=(44,), dtype=np.uint32)

In [3]:
# Weight and bias
input_buffer[0] = 0x0533fb33
input_buffer[1] = 0xfacc06cc
input_buffer[2] = 0x0000facc
input_buffer[3] = 0x00000000

input_buffer[4] = 0x02000133
input_buffer[5] = 0x040000cc
input_buffer[6] = 0x0000fc00
input_buffer[7] = 0x00000000

input_buffer[8] = 0x00660266
input_buffer[9] = 0x06000333
input_buffer[10] = 0x0000fc00
input_buffer[11] = 0x00000000

input_buffer[12] = 0xfb330533
input_buffer[13] = 0x0533fa66
input_buffer[14] = 0x0000fc66
input_buffer[15] = 0x00000000

input_buffer[16] = 0x01330533
input_buffer[17] = 0x01990200
input_buffer[18] = 0x0000fc00
input_buffer[19] = 0x00000000

input_buffer[20] = 0xfecc14cc
input_buffer[21] = 0xf2000333
input_buffer[22] = 0xfa000066
input_buffer[23] = 0x00000000

input_buffer[24] = 0x0066eccc
input_buffer[25] = 0x100002cc
input_buffer[26] = 0xfa660399
input_buffer[27] = 0x00000000

# Input
input_buffer[28] = 0x1c000800
input_buffer[29] = 0x0c001800
input_buffer[30] = 0x0c001800
input_buffer[31] = 0x00000000

input_buffer[32] = 0x08002800
input_buffer[33] = 0x28002000
input_buffer[34] = 0x08002400
input_buffer[35] = 0x00000000

input_buffer[36] = 0x0c001400
input_buffer[37] = 0x0c000400
input_buffer[38] = 0x18001400
input_buffer[39] = 0x00000000

input_buffer[40] = 0x0c000c00
input_buffer[41] = 0x04001800
input_buffer[42] = 0x28001800
input_buffer[43] = 0x00000000

In [4]:
# Check the written data
for i in range(44):
    print("0x%08X" % (input_buffer[i]))

0x0533FB33
0xFACC06CC
0x0000FACC
0x00000000
0x02000133
0x040000CC
0x0000FC00
0x00000000
0x00660266
0x06000333
0x0000FC00
0x00000000
0xFB330533
0x0533FA66
0x0000FC66
0x00000000
0x01330533
0x01990200
0x0000FC00
0x00000000
0xFECC14CC
0xF2000333
0xFA000066
0x00000000
0x0066ECCC
0x100002CC
0xFA660399
0x00000000
0x1C000800
0x0C001800
0x0C001800
0x00000000
0x08002800
0x28002000
0x08002400
0x00000000
0x0C001400
0x0C000400
0x18001400
0x00000000
0x0C000C00
0x04001800
0x28001800
0x00000000


In [5]:
# Do AXI DMA MM2S transfer
dma_send.transfer(input_buffer)

In [6]:
# Allocate physical memory for AXI DMA S2MM
output_buffer = allocate(shape=(8,), dtype=np.uint32)

In [7]:
# Check the memory content
for i in range(8):
    print("0x%08X" % (output_buffer[i]))

0x00000000
0x00000000
0x00000000
0x00000000
0x00000000
0x00000000
0x00000000
0x00000000


In [8]:
# Do AXI DMA S2MM transfer
dma_recv.transfer(output_buffer)

In [9]:
# Check the memory content after DMA transfer
for i in range(8):
    print("0x%08X" % (output_buffer[i]))

0x000B03F1
0x03F1000E
0x000C03ED
0x00000000
0x03F1000A
0x000A03EF
0x03F0000E
0x00000000


In [10]:
# Output
pred_hw = np.zeros((2,6))

pred_hw[0][0] = ((output_buffer[0] & 0x0000FFFF) / 1024.0)
pred_hw[0][1] = (((output_buffer[0] & 0xFFFF0000) >> 16) / 1024.0)
pred_hw[0][2] = ((output_buffer[1] & 0x0000FFFF) / 1024.0)
pred_hw[0][3] = (((output_buffer[1] & 0xFFFF0000) >> 16) / 1024.0)
pred_hw[0][4] = ((output_buffer[2] & 0x0000FFFF) / 1024.0)
pred_hw[0][5] = (((output_buffer[2] & 0xFFFF0000) >> 16) / 1024.0)

pred_hw[1][0] = ((output_buffer[4] & 0x0000FFFF) / 1024.0)
pred_hw[1][1] = (((output_buffer[4] & 0xFFFF0000) >> 16) / 1024.0)
pred_hw[1][2] = ((output_buffer[5] & 0x0000FFFF) / 1024.0)
pred_hw[1][3] = (((output_buffer[5] & 0xFFFF0000) >> 16) / 1024.0)
pred_hw[1][4] = ((output_buffer[6] & 0x0000FFFF) / 1024.0)
pred_hw[1][5] = (((output_buffer[6] & 0xFFFF0000) >> 16) / 1024.0)

In [11]:
# Prediction
pred_hw

array([[0.98535156, 0.01074219, 0.01367188, 0.98535156, 0.98144531,
        0.01171875],
       [0.00976562, 0.98535156, 0.98339844, 0.00976562, 0.01367188,
        0.984375  ]])

In [12]:
# Rounding
np.round(pred_hw)

array([[1., 0., 0., 1., 1., 0.],
       [0., 1., 1., 0., 0., 1.]])

In [13]:
# Delete buffer to prevent memory leak
del input_buffer, output_buffer