In [19]:
## Setup Overlay en MMAPs
from pynq import Overlay
from pynq import MMIO

ov = Overlay('aivo.bit')
base_address = 0x43C00000
ctrl_size = 2**10
ctrl_mmap = MMIO(base_address, ctrl_size)
imem_size = ctrl_mmap.read(0x318)
dmem_size = ctrl_mmap.read(0x314)
pmem_size = ctrl_mmap.read(0x31C)
    
region_size = max([ctrl_size, imem_size, dmem_size, pmem_size])
imem_mmap = MMIO(base_address + region_size, imem_size)
dmem_mmap = MMIO(base_address + region_size*2, dmem_size)
pmem_mmap = MMIO(base_address + region_size*3, pmem_size)
print("imem_size: {}".format(imem_size))
print("dmem_size: {}".format(dmem_size))
print("pmem_size: {}".format(pmem_size))

imem_size: 131072
dmem_size: 65536
pmem_size: 1


In [20]:
## Write IMEM/DMEM/PMEM
i = 0
with open("main.img", mode="r") as file:
    for line in file:
        #print(line)
        #print(len(line))
        line = line.strip().rjust(128, '0') # 128
        #print(line)
        for j in [3,2,1,0]: # 3
            i_word = int(line[(32*j):(32*j+32)], 2)
            imem_mmap.write(i, i_word)
            i += 4
i = 0
with open("main_data.img", mode="r") as file:
    for line in file:
        line = line.strip().rjust(32, '0')
        i_word = int(line, 2)
        if i_word != 0:
            print("{} at {}".format(i_word, i))
        dmem_mmap.write(i, i_word)
        i += 4
# i = 0
# with open("main_parameters.img", mode="r") as file:
#     for line in file:
#         line = line.strip().rjust(32, '0')
#         i_word = int(line, 2)
#         #if i_word != 0:
#         #    print("{} at {}".format(i_word, i))
#         pmem_mmap.write(i, i_word)
#         i += 4

128 at 128
48 at 136
32 at 140
1 at 144
3 at 148
3 at 152
1 at 156
1 at 160


In [21]:
# Setup CMA data
from pynq import Xlnk
import numpy as np 
xlnk = Xlnk()

offset = 256

size = (48, 32)
FM = 64
aFM = int(FM/32)
# Real data
l0_input = xlnk.cma_array(shape=(size[0]*size[1]), dtype=np.int16)
l0_input[:] = np.fromfile("data/input/{}x{}.bin".format(size[0], size[1]), dtype=np.int16)
dmem_mmap.write(offset, l0_input.physical_address)

l0_weights = xlnk.cma_array(shape=(FM*3*3), dtype=np.int16)
l0_weights[:] = np.fromfile("data/weights/conv0.bin", dtype=np.int16)
dmem_mmap.write(offset+4, l0_weights.physical_address)

# Outputs
l0_output = xlnk.cma_array(shape=(FM*size[0]*size[1]), dtype=np.int32)
# l0_output[:] = np.fromfile("data/ref/{}x{}/conv0.bin".format(size[0], size[1]), dtype=np.int32)
dmem_mmap.write(offset+8, l0_output.physical_address)

l0_act = xlnk.cma_array(shape=(aFM*size[0]*size[1]), dtype=np.uint32)
l0_act[:] = np.fromfile("data/ref/{}x{}/act0.bin".format(size[0], size[1]), dtype=np.uint32)
dmem_mmap.write(offset+12, l0_act.physical_address)

offset += 16
B_w = []
B_out = []
B_act = []
B_thres = []
for i in range(1, 15):
    w = xlnk.cma_array(shape=(FM*3*3*aFM), dtype=np.uint32)
    w[:] = np.fromfile("data/weights/conv{}.bin".format(i), dtype=np.uint32)
    B_w.append(w)
    dmem_mmap.write(offset, w.physical_address)

    out = xlnk.cma_array(shape=(FM*size[0]*size[1]), dtype=np.uint16)
    # l1_output[:] = np.fromfile("data/output/conv1.bin".format(size[0], size[1]), dtype=np.uint16)
    B_out.append(out)
    dmem_mmap.write(offset+4, out.physical_address)

    act = xlnk.cma_array(shape=(aFM*size[0]*size[1]), dtype=np.uint32)
    B_act.append(act)
    dmem_mmap.write(offset+12, act.physical_address)

    thres = xlnk.cma_array(shape=(FM), dtype=np.uint16) #uint32hack
    thres[:] = np.fromfile("data/thresholds/act{}.bin".format(i), dtype=np.uint16)#.astype(np.uint32)  #uint32 hack
    B_thres.append(thres)
    dmem_mmap.write(offset+8, thres.physical_address)
    
    offset += 16

# B_out[0][:] = np.fromfile("data/output/conv1.bin".format(size[0], size[1]), dtype=np.uint16)##
B_act[-1][:] = np.fromfile("data/ref/{}x{}/act14.bin".format(size[0], size[1]), dtype=np.uint32)##

l15_weights = xlnk.cma_array(shape=(FM*3*3*aFM), dtype=np.uint32)
l15_weights[:] = np.fromfile("data/weights/conv{}.bin".format(15), dtype=np.uint32)
dmem_mmap.write(offset, l15_weights.physical_address)

l15_out = xlnk.cma_array(shape=(FM*size[0]*size[1]), dtype=np.uint16)
l15_out[:] = np.fromfile("data/output/conv15.bin", dtype=np.uint16) ###
dmem_mmap.write(offset+4, l15_out.physical_address)
offset += 8

# ReLU
relu_thres_a = xlnk.cma_array(shape=(FM), dtype=np.int16)
relu_thres_a[:] = np.fromfile("data/thresholds/act15_a.bin", dtype=np.int16)
dmem_mmap.write(offset, relu_thres_a.physical_address)

relu_thres_b = xlnk.cma_array(shape=(FM), dtype=np.int32)
relu_thres_b[:] = np.fromfile("data/thresholds/act15_b.bin", dtype=np.int16).astype(np.int32)
dmem_mmap.write(offset + 4, relu_thres_b.physical_address)

relu_out = xlnk.cma_array(shape=(FM*size[0]*size[1]), dtype=np.int16)
# relu_out[:] = np.fromfile("data/output/act15.bin", dtype=np.int16) ###
dmem_mmap.write(offset + 8, relu_out.physical_address)
offset += 12

# Conv 16
l16_weights = xlnk.cma_array(shape=(FM*3*3), dtype=np.int16)
l16_weights[:] = np.fromfile("data/weights/conv16.bin", dtype=np.int16)
dmem_mmap.write(offset, l16_weights.physical_address)

# Outputs
l16_output = xlnk.cma_array(shape=(size[0]*size[1]), dtype=np.int32)
dmem_mmap.write(offset + 4, l16_output.physical_address)

print(offset+4)

520


In [22]:
import time
# Run TTA
ctrl_mmap.write(0x200, 0x2)
time.sleep(5)
ctrl_mmap.write(0x200, 0x1)

In [23]:
# Printout DMEM
for i in range(0, dmem_size, 4):
    val = dmem_mmap.read(i)
    if val != 0:
        print("b{}, 0x{}, {} at {}, phys: {}".format(bin(val)[2:].zfill(32), hex(val)[2:].zfill(8), val, i, hex(i+dmem_mmap.base_addr)))

b00000000000000000000000010000000, 0x00000080, 128 at 128, phys: 0x43c40080
b00000000000000000000000000110000, 0x00000030, 48 at 136, phys: 0x43c40088
b00000000000000000000000000100000, 0x00000020, 32 at 140, phys: 0x43c4008c
b00000000000000000000000000000001, 0x00000001, 1 at 144, phys: 0x43c40090
b00000000000000000000000000000011, 0x00000003, 3 at 148, phys: 0x43c40094
b00000000000000000000000000000011, 0x00000003, 3 at 152, phys: 0x43c40098
b00000000000000000000000000000001, 0x00000001, 1 at 156, phys: 0x43c4009c
b00000000000000000000000000000001, 0x00000001, 1 at 160, phys: 0x43c400a0
b00011000000010100101000000000000, 0x180a5000, 403329024 at 256, phys: 0x43c40100
b00011000000010101000000000000000, 0x180a8000, 403341312 at 260, phys: 0x43c40104
b00011000010110000000000000000000, 0x18580000, 408420352 at 264, phys: 0x43c40108
b00011000000010101100000000000000, 0x180ac000, 403357696 at 268, phys: 0x43c4010c
b00011000000001001000000000000000, 0x18048000, 402948096 at 272, phys: 0x43c

b01101101000011101010011000100100, 0x6d0ea624, 1829676580 at 63004, phys: 0x43c4f61c
b10111010000100101100010001001000, 0xba12c448, 3121792072 at 63008, phys: 0x43c4f620
b10101010111001100000011100011101, 0xaae6071d, 2867201821 at 63012, phys: 0x43c4f624
b00001011101100110100000100100100, 0x0bb34124, 196296996 at 63016, phys: 0x43c4f628
b01100100101010100010011010011011, 0x64aa269b, 1688872603 at 63020, phys: 0x43c4f62c
b00101010010000110110000100010100, 0x2a436114, 709058836 at 63024, phys: 0x43c4f630
b00110010111010001000100111110110, 0x32e889f6, 854100470 at 63028, phys: 0x43c4f634
b00011111100101100010111011110110, 0x1f962ef6, 529936118 at 63032, phys: 0x43c4f638
b10111000010110110100001100001101, 0xb85b430d, 3092988685 at 63036, phys: 0x43c4f63c
b11010110011101011111110001110001, 0xd675fc71, 3598056561 at 63040, phys: 0x43c4f640
b00111001010000010111101101101100, 0x39417b6c, 960592748 at 63044, phys: 0x43c4f644
b01010000100111010000111010001001, 0x509d0e89, 1352470153 at 63048, ph

In [24]:
# Save to file
# (l0_output*2**-28).astype(np.float32).tofile("data/output/conv0.bin")
# l0_act.tofile("data/output/act0.bin")

# for i in range(len(B_out)):
#     j = i + 1
#     B_out[i].tofile("data/output/conv{}.bin".format(j))
#     B_act[i].tofile("data/output/act{}.bin".format(j))

# l15_out.tofile("data/output/conv15.bin")
# (relu_out*2**-16).astype(np.float32).tofile("data/output/act15.bin")
# (l16_output*2**-32).astype(np.float32).tofile("data/output/conv16.bin")

In [25]:
# # # Reference check
# # vhex = np.vectorize(hex)
# ref = np.fromfile("data/ref/{}x{}/conv0.bin".format(size[0], size[1]), dtype=np.float32)
# print("Conv0:")
# print(l0_output.reshape(48,32,64)[:7,:9,0]*2**-28)
# print("ref:")
# print(ref.reshape(48,32,64)[:7,:9,0])
# print((ref - l0_output*2**-28).mean())

# ref = np.fromfile("data/ref/{}x{}/act0.bin".format(size[0], size[1]), dtype=np.uint32)
# print("Act0:")
# print(l0_act[17:19])
# print(ref[17:19])
# print((ref==l0_act).mean())

ref = np.fromfile("data/ref/{}x{}/conv1.bin".format(size[0], size[1]), dtype=np.float32)
print("Conv1:")
out = B_out[0].reshape(48,32,64)
# out.tofile("data/output/conv15.bin".format(size[0], size[1]))
out2 = np.zeros_like(out, dtype=np.int32)
for k in range(0, 64):
    for j in range(0, size[0]):
        for i in range(0, size[1]):
            T = 9*64
            if (i==0 or i==size[1]-1):
                T -= 3*64;
            if (j==0 or j==size[0]-1):
                T -= 3*64;
            if ((j==0 and i==0) or (j==size[0]-1 and i==0) or (j==0 and i==size[1]-1) or (j==size[0]-1 and i==size[1]-1)):
                T += 1*64;
            out2[j,i,k] = 2*out[j,i,k] - T
print(out2[:8,:8,0])
print(ref.astype(np.float32).reshape(48,32,64)[:8,:8,0])
print((ref.reshape(48,32,64)==out2).mean())

ref = np.fromfile("data/ref/{}x{}/act1.bin".format(size[0], size[1]), dtype=np.uint32)
print("Act1:")
print(B_act[0].reshape(48,32,2)[:4,:4,:2])
print("ref:")
print(ref.reshape(48,32,2)[:4,:4,:2])
print((ref==B_act[0]).mean())

# ref = np.fromfile("data/ref/{}x{}/act15.bin".format(size[0], size[1]), dtype=np.float32)
# out = relu_out#*2**-16
# ref = (ref*2**16).round().astype(np.int16)
# print("Act15:")
# print(out.reshape(48,32,64)[0,0,:])
# # print((ref.reshape(48,32,64)[0,0,:]*2**16).round().astype(np.int16))
# print(ref.reshape(48,32,64)[0,0,:])
# print((ref==out).mean())
# print((ref-out).mean())

# ref = np.fromfile("data/ref/{}x{}/conv16.bin".format(size[0], size[1]), dtype=np.float32)
# print("Conv16:")
# out = l16_output*2**-32
# print(out.reshape(48,32)[:6,:8])
# print("ref:")
# print(ref.reshape(48,32)[:6,:8])
# print((ref - out).mean())

Conv1:
[[ 56 108 104  92  76  88  66  56]
 [ 74 152 154 140 136 140 136 106]
 [ 80 170 168 144 134 158 138 114]
 [ 80 170 162 130 128 154 146 134]
 [ 82 168 164 122 130 152 144 150]
 [ 82 170 160 124 134 148 162 170]
 [ 84 168 162 126 142 154 152 158]
 [ 84 162 160 132 150 156 150 170]]
[[  56.  108.  104.   92.   76.   88.   66.   56.]
 [  74.  152.  154.  140.  136.  140.  136.  106.]
 [  80.  170.  168.  144.  134.  158.  138.  114.]
 [  80.  170.  162.  130.  128.  154.  146.  134.]
 [  82.  168.  164.  122.  130.  152.  144.  150.]
 [  82.  170.  160.  124.  134.  148.  162.  170.]
 [  84.  168.  162.  126.  142.  154.  152.  158.]
 [  84.  162.  160.  132.  150.  156.  150.  170.]]
1.0
Act1:
[[[0 0]
  [0 0]
  [0 0]
  [0 0]]

 [[0 0]
  [0 0]
  [0 0]
  [0 0]]

 [[0 0]
  [0 0]
  [0 0]
  [0 0]]

 [[0 0]
  [0 0]
  [0 0]
  [0 0]]]
ref:
[[[1861433962 2330276355]
  [3488623456 3265934977]
  [3421661984 3533911689]
  [3455218474 2206315017]]

 [[1794472488 4142858891]
  [3907283744 360515

In [26]:
# !python3 compare.py

In [27]:
# Clear CMA memory
l0_input.close()
l0_weights.close()
l0_output.close()
l0_act.close()

for i in range(len(B_out)):
    B_out[i].close()
    B_w[i].close()
    B_act[i].close()
    B_thres[i].close()

l15_out.close()
relu_thres_a.close()
relu_thres_b.close()
relu_out.close()

l16_weights.close()
l16_output.close()

In [41]:
# (ref.reshape(48,32,64)==out2)[42:48,:5,0]

In [340]:
# (l1_act.reshape(48,32,2)==ref.reshape(48,32,2))[0,8]

In [152]:
# B_out[0].tofile("data/output/conv1.bin".format(size[0], size[1]))

In [153]:
ref = np.fromfile("data/ref/{}x{}/act1.bin".format(size[0], size[1]), dtype=np.uint32)
start = 2
end = start + 3
for i in range(start, end):
    print("b{} ".format(bin(B_act[0][i])[2:].zfill(32)), end='')
print()
for i in range(start, end):
    print("b{} ".format(bin(ref[i])[2:].zfill(32)), end='')

b00000000000000000000000000000000 b00000000000000000000000000000000 b00000000000000000000000000000000 
b11001111111100000010101101100000 b11000010101010100011011010000001 b11001011111100100110101100100000 

In [38]:
vhex=np.vectorize(hex)