In [595]:
## Setup Overlay en MMAPs
from pynq import Overlay
from pynq import MMIO

ov = Overlay('aivo.bit')
base_address = 0x43C00000
ctrl_size = 2**10
ctrl_mmap = MMIO(base_address, ctrl_size)
imem_size = ctrl_mmap.read(0x318)
dmem_size = ctrl_mmap.read(0x314)
pmem_size = ctrl_mmap.read(0x31C)
    
region_size = max([ctrl_size, imem_size, dmem_size, pmem_size])
imem_mmap = MMIO(base_address + region_size, imem_size)
dmem_mmap = MMIO(base_address + region_size*2, dmem_size)
pmem_mmap = MMIO(base_address + region_size*3, pmem_size)
print("imem_size: {}".format(imem_size))
print("dmem_size: {}".format(dmem_size))
print("pmem_size: {}".format(pmem_size))

imem_size: 131072
dmem_size: 4096
pmem_size: 4096


In [596]:
## Write IMEM/DMEM/PMEM
i = 0
with open("main.img", mode="r") as file:
    for line in file:
        line = line.strip().rjust(128, '0') # 128
        for j in [3,2,1,0]: # 3
            i_word = int(line[(32*j):(32*j+32)], 2)
            imem_mmap.write(i, i_word)
            i += 4
i = 0
with open("main_data.img", mode="r") as file:
    for line in file:
        line = line.strip().rjust(32, '0')
        i_word = int(line, 2)
#         if i_word != 0:
#             print("{} at {}".format(i_word, i))
        dmem_mmap.write(i, i_word)
        i += 4
i = 0
with open("main_param.img", mode="r") as file:
    for line in file:
        line = line.strip().rjust(32, '0')
        i_word = int(line, 2)
        #if i_word != 0:
        #    print("{} at {}".format(i_word, i))
        pmem_mmap.write(i, i_word)
        i += 4

In [597]:
# Setup CMA data
from pynq import Xlnk
import numpy as np 
xlnk = Xlnk()

offset = 0 # MMAP address

size = (48, 32)
FM = 64
aFM = int(FM/32)
# Real data
l0_input = xlnk.cma_array(shape=(size[0]*size[1]), dtype=np.int16)
l0_input[:] = np.fromfile("data/input/{}x{}.bin".format(size[0], size[1]), dtype=np.int16)
dmem_mmap.write(offset, l0_input.physical_address)

l0_weights = xlnk.cma_array(shape=(FM*3*3), dtype=np.int16)
l0_weights[:] = np.fromfile("data/weights/conv0.bin", dtype=np.int16)
dmem_mmap.write(offset+4, l0_weights.physical_address)

# Outputs
l0_output = xlnk.cma_array(shape=(FM*size[0]*size[1]), dtype=np.int32)
# l0_output[:] = np.fromfile("data/ref/{}x{}/conv0.bin".format(size[0], size[1]), dtype=np.int32)
dmem_mmap.write(offset+8, l0_output.physical_address)

l0_act = xlnk.cma_array(shape=(aFM*size[0]*size[1]), dtype=np.uint32)
l0_act[:] = np.fromfile("data/ref/{}x{}/act0.bin".format(size[0], size[1]), dtype=np.uint32)
dmem_mmap.write(offset+12, l0_act.physical_address)

offset += 16
B_w = []
B_out = []
B_act = []
B_thres = []
for i in range(1, 15):
    w = xlnk.cma_array(shape=(FM*3*3*aFM), dtype=np.uint32)
    w[:] = np.fromfile("data/weights/conv{}.bin".format(i), dtype=np.uint32)
    B_w.append(w)
    dmem_mmap.write(offset, w.physical_address)

    out = xlnk.cma_array(shape=(FM*size[0]*size[1]), dtype=np.uint16)
    B_out.append(out)
    dmem_mmap.write(offset+4, out.physical_address)

    act = xlnk.cma_array(shape=(aFM*size[0]*size[1]), dtype=np.uint32)
    B_act.append(act)
    dmem_mmap.write(offset+12, act.physical_address)

    thres = xlnk.cma_array(shape=(3*FM), dtype=np.uint16)
#     thres[:] = np.fromfile("data/thresholds/act{}.bin".format(i), dtype=np.uint16)
    tmp = np.fromfile("data/thresholds/act{}.bin".format(i), dtype=np.uint16)
    tmp3 = np.zeros_like(tmp)
    tmp5 = np.zeros_like(tmp)
    for k in range(0,64):
        tmp3[k] = tmp[k] - 3*64 if tmp[k] > 3*64 else 0
        tmp5[k] = tmp[k] - 5*64 if tmp[k] > 5*64 else 0
    print(tmp)
    print(tmp3)
    thres[:] = np.concatenate((tmp, tmp3, tmp5))
    B_thres.append(thres)
    dmem_mmap.write(offset+8, thres.physical_address)
    
    offset += 16

l15_weights = xlnk.cma_array(shape=(FM*3*3*aFM), dtype=np.uint32)
l15_weights[:] = np.fromfile("data/weights/conv{}.bin".format(15), dtype=np.uint32)
dmem_mmap.write(offset, l15_weights.physical_address)

l15_out = xlnk.cma_array(shape=(FM*size[0]*size[1]), dtype=np.uint16)
# l15_out[:] = np.fromfile("data/output/conv15.bin", dtype=np.uint16) ###
dmem_mmap.write(offset+4, l15_out.physical_address)
offset += 8

# ReLU
relu_thres_a = xlnk.cma_array(shape=(FM), dtype=np.int16)
relu_thres_a[:] = np.fromfile("data/thresholds/act15_a.bin", dtype=np.int16)
dmem_mmap.write(offset, relu_thres_a.physical_address)

relu_thres_b = xlnk.cma_array(shape=(FM), dtype=np.int32)
relu_thres_b[:] = np.fromfile("data/thresholds/act15_b.bin", dtype=np.int16).astype(np.int32)
dmem_mmap.write(offset + 4, relu_thres_b.physical_address)

relu_out = xlnk.cma_array(shape=(FM*size[0]*size[1]), dtype=np.int16)
# relu_out[:] = np.fromfile("data/output/act15.bin", dtype=np.int16) ###
dmem_mmap.write(offset + 8, relu_out.physical_address)
offset += 12

# Conv 16
l16_weights = xlnk.cma_array(shape=(FM*3*3), dtype=np.int16)
l16_weights[:] = np.fromfile("data/weights/conv16.bin", dtype=np.int16)
dmem_mmap.write(offset, l16_weights.physical_address)

# Outputs
l16_output = xlnk.cma_array(shape=(size[0]*size[1]), dtype=np.int32)
dmem_mmap.write(offset + 4, l16_output.physical_address)

print(offset+4)

[ 664  614  574  608  606  574  581  571  307  579  501  578  572  579  568
  576  573  581  587  578  590  559  420  579 1006  572  576  599  576  555
  571  633  537  581  577  582  575  603  547  568  568  579  598  589  638
  561  577  576  605  579  575  578  586  580  578  571  583  895  567  567
  580  578  574  623]
[472 422 382 416 414 382 389 379 115 387 309 386 380 387 376 384 381 389
 395 386 398 367 228 387 814 380 384 407 384 363 379 441 345 389 385 390
 383 411 355 376 376 387 406 397 446 369 385 384 413 387 383 386 394 388
 386 379 391 703 375 375 388 386 382 431]
[ 425  552  592  546  560  522  633 1152  605  586  567  583  566  596  589
  572  826  560  580  545  588  597  593  567  570  480  573  588  555  604
  562 1066  568  570  585  527  537  559  594  557  571  509  702  500  579
  573  562  596  576  581  609  585  581  580  581  512  562  575  594   61
  556  575  564  613]
[233 360 400 354 368 330 441 960 413 394 375 391 374 404 397 380 634 368
 388 353 396 4

In [598]:
import time
# Run TTA
ctrl_mmap.write(0x200, 0x2)
time.sleep(5)
ctrl_mmap.write(0x200, 0x1)

In [599]:
# Printout DMEM
for i in range(0, dmem_size, 4):
    val = dmem_mmap.read(i)
    if val != 0:
        print("b{}, 0x{}, {} at {}, phys: {}".format(bin(val)[2:].zfill(32), hex(val)[2:].zfill(8), val, i, hex(i+dmem_mmap.base_addr)))

b00011000000010010011000000000000, 0x18093000, 403255296 at 0, phys: 0x43c40000
b00011000000010011011000000000000, 0x1809b000, 403288064 at 4, phys: 0x43c40004
b00011000010110000000000000000000, 0x18580000, 408420352 at 8, phys: 0x43c40008
b00011000000010101100000000000000, 0x180ac000, 403357696 at 12, phys: 0x43c4000c
b00011000000001001000000000000000, 0x18048000, 402948096 at 16, phys: 0x43c40010
b00011000000011000000000000000000, 0x180c0000, 403439616 at 20, phys: 0x43c40014
b00011000000001001010000000000000, 0x1804a000, 402956288 at 24, phys: 0x43c40018
b00011000000001001100000000000000, 0x1804c000, 402964480 at 28, phys: 0x43c4001c
b00011000000001010000000000000000, 0x18050000, 402980864 at 32, phys: 0x43c40020
b00011000000100000000000000000000, 0x18100000, 403701760 at 36, phys: 0x43c40024
b00011000000001001011000000000000, 0x1804b000, 402960384 at 40, phys: 0x43c40028
b00011000000001010100000000000000, 0x18054000, 402997248 at 44, phys: 0x43c4002c
b000110000000010100100000000000

In [600]:
# # Printout PMEM
# for i in range(0, pmem_size, 4):
#     val = pmem_mmap.read(i)
#     if val != 0:
#         print("b{}, 0x{}, {} at {}, phys: {}".format(bin(val)[2:].zfill(32), hex(val)[2:].zfill(8), val, i, hex(i+dmem_mmap.base_addr)))

In [601]:
# Save to file
# (l0_output*2**-28).astype(np.float32).tofile("data/output/conv0.bin")
# l0_act.tofile("data/output/act0.bin")

# for i in range(len(B_out)):
#     j = i + 1
#     B_out[i].tofile("data/output/conv{}.bin".format(j))
#     B_act[i].tofile("data/output/act{}.bin".format(j))

# l15_out.tofile("data/output/conv15.bin")
# (relu_out*2**-16).astype(np.float32).tofile("data/output/act15.bin")
# (l16_output*2**-32).astype(np.float32).tofile("data/output/conv16.bin")

In [605]:
# # # Reference check
# # vhex = np.vectorize(hex)
# ref = np.fromfile("data/ref/{}x{}/conv0.bin".format(size[0], size[1]), dtype=np.float32)
# print("Conv0:")
# print(l0_output.reshape(48,32,64)[:7,:9,0]*2**-28)
# print("ref:")
# print(ref.reshape(48,32,64)[:7,:9,0])
# print((ref - l0_output*2**-28).mean())

# ref = np.fromfile("data/ref/{}x{}/act0.bin".format(size[0], size[1]), dtype=np.uint32)
# print("Act0:")
# print(l0_act[17:19])
# print(ref[17:19])
# print((ref==l0_act).mean())

# ref = np.fromfile("data/ref/{}x{}/conv1.bin".format(size[0], size[1]), dtype=np.float32)
# print("Conv1:")
# out = B_out[0].reshape(48,32,64)
# # out.tofile("data/output/conv15.bin".format(size[0], size[1]))
# out2 = np.zeros_like(out, dtype=np.int32)
# for k in range(0, 64):
#     for j in range(0, size[0]):
#         for i in range(0, size[1]):
#             T = 9*64
#             if (i==0 or i==size[1]-1):
#                 T -= 3*64;
#             if (j==0 or j==size[0]-1):
#                 T -= 3*64;
#             if ((j==0 and i==0) or (j==size[0]-1 and i==0) or (j==0 and i==size[1]-1) or (j==size[0]-1 and i==size[1]-1)):
#                 T += 1*64;
#             out2[j,i,k] = 2*out[j,i,k] - T
# print(out2[:8,:8,0])
# print(ref.astype(np.float32).reshape(48,32,64)[:8,:8,0])
# print("{}% OK".format((ref.reshape(48,32,64)==out2).mean() * 100))

ref = np.fromfile("data/ref/{}x{}/act1.bin".format(size[0], size[1]), dtype=np.uint32)
print("Act1:")
print(B_act[0].reshape(48,32,2)[:4,:4,:2])
print("ref:")
print(ref.reshape(48,32,2)[:4,:4,:2])
print((ref==B_act[0]).mean())

# ref = np.fromfile("data/ref/{}x{}/act15.bin".format(size[0], size[1]), dtype=np.float32)
# out = relu_out#*2**-16
# ref = (ref*2**16).round().astype(np.int16)
# print("Act15:")
# print(out.reshape(48,32,64)[0,0,:])
# # print((ref.reshape(48,32,64)[0,0,:]*2**16).round().astype(np.int16))
# print(ref.reshape(48,32,64)[0,0,:])
# print((ref==out).mean())
# print((ref-out).mean())

# ref = np.fromfile("data/ref/{}x{}/conv16.bin".format(size[0], size[1]), dtype=np.float32)
# print("Conv16:")
# out = l16_output*2**-32
# print(out.reshape(48,32)[:6,:8])
# print("ref:")
# print(ref.reshape(48,32)[:6,:8])
# print((ref - out).mean())

Act1:
[[[4236074442 2102566462]
  [3637268924 2103467580]
  [3637400056 2018565044]
  [3704246776 2018756532]]

 [[4233435328 3151027742]
  [3634311832 1936217151]
  [3633279000 2078331061]
  [3633197368 2053300144]]

 [[4233371084 1003550238]
  [3634000700 1918915773]
  [3632951576 2061556149]
  [3635040568 2051464112]]

 [[4233444812 3151033886]
  [3634246428 1911051453]
  [3633213720 2028001717]
  [3635032376 2053167537]]]
ref:
[[[4236074442 2102566462]
  [3637268924 2103467580]
  [3637400056 2018565044]
  [3704246776 2018756532]]

 [[4233435328 3151027742]
  [3634311832 1936217151]
  [3633279000 2078331061]
  [3633197368 2053300144]]

 [[4233371084 1003550238]
  [3634000700 1918915773]
  [3632951576 2061556149]
  [3635040568 2051464112]]

 [[4233444812 3151033886]
  [3634246428 1911051453]
  [3633213720 2028001717]
  [3635032376 2053167537]]]
1.0


In [603]:
# !python3 compare.py

In [604]:
# Clear CMA memory
l0_input.close()
l0_weights.close()
l0_output.close()
l0_act.close()

for i in range(len(B_out)):
    B_out[i].close()
    B_w[i].close()
    B_act[i].close()
    B_thres[i].close()

l15_out.close()
relu_thres_a.close()
relu_thres_b.close()
relu_out.close()

l16_weights.close()
l16_output.close()

In [260]:
ref = np.fromfile("data/ref/{}x{}/act1.bin".format(size[0], size[1]), dtype=np.uint32)
start = 0
end = start + 3
for i in range(start, end):
    print("b{} ".format(bin(B_act[0][i])[2:].zfill(32)), end='')
print()
for i in range(start, end):
    print("b{} ".format(bin(ref[i])[2:].zfill(32)), end='')

b01101110111100111111111111111111 b10001010111001010011001000000011 b11001111111100000010101101100000 
b01101110111100110011101001101010 b10001010111001010011001000000011 b11001111111100000010101101100000 

In [121]:
# vhex=np.vectorize(hex)