In [1]:
import sys
import os

# sys.path.append(os.path.abspath("../common"))

import math
import time
import numpy as np
import cv2
import pynq
import dac_sdc
import ctypes

team_name = 'iSmart'
team = dac_sdc.Team(team_name, batch_size = 64)

In [2]:
# last_bais = np.load('last_bias.npy')
# last_bais = last_bais.reshape((6, 6))[:, :4]
# os.system('g++ -shared -O2 ./load_image_new.cpp -o load_image_new.so -fPIC $(pkg-config opencv --cflags --libs) -lpthread')
cfuns = ctypes.cdll.LoadLibrary("./load_image_new.so")

overlay = pynq.Overlay(team.get_bitstream_path())
dma = overlay.axi_dma_0
xlnk = pynq.Xlnk()
nn_ctrl = overlay.ultra_net_0
print('got nn accelerator!')

got nn accelerator!


In [3]:
BATCH_SIZE = team.batch_size
IMAGE_RAW_ROW = 360
IMAGE_RAW_COL = 640
IMAGE_ROW = 160
IMAGE_COL = 320
GRID_ROw = 10
GRID_COL = 20
X_SCALE = IMAGE_RAW_COL / IMAGE_COL
Y_SCALE = IMAGE_RAW_ROW / IMAGE_ROW


in_buffer0 = xlnk.cma_array(shape=(BATCH_SIZE, IMAGE_RAW_ROW, IMAGE_RAW_COL, 3), dtype=np.uint8, cacheable = 1)
in_buffer1 = xlnk.cma_array(shape=(BATCH_SIZE, IMAGE_RAW_ROW, IMAGE_RAW_COL, 3), dtype=np.uint8, cacheable = 1)
in_buffers = [in_buffer0, in_buffer1]
out_buffer0 = xlnk.cma_array(shape=(BATCH_SIZE, GRID_ROw, GRID_COL, 6, 6), dtype=np.int32, cacheable = 1)
out_buffer1 = xlnk.cma_array(shape=(BATCH_SIZE, GRID_ROw, GRID_COL, 6, 6), dtype=np.int32, cacheable = 1)
out_buffers = [out_buffer0, out_buffer1]


# use c code load image
def load_image(image_paths, buff):
    paths = [str(path) for path in image_paths]
    tmp = np.asarray(buff)
    dataptr = tmp.ctypes.data_as(ctypes.c_char_p)
    paths_p_list = [ctypes.c_char_p(bytes(str_, 'utf-8')) for str_ in paths]
    paths_c = (ctypes.c_char_p*len(paths_p_list))(*paths_p_list)
    cfuns.load_image(paths_c, dataptr, len(paths), IMAGE_ROW, IMAGE_COL, 3)
    
def sigmoid(x):
    s = 1 / (1 + np.exp(-x))
    return s

def yolo(out_buffer, batch_n, div, result=None):
    res_np = np.array(out_buffer[:batch_n]).reshape(batch_n, -1, 6, 6)
    conf = res_np[...,4].sum(axis=2)
    max_index = conf.argmax(1)
    
    grid_x = max_index % GRID_COL
    grid_y = max_index // GRID_COL
    
    boxs = np.zeros((batch_n, 6, 4))
    for i in range(batch_n):
        boxs[i, :, :] = res_np[i, max_index[i], :, :4] / div
    xy = sigmoid(boxs[..., :2]).mean(axis=1)
    wh = np.exp(boxs[..., 2:4]).mean(axis=1)
    
    xy[:, 0] += grid_x
    xy[:, 1] += grid_y

    xy *= 16
    wh *= 20

    xy[:, 0] *= X_SCALE
    xy[:, 1] *= Y_SCALE
    wh[:, 0] *= X_SCALE
    wh[:, 1] *= Y_SCALE
    xmin = xy[:, 0] - wh[:, 0] / 2
    xmax = xy[:, 0] + wh[:, 0] / 2
    ymin = xy[:, 1] - wh[:, 1] / 2
    ymax = xy[:, 1] + wh[:, 1] / 2
    
    for i in range(batch_n):
        temp = [int(xmin[i]), int(xmax[i]), int(ymin[i]), int(ymax[i])]
        result.append(temp)
#         result.append([0,0,0,0])
    
which_buffer = 0
first_batch = True
net_cnt = 0
last_batch_size = BATCH_SIZE

def net(img_paths, result):
    global first_batch
    global which_buffer    
    global net_cnt
    global last_batch_size
    # buffer first batch
    if first_batch == True:
        first_batch = False
        which_buffer = 0
        load_image(img_paths, in_buffers[which_buffer])
        return
    # count
    net_cnt += 1
    nn_ctrl.write(0x0, 0) # Reset
    nn_ctrl.write(0x10, in_buffers[which_buffer].shape[0])
    nn_ctrl.write(0x0, 1) # Deassert reset
    

    dma.recvchannel.transfer(out_buffers[which_buffer])
    dma.sendchannel.transfer(in_buffers[which_buffer])

    
    # switch buffer
    if which_buffer == 0:
        which_buffer = 1
    else:
        which_buffer = 0
    # buffer next batch

    if img_paths is not None:
        load_image(img_paths, in_buffers[which_buffer])

    
    # yolo 
    if net_cnt > 1:
        yolo(out_buffers[which_buffer], BATCH_SIZE, 127 * 15, result)
    
    if img_paths is not None and len(img_paths) != BATCH_SIZE:
        last_batch_size = len(img_paths)

    dma.sendchannel.wait()
    dma.recvchannel.wait()

    # last batch 
    if img_paths is None:
        yolo(out_buffers[(which_buffer + 1) % 2], last_batch_size, 127 * 15, result) 
        
################################Inference##################################
interval_time = 0
total_time = 0
total_energy = 0
result = list()
team.reset_batch_count()

rails = pynq.get_rails()

start = time.time()    
recorder = pynq.DataRecorder(rails["5V"].power)
with recorder.record(0.05): 
    while True:
        image_paths = team.get_next_batch()
        net(image_paths, result)

#         print('pro_image_cnt', len(result))
        # end
        if image_paths is None:
            break

end = time.time()
t = end - start
    
# Energy measurements    
energy = recorder.frame["5V_power"].mean() * t    
# energy = 0

total_time = t
total_energy = energy
print("Total time:", total_time, "seconds")
print("Total energy:", total_energy, "J")
print('images nums: {} .'.format(len(result)))
print('fps: {} .'.format(len(result) / total_time))

Total time: 2.8266243934631348 seconds
Total energy: 14.3097859919 J
images nums: 1000 .
fps: 353.77887571925186 .


In [4]:
team.save_results_xml(result, total_time, energy)
xlnk.xlnk_reset()



All results stored in bbox_PL_1000_sample.txt


In [5]:
def computeIoU(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    
    if( xB < xA or yB < yA ):
        return 0.0

    # compute the area of intersection rectangle
    interArea = (xB - xA + 1) * (yB - yA + 1)

    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou



#f_PL = open("iSmart2.txt", "r")
f_PL = open("../deploy/bbox_PL_1000_sample.txt", "r")
#f_PL = open("bbox_PL_1000.txt", "r")
#f_PL = open("bbox_PL_1000_sample_64_py2.txt", "r")
#f_PL = open("bbox_golden_c_1000_sample_512_py3.txt", "r")
#f_PL = open("bbox_golden_c_1000.txt", "r")
f_GT = open("../deploy2/bbox_GT_1000_sample.txt", "r")
#f_GT = open("bbox_GT_1000.txt", "r")

#f_PL = open("bbox_pytorch.txt", "r")
#f_PL = open("bbox_golden_C.txt", "r")

bbox_PL = []
bbox_GT = []


for line in f_PL:
    if not line.strip():
        continue
    if(line.endswith("jpg\n") or line.endswith("xml\n") or line.endswith("bin\n")):
        continue
        
    x1, x2, y1, y2 = line.split()
    bbox_PL.append([int(x1), int(y1), int(x2), int(y2)])
    


for line in f_GT:
    if not line.strip():
        continue
    if(line.endswith("jpg\n") or line.endswith("xml\n") or line.endswith("bin\n")):
        continue
        
    x1, x2, y1, y2 = line.split()
    bbox_GT.append([int(x1), int(y1), int(x2), int(y2)])
    

#if( len(bbox_PL) != len(bbox_GT) ):
#    print("ERROR! Ground truth and PL output do not match!")
    
cnt = len(bbox_PL)
IoU_avg = 0
for i in range(cnt):
    boxA = bbox_PL[i]
    boxB = bbox_GT[i]
    
    print('\n')
    print(i)
    print(boxA)
    print(boxB)
    
    IoU = computeIoU(boxA, boxB)
    print(IoU)
    
    IoU_avg = IoU_avg + IoU

print("\nAvg IOU:")
print(IoU_avg/cnt)




0
[-16, 73, 42, 94]
[295, 131, 382, 232]
0.0


1
[464, 109, 520, 132]
[294, 129, 314, 173]
0.0


2
[-17, 36, 42, 58]
[560, 235, 573, 252]
0.0


3
[208, 178, 265, 203]
[239, 161, 262, 216]
0.2800718132854578


4
[272, 178, 328, 204]
[301, 170, 324, 200]
0.3188908145580589


5
[210, 292, 263, 314]
[235, 293, 245, 322]
0.18195488721804512


6
[-17, 36, 42, 58]
[173, 35, 219, 158]
0.0


7
[-17, 36, 42, 58]
[260, 87, 304, 196]
0.0


8
[-17, 36, 42, 58]
[208, 95, 244, 194]
0.0


9
[-16, 37, 42, 59]
[304, 281, 323, 309]
0.0


10
[303, 182, 361, 203]
[318, 153, 346, 228]
0.22276536312849163


11
[-16, 37, 42, 59]
[317, 147, 339, 174]
0.0


12
[-17, 252, 42, 274]
[317, 157, 339, 176]
0.0


13
[-17, 36, 42, 58]
[333, 149, 358, 211]
0.0


14
[208, 178, 264, 204]
[209, 100, 269, 291]
0.12880143112701253


15
[590, 36, 650, 58]
[438, 191, 479, 230]
0.0


16
[591, 72, 650, 94]
[280, 172, 423, 318]
0.0


17
[-17, 0, 42, 22]
[210, 123, 256, 155]
0.0


18
[273, 177, 328, 204]
[301, 157, 320, 206]
0.2

0.0


190
[-17, 0, 42, 22]
[561, 165, 568, 177]
0.0


191
[590, 252, 650, 274]
[226, 320, 236, 353]
0.0


192
[559, 108, 617, 131]
[307, 157, 335, 239]
0.0


193
[-17, 36, 42, 58]
[323, 150, 338, 171]
0.0


194
[272, 251, 328, 276]
[299, 169, 321, 200]
0.0


195
[240, 180, 296, 204]
[274, 152, 299, 197]
0.18758495695514274


196
[304, 178, 360, 204]
[305, 151, 341, 228]
0.29159369527145357


197
[433, 187, 487, 206]
[452, 179, 489, 214]
0.41189931350114417


198
[-17, 36, 42, 58]
[174, 163, 371, 304]
0.0


199
[-17, 288, 42, 310]
[205, 203, 235, 276]
0.0


200
[240, 106, 297, 131]
[279, 99, 289, 132]
0.17919799498746866


201
[590, 216, 650, 238]
[318, 161, 373, 279]
0.0


202
[591, 109, 650, 131]
[223, 158, 314, 197]
0.0


203
[304, 145, 360, 168]
[311, 139, 357, 195]
0.3864337101747174


204
[-17, 180, 42, 202]
[295, 218, 313, 227]
0.0


205
[-17, 216, 42, 238]
[284, 153, 302, 202]
0.0


206
[-17, 180, 42, 202]
[215, 150, 244, 227]
0.0


207
[304, 184, 360, 204]
[318, 159, 345, 231]


[303, 168, 325, 199]
0.2980281690140845


390
[-17, 108, 42, 130]
[206, 140, 295, 226]
0.0


391
[-17, 108, 42, 130]
[361, 29, 433, 131]
0.0


392
[-17, 36, 42, 58]
[331, 133, 335, 146]
0.0


393
[271, 35, 329, 59]
[290, 9, 313, 64]
0.270392068499324


394
[335, 144, 394, 166]
[372, 125, 392, 178]
0.2378138847858198


395
[-17, 36, 42, 58]
[305, 152, 324, 176]
0.0


396
[271, 179, 329, 203]
[299, 169, 322, 199]
0.2938775510204082


397
[590, 36, 650, 58]
[445, 224, 503, 281]
0.0


398
[590, 108, 650, 130]
[269, 219, 282, 242]
0.0


399
[-17, 36, 42, 58]
[289, 181, 330, 267]
0.0


400
[-17, 72, 42, 94]
[313, 166, 334, 215]
0.0


401
[-17, 36, 42, 58]
[248, 152, 399, 311]
0.0


402
[590, 216, 650, 238]
[184, 136, 266, 216]
0.0


403
[242, 153, 295, 171]
[279, 153, 295, 182]
0.2662819455894477


404
[240, 180, 296, 204]
[260, 152, 303, 248]
0.19400167785234898


405
[-17, 108, 42, 130]
[254, 101, 290, 213]
0.0


406
[368, 72, 425, 95]
[377, 67, 393, 107]
0.2427126710291493


407
[-17, 288

[335, 143, 393, 167]
[374, 126, 393, 177]
0.24813895781637718


590
[590, 144, 650, 166]
[303, 145, 359, 269]
0.0


591
[-17, 36, 42, 58]
[364, 190, 390, 246]
0.0


592
[368, 109, 424, 132]
[377, 95, 405, 155]
0.28512904547316675


593
[590, 216, 650, 238]
[328, 164, 350, 231]
0.0


594
[-17, 36, 42, 58]
[439, 237, 462, 261]
0.0


595
[303, 180, 361, 203]
[321, 164, 342, 213]
0.2655935613682093


596
[-17, 36, 42, 58]
[212, 128, 295, 182]
0.0


597
[-17, 36, 42, 58]
[236, 124, 314, 278]
0.0


598
[17, 41, 72, 61]
[346, 154, 367, 205]
0.0


599
[462, 324, 522, 346]
[487, 318, 521, 336]
0.28208307501549906


600
[271, 179, 329, 203]
[303, 172, 326, 202]
0.35057821059038347


601
[-17, 216, 42, 238]
[307, 302, 371, 360]
0.0


602
[591, 181, 649, 203]
[304, 175, 325, 203]
0.0


603
[304, 179, 360, 204]
[322, 158, 350, 213]
0.320578231292517


604
[-17, 216, 42, 238]
[332, 233, 350, 257]
0.0


605
[591, 73, 649, 95]
[374, 202, 375, 206]
0.0


606
[208, 143, 264, 168]
[249, 134, 261, 174]
0.

789
[-17, 36, 42, 58]
[298, 149, 321, 204]
0.0


790
[590, 216, 650, 238]
[384, 323, 416, 359]
0.0


791
[241, 113, 295, 133]
[276, 112, 287, 151]
0.1822125813449024


792
[-17, 36, 42, 58]
[180, 164, 369, 300]
0.0


793
[271, 181, 329, 203]
[300, 149, 324, 205]
0.26053466243769824


794
[560, 112, 616, 132]
[329, 133, 354, 191]
0.0


795
[-17, 216, 42, 238]
[310, 161, 328, 200]
0.0


796
[-17, 36, 42, 58]
[224, 155, 248, 194]
0.0


797
[-16, 36, 42, 58]
[281, 157, 311, 181]
0.0


798
[-17, 180, 42, 202]
[305, 299, 371, 360]
0.0


799
[-17, 180, 42, 202]
[262, 163, 297, 280]
0.0


800
[272, 182, 328, 204]
[285, 154, 335, 210]
0.3156581409856519


801
[271, 181, 329, 203]
[281, 174, 304, 202]
0.3462295081967213


802
[590, 72, 650, 94]
[304, 186, 327, 214]
0.0


803
[-16, 110, 41, 131]
[319, 156, 341, 183]
0.0


804
[-17, 216, 42, 238]
[300, 151, 334, 222]
0.0


805
[-17, 36, 42, 58]
[248, 189, 268, 232]
0.0


806
[590, 36, 650, 58]
[359, 171, 370, 195]
0.0


807
[142, 180, 202, 202]
[3


989
[-27831, 287, 28599, 288]
[397, 284, 424, 299]
0.0004944637716990129


990
[-28215, 107, 28215, 108]
[74, 191, 108, 254]
0.0


991
[-3520, 146, 4224, 146]
[348, 142, 382, 204]
0.0035300050428643467


992
[-28215, 71, 28215, 72]
[293, 155, 339, 206]
0.0


993
[-11100, 180, 12316, 180]
[301, 173, 323, 202]
0.0009549908653047666


994
[-27607, 215, 28823, 216]
[318, 145, 335, 185]
0.0


995
[-28215, 251, 28215, 252]
[327, 162, 400, 292]
0.0012090713025292464


996
[-28215, 35, 28215, 36]
[304, 211, 339, 301]
0.0


997
[-6249, 144, 6953, 144]
[372, 142, 392, 192]
0.0014733740265207324


998
[-28215, 35, 28215, 36]
[299, 127, 326, 207]
0.0


999
[-20448, 108, 21088, 108]
[324, 79, 346, 136]
0.0005367811799850635

Avg IOU:
0.08977855248541376
