In [1]:
import sys
import os

# sys.path.append(os.path.abspath("../common"))

import math
import time
import numpy as np
import cv2
import pynq
import dac_sdc
import ctypes

team_name = 'iSmart'
team = dac_sdc.Team(team_name, batch_size = 64)

In [2]:
# last_bais = np.load('last_bias.npy')
# last_bais = last_bais.reshape((6, 6))[:, :4]
# os.system('g++ -shared -O2 ./load_image_new.cpp -o load_image_new.so -fPIC $(pkg-config opencv --cflags --libs) -lpthread')
cfuns = ctypes.cdll.LoadLibrary("./load_image_new.so")

overlay = pynq.Overlay(team.get_bitstream_path())
dma = overlay.axi_dma_0
xlnk = pynq.Xlnk()
nn_ctrl = overlay.ultra_net_0
print('got nn accelerator!')

got nn accelerator!


In [3]:
BATCH_SIZE = team.batch_size
IMAGE_RAW_ROW = 360
IMAGE_RAW_COL = 640
IMAGE_ROW = 160
IMAGE_COL = 320
GRID_ROw = 10
GRID_COL = 20
X_SCALE = IMAGE_RAW_COL / IMAGE_COL
Y_SCALE = IMAGE_RAW_ROW / IMAGE_ROW


in_buffer0 = xlnk.cma_array(shape=(BATCH_SIZE, IMAGE_RAW_ROW, IMAGE_RAW_COL, 3), dtype=np.uint8, cacheable = 1)
in_buffer1 = xlnk.cma_array(shape=(BATCH_SIZE, IMAGE_RAW_ROW, IMAGE_RAW_COL, 3), dtype=np.uint8, cacheable = 1)
in_buffers = [in_buffer0, in_buffer1]
out_buffer0 = xlnk.cma_array(shape=(BATCH_SIZE, GRID_ROw, GRID_COL, 6, 6), dtype=np.int32, cacheable = 1)
out_buffer1 = xlnk.cma_array(shape=(BATCH_SIZE, GRID_ROw, GRID_COL, 6, 6), dtype=np.int32, cacheable = 1)
out_buffers = [out_buffer0, out_buffer1]


# use c code load image
def load_image(image_paths, buff):
    paths = [str(path) for path in image_paths]
    tmp = np.asarray(buff)
    dataptr = tmp.ctypes.data_as(ctypes.c_char_p)
    paths_p_list = [ctypes.c_char_p(bytes(str_, 'utf-8')) for str_ in paths]
    paths_c = (ctypes.c_char_p*len(paths_p_list))(*paths_p_list)
    cfuns.load_image(paths_c, dataptr, len(paths), IMAGE_ROW, IMAGE_COL, 3)
    
def sigmoid(x):
    s = 1 / (1 + np.exp(-x))
    return s

def yolo(out_buffer, batch_n, div, result=None):
    res_np = np.array(out_buffer[:batch_n]).reshape(batch_n, -1, 6, 6)
    conf = res_np[...,4].sum(axis=2)
    max_index = conf.argmax(1)
    
    grid_x = max_index % GRID_COL
    grid_y = max_index // GRID_COL
    
    boxs = np.zeros((batch_n, 6, 4))
    for i in range(batch_n):
        boxs[i, :, :] = res_np[i, max_index[i], :, :4] / div
    xy = sigmoid(boxs[..., :2]).mean(axis=1)
    wh = np.exp(boxs[..., 2:4]).mean(axis=1)
    
    xy[:, 0] += grid_x
    xy[:, 1] += grid_y

    xy *= 16
    wh *= 20

    xy[:, 0] *= X_SCALE
    xy[:, 1] *= Y_SCALE
    wh[:, 0] *= X_SCALE
    wh[:, 1] *= Y_SCALE
    xmin = xy[:, 0] - wh[:, 0] / 2
    xmax = xy[:, 0] + wh[:, 0] / 2
    ymin = xy[:, 1] - wh[:, 1] / 2
    ymax = xy[:, 1] + wh[:, 1] / 2
    
    for i in range(batch_n):
        temp = [int(xmin[i]), int(xmax[i]), int(ymin[i]), int(ymax[i])]
        result.append(temp)
#         result.append([0,0,0,0])
    
which_buffer = 0
first_batch = True
net_cnt = 0
last_batch_size = BATCH_SIZE

def net(img_paths, result):
    global first_batch
    global which_buffer    
    global net_cnt
    global last_batch_size
    # buffer first batch
    if first_batch == True:
        first_batch = False
        which_buffer = 0
        load_image(img_paths, in_buffers[which_buffer])
        return
    # count
    net_cnt += 1
    nn_ctrl.write(0x0, 0) # Reset
    nn_ctrl.write(0x10, in_buffers[which_buffer].shape[0])
    nn_ctrl.write(0x0, 1) # Deassert reset
    

    dma.recvchannel.transfer(out_buffers[which_buffer])
    dma.sendchannel.transfer(in_buffers[which_buffer])

    
    # switch buffer
    if which_buffer == 0:
        which_buffer = 1
    else:
        which_buffer = 0
    # buffer next batch

    if img_paths is not None:
        load_image(img_paths, in_buffers[which_buffer])

    
    # yolo 
    if net_cnt > 1:
        yolo(out_buffers[which_buffer], BATCH_SIZE, 127 * 15, result)
    
    if img_paths is not None and len(img_paths) != BATCH_SIZE:
        last_batch_size = len(img_paths)

    dma.sendchannel.wait()
    dma.recvchannel.wait()

    # last batch 
    if img_paths is None:
        yolo(out_buffers[(which_buffer + 1) % 2], last_batch_size, 127 * 15, result) 
        
################################Inference##################################
interval_time = 0
total_time = 0
total_energy = 0
result = list()
team.reset_batch_count()

rails = pynq.get_rails()

start = time.time()    
recorder = pynq.DataRecorder(rails["5V"].power)
with recorder.record(0.05): 
    while True:
        image_paths = team.get_next_batch()
        net(image_paths, result)

#         print('pro_image_cnt', len(result))
        # end
        if image_paths is None:
            break

end = time.time()
t = end - start
    
# Energy measurements    
energy = recorder.frame["5V_power"].mean() * t    
# energy = 0

total_time = t
total_energy = energy
print("Total time:", total_time, "seconds")
print("Total energy:", total_energy, "J")
print('images nums: {} .'.format(len(result)))
print('fps: {} .'.format(len(result) / total_time))

Total time: 2.878736972808838 seconds
Total energy: 13.2723911101 J
images nums: 1000 .
fps: 347.37456372204826 .


In [4]:
team.save_results_xml(result, total_time, energy)
xlnk.xlnk_reset()



All results stored in bbox_PL_1000_sample.txt


In [5]:
def computeIoU(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    
    if( xB < xA or yB < yA ):
        return 0.0

    # compute the area of intersection rectangle
    interArea = (xB - xA + 1) * (yB - yA + 1)

    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou



#f_PL = open("iSmart2.txt", "r")
f_PL = open("../deploy/bbox_PL_1000_sample.txt", "r")
#f_PL = open("bbox_PL_1000.txt", "r")
#f_PL = open("bbox_PL_1000_sample_64_py2.txt", "r")
#f_PL = open("bbox_golden_c_1000_sample_512_py3.txt", "r")
#f_PL = open("bbox_golden_c_1000.txt", "r")
f_GT = open("../deploy2/bbox_GT_1000_sample.txt", "r")
#f_GT = open("bbox_GT_1000.txt", "r")

#f_PL = open("bbox_pytorch.txt", "r")
#f_PL = open("bbox_golden_C.txt", "r")

bbox_PL = []
bbox_GT = []


for line in f_PL:
    if not line.strip():
        continue
    if(line.endswith("jpg\n") or line.endswith("xml\n") or line.endswith("bin\n")):
        continue
        
    x1, x2, y1, y2 = line.split()
    bbox_PL.append([int(x1), int(y1), int(x2), int(y2)])
    


for line in f_GT:
    if not line.strip():
        continue
    if(line.endswith("jpg\n") or line.endswith("xml\n") or line.endswith("bin\n")):
        continue
        
    x1, x2, y1, y2 = line.split()
    bbox_GT.append([int(x1), int(y1), int(x2), int(y2)])
    

#if( len(bbox_PL) != len(bbox_GT) ):
#    print("ERROR! Ground truth and PL output do not match!")
    
cnt = len(bbox_PL)
IoU_avg = 0
for i in range(cnt):
    boxA = bbox_PL[i]
    boxB = bbox_GT[i]
    
    print('\n')
    print(i)
    print(boxA)
    print(boxB)
    
    IoU = computeIoU(boxA, boxB)
    print(IoU)
    
    IoU_avg = IoU_avg + IoU

print("\nAvg IOU:")
print(IoU_avg/cnt)




0
[300, 133, 381, 235]
[295, 131, 382, 232]
0.8891780524831924


1
[293, 131, 314, 172]
[294, 129, 314, 173]
0.8936170212765957


2
[560, 235, 574, 248]
[560, 235, 573, 252]
0.7368421052631579


3
[238, 160, 260, 212]
[239, 161, 262, 216]
0.8062015503875969


4
[303, 168, 320, 198]
[301, 170, 324, 200]
0.6692307692307692


5
[236, 292, 247, 321]
[235, 293, 245, 322]
0.725


6
[173, 34, 217, 161]
[173, 35, 219, 158]
0.9287616511318242


7
[260, 85, 304, 197]
[260, 87, 304, 196]
0.9734513274336283


8
[208, 94, 246, 195]
[208, 95, 244, 194]
0.930115635997989


9
[305, 284, 323, 310]
[304, 281, 323, 309]
0.8247078464106845


10
[319, 153, 345, 230]
[318, 153, 346, 228]
0.9087688219663419


11
[322, 149, 340, 169]
[317, 147, 339, 174]
0.5684210526315789


12
[316, 156, 338, 176]
[317, 157, 339, 176]
0.8747514910536779


13
[328, 152, 357, 219]
[333, 149, 358, 211]
0.6887052341597796


14
[210, 95, 273, 289]
[209, 100, 269, 291]
0.8911819887429644


15
[435, 193, 479, 232]
[438, 191, 479,

0.9131792629606496


190
[560, 165, 568, 178]
[561, 165, 568, 177]
0.8253968253968254


191
[225, 319, 239, 348]
[226, 320, 236, 353]
0.6316831683168317


192
[304, 159, 333, 234]
[307, 157, 335, 239]
0.7787476280834915


193
[317, 148, 333, 170]
[323, 150, 338, 171]
0.451171875


194
[303, 240, 327, 280]
[299, 169, 321, 200]
0.0


195
[271, 152, 297, 201]
[274, 152, 299, 197]
0.7656033287101248


196
[308, 147, 341, 229]
[305, 151, 341, 228]
0.8678010471204188


197
[444, 184, 493, 220]
[452, 179, 489, 214]
0.5774509803921568


198
[151, 228, 215, 302]
[174, 163, 371, 304]
0.1055594651653765


199
[205, 200, 238, 268]
[205, 203, 235, 276]
0.7887432536622976


200
[279, 99, 290, 133]
[279, 99, 289, 132]
0.8904761904761904


201
[319, 164, 373, 274]
[318, 161, 373, 279]
0.9161164465786314


202
[228, 145, 326, 205]
[223, 158, 314, 197]
0.5577816957845808


203
[316, 136, 358, 204]
[311, 139, 357, 195]
0.7361623616236163


204
[293, 217, 311, 227]
[295, 218, 313, 227]
0.74235807860262




[303, 168, 325, 199]
0.8637532133676092


390
[191, 183, 228, 274]
[206, 140, 295, 226]
0.09811906146984681


391
[363, 22, 438, 137]
[361, 29, 433, 131]
0.8105741520727111


392
[329, 130, 336, 144]
[331, 133, 335, 146]
0.46153846153846156


393
[291, 12, 311, 65]
[290, 9, 313, 64]
0.8153846153846154


394
[371, 125, 393, 178]
[372, 125, 392, 178]
0.9130434782608695


395
[313, 146, 334, 167]
[305, 152, 324, 176]
0.24242424242424243


396
[301, 167, 319, 197]
[299, 169, 322, 199]
0.7046035805626598


397
[450, 219, 503, 276]
[445, 224, 503, 281]
0.7751895991332611


398
[267, 219, 281, 243]
[269, 219, 282, 242]
0.7819548872180451


399
[293, 179, 332, 266]
[289, 181, 330, 267]
0.8366615463389657


400
[310, 165, 337, 214]
[313, 166, 334, 215]
0.7580872011251758


401
[232, 106, 324, 216]
[248, 152, 399, 311]
0.16887104393008975


402
[342, 214, 379, 301]
[184, 136, 266, 216]
0.0


403
[279, 154, 294, 178]
[279, 153, 295, 182]
0.7843137254901961


404
[257, 149, 303, 250]
[260, 152, 30

[372, 125, 394, 178]
[374, 126, 393, 177]
0.8373590982286635


590
[306, 142, 359, 271]
[303, 145, 359, 269]
0.9127789046653144


591
[369, 189, 392, 246]
[364, 190, 390, 246]
0.7477638640429338


592
[377, 96, 403, 156]
[377, 95, 405, 155]
0.9020044543429844


593
[326, 171, 351, 234]
[328, 164, 350, 231]
0.7687671232876713


594
[439, 238, 462, 262]
[439, 237, 462, 261]
0.9230769230769231


595
[322, 168, 345, 213]
[321, 164, 342, 213]
0.7802907915993538


596
[210, 128, 296, 183]
[212, 128, 295, 182]
0.9482758620689655


597
[244, 126, 307, 280]
[236, 124, 314, 278]
0.7914006304049139


598
[338, 109, 364, 152]
[346, 154, 367, 205]
0.0


599
[489, 318, 524, 335]
[487, 318, 521, 336]
0.8261474269819193


600
[304, 170, 322, 199]
[303, 172, 326, 202]
0.680306905370844


601
[307, 303, 371, 360]
[307, 302, 371, 360]
0.9830508474576272


602
[306, 174, 324, 202]
[304, 175, 325, 203]
0.8097412480974124


603
[328, 162, 353, 214]
[322, 158, 350, 213]
0.6622369878183831


604
[96, 175, 203

789
[299, 149, 321, 204]
[298, 149, 321, 204]
0.9583333333333334


790
[377, 321, 425, 359]
[384, 323, 416, 359]
0.6389324960753532


791
[275, 111, 287, 149]
[276, 112, 287, 151]
0.8587570621468926


792
[166, 231, 239, 307]
[180, 164, 369, 300]
0.15257192676547515


793
[299, 149, 323, 207]
[300, 149, 324, 205]
0.8929503916449086


794
[328, 132, 353, 194]
[329, 133, 354, 191]
0.869180907483795


795
[306, 161, 331, 196]
[310, 161, 328, 200]
0.6758893280632411


796
[223, 160, 244, 201]
[224, 155, 248, 194]
0.6181665264928511


797
[278, 158, 307, 185]
[281, 157, 311, 181]
0.6701137538779731


798
[309, 295, 369, 360]
[305, 299, 371, 360]
0.8599363346975898


799
[262, 165, 301, 289]
[262, 163, 297, 280]
0.8233438485804416


800
[281, 153, 336, 208]
[285, 154, 335, 210]
0.8662754786905498


801
[282, 176, 304, 202]
[281, 174, 304, 202]
0.8922413793103449


802
[307, 185, 328, 214]
[304, 186, 327, 214]
0.8152610441767069


803
[440, 183, 463, 229]
[319, 156, 341, 183]
0.0


804
[301, 


989
[397, 283, 424, 298]
[397, 284, 424, 299]
0.8823529411764706


990
[75, 196, 108, 255]
[74, 191, 108, 254]
0.882145998240985


991
[350, 143, 379, 201]
[348, 142, 382, 204]
0.8027210884353742


992
[292, 155, 343, 208]
[293, 155, 339, 206]
0.8703703703703703


993
[302, 173, 320, 200]
[301, 173, 323, 202]
0.7710144927536232


994
[286, 169, 325, 209]
[318, 145, 335, 185]
0.060660124888492414


995
[324, 165, 405, 295]
[327, 162, 400, 292]
0.8639182780007296


996
[305, 211, 341, 305]
[304, 211, 339, 301]
0.8832501386577926


997
[372, 142, 392, 192]
[372, 142, 392, 192]
1.0


998
[292, 134, 331, 202]
[299, 127, 326, 207]
0.624031007751938


999
[322, 79, 347, 139]
[324, 79, 346, 136]
0.8411097099621689

Avg IOU:
0.7210140234502581
