In [1]:
import sys
import os

sys.path.append(os.path.abspath("../common"))

import math
import time
import numpy as np
import cv2
import pynq
# import dac_sdc
import ctypes

class Team:
    def __init__(self, team_name, batch_size):
        self.batch_size=batch_size
        IMG_DIR = '/home/xilinx/jupyter_notebooks/DACSDC2020/test_images/'

        self.names_temp = [f for f in os.listdir(IMG_DIR) if f.endswith('.jpg')]
        self.names_temp.sort(key= lambda x:int(x[:-4]))
        for i,k in enumerate(self.names_temp):
            self.names_temp[i]=IMG_DIR+k
        self.start_idx=-batch_size;

    def get_next_batch(self):
        self.start_idx+=self.batch_size;
        if(self.start_idx>= len(self.names_temp) ):
            self.start_idx=-self.batch_size
            return None
        elif( self.start_idx+self.batch_size<=len(self.names_temp) ):
            return self.names_temp[self.start_idx:self.start_idx+self.batch_size]
        else:
            return self.names_temp[self.start_idx:]
    def reset_batch_count(self):
        self.start_idx=-self.batch_size
    def get_bitstream_path(self):
        return "./fastNet.bit"
    def save_results_xml(self,result_rectangle, total_time, energy):
        f_out = open('./bbox_PL_1000_sample.txt', 'w')
        cnt = 0


        for box in result_rectangle:
            x1 = box[0]
            x2 = box[1]
            y1 = box[2]
            y2 = box[3]
            coord = str(x1) + ' ' + str(x2) + ' ' + str(y1) + ' ' + str(y2)

            #name = names[cnt]
            cnt = cnt + 1
            #f_out.write(name + '\n')
            f_out.write(coord + '\n')

        f_out.close()
        print("\nAll results stored in bbox_PL_1000_sample.txt")
        
        
team_name = 'BJUT_Runner'
team = Team(team_name, batch_size = 64)

In [2]:
last_bais = np.load('last_bias.npy')
last_bais = last_bais.reshape((6, 6))[:, :4]
cfuns = ctypes.cdll.LoadLibrary("./load_image.so")

overlay = pynq.Overlay(team.get_bitstream_path())
dma = overlay.axi_dma_0
xlnk = pynq.Xlnk()
nn_ctrl = overlay.ultra_net_0
print('got nn accelerator!')

got nn accelerator!


In [3]:
BATCH_SIZE = team.batch_size
IMAGE_RAW_ROW = 360
IMAGE_RAW_COL = 640
IMAGE_ROW = 160
IMAGE_COL = 320
GRID_ROw = 10
GRID_COL = 20
X_SCALE = IMAGE_RAW_COL / IMAGE_COL
Y_SCALE = IMAGE_RAW_ROW / IMAGE_ROW


in_buffer0 = xlnk.cma_array(shape=(BATCH_SIZE, IMAGE_RAW_ROW, IMAGE_RAW_COL, 3), dtype=np.uint8, cacheable = 1)
in_buffer1 = xlnk.cma_array(shape=(BATCH_SIZE, IMAGE_RAW_ROW, IMAGE_RAW_COL, 3), dtype=np.uint8, cacheable = 1)
in_buffers = [in_buffer0, in_buffer1]
out_buffer0 = xlnk.cma_array(shape=(BATCH_SIZE, GRID_ROw, GRID_COL, 6, 6), dtype=np.int32, cacheable = 1)
out_buffer1 = xlnk.cma_array(shape=(BATCH_SIZE, GRID_ROw, GRID_COL, 6, 6), dtype=np.int32, cacheable = 1)
out_buffers = [out_buffer0, out_buffer1]


# use c code load image
def load_image(image_paths, buff):
    paths = [str(path) for path in image_paths]
    tmp = np.asarray(buff)
    dataptr = tmp.ctypes.data_as(ctypes.c_char_p)
    paths_p_list = [ctypes.c_char_p(bytes(str_, 'utf-8')) for str_ in paths]
    paths_c = (ctypes.c_char_p*len(paths_p_list))(*paths_p_list)
    cfuns.load_image(paths_c, dataptr, len(paths), IMAGE_ROW, IMAGE_COL, 3)
    
def sigmoid(x):
    s = 1 / (1 + np.exp(-x))
    return s

def yolo(out_buffer, batch_n, div, last_bais=None, result=None):
    res_np = np.array(out_buffer[:batch_n]).reshape(batch_n, -1, 6, 6)
    conf = res_np[...,4].sum(axis=2)
    max_index = conf.argmax(1)
    
    grid_x = max_index % GRID_COL
    grid_y = max_index // GRID_COL
    
    boxs = np.zeros((batch_n, 6, 4))
    for i in range(batch_n):
        boxs[i, :, :] = res_np[i, max_index[i], :, :4] / div# + last_bais
    xy = sigmoid(boxs[..., :2]).mean(axis=1)
    wh = np.exp(boxs[..., 2:4]).mean(axis=1)
    
    xy[:, 0] += grid_x
    xy[:, 1] += grid_y

    xy *= 16
    wh *= 20

    xy[:, 0] *= X_SCALE
    xy[:, 1] *= Y_SCALE
    wh[:, 0] *= X_SCALE
    wh[:, 1] *= Y_SCALE
    xmin = xy[:, 0] - wh[:, 0] / 2
    xmax = xy[:, 0] + wh[:, 0] / 2
    ymin = xy[:, 1] - wh[:, 1] / 2
    ymax = xy[:, 1] + wh[:, 1] / 2
    
    for i in range(batch_n):
        temp = [int(xmin[i]), int(xmax[i]), int(ymin[i]), int(ymax[i])]
        result.append(temp)

which_buffer = 0
first_batch = True
net_cnt = 0
last_batch_size = BATCH_SIZE

def net(img_paths, result):
    global first_batch
    global which_buffer    
    global net_cnt
    global last_batch_size
    # buffer first batch
    if first_batch == True:
        first_batch = False
        which_buffer = 0
        load_image(img_paths, in_buffers[which_buffer])
        return
    # count
    net_cnt += 1
    nn_ctrl.write(0x0, 0) # Reset
    nn_ctrl.write(0x10, in_buffers[which_buffer].shape[0])
    nn_ctrl.write(0x0, 1) # Deassert reset
    dma.recvchannel.transfer(out_buffers[which_buffer])
    dma.sendchannel.transfer(in_buffers[which_buffer])
    
#     print(img_paths)
    # switch buffer
    if which_buffer == 0:
        which_buffer = 1
    else:
        which_buffer = 0
    # buffer next batch
    if img_paths is not None:
        load_image(img_paths, in_buffers[which_buffer])
    
    # yolo 
    if net_cnt > 1:
        yolo(out_buffers[which_buffer], BATCH_SIZE, 7 * 15, last_bais, result)
    
    if img_paths is not None and len(img_paths) != BATCH_SIZE:
        last_batch_size = len(img_paths)
            
    dma.sendchannel.wait()
    dma.recvchannel.wait()
    # last batch 
    if img_paths is None:
        yolo(out_buffers[(which_buffer + 1) % 2], last_batch_size, 7 * 15, last_bais, result) 
        
################################Inference##################################
interval_time = 0
total_time = 0
total_energy = 0
result = list()
team.reset_batch_count()

rails = pynq.get_rails()

start = time.time()    
recorder = pynq.DataRecorder(rails["5V"].power)
with recorder.record(0.05): 
    while True:
        image_paths = team.get_next_batch()
        net(image_paths, result)

#         print('pro_image_cnt', len(result))
        # end
        if image_paths is None:
            break

end = time.time()
t = end - start
xlnk.xlnk_reset()
# Energy measurements    
energy = recorder.frame["5V_power"].mean() * t    
# energy = 0

total_time = t
total_energy = energy
print("Total time:", total_time, "seconds")
print("Total energy:", total_energy, "J")
print('images nums: {} .'.format(len(result)))
print('fps: {} .'.format(len(result) / total_time))
print(len(result))
print(result)
team.save_results_xml(result, total_time, energy)
xlnk.xlnk_reset()


Total time: 2.9765923023223877 seconds
Total energy: 15.0689985305 J
images nums: 1000 .
fps: 335.95464156101696 .
1000
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0