 # 在PYNQ-Z2上部署神经网络实现目标检测

# 1. 准备阶段

首先我们引入所需要的模块

In [1]:
import sys
import math
import numpy as np
import os
import time
from PIL import Image
from matplotlib import pyplot
import cv2
from datetime import datetime
from pynq import Xlnk
from pynq import Overlay
from pynq.mmio import MMIO
import scipy.misc
from IPython.display import display
from pynq.lib.video import *

然后，我们调用在Vivado里综合好的Overlay,同时打开HDMI模块

In [2]:
OVERLAY_PATH = 'multiple_boundbox.bit'
overlay = Overlay(OVERLAY_PATH)
dma = overlay.axi_dma_0
WEIGHTS_FILE_NAME = 'weights_file.txt'
xlnk = Xlnk()
nn_ctrl = MMIO(0x43C40000, length=1024)
Mode     = VideoMode(640,480,24)
hdmi_out = overlay.video.hdmi_out
hdmi_out.configure(Mode,PIXEL_BGR)
hdmi_out.start()

<contextlib._GeneratorContextManager at 0xa4246810>

然后我们定义相关的函数

In [3]:
MINIBATCH_SIZE = 1
height = 224
width = 224
pixel_bits = 24
pixels_per_line = 384/pixel_bits
num_lines = int((height*width)/pixels_per_line)

in_buffer = xlnk.cma_array(shape=(MINIBATCH_SIZE*num_lines, 64), dtype=np.uint8)
fire1_num_out_lines = 28*28*MINIBATCH_SIZE
fire1_out_buffer = xlnk.cma_array(shape=(int(16*fire1_num_out_lines),), dtype=np.uint32)
fire2345_num_out_lines = 14*14*MINIBATCH_SIZE
fire2_out_buffer = xlnk.cma_array(shape=(int(16*fire2345_num_out_lines),), dtype=np.uint32)
fire3_out_buffer = xlnk.cma_array(shape=(int(16*fire2345_num_out_lines),), dtype=np.uint32)
bndboxes = xlnk.cma_array(shape=(14*14,16), dtype=np.int32)

weights_file = open(WEIGHTS_FILE_NAME, "r")
layer = 0
squeeze_weight_iterations   = np.zeros(7)
expand_weight_iterations    = np.zeros(7)
squeeze_factor_iterations    = np.zeros(7)
expand_factor_iterations     = np.zeros(7)
for line in weights_file:
    if "layer" in line:
        temp = line.split(": ")
        layer = int(temp[1])
    if "squeeze_weight_iterations" in line:
        temp = line.split(": ")
        squeeze_weight_iterations[layer] = int(temp[1])
    if "expand_weight_iterations" in line:
        temp = line.split(": ")
        expand_weight_iterations[layer] = int(temp[1])
    if "squeeze_factor_iterations" in line:
        temp = line.split(": ")
        squeeze_factor_iterations[layer] = int(temp[1])
    if "expand_factor_iterations" in line:
        temp = line.split(": ")
        expand_factor_iterations[layer] = int(temp[1])
weights_file.close()
weightfactors_length = np.zeros(7)
for i in range(0, 7):
    weightfactors_length[i] = int(squeeze_weight_iterations[i] + \
                                expand_weight_iterations[i] + \
                                squeeze_factor_iterations[i] + \
                                expand_factor_iterations[i])
weightsfactors = []
weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[0]),), dtype=np.uint32) )
weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[1]),), dtype=np.uint32) )
weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[2]),), dtype=np.uint32) )
weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[3]),), dtype=np.uint32) )
weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[4]),), dtype=np.uint32) )
weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[5]),), dtype=np.uint32) )
weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[6]),), dtype=np.uint32) )
index = 0
weights_file = open(WEIGHTS_FILE_NAME, "r")
for line in weights_file:
    if "layer" in line:
        temp = line.split(": ")
        layer = int(temp[1])
        index = 0
    elif "squeeze_weight_iterations" not in line    and \
            "expand_weight_iterations" not in line  and \
            "squeeze_factor_iterations" not in line and \
            "expand_factor_iterations" not in line:
        no0x = line.split('0x')[-1]
        base = 1
        while base < len(no0x):
            part = no0x[-1*(base+8):-1*base]    
            weightsfactors[layer][index*16 + int(base/8)] = int(part, 16) 
            base += 8
        index += 1

def weightsfactors_transfer(weightsfactors):
    nn_ctrl.write(0x18, 13)
    nn_ctrl.write(0x0, 0) 
    nn_ctrl.write(0x0, 1) 
    dma.sendchannel.transfer(weightsfactors)
    dma.sendchannel.wait()
    
def fire(inbuffer, outbuffer, 
         squeeze_din, 
         whichfire):
    nn_ctrl.write(0x0, 0) 
    nn_ctrl.write(0x10, squeeze_din)
    nn_ctrl.write(0x18, whichfire)
    nn_ctrl.write(0x0, 1) 
    dma.recvchannel.transfer(outbuffer)
    dma.sendchannel.transfer(inbuffer)

def line_intersection_union(line1_min, line1_max, line2_min, line2_max):
    intersection = 0
    intersect_state = 0
    if line1_min <= line2_min and line2_min < line1_max:
        if line2_max > line1_max:
            intersection = line1_max - line2_min
            intersect_state = 1
        else:
            intersection = line2_max - line2_min
            intersect_state = 2
    elif line2_min <= line1_min and line1_min < line2_max:
        if line1_max > line2_max:
            intersection = line2_max - line1_min
            intersect_state = 3
        else:
            intersection = line1_max - line1_min
            intersect_state = 4

    return intersection

def intersection(rect1, rect2):
    
    x_intersection = line_intersection_union(rect1[0], rect1[1], rect2[0], rect2[1]) 
    y_intersection = line_intersection_union(rect1[2], rect1[3], rect2[2], rect2[3])

    intersection = x_intersection*y_intersection
    
    return intersection

## 2.一张图片测试

我们首先引入一张图片进行测试

In [None]:
car_im = cv2.imread('vid_9.jpg', cv2.IMREAD_COLOR)
car_im_rgb = cv2.cvtColor(car_im, cv2.COLOR_BGR2RGB)
display(scipy.misc.toimage( car_im_rgb ))
im_list = [car_im]

In [None]:
start=time.time()
num_images = len(im_list)
result = xlnk.cma_array(shape=(int(num_images),5), dtype=np.int32)

for i in range(0,1):
    image = cv2.resize(im_list[i], (height, width))
    in_buffer[i*num_lines:(i+1)*num_lines,0:48] = np.reshape(image, (num_lines, 48)) 

weightsfactors_transfer(weightsfactors[0])
fire(in_buffer, fire1_out_buffer,
 56, 1)
dma.recvchannel.wait()

weightsfactors_transfer(weightsfactors[1])
fire(fire1_out_buffer, fire2_out_buffer,
 28, 2)
dma.recvchannel.wait()

weightsfactors_transfer(weightsfactors[2])
fire(fire2_out_buffer, fire3_out_buffer,
14,  3)
dma.recvchannel.wait()

weightsfactors_transfer(weightsfactors[3])
fire(fire3_out_buffer, fire2_out_buffer,
14, 4)
dma.recvchannel.wait()

weightsfactors_transfer(weightsfactors[4])
fire(fire2_out_buffer, fire3_out_buffer,
14,  5)
dma.recvchannel.wait()

weightsfactors_transfer(weightsfactors[5])
fire(fire3_out_buffer, fire2_out_buffer,
14,  6)
dma.recvchannel.wait()

weightsfactors_transfer(weightsfactors[6])
fire(fire2_out_buffer, bndboxes,
14,  7)
dma.recvchannel.wait()
   
result        = xlnk.cma_array(shape=(14*14,5), dtype=np.int32)
result[:,:]   = bndboxes[:,0:5]
result_obj = xlnk.cma_array(shape=(14*14,1), dtype=np.int32)
result_obj_2 = []
result_obj = result[:,0]
for i in range(14*14):
    result_obj_2.append(result_obj[i])
    
index = []

max_a = -1000000000

for m in range(7):
    for i in range(196):
        if (result_obj_2[i]  > max_a):
                max_a = result_obj_2[i]
                index1 = i
    result_obj_2[index1] = -1000000000
    max_a                       = -1000000000 
    index.append(index1)
obj_h=[]
obj_w=[]



for x in index:
    for y in index:
            if( ((int(x/14)-int(y/14))*(int(x/14)-int(y/14)) + (int(x%14)-int(y%14))*(int(x%14)-int(y%14)) ) <= 2) and ( x != y ) :
                index.remove(y)

index  
bndbox_fuck = np.empty(shape = [0,4])   
bndbox_fuck2 = np.empty(shape = [0,4])   
for i in range(0,len(index)):
    float_bndboxes = result[index[i],1:5].astype('float')
    float_bndboxes = np.divide( float_bndboxes, float((1<<22)) )
    float_bndboxes *= 0x882065
    float_bndboxes = np.divide( float_bndboxes, float((1<<23)) )
    obj_h = int(index[i]/14)
    obj_w = int(index[i]%14)
    x_min = int((float_bndboxes[0] + obj_w*16) *(640/width))
    y_min = int((float_bndboxes[1] + obj_h*16) *(360/height))
    x_max = int((float_bndboxes[2] + obj_w*16) *(640/width))
    y_max = int((float_bndboxes[3] + obj_h*16) *(360/height))
    if((x_max-x_min)>20 and(y_max-y_min)>20):
        bndbox_fuck  = np.append(bndbox_fuck,[[x_min, x_max, y_min, y_max]],axis=0)
        bndbox_fuck2= np.append(bndbox_fuck2,[[x_min, x_max, y_min, y_max]],axis=0)
for i in range(2):
    for i  in range(bndbox_fuck.shape[0]):
        for j in range(bndbox_fuck.shape[0]):
            if(i<bndbox_fuck.shape[0] and  j<bndbox_fuck.shape[0]):
                if (i<j and intersection(bndbox_fuck[i,:],bndbox_fuck[j,:])>2500):
                       bndbox_fuck=np.delete(bndbox_fuck,j,axis=0) 
for i in range(bndbox_fuck.shape[0]):
    cv2.rectangle(im_list[0],(int(bndbox_fuck[i,0]),int(bndbox_fuck[i,2])),(int(bndbox_fuck[i,1]),int(bndbox_fuck[i,3])),(0,255,0),2)
numbers = 'The number of cars ' 
cv2.rectangle(im_list[0],(0,0),(210,50),(0,0,255),2)
cv2.putText(im_list[0], numbers, (10, 15),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0, 0, 255), 1)
cv2.putText(im_list[0], str(bndbox_fuck.shape[0]), (70,40),cv2.FONT_HERSHEY_SIMPLEX,1, (0, 0, 255), 2)
end=time.time()
print('Running time: {} Seconds'.format(end-start))       
display(scipy.misc.toimage( cv2.cvtColor(im_list[0], cv2.COLOR_BGR2RGB) ))
frame     =cv2.resize(im_list[0],(640,480)) 
outframe = hdmi_out.newframe()
outframe[:] = frame
hdmi_out.writeframe(outframe)

## 3.多张图片测试
然后我们进行多张图片测试

In [None]:
count_number = 0
image=[]
number_car = 0
for file in os.listdir('/home/xilinx/jupyter_notebooks/nesc_test/deploy/'):
    if '.jpg' in file:
         image.append(file)
for a in image:
    count_number = count_number + 1
    car_im = cv2.imread(a, cv2.IMREAD_COLOR)
    car_im_rgb = cv2.cvtColor(car_im, cv2.COLOR_BGR2RGB)
    im_list = [car_im]
    num_images = len(im_list)
    result = xlnk.cma_array(shape=(int(num_images),5), dtype=np.int32)

    image = cv2.resize(im_list[0], (224, 224))
    in_buffer[0:1*num_lines,0:48] = np.reshape(image, (num_lines, 48)) 

    weightsfactors_transfer(weightsfactors[0])
    fire(in_buffer, fire1_out_buffer,56, 1)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[1])
    fire(fire1_out_buffer, fire2_out_buffer,28, 2)
    dma.recvchannel.wait()
    
    weightsfactors_transfer(weightsfactors[2])
    fire(fire2_out_buffer, fire3_out_buffer,14,  3)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[3])
    fire(fire3_out_buffer, fire2_out_buffer,14, 4)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[4])
    fire(fire2_out_buffer, fire3_out_buffer,14,  5)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[5])
    fire(fire3_out_buffer, fire2_out_buffer,14,  6)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[6])
    fire(fire2_out_buffer, bndboxes,14,  7)
    dma.recvchannel.wait()
    result        = xlnk.cma_array(shape=(14*14,5), dtype=np.int32)
    result[:,:]   = bndboxes[:,0:5]
    result_obj = xlnk.cma_array(shape=(14*14,1), dtype=np.int32)
    result_obj_2 = []
    result_obj = result[:,0]
    for i in range(14*14):
        result_obj_2.append(result_obj[i])
    
    index = []

    max_a = -1000000000

    for m in range(7):
        for i in range(196):
            if (result_obj_2[i]  > max_a):
                    max_a = result_obj_2[i]
                    index1 = i
        result_obj_2[index1] = -1000000000
        max_a                       = -1000000000 
        index.append(index1)
    obj_h=[]
    obj_w=[]

    for x in index:
        for y in index:
                if( ((int(x/14)-int(y/14))*(int(x/14)-int(y/14)) + (int(x%14)-int(y%14))*(int(x%14)-int(y%14)) ) <= 2) and ( x != y ) :
                    index.remove(y)
    bndbox_fuck = np.empty(shape = [0,4])   
    bndbox_fuck2 = np.empty(shape = [0,4])   
    for i in range(0,len(index)):
        float_bndboxes = result[index[i],1:5].astype('float')
        float_bndboxes = np.divide( float_bndboxes, float((1<<22)) )
        float_bndboxes *= 0x882065
        float_bndboxes = np.divide( float_bndboxes, float((1<<23)) )
        obj_h = int(index[i]/14)
        obj_w = int(index[i]%14)
        x_min = int((float_bndboxes[0]  + obj_w*16) *(640/width))
        y_min = int((float_bndboxes[1]  + obj_h*16) *(360/height))
        x_max = int((float_bndboxes[2] + obj_w*16) *(640/width))
        y_max = int((float_bndboxes[3] + obj_h*16) *(360/height))
        if((x_max-x_min)>20 and(y_max-y_min)>20):
            bndbox_fuck  = np.append(bndbox_fuck,[[x_min, x_max, y_min, y_max]],axis=0)
            bndbox_fuck2= np.append(bndbox_fuck2,[[x_min, x_max, y_min, y_max]],axis=0)
    for number in range(3):
        for i  in range(bndbox_fuck.shape[0]):
            for j in range(bndbox_fuck.shape[0]):
                if(i<bndbox_fuck.shape[0] and  j<bndbox_fuck.shape[0]):
                    if (i<j and intersection(bndbox_fuck[i,:],bndbox_fuck[j,:])>1000):
                           bndbox_fuck=np.delete(bndbox_fuck,j,axis=0) 
                    
    for i in range(bndbox_fuck.shape[0]):
        cv2.rectangle(im_list[0],(int(bndbox_fuck[i,0]),int(bndbox_fuck[i,2])),(int(bndbox_fuck[i,1]),int(bndbox_fuck[i,3])),(0,255,0),2)
    numbers = 'The number of cars '
    number_car     =  bndbox_fuck.shape[0]
    cv2.rectangle(im_list[0],(0,0),(210,50),(0,0,255),2)
    cv2.putText(im_list[0], numbers, (10, 15),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0, 0, 255), 1)
    cv2.putText(im_list[0], str(number_car), (70,40),cv2.FONT_HERSHEY_SIMPLEX,1, (0, 0, 255), 2)
    display(scipy.misc.toimage( cv2.cvtColor(im_list[0], cv2.COLOR_BGR2RGB) ))
    frame     =cv2.resize(im_list[0],(640,480)) 

    outframe = hdmi_out.newframe()
    outframe[:] = frame
    hdmi_out.writeframe(outframe)

## 4.输出视频测试
然后我们进行输出视频的测试

In [None]:
count = 0
count_number = 0
number_car = 0
videoIn = cv2.VideoCapture(0)
videoIn.set(cv2.CAP_PROP_FRAME_WIDTH, 480);
videoIn.set(cv2.CAP_PROP_FRAME_HEIGHT, 640);
while (count<10000000):
    count = count+1
    count_number = count_number + 1
    frame_vga     = videoIn.read()
    frame = frame_vga[1] 
    image_put     = cv2.resize(frame_vga[1], (224, 224))
    in_buffer[0:num_lines,0:48] = np.reshape(image_put, (num_lines, 48))
    
    weightsfactors_transfer(weightsfactors[0])
    fire(in_buffer, fire1_out_buffer,56, 1)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[1])
    fire(fire1_out_buffer, fire2_out_buffer, 28, 2)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[2])
    fire(fire2_out_buffer, fire3_out_buffer,14,  3)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[3])
    fire(fire3_out_buffer, fire2_out_buffer,14, 4)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[4])
    fire(fire2_out_buffer, fire3_out_buffer,14,  5)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[5])
    fire(fire3_out_buffer, fire2_out_buffer,14,  6)
    dma.recvchannel.wait()

    weightsfactors_transfer(weightsfactors[6])
    fire(fire2_out_buffer, bndboxes,14,  7)
    dma.recvchannel.wait()
    result        = xlnk.cma_array(shape=(14*14,5), dtype=np.int32)
    result[:,:]   = bndboxes[:,0:5]
    result_obj = xlnk.cma_array(shape=(14*14,1), dtype=np.int32)
    result_obj_2 = []
    result_obj = result[:,0]
    for i in range(14*14):
        result_obj_2.append(result_obj[i])
    
    index = []
    max_a = -1000000000
    for m in range(7):
        for i in range(196):
            if (result_obj_2[i]  > max_a):
                    max_a = result_obj_2[i]
                    index1 = i
        result_obj_2[index1] = -1000000000
        max_a                       = -1000000000 
        index.append(index1)
    obj_h=[]
    obj_w=[]

    for x in index:
        for y in index:
                if( ((int(x/14)-int(y/14))*(int(x/14)-int(y/14)) + (int(x%14)-int(y%14))*(int(x%14)-int(y%14)) ) <= 2) and ( x != y ) :
                    index.remove(y)
    bndbox_fuck = np.empty(shape = [0,4])   
    bndbox_fuck2 = np.empty(shape = [0,4])   
    for i in range(0,len(index)):
        float_bndboxes = result[index[i],1:5].astype('float')
        float_bndboxes = np.divide( float_bndboxes, float((1<<22)) )
        float_bndboxes *= 0x882065
        float_bndboxes = np.divide( float_bndboxes, float((1<<23)) )
        obj_h = int(index[i]/14)
        obj_w = int(index[i]%14)
        x_min = int((float_bndboxes[0]  + obj_w*16) *(640/width))
        y_min = int((float_bndboxes[1]  + obj_h*16) *(480/height))
        x_max = int((float_bndboxes[2] + obj_w*16) *(640/width))
        y_max = int((float_bndboxes[3] + obj_h*16) *(480/height))
        if((x_max-x_min)>20 and(y_max-y_min)>20 and (y_max-y_min)< 300 and (x_max<300 or x_min>350 ) ):
            bndbox_fuck  = np.append(bndbox_fuck,[[x_min, x_max, y_min, y_max]],axis=0)
            bndbox_fuck2= np.append(bndbox_fuck2,[[x_min, x_max, y_min, y_max]],axis=0)
    for number in range(3):
        for i  in range(bndbox_fuck.shape[0]):
            for j in range(bndbox_fuck.shape[0]):
                if(i<bndbox_fuck.shape[0] and  j<bndbox_fuck.shape[0]):
                    if (i<j and intersection(bndbox_fuck[i,:],bndbox_fuck[j,:])>1000):
                           bndbox_fuck=np.delete(bndbox_fuck,j,axis=0) 
                    
    for i in range(bndbox_fuck.shape[0]):
        cv2.rectangle(frame,(int(bndbox_fuck[i,0]),int(bndbox_fuck[i,2])),(int(bndbox_fuck[i,1]),int(bndbox_fuck[i,3])),(0,255,0),2)
    numbers = 'The number of cars '
    number_car     =  bndbox_fuck.shape[0]
    cv2.rectangle(frame,(0,0),(210,50),(0,0,255),2)
    cv2.putText(frame, numbers, (10, 15),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0, 0, 255), 1)
    cv2.putText(frame, str(number_car), (70,40),cv2.FONT_HERSHEY_SIMPLEX,1, (0, 0, 255), 2)
    outframe = hdmi_out.newframe()
    outframe[:] = frame
    hdmi_out.writeframe(outframe)