## 加载所需的库

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from time import sleep, time
from pynq import allocate, Overlay

## 加载 bitstream

In [None]:
overlay = Overlay("gaussian_blur.bit")

In [None]:
dma = overlay.axi_dma_0
gaussian_blur = overlay.GaussianBlur_accel_0

## 读取输入图片

In [None]:
img_BGR = cv2.imread("sahara.jpg")
img = cv2.cvtColor(img_BGR, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(16, 9))
plt.imshow(img)

## 分配内存

In [None]:
height, width, _ = img.shape
in_buffer = allocate(shape=(height, width, 3), dtype=np.uint8, cacheable=1)
out_buffer = allocate(shape=(height, width, 3), dtype=np.uint8, cacheable=1)
in_buffer[:] = img[:]

## 配置 IP 的寄存器

In [None]:
gaussian_blur.write(0x10, height)
gaussian_blur.write(0x18, width)
gaussian_blur.write(0x00, 0x81)

## 启动 DMA

In [None]:
dma.sendchannel.transfer(in_buffer)
dma.recvchannel.transfer(out_buffer)
dma.sendchannel.wait()
dma.recvchannel.wait()

## 显示结果

In [None]:
plt.figure(figsize=(16, 9))
plt.imshow(out_buffer)

## 速度测试，对比 OpenCV

In [None]:
start_time = time()
out = cv2.GaussianBlur(img, (7, 7), 5, 5)
end_time = time()
print("OpenCV 延迟: %f ms" %((end_time-start_time)*1000))

start_time = time()
dma.sendchannel.transfer(in_buffer)
dma.recvchannel.transfer(out_buffer)    
dma.sendchannel.wait()
dma.recvchannel.wait()
end_time = time()
print("FPGA 延迟: %f ms" %((end_time-start_time)*1000))

In [None]:
del in_buffer
del out_buffer