In [1]:
from pynq import allocate
from pynq import Overlay
import numpy as np
import struct
import ctypes

In [2]:
custom_hw = Overlay("ip64.bit")

In [3]:
custom_hw?

[0;31mType:[0m            Overlay
[0;31mString form:[0m     <pynq.overlay.Overlay object at 0xaf2f0910>
[0;31mFile:[0m            /usr/local/share/pynq-venv/lib/python3.8/site-packages/pynq/overlay.py
[0;31mDocstring:[0m      
Default documentation for overlay ip64.bit. The following
attributes are available on this overlay:

IP Blocks
----------
axi_dma_1            : pynq.lib.dma.DMA
axi_dma_2            : pynq.lib.dma.DMA
out_dma              : pynq.lib.dma.DMA
krnl_saxpy2_0        : pynq.overlay.DefaultIP
processing_system7_0 : pynq.overlay.DefaultIP

Hierarchies
-----------
None

Interrupts
----------
None

GPIO Outputs
------------
None

Memories
------------
PSDDR                : Memory
[0;31mClass docstring:[0m
This class keeps track of a single bitstream's state and contents.

The overlay class holds the state of the bitstream and enables run-time
protection of bindings.

Our definition of overlay is: "post-bitstream configurable design".
Hence, this class must expo

In [4]:
custom_hw.ip_dict

{'axi_dma_1': {'fullpath': 'axi_dma_1',
  'type': 'xilinx.com:ip:axi_dma:7.1',
  'bdtype': None,
  'state': None,
  'addr_range': 65536,
  'phys_addr': 1077936128,
  'mem_id': 'S_AXI_LITE',
  'memtype': 'REGISTER',
  'gpio': {},
  'interrupts': {},
  'parameters': {'C_S_AXI_LITE_ADDR_WIDTH': '10',
   'C_S_AXI_LITE_DATA_WIDTH': '32',
   'C_DLYTMR_RESOLUTION': '125',
   'C_PRMRY_IS_ACLK_ASYNC': '0',
   'C_ENABLE_MULTI_CHANNEL': '0',
   'C_NUM_MM2S_CHANNELS': '1',
   'C_NUM_S2MM_CHANNELS': '1',
   'C_INCLUDE_SG': '0',
   'C_SG_INCLUDE_STSCNTRL_STRM': '0',
   'C_SG_USE_STSAPP_LENGTH': '0',
   'C_SG_LENGTH_WIDTH': '20',
   'C_M_AXI_SG_ADDR_WIDTH': '32',
   'C_M_AXI_SG_DATA_WIDTH': '32',
   'C_M_AXIS_MM2S_CNTRL_TDATA_WIDTH': '32',
   'C_S_AXIS_S2MM_STS_TDATA_WIDTH': '32',
   'C_MICRO_DMA': '0',
   'C_INCLUDE_MM2S': '1',
   'C_INCLUDE_MM2S_SF': '1',
   'C_MM2S_BURST_SIZE': '16',
   'C_M_AXI_MM2S_ADDR_WIDTH': '32',
   'C_M_AXI_MM2S_DATA_WIDTH': '32',
   'C_M_AXIS_MM2S_TDATA_WIDTH': '32',
   'C

In [5]:
axi_dma_1 = custom_hw.axi_dma_1
axi_dma_2 = custom_hw.axi_dma_2
out_dma = custom_hw.out_dma
saxpy_hw = custom_hw.krnl_saxpy2_0

In [6]:
custom_hw.krnl_saxpy2_0.register_map

RegisterMap {
  a = Register(a=write-only),
  size = Register(size=write-only)
}

In [7]:
#https://docs.xilinx.com/r/en-US/ug1399-vitis-hls/S_AXILITE-Example
axi_dma_1.register_map

RegisterMap {
  MM2S_DMACR = Register(RS=1, Reset=0, Keyhole=0, Cyclic_BD_Enable=0, IOC_IrqEn=0, Dly_IrqEn=0, Err_IrqEn=0, IRQThreshold=1, IRQDelay=0),
  MM2S_DMASR = Register(Halted=0, Idle=0, SGIncld=0, DMAIntErr=0, DMASlvErr=0, DMADecErr=0, SGIntErr=0, SGSlvErr=0, SGDecErr=0, IOC_Irq=0, Dly_Irq=0, Err_Irq=0, IRQThresholdSts=0, IRQDelaySts=0),
  MM2S_CURDESC = Register(Current_Descriptor_Pointer=0),
  MM2S_CURDESC_MSB = Register(Current_Descriptor_Pointer=0),
  MM2S_TAILDESC = Register(Tail_Descriptor_Pointer=0),
  MM2S_TAILDESC_MSB = Register(Tail_Descriptor_Pointer=0),
  MM2S_SA = Register(Source_Address=0),
  MM2S_SA_MSB = Register(Source_Address=0),
  MM2S_LENGTH = Register(Length=0),
  SG_CTL = Register(SG_CACHE=0, SG_USER=0),
  S2MM_DMACR = Register(RS=0, Reset=0, Keyhole=0, Cyclic_BD_Enable=0, IOC_IrqEn=0, Dly_IrqEn=0, Err_IrqEn=0, IRQThreshold=0, IRQDelay=0),
  S2MM_DMASR = Register(Halted=0, Idle=0, SGIncld=0, DMAIntErr=0, DMASlvErr=0, DMADecErr=0, SGIntErr=0, SGSlvErr=0, SG

In [8]:
def python_float_to_binary(num):
    ctypes.c_uint.from_buffer(ctypes.c_float(num)).value

def python_float_to_byte(num):
    return struct.pack('!f', num)

def python_float_to_uint(num):
    return ctypes.c_uint.from_buffer(ctypes.c_float(num)).value
#custom_hw.krnl_saxpy2_0.register_map.a = python_float_to_uint(c)

def float_to_bin(num):
    return ''.join('{:0>8b}'.format(c) for c in struct.pack('!f', num))

In [9]:
SIZE = 4096
a = np.random.random_sample((SIZE,))
b = np.random.random_sample((SIZE,))
c = float(2.0)

abuf = allocate((SIZE, ) , dtype=np.float32)
bbuf = allocate((SIZE, ) , dtype=np.float32)
#cbuf = allocate((SIZE, ) , dtype="float32")
obuf = allocate((SIZE, ) , dtype=np.float32)

np.copyto(abuf, a)
np.copyto(bbuf, b)
#np.copyto(cbuf, c)

In [10]:
saxpy_hw.register_map.a.address

16

In [11]:
saxpy_hw.register_map.size.address

24

In [12]:
saxpy_hw.write(saxpy_hw.register_map.size.address, SIZE)

In [13]:
saxpy_hw.read(saxpy_hw.register_map.size.address)

4096

In [14]:
saxpy_hw.write(saxpy_hw.register_map.a.address, python_float_to_uint(c))

In [15]:
saxpy_hw.read(saxpy_hw.register_map.a.address)

1073741824

In [16]:
print(c, float_to_bin(c),int('1000000000000000000000000000000',2))

2.0 01000000000000000000000000000000 1073741824


In [17]:
def print_dma_status(dma, direction):
    # direction 0 = read channel, 1 is write
    if(direction == 0):
        print("Read channel")
        offset = 0
    else:
        print("Write channel")
        offset = 0x30
 
    print("Control: " + hex(dma.read(0x0 + offset)))
    print("Status : " + hex(dma.read(0x4 + offset)))
    print("")

In [18]:
#%%timeit
custom_hw.axi_dma_1.sendchannel.transfer(abuf)
custom_hw.axi_dma_2.sendchannel.transfer(bbuf)
custom_hw.out_dma.recvchannel.transfer(obuf)
custom_hw.axi_dma_1.sendchannel.wait()
custom_hw.axi_dma_2.sendchannel.wait()

custom_hw.out_dma.recvchannel.wait()
#https://gist.github.com/cathalmccabe/1d32e2346c1d2b87dd65277eb3bb50da




In [19]:
print_dma_status(axi_dma_1,0)
print_dma_status(axi_dma_2,0)
print_dma_status(out_dma,1)

Read channel
Control: 0x10003
Status : 0x1002

Read channel
Control: 0x10003
Status : 0x1002

Write channel
Control: 0x10003
Status : 0x1002



In [20]:
'''

    bit 0 - ap_start (Read/Write/COH)
    bit 1 - ap_done (Read/COR)
    bit 2 - ap_idle (Read)
    bit 3 - ap_ready (Read)
    bit 7 - auto_restart (Read/Write)
'''

'\n\n    bit 0 - ap_start (Read/Write/COH)\n    bit 1 - ap_done (Read/COR)\n    bit 2 - ap_idle (Read)\n    bit 3 - ap_ready (Read)\n    bit 7 - auto_restart (Read/Write)\n'

In [21]:
class saxpy_overlay(Overlay):
    def __init__(self, bitfile_name, SIZE=4096):
        super().__init__(bitfile_name)
        self.saxpy_hw = self.krnl_saxpy2_0
        self.abuf = allocate((SIZE, ) , dtype=np.float32)
        self.bbuf = allocate((SIZE, ) , dtype=np.float32)
        self.obuf = allocate((SIZE, ) , dtype=np.float32)
        #interrupt 생략
    
    def python_float_to_uint(self, num):
        return ctypes.c_uint.from_buffer(ctypes.c_float(num)).value
    
    def register_map(self):
        return self.register_map
    
    def ip_dict(self):
        return self.saxpy_hw.ip_dict()
    
    def run(self, x, y, a, SIZE):
        np.copyto(self.abuf, x)
        np.copyto(self.bbuf, y)
        self.saxpy_hw.write(saxpy_hw.register_map.size.address, SIZE)
        self.saxpy_hw.write(saxpy_hw.register_map.a.address, self.python_float_to_uint(a))
        
        #print(self.saxpy_hw.read(saxpy_hw.register_map.size.address))
        #print(self.saxpy_hw.read(saxpy_hw.register_map.a.address))
        
        #print(self.abuf,self.bbuf,a,SIZE)
        
        for i in range(4):
            self.axi_dma_1.sendchannel.transfer(self.abuf)
            self.axi_dma_2.sendchannel.transfer(self.bbuf)
            self.out_dma.recvchannel.transfer(self.obuf)
            self.axi_dma_1.sendchannel.wait()
            self.axi_dma_2.sendchannel.wait()
            self.out_dma.recvchannel.wait()
            
        #print(self.obuf)
        return self.obuf
    
    def assert_by_sw(self, x, y, a, SIZE):
        ax = np.multiply(a,x)
        axpy = np.add(ax,y)
        axpy_hw = self.run(x,y,a,SIZE)
        comparison = np.isclose(axpy,axpy_hw)
        for i in comparison:
            assert(i == True)
            
        return True

In [22]:
SAXPY_HW = saxpy_overlay("ip64.bit")

In [23]:
SAXPY_HW.ip_dict

{'axi_dma_1': {'fullpath': 'axi_dma_1',
  'type': 'xilinx.com:ip:axi_dma:7.1',
  'bdtype': None,
  'state': None,
  'addr_range': 65536,
  'phys_addr': 1077936128,
  'mem_id': 'S_AXI_LITE',
  'memtype': 'REGISTER',
  'gpio': {},
  'interrupts': {},
  'parameters': {'C_S_AXI_LITE_ADDR_WIDTH': '10',
   'C_S_AXI_LITE_DATA_WIDTH': '32',
   'C_DLYTMR_RESOLUTION': '125',
   'C_PRMRY_IS_ACLK_ASYNC': '0',
   'C_ENABLE_MULTI_CHANNEL': '0',
   'C_NUM_MM2S_CHANNELS': '1',
   'C_NUM_S2MM_CHANNELS': '1',
   'C_INCLUDE_SG': '0',
   'C_SG_INCLUDE_STSCNTRL_STRM': '0',
   'C_SG_USE_STSAPP_LENGTH': '0',
   'C_SG_LENGTH_WIDTH': '20',
   'C_M_AXI_SG_ADDR_WIDTH': '32',
   'C_M_AXI_SG_DATA_WIDTH': '32',
   'C_M_AXIS_MM2S_CNTRL_TDATA_WIDTH': '32',
   'C_S_AXIS_S2MM_STS_TDATA_WIDTH': '32',
   'C_MICRO_DMA': '0',
   'C_INCLUDE_MM2S': '1',
   'C_INCLUDE_MM2S_SF': '1',
   'C_MM2S_BURST_SIZE': '16',
   'C_M_AXI_MM2S_ADDR_WIDTH': '32',
   'C_M_AXI_MM2S_DATA_WIDTH': '32',
   'C_M_AXIS_MM2S_TDATA_WIDTH': '32',
   'C

In [24]:
SIZE = 4096
x = np.random.random_sample((SIZE,))
y = np.random.random_sample((SIZE,))
a = float(2.0)
SAXPY_HW.run(x,y,a,SIZE)

PynqBuffer([1.4496164, 2.1369429, 1.4479941, ..., 1.106302 , 1.714165 ,
            2.4501247], dtype=float32)

In [25]:
SAXPY_HW.assert_by_sw(x,y,a,SIZE)

True

In [26]:
custom_hw = Overlay("ip64.bit")

In [29]:
SIZE = 4096
x = np.random.random_sample((SIZE,))
y = np.random.random_sample((SIZE,))
a = float(2.0)
SAXPY_HW = saxpy_overlay("ip64.bit")

In [30]:
%%timeit
SAXPY_HW.run(x,y,a,SIZE)

516 ms ± 2.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
%%timeit
axpy = np.add(np.multiply(a,x),y)

227 µs ± 1.58 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## webpage memos

<https://discuss.pynq.io/t/tutorial-using-a-hls-stream-ip-with-dma-part-1-hls-design/3344>  
<https://discuss.pynq.io/t/tutorial-using-a-hls-stream-ip-with-dma-part-2-vivado-design/3345>  
<https://discuss.pynq.io/t/tutorial-using-a-hls-stream-ip-with-dma-part-3-using-the-hls-ip-from-pynq/3346>  
<https://github.com/Xilinx/Vitis-HLS-Introductory-Examples/tree/2021.2/Interface/Streaming/using_axi_stream_with_side_channel_data>  
<https://docs.xilinx.com/r/en-US/ug1399-vitis-hls/Using-HLS-Streams>  
<https://gist.github.com/cathalmccabe/1d32e2346c1d2b87dd65277eb3bb50da>  
<https://docs.xilinx.com/r/en-US/ug1399-vitis-hls/Non-Blocking-Read>  
<https://www.xilinx.com/htmldocs/xilinx2017_4/sdaccel_doc/ylh1504034366220.html>  
<https://xilinx.github.io/Vitis_Accel_Examples/2020.1/html/hello_world.html>  
<https://github.com/Xilinx/Vitis-HLS-Introductory-Examples/blob/master/Interface/Streaming/using_axi_stream_with_struct/example.cpp>  
<https://github.com/Xilinx/Vitis-HLS-Introductory-Examples/blob/master/Interface/Streaming/axi_stream_to_master/example.cpp>  
<https://wikidocs.net/91547>  
<https://discuss.pynq.io/t/vitis-hls-wrong-ap-axiu-tdata-size/2610/3>  
<https://mygit.th-deg.de/gaydos/fa-notes/-/jobs/17537/artifacts/raw/synthesizing-a-streaming-overlay.pdf>  
<https://pp4fpgas.readthedocs.io/en/latest/axidma.html>  