In [1]:
from pynq import Overlay
custom_overlay = Overlay("saxpy.bit")

In [2]:
custom_overlay?

[0;31mType:[0m            Overlay
[0;31mString form:[0m     <pynq.overlay.Overlay object at 0xaf3eedf0>
[0;31mFile:[0m            /usr/local/share/pynq-venv/lib/python3.8/site-packages/pynq/overlay.py
[0;31mDocstring:[0m      
Default documentation for overlay saxpy.bit. The following
attributes are available on this overlay:

IP Blocks
----------
axi_intc_0           : pynq.overlay.DefaultIP
saxpy_0              : pynq.overlay.DefaultIP
processing_system7_0 : pynq.overlay.DefaultIP

Hierarchies
-----------
None

Interrupts
----------
None

GPIO Outputs
------------
None

Memories
------------
PSDDR                : Memory
[0;31mClass docstring:[0m
This class keeps track of a single bitstream's state and contents.

The overlay class holds the state of the bitstream and enables run-time
protection of bindings.

Our definition of overlay is: "post-bitstream configurable design".
Hence, this class must expose configurability through content discovery
and runtime protection.

The

In [3]:
saxpy_hw = custom_overlay.saxpy_0

In [4]:
saxpy_hw.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  x_1 = Register(x=write-only),
  x_2 = Register(x=write-only),
  y_1 = Register(y=write-only),
  y_2 = Register(y=write-only),
  a = Register(a=write-only)
}

In [5]:
from pynq import allocate
import numpy as np
import struct
import ctypes

In [6]:
def python_float_to_binary(num):
    ctypes.c_uint.from_buffer(ctypes.c_float(1.0)).value

def python_float_to_byte(num):
    return struct.pack('!f', num)

def python_float_to_uint(num):
    return ctypes.c_uint.from_buffer(ctypes.c_float(num)).value

In [7]:
python_float_to_uint(0.15625)

1042284544

In [8]:
X = [float(i) for i in range(10)]
Y = [float(i) for i in range(10)]
a = 0.15625

In [9]:
a,X,Y

(0.15625,
 [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
 [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])

In [10]:
xbuf_test = allocate(shape=(10,), dtype='float32')
ybuf_test = allocate(shape=(10,), dtype='float32')
abuf = python_float_to_uint(a)

In [11]:
np.copyto(xbuf_test,X)
np.copyto(ybuf_test,Y)
#np.copyto(abuf,a)

In [12]:
xbuf_test,ybuf_test,abuf

(PynqBuffer([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32),
 PynqBuffer([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32),
 1042284544)

In [13]:
xbuf_test.device_address, ybuf_test.device_address

(377823232, 377827328)

In [14]:
saxpy_hw.register_map.a

Register(a=write-only)

In [15]:
saxpy_hw.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  x_1 = Register(x=write-only),
  x_2 = Register(x=write-only),
  y_1 = Register(y=write-only),
  y_2 = Register(y=write-only),
  a = Register(a=write-only)
}

In [16]:
#saxpy_hw.write(saxpy_hw.register_map.a.address, )

In [17]:
#x_1, x_2 중 input으로 활용되는건 x_1
saxpy_hw.register_map.x_1 = xbuf_test.device_address
#saxpy_hw.register_map.x_2 = xbuf_test.device_address
saxpy_hw.register_map.a = abuf
saxpy_hw.register_map.y_1 = ybuf_test.device_address
#saxpy_hw.register_map.y_2 = ybuf_test.device_address

In [18]:
ybuf_test
#이거 할 때 연산이 끝났다는 보장 없음.

PynqBuffer([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)

In [19]:
saxpy_hw.register_map.CTRL.AP_START = 1

In [20]:
ybuf_test
#이거 할 때 연산이 끝났다는 보장 없음.

PynqBuffer([ 0.     ,  1.15625,  2.3125 ,  3.46875,  4.625  ,  5.78125,
             6.9375 ,  8.09375,  9.25   , 10.40625], dtype=float32)

In [21]:
Y

[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]

In [22]:
saxpy_hw.register_map.GIER = 1

In [23]:
saxpy_hw.register_map.IP_IER.CHAN0_INT_EN = 1

In [24]:
saxpy_hw.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=1, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=1, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  x_1 = Register(x=write-only),
  x_2 = Register(x=write-only),
  y_1 = Register(y=write-only),
  y_2 = Register(y=write-only),
  a = Register(a=write-only)
}

In [25]:
class saxpy_overlay(Overlay):
    def set_interrupt(self):
        saxpy_ip = self.saxpy_0
        saxpy_ip.register_map.GIER = 1
        saxpy_ip.register_map.IP_IER.CHAN0_INT_EN = 1
    
    def run(self,X,Y,a):
        saxpy_ip = self.saxpy_0
        
        self.set_interrupt()
        
        xbuf = allocate(shape=(10,), dtype='float32')
        ybuf = allocate(shape=(10,), dtype='float32')
        abuf = python_float_to_uint(a)
        np.copyto(xbuf,X)
        np.copyto(ybuf,Y)
        
        saxpy_ip.register_map.x_1 = xbuf.device_address
        saxpy_ip.register_map.y_1 = ybuf.device_address
        saxpy_ip.write(saxpy_ip.register_map.a.address,abuf)
        
        saxpy_hw.register_map.CTRL.AP_START = 1
        
        while(saxpy_hw.register_map.CTRL.AP_IDLE != 1):
            continue
            
        return ybuf

In [26]:
SAXPY = saxpy_overlay("saxpy.bit")


In [27]:
SAXPY.run(X,Y,a)

PynqBuffer([ 0.     ,  1.15625,  2.3125 ,  3.46875,  4.625  ,  5.78125,
             6.9375 ,  8.09375,  9.25   , 10.40625], dtype=float32)

In [28]:
def tc_gen(len=10):
    return np.random.rand(1,len)

In [29]:
def sw_saxpy_test(X,Y,a):
    Y_hw = SAXPY.run(X,Y,a)
    Y = np.array(Y, dtype = 'float32')
    X = np.array(X, dtype = 'float32')
    Y_sw = Y + a*X
    Y_sw = Y_sw[0]
    print('Y_hw: ',Y_hw)
    print('Y_sw: ',Y_sw)
    for i in range(10):
        if(Y_hw[i] != Y_sw[i]):
            return False
    return True

In [30]:
X = tc_gen()
Y = tc_gen()
a = tc_gen(1).item()
sw_saxpy_test(X,Y,a)

Y_hw:  [0.40879497 0.23080269 0.35910225 0.7982199  0.68597424 1.147953
 0.30933586 0.6696626  0.93188953 1.0193346 ]
Y_sw:  [0.40879497 0.23080269 0.35910225 0.7982199  0.68597424 1.147953
 0.30933586 0.6696626  0.93188953 1.0193346 ]


True

In [31]:
## FFT 에서 썼던 receive wait
import asyncio
# https://pynq.readthedocs.io/en/v2.7.0/overlay_design_methodology/pynq_and_asyncio.html
# https://www.xilinx.com/htmldocs/xilinx2017_4/sdaccel_doc/fde1504034360078.html
## DAXPY = Double Precision A X plus Y
## SAXPY = Single Precision A X plus Y --> floating point

# Make DAXPY
# Z = aX + Y (all in float32) (10 elements)
# only two address, Y += aX, accumulation

'''
void saxpy_hw(float* x, float* y, float a){
    
}
'''

'''
void saxpy(float* x, float* y, float a){
	for(int i=0;i<SIZE;++i){
		y[i] += a*x[i];
	}
}

'''

'\nvoid saxpy(float* x, float* y, float a){\n\tfor(int i=0;i<SIZE;++i){\n\t\ty[i] += a*x[i];\n\t}\n}\n\n'