# Welcome to PYNQ

## Getting Started

To get started using PYNQ, try running the example notebooks in the folders described below. 

* **getting_started**: includes an introduction to using Jupyter notebook with PYNQ, the Python environment, and how to use some basic features of the curernt platform. 

* **common**: contains example notebooks on how to download an overlay, how to set the Zynq clocks, how to execute Linux shell commands, and how to use USB devices.

If other overlays or packages are installed, other folders with example notebooks may also be available in this directory.  


## Documentation

Please see the latest <a href="http://pynq.readthedocs.io">PYNQ Documentation on readthedocs</a>.  


## Support

For questions or support, go to the forum on the <a href="http://www.pynq.io">PYNQ project webpage </a>.


## Project webpage

You can find details on the <a href="http://www.pynq.io">PYNQ project webpage </a>.


## GitHub

The PYNQ Repository is hosted on github: <a href="https://github.com/Xilinx/PYNQ">PYNQ GitHub Repository</a>.

In [16]:
import os

In [17]:
os.getcwd()

'/home/xilinx/jupyter_notebooks'

In [18]:
from pynq import Overlay, Clocks

print(f'CPU:   {Clocks.cpu_mhz:.6f}MHz')
print(f'FCLK0: {Clocks.fclk0_mhz:.6f}MHz')


CPU:   1199.988000MHz
FCLK0: 99.999000MHz


In [229]:
# Use bitstream for PL 
overlay = Overlay(os.getcwd() + "/smart_parking_complex/dpu_ip/dpu.bit")

In [230]:
overlay?

In [231]:
# DPU IP
dpu_ip = overlay.dpu_ip
dpu_ip?

In [232]:
# DMA IP
dma = overlay.axi_dma
dma?

In [233]:
hw_timer = overlay.axi_timer
hw_timer?

In [234]:
#  Shows the Registers we need to access - Can use access via names or direct memory  (via names is easier)
dpu_ip.register_map


RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  max_size = Register(max_size=0),
  max_size_ctrl = Register(max_size_ap_vld=0, RESERVED=0)
}

In [235]:
# Smart Parking Stream

# First signal to set high. Ensures AP_START Signal does not go low after one cycle
# In AXI_STREAM, only setting AP_START enables computations for 1 stream
dpu_ip.register_map.CTRL.AUTO_RESTART = 1
# Computations occur while high
dpu_ip.register_map.CTRL.AP_START = 1

In [236]:
# Check if signals were set and read only registers have values
dpu_ip.register_map

RegisterMap {
  CTRL = Register(AP_START=1, AP_DONE=1, AP_IDLE=0, AP_READY=0, RESERVED_1=0, AUTO_RESTART=1, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  max_size = Register(max_size=4294967295),
  max_size_ctrl = Register(max_size_ap_vld=1, RESERVED=0)
}

In [237]:
# Maximum no of elements dpu accepts in input Stream
## This is for a finite size for loop implementation
## While loop implementation can accept unlimited size of stream
max_size = int(dpu_ip.register_map.max_size)
print(max_size)

4294967295


In [238]:
################# Test 1: Feed a single stream ##################

In [239]:

def createInOutStreams(size):
    # Test Continuous Streams
    
    featuresList = []
    predictionsList = []
    
    row_length = 2
    
    
    isFirst = True
    time = 1587568890
    carparkID = 1
    
    for i in range(0, size, row_length):

        if isFirst:
            isFirst = False
        else:
            time += 60

        featuresList.append(time)
        featuresList.append(carparkID)
        
        if(len(featuresList) != size):
            predictionsList.append(0)

    
    predictionsList.append(0)
    print("featuresList Size: ", len(featuresList))
    print("predictionsList Size: ", len(predictionsList))
    
    return featuresList, predictionsList

In [240]:
size = 10
featuresList, predictionsList = createInOutStreams(size)

featuresList Size:  10
predictionsList Size:  5


In [241]:
import numpy as np
from pynq import allocate


inStream = allocate(shape=(len(featuresList),), dtype=np.uint32)


inStream[:] = featuresList[:]

# Confirm records were transferred
print(inStream)

outStream = allocate(shape=(len(predictionsList),), dtype=np.uint32)

[1587568890          1 1587568950          1 1587569010          1
 1587569070          1 1587569130          1]


In [242]:
dma.sendchannel.transfer(inStream)
print("Completed transfer inStream")

Completed transfer inStream


In [243]:
dma.recvchannel.transfer(outStream)
print("Completed transfer outStream")

Completed transfer outStream


In [244]:
# Call if TLAST is not set high in HLS code
dma.sendchannel.wait()
print("Completed send Channel wait")

Completed send Channel wait


In [245]:
# Call if TLAST is not set high in HLS code
dma.recvchannel.wait()
print("Completed recv Channel wait")

Completed recv Channel wait


In [246]:
# outStream.flush()
print(outStream)

[20 20 20 20 20]


In [247]:
inStream.freebuffer()
print("Freed inStream buffer")


Freed inStream buffer


In [248]:
outStream.freebuffer()
print("Freed outStream buffer")

Freed outStream buffer


In [249]:

################# Test 2: Feed continuous streams ##################

In [268]:
# Create A large batch size (records = size // 2)
size = 2048

# No of batches
streams = 10000

featuresList, predictionsList = createInOutStreams(size)

featuresList Size:  2048
predictionsList Size:  1024


In [269]:
import numpy as np
from pynq import allocate


import numpy as np
from pynq import allocate

if max_size >= size: 

    inStream = allocate(shape=(len(featuresList),), dtype=np.uint32)

    inStream[:] = featuresList[:]

    outStream = allocate(shape=(len(predictionsList),), dtype=np.uint32)
else:
    print("Use a lower batch size than {0} or implement alternate design that removes batch size limit".format(max_size))

In [270]:
########## Timer Functions ################
def init_timer():
    # Generate Mode
    hw_timer.register_map.TCSR0.MDT0 = 0
    # DOWN counter
    hw_timer.register_map.TCSR0.UDT0 = 1
    # Don't Overwrite Load Value
    hw_timer.register_map.TCSR0.ARHT0 = 0
    
# Reset/Load Counter registers
## Note:- Run this block each time before measuring time
def reset_timer():
    ## MAX_COUNT
    hw_timer.register_map.TLR0 = 0xFFFFFFFF
    ## Load value from TLR0
    hw_timer.register_map.TCSR0.LOAD0 = 1
    ## Disable load bit so that timer can be enabled
    hw_timer.register_map.TCSR0.LOAD0 = 0
    
def start_timer():
    hw_timer.register_map.TCSR0.ENT0 = 1
    
def stop_timer():
    hw_timer.register_map.TCSR0.ENT0 = 0

In [271]:
init_timer()
reset_timer()

In [272]:
########## HW Timer Measurement ###########

## HW timer can only measure total time or dma block time at any given point
import time

listOut = []

reset_timer()

start_timer()
for i in range(streams):
    

    dma.sendchannel.transfer(inStream)
    
    dma.recvchannel.transfer(outStream)
    
#     if i == (streams - 1):
#         hw_timer.register_map.TCSR0.ENT0 = 1
        
    dma.sendchannel.wait()

    dma.recvchannel.wait()


# To calculate time which DMA blocks for
    
#     if i == (streams - 1):
#         hw_timer.register_map.TCSR0.ENT0 = 0
#         process_count = int(hw_timer.register_map.TCR0)
#         max_count = int(hw_timer.register_map.TLR0)
#         time = (max_count - process_count) * 1/(Clocks.fclk0_mhz * 10**6)
#         print("DMA Block Time: {0}".format(time))
        
  
    #listOut.append(outStream) 

stop_timer()
inStream.freebuffer()
outStream.freebuffer()
print("Completed transfer")    

Completed transfer


In [273]:
process_count = int(hw_timer.register_map.TCR0)
max_count = int(hw_timer.register_map.TLR0)
time = (max_count - process_count) * 1/(Clocks.fclk0_mhz * 10**6)
print("HW timer measurement for {0} batches of {1} records in s: {2}".format(streams, size//2,  time))
print("Time taken for 1 records in s: {0}".format((time)/(outStream.size * streams)))


HW timer measurement for 10000 batches of 1024 records in s: 3.1984434144341445
Time taken for 1 records in s: 3.1234798969083443e-07


In [135]:

print(hw_timer.register_map)

RegisterMap {
  TCSR0 = Register(MDT0=0, UDT0=1, GENT0=0, CAPT0=0, ARHT0=0, LOAD0=0, ENIT0=0, ENT0=0, T0INT=0, PWMA0=0, ENALL=0, CASC=0),
  TLR0 = Register(TCLR0=4294967295),
  TCR0 = Register(TCR0=3980286318),
  TCSR1 = Register(MDT1=0, UDT1=0, GENT1=0, CAPT1=0, ARHT1=0, LOAD1=0, ENIT1=0, ENT1=0, T1INT=0, PWMA1=0, ENALL=0),
  TLR1 = Register(TCLR1=0),
  TCR1 = Register(TCR1=0)
}


In [750]:
# Check records
# for i in range(streams):
#     print(listOut[i])

In [751]:
########## SW Timer Measurement ###########
## SW timer can measure total time and dma block time together

import time

streams = 1000

listOut = []
# Start SW_TIMER
startTime = time.time()

for i in range(streams):
    
    dma.sendchannel.transfer(inStream)
    
    dma.recvchannel.transfer(outStream)
    
#     if i == (streams - 1):
#         dmaStartTime = time.time()
        
    dma.sendchannel.wait()

    dma.recvchannel.wait()
    
#     if i == (streams - 1):
#         dmaEndTime = time.time()
#         print("DMA Block Time: {0}".format(dmaEndTime - dmaStartTime))
        
    # Not sure if flush is needed after IP writes to outStream
    # outStream.flush()
    
#     listOut.append(outStream)

# STOP SW_TIMER
endTime = time.time()

inStream.freebuffer()
outStream.freebuffer()
print("Completed transfer") 

Completed transfer


In [423]:
print("Time for {0} streams of {1} records in s: {2}".format(streams, size//2,  endTime - startTime))

Time for 1000 streams of 50000 records in s: 2.2971248626708984
