# Welcome to PYNQ

## Getting Started

To get started using PYNQ, try running the example notebooks in the folders described below. 

* **getting_started**: includes an introduction to using Jupyter notebook with PYNQ, the Python environment, and how to use some basic features of the curernt platform. 

* **common**: contains example notebooks on how to download an overlay, how to set the Zynq clocks, how to execute Linux shell commands, and how to use USB devices.

If other overlays or packages are installed, other folders with example notebooks may also be available in this directory.  


## Documentation

Please see the latest <a href="http://pynq.readthedocs.io">PYNQ Documentation on readthedocs</a>.  


## Support

For questions or support, go to the forum on the <a href="http://www.pynq.io">PYNQ project webpage </a>.


## Project webpage

You can find details on the <a href="http://www.pynq.io">PYNQ project webpage </a>.


## GitHub

The PYNQ Repository is hosted on github: <a href="https://github.com/Xilinx/PYNQ">PYNQ GitHub Repository</a>.

In [1]:
# import pyspark
# import pynq
import findspark
findspark.init()


In [2]:

import pyspark

In [3]:
from pyspark.sql import SparkSession

In [5]:
import os

In [6]:
os.getcwd()

'/home/xilinx/jupyter_notebooks'

In [7]:
from pynq import Overlay, Clocks

print(f'CPU:   {Clocks.cpu_mhz:.6f}MHz')
print(f'FCLK0: {Clocks.fclk0_mhz:.6f}MHz')


CPU:   650.000000MHz
FCLK0: 76.923077MHz


In [354]:
# Use bitstream for PL 
overlay = Overlay(os.getcwd() + "/nn3_stream/dpu_ip/dpu.bit")

In [355]:
overlay?

In [356]:
# DPU IP
dpu_ip = overlay.dpu_ip
dpu_ip?

In [357]:
# DMA IP
dma = overlay.axi_dma
dma?

In [358]:
hw_timer = overlay.axi_timer
hw_timer?

In [359]:
#  Shows the Registers we need to access - Can use access via names or direct memory  (via names is easier)
dpu_ip.register_map


RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  size = Register(size=0, RESERVED=0),
  size_ctrl = Register(size_ap_vld=0, RESERVED=0)
}

In [360]:
# NN3 Stream

# First signal to set high. Ensures AP_START Signal does not go low after one cycle
# In AXI_STREAM, only setting AP_START enables computations for 1 stream
dpu_ip.register_map.CTRL.AUTO_RESTART = 1
# Computations occur while high
dpu_ip.register_map.CTRL.AP_START = 1

In [334]:
# Check if signals were set and read only registers have values
dpu_ip.register_map

RegisterMap {
  CTRL = Register(AP_START=1, AP_DONE=0, AP_IDLE=0, AP_READY=0, RESERVED_1=0, AUTO_RESTART=1, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  size = Register(size=10, RESERVED=0),
  size_ctrl = Register(size_ap_vld=1, RESERVED=0)
}

In [361]:
size = int(dpu_ip.register_map.size)
print(size)

10


In [161]:
################# Test 1: Feed a single stream ##################

import numpy as np
from pynq import allocate

row_length = 2
inStream = allocate(shape=(size,), dtype=np.uint32)
outStream = allocate(shape=(size//row_length,), dtype=np.uint32)

for i in range(0, size, row_length):
    inStream[i] = 1587568890
    inStream[i+1] = 1
    
inStream.flush()
print("InStream: ", inStream)
print("OutStream: ", outStream)

InStream:  [1587568890          1 1587568890          1 1587568890          1
 1587568890          1 1587568890          1]
OutStream:  [0 0 0 0 0]


In [162]:
dma.sendchannel.transfer(inStream)
print("Completed transfer inStream")

Completed transfer inStream


In [163]:
dma.recvchannel.transfer(outStream)
print("Completed transfer outStream")

Completed transfer outStream


In [164]:
import time
# Call if TLAST is not set high in HLS code
startTime = time.time()
dma.sendchannel.wait()
# Call if TLAST is not set high in HLS code
dma.recvchannel.wait()
endTime = time.time()

In [165]:
print("Processor waits on dma for: ", endTime-startTime)

Processor waits on dma for:  0.0011169910430908203


In [166]:
outStream.flush()
print(outStream)

[105 105 105 105 105]


In [167]:
inStream.freebuffer()
print("Freed inStream buffer")


Freed inStream buffer


In [168]:
outStream.freebuffer()
print("Freed outStream buffer")

Freed outStream buffer


In [362]:
################# Test 2: Feed continuous streams ##################

import numpy as np
from pynq import allocate

row_length = 2
inStream = allocate(shape=(size,), dtype=np.uint32)
outStream = allocate(shape=(size//row_length,), dtype=np.uint32)

# Test Continuous Streams
for i in range(0, size, row_length):
    inStream[i] = 1587568890
    inStream[i+1] = 1
    
inStream.flush()
print("InStream: ", inStream)
print("OutStream: ", outStream)


InStream:  [1587568890          1 1587568890          1 1587568890          1
 1587568890          1 1587568890          1]
OutStream:  [0 0 0 0 0]


In [363]:
hw_timer.register_map

RegisterMap {
  TCSR0 = Register(MDT0=0, UDT0=0, GENT0=0, CAPT0=0, ARHT0=0, LOAD0=0, ENIT0=0, ENT0=0, T0INT=0, PWMA0=0, ENALL=0, CASC=0),
  TLR0 = Register(TCLR0=0),
  TCR0 = Register(TCR0=0),
  TCSR1 = Register(MDT1=0, UDT1=0, GENT1=0, CAPT1=0, ARHT1=0, LOAD1=0, ENIT1=0, ENT1=0, T1INT=0, PWMA1=0, ENALL=0),
  TLR1 = Register(TCLR1=0),
  TCR1 = Register(TCR1=0)
}

In [364]:
# # Enable Cascaded Mode (64 bit timer)
# ## -> TCSR1 is invalid in this mode
# hw_timer.register_map.TCSR0.CASC = 1

In [379]:
# Generate Mode
hw_timer.register_map.TCSR0.MDT0 = 0

In [380]:
# DOWN counter
hw_timer.register_map.TCSR0.UDT0 = 1

In [381]:
# Don't Overwrite Load Value
hw_timer.register_map.TCSR0.ARHT0 = 0

In [382]:
# Reset/Load registers for Cascaded 64 bit mode

## MAX_COUNT
# hw_timer.register_map.TLR1 = 0xFFFFFFFF

## MAX_COUNT
# hw_timer.register_map.TLR0 =  0xFFFFFFFF

## Load value from TLR0 and TLR1 in Cascaded Mode
# hw_timer.register_map.TCSR0.LOAD0 = 1
## Disable load bit so that timer can be enabled
# hw_timer.register_map.TCSR0.LOAD0 = 0

In [454]:
# Reset/Load registers
 
## MAX_COUNT
hw_timer.register_map.TLR0 = 0xFFFFFFFF
## Load value from TLR0
hw_timer.register_map.TCSR0.LOAD0 = 1
## Disable load bit so that timer can be enabled
hw_timer.register_map.TCSR0.LOAD0 = 0

In [444]:
# Check signals were set
hw_timer.register_map

RegisterMap {
  TCSR0 = Register(MDT0=0, UDT0=1, GENT0=0, CAPT0=0, ARHT0=0, LOAD0=0, ENIT0=0, ENT0=0, T0INT=0, PWMA0=0, ENALL=0, CASC=0),
  TLR0 = Register(TCLR0=4294967295),
  TCR0 = Register(TCR0=4294967295),
  TCSR1 = Register(MDT1=0, UDT1=0, GENT1=0, CAPT1=0, ARHT1=0, LOAD1=0, ENIT1=0, ENT1=0, T1INT=0, PWMA1=0, ENALL=0),
  TLR1 = Register(TCLR1=0),
  TCR1 = Register(TCR1=0)
}

In [455]:
########## HW Timer Measurement ###########
## HW timer can only measure total time or dma block time at any given point
import time

streams = 100
listOut = []

# Start HW_TIMER
# hw_timer.register_map.TCSR0.ENT0 = 1
for i in range(streams):
    
    dma.sendchannel.transfer(inStream)
    
    dma.recvchannel.transfer(outStream)
    
    if i == (streams - 1):
        hw_timer.register_map.TCSR0.ENT0 = 1
        
    dma.sendchannel.wait()

    dma.recvchannel.wait()
    
    if i == (streams - 1):
        hw_timer.register_map.TCSR0.ENT0 = 0
        process_count = int(hw_timer.register_map.TCR0)
        max_count = int(hw_timer.register_map.TLR0)
        time = (max_count - process_count) * 1/(Clocks.fclk0_mhz * 10**6)
        print("DMA Block Time: {0}".format(time))
        
  
    # listOut.append(outStream) 

# STOP HW_TIMER
# hw_timer.register_map.TCSR0.ENT0 = 0
inStream.freebuffer()
outStream.freebuffer()
print("Completed transfer")    

DMA Block Time: 0.000415193999584806
Completed transfer


In [446]:
process_count = int(hw_timer.register_map.TCR0)
max_count = int(hw_timer.register_map.TLR0)
time = (max_count - process_count) * 1/(Clocks.fclk0_mhz * 10**6)
print("HW timer measurement in seconds is: {}".format(time))

hw_timer.register_map

HW timer measurement in seconds is: 0.0374846029625154


RegisterMap {
  TCSR0 = Register(MDT0=0, UDT0=1, GENT0=0, CAPT0=0, ARHT0=0, LOAD0=0, ENIT0=0, ENT0=0, T0INT=0, PWMA0=0, ENALL=0, CASC=0),
  TLR0 = Register(TCLR0=4294967295),
  TCR0 = Register(TCR0=4292083864),
  TCSR1 = Register(MDT1=0, UDT1=0, GENT1=0, CAPT1=0, ARHT1=0, LOAD1=0, ENIT1=0, ENT1=0, T1INT=0, PWMA1=0, ENALL=0),
  TLR1 = Register(TCLR1=0),
  TCR1 = Register(TCR1=0)
}

In [456]:
########## SW Timer Measurement ###########
## SW timer can measure total time and dma block time together

import time

streams = 100
listOut = []
# Start SW_TIMER
startTime = time.time()

for i in range(streams):
    
    dma.sendchannel.transfer(inStream)
    
    dma.recvchannel.transfer(outStream)
    
    if i == (streams - 1):
        dmaStartTime = time.time()
        
    dma.sendchannel.wait()

    dma.recvchannel.wait()
    
    if i == (streams - 1):
        dmaEndTime = time.time()
        print("DMA Block Time: {0}".format(dmaEndTime - dmaStartTime))
        
    # Not sure if flush is needed after IP writes to outStream
    outStream.flush()
    
    listOut.append(outStream)

# STOP SW_TIMER
endTime = time.time()

inStream.freebuffer()
outStream.freebuffer()
print("Completed transfer") 

DMA Block Time: 0.00012230873107910156
Completed transfer


In [448]:
print("Time for {0} streams of {1} records in s: {2}".format(streams, size,  endTime - startTime))

Time for 100 streams of 10 records in s: 0.03750967979431152


In [96]:
# Check records
# for i in range(streams):
#     print(listOut[i])