## Configure hardware

Program in the bitstream overlay

In [1]:
from pynq import Overlay
from pynq import allocate

In [2]:
overlay = Overlay('/home/root/jupyter_notebooks/sha256accelerator.bit')

ip = overlay.hash_0
mmio = ip.mmio
register_map = ip.register_map
registers = register_map._register_classes

## Memory mapping

See available registers available. Allocate buffer for PL to access the text input on the PS. Map memory location for PS to access results on the PL.

In [3]:
# Show all registers available
for name, reg in registers.items():
    print(name, reg)

CTRL (<class 'pynq.registers.RegisterCTRL'>, 0, 32, None, None, 'read-write')
GIER (<class 'pynq.registers.RegisterGIER'>, 4, 32, None, None, 'read-write')
IP_IER (<class 'pynq.registers.RegisterIP_IER'>, 8, 32, None, None, 'read-write')
IP_ISR (<class 'pynq.registers.RegisterIP_ISR'>, 12, 32, None, None, 'read-write')
ap_return (<class 'pynq.registers.Registerap_return'>, 16, 32, None, None, 'read-only')
text_length (<class 'pynq.registers.Registertext_length'>, 24, 32, None, None, 'write-only')
text_input_1 (<class 'pynq.registers.Registertext_input_1'>, 32, 32, None, None, 'write-only')
text_input_2 (<class 'pynq.registers.Registertext_input_2'>, 36, 32, None, None, 'write-only')
Memory_result (<class 'pynq.registers.RegisterMemory_result'>, 64, 32, None, None, 'read-write')


In [4]:
# Register location (axilite)
result_addr = register_map.Memory_result.address // 4
result_size = register_map.Memory_result.width // 4
result_val = mmio.array[result_addr:result_addr+result_size]

# Allocated buffer (m_axi)
input_buffer_size = 1024
input_buffer = allocate(shape=(input_buffer_size,), dtype='u1', cacheable=False) # unsigned 32-bit integers
register_map.text_input_1.text_input = input_buffer.device_address

## Hashing functions

Define hardware accelerated function and software library function

In [5]:
import hashlib
import array
import math
import numpy as np

# Hardware accelerated function
def SHA256_hardware(bstr):
    # Set text length
    register_map.text_length = len(bstr)
    
    # Write to input buffer
    input_buffer[:len(bstr)] = bytearray(bstr)
    #input_buffer.flush()
    
    # Send start signal
    register_map.CTRL.AP_START = 1
    
    # Wait until algorithm has completed
    while (register_map.CTRL.AP_DONE == 0):
        pass
    
    # Retrieve result as a hex string
    result = bytearray(result_val)
    return result.hex()

# Software library function
def SHA256_software(bstr):
    return hashlib.sha256(bstr).hexdigest()

## Test cases

Print some known SHA256 test vectors. Run assertions for random bytes arrays while varying the input length.

In [6]:
import secrets

# Known test vectors
print(SHA256_hardware(b''))
print(SHA256_software(b''))
print(SHA256_hardware(b'abc'))
print(SHA256_software(b'abc'))

# Test random lengths of bytes
for i in range(0, 512+1):
    random_bytes = secrets.token_bytes(i)
    hw = SHA256_hardware(random_bytes)
    sw = SHA256_software(random_bytes)
    assert (hw == sw), (i, hw, sw)

print("All tests passed")

e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad
ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad
All tests passed


## Benchmark

Compare the hardware and software implementations. Find out the performance gains.

In [7]:
sw_time = %timeit -n 1000 -r 5 -o SHA256_software(secrets.token_bytes(16))
hw_time = %timeit -n 1000 -r 5 -o SHA256_hardware(secrets.token_bytes(16))

157 µs ± 4.93 µs per loop (mean ± std. dev. of 5 runs, 1000 loops each)
11.1 µs ± 3.27 µs per loop (mean ± std. dev. of 5 runs, 1000 loops each)


In [8]:
print('Performance gain:', sw_time.average / hw_time.average) 

Performance gain: 14.140803085604233
