In [1]:
#!/usr/bin/env python3
# Copyright 2022 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import argparse
import pathlib
import hjson
import random
import os

In [2]:
np.random.seed(42)
torch.manual_seed(42)
global verbose

## Random Data

In [15]:
np.random.seed(42)
torch.manual_seed(42)
# get input channels
IN_CH = 1 * 28 * 28 # 1 channels, 32x32 pixels // FP64: 1 x 28 x 28, FP32: 1 x 40 x 40, FP16: 3 x 32 x 32, FP8: 3 * 48 * 48
OUT_CH = 16 # 16 classes
r1 = 0
r2 = 0.5

data_type = torch.float64

# get random input data with shape (IN_CH, 1)
# input = first_im.to(torch.float64).view(first_im.to(torch.float64).size(0), -1) #torch.randn(IN_CH)
input = torch.randn(IN_CH, dtype=data_type)
print(input.shape)

# get random weights with shape (OUT_CH, IN_CH)
weights = torch.FloatTensor(OUT_CH, IN_CH).uniform_(r1, r2).to(data_type) #torch.randn(OUT_CH, IN_CH).to(torch.float32)
print(weights.shape)

# get random weight gradients with shape (OUT_CH, IN_CH)
weight_grads = torch.FloatTensor(OUT_CH, IN_CH).uniform_(r1, r2).to(data_type) 
print(weight_grads.shape)

# get random bias with shape (OUT_CH, 1)
bias = torch.FloatTensor(OUT_CH).uniform_(r1, r2).to(data_type)#torch.randn(OUT_CH).to(torch.float32)
print(bias.shape)

# get random bias gradients with shape (OUT_CH, 1)
bias_grads = torch.FloatTensor(OUT_CH).uniform_(r1, r2).to(data_type)
print(bias_grads.shape)

# calculate the activations of the linear layer
activations = input @ weights.t() + bias
print(activations.shape)
# get a random integer between 0 and 16
label = torch.randint(0, 16, (1,))
print(label)

torch.Size([784])
torch.Size([16, 784])
torch.Size([16, 784])
torch.Size([16])
torch.Size([16])
torch.Size([16])
tensor([9])


#### Conversion helper functions for FP8

In [None]:
# function to convert float32 to binary representation
import struct

def float32_to_bin(value):
    return ''.join(f'{c:0>8b}' for c in struct.pack('!f', value))

In [None]:
"""
We have to handle denormalized numbers:
    
    +INF will be represented in FP8 as 0 11111 00 
    -INF will be represented in FP8 as 1 11111 00
    +0 will be represented in FP8 as 0 00000 00
    -0 will be represented in FP8 as 1 00000 00
    NaN will be represented in FP8 as X 11111 MM (at least one of the MMM bits is set, sign bit is don't care)

According to https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8556098 denormalized transprecision numbers
will be represented by their high precision counterparts. In these cases we have to make sure that we do not adjust
the exponent. In the other cases we adjust the exponent and cut the mantissa.
"""

from numpy import binary_repr

# this function returns an 8 character string representing the binary representation of the FP8 number
def float32_to_fp8(value):
    max_exp_fp32 = int('11111111', 2)
    min_exp_fp32 = int('00000000', 2)
    exp_bias_fp32 = 2 ** (8 - 1) - 1
    exp_bias_fp8 = 2 ** (5 - 1) - 1
    # get the binary representation of the number
    binstr = float32_to_bin(value)
    # extract sign, exponent and mantissa bits
    sign = binstr[0]
    exponent = binstr[1:9]
    mantissa = binstr[9:]
    # check if the number is denormalized
    # we start by checking if all exponent bits are asserted
    if(int(exponent) == max_exp_fp32):
        # if so, we check if the mantissa is all zeros (will result in +/-INF)
        if(int(mantissa) == 0):
            return '0b' + sign + exponent[:5] + mantissa[:2]
        # if not, we have to return a NaN
        else:
            return '0b' + sign + exponent[:5] + '01'
    # if both exponent and mantissa are zero we will return +/-0
    elif (int(exponent) == min_exp_fp32 and int(mantissa) == 0): 
        return '0b' + sign + exponent[:5] + mantissa[:2]
    else :
        # if not, we adjust the exponent and cut the mantissa
        exponent_fp8 = binary_repr(int(exponent, 2) - exp_bias_fp32 + exp_bias_fp8, width=5)
        mantissa_fp8 = mantissa[:2]
        return '0b' + sign + exponent_fp8 + mantissa_fp8


In [None]:
# exponent is stored in two's complement
def twos_comp(val, bits):
    """compute the 2's complement of int value val"""
    if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
        val = val - (1 << bits)        # compute negative value
    return val  

In [None]:
def convert_to_fp8_decimal(binstr):

    # extract sign, exponent and mantissa bits
    sign = binstr[0]
    num_sign_bits = 1
    # print(f'Sign:     ({num_sign_bits} bit)  = {sign}')
    exponent = binstr[1:6]
    num_exp_bits = len(exponent)
    # print(f'Exponent: ({num_exp_bits} bit)  = {exponent}')
    mantissa = binstr[6:]
    num_mant_bits = len(mantissa)
    # print(f'Mantissa: ({num_mant_bits} bit) = {mantissa}')

    exp_bias_fp8 = 2 ** (5 - 1) - 1
    dec_val_fp8 = (-1)**(int(sign, 2)) * (1 + (int(mantissa, 2))/(2**num_mant_bits)) * 2**(twos_comp(int(exponent, 2), num_exp_bits) - exp_bias_fp8)
    if(int(sign, 2) == 0 and int(exponent, 2) == 0 and int(mantissa, 2) == 0):
        dec_val_fp8 = 0
    # print("\nBinary to floating point number (FP8) conversion using formula: ", dec_val_fp8)
    return dec_val_fp8
    

In [None]:
# for FP8 conversion
# convert weights to FP8
weights = [float32_to_fp8(x) for x in weights.flatten().tolist()]
# convert bias to FP8
bias = [float32_to_fp8(x) for x in bias.flatten().tolist()]
# convert input to FP8
input = [float32_to_fp8(x) for x in input.flatten().tolist()]

#### Get the memory requirements for given data

In [4]:
# calculate the memory requirements
if(torch.is_tensor(bias[0]) and bias[0].dtype == torch.float64):
    print("FP64 memory reguirements:")
    print(f'Input size: {IN_CH * 64 / 8 / 1024} KB')
    print(f'Weights size: {OUT_CH * IN_CH * 64 / 8 / 1024} KB')
    print(f'Bias size: {OUT_CH * 64 / 8 / 1024} KB')
    print(f'Output size: {OUT_CH * 64 / 8 / 1024} KB')
    print(f'\nTotal size: {(IN_CH + OUT_CH * IN_CH + OUT_CH) * 64 / 8 / 1024} KB')
elif(torch.is_tensor(bias[0]) and bias[0].dtype == torch.float32):
    print("FP32 memory reguirements:")
    print(f'Input size: {IN_CH * 32 / 8 / 1024} KB')
    print(f'Weights size: {OUT_CH * IN_CH * 32 / 8 / 1024} KB')
    print(f'Bias size: {OUT_CH * 32 / 8 / 1024} KB')
    print(f'Output size: {OUT_CH * 32 / 8 / 1024} KB')
    print(f'\nTotal size: {(IN_CH + OUT_CH * IN_CH + OUT_CH) * 32 / 8 / 1024} KB')
elif(torch.is_tensor(bias[0]) and bias[0].dtype == torch.float16):
    print("FP16 memory reguirements:")
    print(f'Input size: {IN_CH * 16 / 8 / 1024} KB')
    print(f'Weights size: {OUT_CH * IN_CH * 16 / 8 / 1024} KB')
    print(f'Bias size: {OUT_CH * 16 / 8 / 1024} KB')
    print(f'Output size: {OUT_CH * 16 / 8 / 1024} KB')
    print(f'\nTotal size: {(IN_CH + OUT_CH * IN_CH + OUT_CH) * 16 / 8 / 1024} KB')
else:
    print("FP8 memory reguirements:")
    print(f'Input size: {IN_CH * 8 / 8 / 1024} KB')
    print(f'Weights size: {OUT_CH * IN_CH * 8 / 8 / 1024} KB')
    print(f'Bias size: {OUT_CH * 8 / 8 / 1024} KB')
    print(f'Output size: {OUT_CH * 8 / 8 / 1024} KB')
    print(f'\nTotal size: {(IN_CH + OUT_CH * IN_CH + OUT_CH) * 8 / 8 / 1024} KB')

FP64 memory reguirements:
Input size: 6.125 KB
Weights size: 98.0 KB
Bias size: 0.125 KB
Output size: 0.125 KB

Total size: 104.25 KB


#### Helper functions for exporting the data

In [5]:
def array_to_cstr(a):
    out = '{'
    if isinstance(a, np.ndarray):
        a = a.flat
    if isinstance(a, torch.Tensor):
        a = a.numpy().flat
    for el in a:
        out += '{}, '.format(el)
    out = out[:-2] + '}'
    return out

In [19]:
def emit_mnist_data(name='mnist', **kwargs):
    
    # constants
    IN_CH = kwargs['IN_CH']
    OUT_CH = kwargs['OUT_CH']
    DATASET_SIZE = kwargs['DATASET_SIZE']
    
    # data
    MAT_INPUT = kwargs['INPUT']
    MAT_LABELS = kwargs['LABELS']

    # network init parameters from golden model
    MAT_WEIGHTS = kwargs['WEIGHTS']
    MAT_WEIGHT_GRADS = kwargs['WEIGHT_GRADS']
    MAT_BIASES = kwargs['BIASES'] 
    MAT_BIAS_GRADS = kwargs['BIAS_GRADS']
    
    layer_str = ''
    layer_str += '#include "network.h"\n\n'
    layer_str += f'network_single_cluster_t {name}_t = {{\n'
    layer_str += f'\t.IN_CH = {IN_CH},\n'
    layer_str += f'\t.OUT_CH = {OUT_CH},\n'
    layer_str += f'\t.dtype = FP{kwargs["prec"]}\n'
    layer_str += '};\n\n\n'

    ctypes = {
        '64': 'double',
        '32': 'float',
        '16': '__fp16',
        'B16': '__bf16',
        '8': 'char'
    }

    dtype = ctypes[str(kwargs['prec'])]

    # network initialization
    layer_str += f'static {dtype} {name}_weights_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHTS) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_weight_grads_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHT_GRADS) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_biases_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIASES) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_bias_grads_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIAS_GRADS) + ';\n\n\n'


    # input data
    layer_str += f'static {dtype} {name}_images_dram [{DATASET_SIZE*IN_CH}][{1}] = ' + array_to_cstr(MAT_INPUT) + ';\n\n\n'
    layer_str += f'static uint32_t {name}_labels_dram [{DATASET_SIZE}][{1}] = ' + array_to_cstr(MAT_LABELS) + ';\n\n\n'

    return layer_str


In [20]:
def emit_mnist_header_file(layer_type: str, **kwargs):

    file_path = '/scratch/msc22f11/msc22f11/snitch/sw/applications/data/'
    emit_str = "// Copyright 2022 ETH Zurich and University of Bologna.\n" + \
               "// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n" + \
               "// SPDX-License-Identifier: Apache-2.0\n\n"

    if(layer_type == 'mnist'):
        file = file_path + 'data_fp64_all_mnist.h'
        emit_str += emit_mnist_data(**kwargs)

    with open(file, 'w') as f:
        f.write(emit_str)

    print("File written to: " + file)


In [21]:
kwargs = {
            'IN_CH': IN_CH,
            'OUT_CH': OUT_CH,
            'DATASET_SIZE': 1,
            'INPUT': input, #input.to(torch.float16),
            'WEIGHTS': weights, #weights.detach().to(torch.float16),
            'WEIGHT_GRADS': weight_grads,
            'BIASES': bias,#bias.detach().to(torch.float16),
            'BIAS_GRADS': bias_grads,
            'LABELS': label,
            'prec': 64
}

In [22]:
emit_mnist_header_file('mnist', **kwargs)

File written to: /scratch/msc22f11/msc22f11/snitch/sw/applications/data/data_fp64_all_mnist.h
