In [3]:
#!/usr/bin/env python3
# Copyright 2022 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import argparse
import pathlib
import hjson
import random
import os

In [5]:
np.random.seed(42)
torch.manual_seed(42)
global verbose

In [6]:
def array_to_cstr(a):
    out = '{'
    if isinstance(a, np.ndarray):
        a = a.flat
    if isinstance(a, torch.Tensor):
        a = a.numpy().flat
    for el in a:
        out += '{}, '.format(el)
    out = out[:-2] + '}'
    return out

In [47]:
def emit_mnist_data(name='mnist', **kwargs):
    
    # constants
    IN_CH1 = kwargs['IN_CH1']
    IN_CH2 = kwargs['IN_CH2']
    OUT_CH = kwargs['OUT_CH']
    DATASET_SIZE = kwargs['DATASET_SIZE']
    
    # data
    # MAT_INPUT = kwargs['INPUT']
    # MAT_LABELS = kwargs['LABELS']

    # network init parameters from golden model
    MAT_WEIGHTS = kwargs['WEIGHTS']
    MAT_BIASES = kwargs['BIASES']

    IN_CH = IN_CH1*IN_CH2
    
    layer_str = ''
    layer_str += '#include "network.h"\n\n'
    layer_str += f'network_fp64_t {name}_t = {{\n'
    layer_str += f'\t.IN_CH1 = {IN_CH1},\n'
    layer_str += f'\t.IN_CH2 = {IN_CH2},\n'
    layer_str += f'\t.OUT_CH = {OUT_CH},\n'
    layer_str += f'\t.dtype = FP{kwargs["prec"]}\n'
    layer_str += '};\n\n\n'

    ctypes = {
        '64': 'double',
        '32': 'float',
        '16': '__fp16',
        'B16': '__bf16',
        '8': 'char'
    }

    dtype = ctypes[str(kwargs['prec'])]

    # network initialization
    layer_str += f'static {dtype} {name}_weights_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHTS) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_biases_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIASES) + ';\n\n\n'
    # layer_str += f'static {dtype} {name}_weight_grads_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHT_GRADS) + ';\n\n\n'
    # layer_str += f'static {dtype} {name}_bias_grads_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIAS_GRADS) + ';\n\n\n'


    # input data
    # layer_str += f'static {dtype} {name}_images_dram [{DATASET_SIZE*IN_CH}][{1}] = ' + array_to_cstr(MAT_INPUT) + ';\n\n\n'
    # layer_str += f'static uint32_t {name}_labels_dram [{DATASET_SIZE}][{1}] = ' + array_to_cstr(MAT_LABELS) + ';\n\n\n'
    #layer_str += f'static {dtype} {name}_images_dram [{IN_CH}][{1}] = ' + array_to_cstr(MAT_INPUT) + ';\n\n\n'
    #layer_str += f'static uint32_t {name}_labels_dram[{1}] = ' + array_to_cstr(MAT_LABELS) + ';\n\n\n'

    return layer_str


In [55]:
def emit_mnist_header_file(layer_type: str, data_type: str, **kwargs):

    file_path = '/scratch1/msc22f11/snitch/sw/applications/data/'
    emit_str = "// Copyright 2022 ETH Zurich and University of Bologna.\n" + \
               "// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n" + \
               "// SPDX-License-Identifier: Apache-2.0\n\n"

    if(layer_type == 'mnist'):
        if(data_type == 'FP64'):
            file = file_path + 'data_fp64_mnist.h'
            emit_str += emit_mnist_data(**kwargs)
        elif(data_type == 'FP32'):
            file = file_path + 'data_fp32_mnist.h'
            emit_str += emit_mnist_data(**kwargs)
        elif(data_type == 'FP16'):
            file = file_path + 'data_fp16_test_mnist.h'
            emit_str += emit_mnist_data(**kwargs)
        elif(data_type == 'BF16'):
            file = file_path + 'data_bf16_mnist.h'
            emit_str += emit_mnist_data(**kwargs)
        elif(data_type == 'FP8'):
            file = file_path + 'data_fp8_test_mnist.h'
            emit_str += emit_mnist_data(**kwargs)

    #  # if file does not exist create it
    # if(not os.path.exists(file)):
    #     pathlib.Path(file).touch()

    with open(file, 'w') as f:
        f.write(emit_str)

    print(f'Wrote {file}')


In [9]:
# download MNIST dataset using DataLoader

transform = transforms.Compose(
    [
        transforms.ToTensor()
    ]
)

PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
mnist_dataset = MNIST(PATH_DATASETS, train=True, transform=transform, download=True)

# set seeds for reproducability 
g = torch.Generator()
g.manual_seed(42)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

mnist_dl = DataLoader(mnist_dataset, worker_init_fn=seed_worker, generator=g)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


2.4%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


30.9%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

82.6%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100.0%


Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


112.7%

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw






In [10]:
in_ch = 28*28
out_ch = 10

class LinLayer(nn.Module):
    def __init__(self):
        super(LinLayer, self).__init__()
        torch.manual_seed(42)
        self.lin = nn.Linear(in_ch, out_ch, dtype=torch.float64)

    def forward(self, x):
        torch.manual_seed(42)
        out = self.lin(x.view(x.size(0), -1))
        return out

In [11]:
# import optimizers
import torch.optim as optim
torch.manual_seed(42)

net = LinLayer()
weights = net.lin.weight
print("initial weights shape: {}".format(weights.shape))
biases = net.lin.bias
print("initial biases shape: {}".format(biases.shape))

initial weights shape: torch.Size([10, 784])
initial biases shape: torch.Size([10])


In [13]:
# special handling for FP8 type where we set up the conversion first
import pathlib
import ctypes

from ctypes import c_uint8, c_double, c_float
from ctypes import byref, Structure


class flexfloat_desc_t(Structure):
    _fields_ = [("exp_bits", c_uint8), ("frac_bits", c_uint8)]


class flexfloat_t(Structure):
    _fields_ = [("value", c_double), ("desc", flexfloat_desc_t)]


fp64_desc = flexfloat_desc_t(11, 52)
fp32_desc = flexfloat_desc_t(8, 23)
fp16_desc = flexfloat_desc_t(5, 11)
fp16alt_desc = flexfloat_desc_t(8, 7)
fp8_desc = flexfloat_desc_t(5, 2)
fp8alt_desc = flexfloat_desc_t(4, 3)

lib_path = "/usr/scratch/badile31/msc22f11/msc22f11/PlayGround/flexfloat/src/libflexfloat.so"
ff_lib = ctypes.CDLL(lib_path)

ff_get_float = ff_lib.ff_get_float
ff_get_float.restype = c_float


class ff:
    def __init__(self, value: float, desc: flexfloat_desc_t = fp64_desc):
        self.desc = desc
        self.value = value
        self.a = flexfloat_t(value, desc)
        ff_lib.ff_init_float(byref(self.a), c_float(value), desc)

    def __add__(self, b):
        ff_res = flexfloat_t(0.0, self.desc)
        ff_lib.ff_add(byref(ff_res), byref(self.a), byref(b.a))
        return 0

In [14]:
# function to convert float32 to binary representation
import struct

def float32_to_bin(value):
    return ''.join(f'{c:0>8b}' for c in struct.pack('!f', value))

In [15]:
"""
We have to handle denormalized numbers:
    
    +INF will be represented in FP8 as 0 11111 00 
    -INF will be represented in FP8 as 1 11111 00
    +0 will be represented in FP8 as 0 00000 00
    -0 will be represented in FP8 as 1 00000 00
    NaN will be represented in FP8 as X 11111 MM (at least one of the MMM bits is set, sign bit is don't care)

According to https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8556098 denormalized transprecision numbers
will be represented by their high precision counterparts. In these cases we have to make sure that we do not adjust
the exponent. In the other cases we adjust the exponent and cut the mantissa.
"""

from numpy import binary_repr

# this function returns an 8 character string representing the binary representation of the FP8 number
def float32_to_fp8(value):
    max_exp_fp32 = int('11111111', 2)
    min_exp_fp32 = int('00000000', 2)
    exp_bias_fp32 = 2 ** (8 - 1) - 1
    exp_bias_fp8 = 2 ** (5 - 1) - 1
    # get the binary representation of the number
    binstr = float32_to_bin(value)
    # extract sign, exponent and mantissa bits
    sign = binstr[0]
    exponent = binstr[1:9]
    mantissa = binstr[9:]
    # check if the number is denormalized
    # we start by checking if all exponent bits are asserted
    if(int(exponent) == max_exp_fp32):
        # if so, we check if the mantissa is all zeros (will result in +/-INF)
        if(int(mantissa) == 0):
            return '0b' + sign + exponent[:5] + mantissa[:2]
        # if not, we have to return a NaN
        else:
            return '0b' + sign + exponent[:5] + '01'
    # if both exponent and mantissa are zero we will return +/-0
    elif (int(exponent) == min_exp_fp32 and int(mantissa) == 0): 
        return '0b' + sign + exponent[:5] + mantissa[:2]
    else :
        # if not, we adjust the exponent and cut the mantissa
        exponent_fp8 = binary_repr(int(exponent, 2) - exp_bias_fp32 + exp_bias_fp8, width=5)
        mantissa_fp8 = mantissa[:2]
        return '0b' + sign + exponent_fp8 + mantissa_fp8


In [16]:
# exponent is stored in two's complement
def twos_comp(val, bits):
    """compute the 2's complement of int value val"""
    if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
        val = val - (1 << bits)        # compute negative value
    return val  

In [17]:
def convert_to_fp8_decimal(binstr):

    # extract sign, exponent and mantissa bits
    sign = binstr[0]
    num_sign_bits = 1
    # print(f'Sign:     ({num_sign_bits} bit)  = {sign}')
    exponent = binstr[1:6]
    num_exp_bits = len(exponent)
    # print(f'Exponent: ({num_exp_bits} bit)  = {exponent}')
    mantissa = binstr[6:]
    num_mant_bits = len(mantissa)
    # print(f'Mantissa: ({num_mant_bits} bit) = {mantissa}')

    exp_bias_fp8 = 2 ** (5 - 1) - 1
    dec_val_fp8 = (-1)**(int(sign, 2)) * (1 + (int(mantissa, 2))/(2**num_mant_bits)) * 2**(twos_comp(int(exponent, 2), num_exp_bits) - exp_bias_fp8)
    if(int(sign, 2) == 0 and int(exponent, 2) == 0 and int(mantissa, 2) == 0):
        dec_val_fp8 = 0
    # print("\nBinary to floating point number (FP8) conversion using formula: ", dec_val_fp8)
    return dec_val_fp8
    

In [18]:
# define the special FP8 values
fp8_nans = ['01111101', '01111110', '01111111', '11111101', '11111110', '11111111']
# print fp8_nans as integers
for nan in fp8_nans:
    print(f' NaN: {nan} = {int(nan, 2)}')
fp8_pinf = '01111100'
print(f'+INF: {fp8_pinf} = {int(fp8_pinf, 2)}')
fp8_ninf = '11111100'
print(f'-INF: {fp8_ninf} = {int(fp8_ninf, 2)}')

 NaN: 01111101 = 125
 NaN: 01111110 = 126
 NaN: 01111111 = 127
 NaN: 11111101 = 253
 NaN: 11111110 = 254
 NaN: 11111111 = 255
+INF: 01111100 = 124
-INF: 11111100 = 252


In [19]:
fp8_int_values = range(0, 256)
# remove the special FP8 values
fp8_int_values = [x for x in fp8_int_values if x not in [int(nan, 2) for nan in fp8_nans] + [int(fp8_pinf, 2), int(fp8_ninf, 2)]]

In [29]:
import random
def clear_special_values(data_np):
    nan_cnt = 0
    inf_cnt = 0
    for val, idx in zip(data_np, range(len(data_np))):
        if(val in [int(nan, 2) for nan in fp8_nans]):
            print(f'Found NaN at index {idx}')
            data_np[idx] = random.choice(fp8_int_values)
            print(f'Randomly replaced NaN with {data_np[idx]}')
            nan_cnt += 1
        elif(val == int(fp8_pinf, 2) or val == int(fp8_ninf, 2)):
            print(f'Found INF at index {idx}')
            data_np[idx] = random.choice(fp8_int_values)
            print(f'Randomly replaced INF with {data_np[idx]}')
            inf_cnt += 1 

    print(f'NaN count: {nan_cnt}')
    print(f'INF count: {inf_cnt}')

    return data_np

In [31]:
def convert_to_target_format(data_np, prec):
    if prec == 64:
        return data_np.astype(np.float64)
    elif prec == 32:
        return data_np.astype(np.float32)
    elif prec == 16:
        return data_np.astype(np.float16)
    elif prec == 8:
        data_np = data_np.astype(np.float32)
        # convert to FP8
        data_fp8 = np.array([float32_to_fp8(val) for val in data_np])
        # clear special values
        data_fp8 = clear_special_values(data_fp8)
        return data_fp8

In [33]:
weights.detach().numpy().flatten()

array([-0.03156039, -0.0312207 , -0.02688671, ...,  0.03123467,
        0.00186213,  0.0296352 ])

In [40]:
# print data type and shape
print(f'Original data type: {weights.detach().numpy().flatten().dtype}')
print(f'Original data shape: {weights.detach().numpy().flatten().shape}')
# convert to target format
weights_fp64 = convert_to_target_format(weights.detach().numpy().flatten(), 64)
biases_fp64 = convert_to_target_format(biases.detach().numpy().flatten(), 64)
print(f'Weights - FP64 data type: {weights_fp64.dtype}')
print(f'Weights - FP64 data shape: {weights_fp64.shape}')
print(f'Biases - FP64 data type: {biases_fp64.dtype}')
print(f'Biases - FP64 data shape: {biases_fp64.shape}')
weights_fp32 = convert_to_target_format(weights.detach().numpy().flatten(), 32)
biases_fp32 = convert_to_target_format(biases.detach().numpy().flatten(), 32)
print(f'Weights - FP32 data type: {weights_fp32.dtype}')
print(f'Biases - FP32 data shape: {weights_fp32.shape}')
print(f'Biases - FP32 data type: {biases_fp32.dtype}')
print(f'Biases - FP32 data shape: {biases_fp32.shape}')
weights_fp16 = convert_to_target_format(weights.detach().numpy().flatten(), 16)
biases_fp16 = convert_to_target_format(biases.detach().numpy().flatten(), 16)
print(f'Weights - FP16 data type: {weights_fp16.dtype}')
print(f'Weights - FP16 data shape: {weights_fp16.shape}')
print(f'Biases - FP16 data type: {biases_fp16.dtype}')
print(f'Biases - FP16 data shape: {biases_fp16.shape}')
weights_fp8 = convert_to_target_format(weights.detach().numpy().flatten(), 8)
biases_fp8 = convert_to_target_format(biases.detach().numpy().flatten(), 8)
print(f'Weights - FP8 data type: {weights_fp8.dtype}')
print(f'Weights - FP8 data shape: {weights_fp8.shape}')
print(f'Biases - FP8 data type: {biases_fp8.dtype}')
print(f'Biases - FP8 data shape: {biases_fp8.shape}')

Original data type: float64
Original data shape: (7840,)
Weights - FP64 data type: float64
Weights - FP64 data shape: (7840,)
Biases - FP64 data type: float64
Biases - FP64 data shape: (10,)
Weights - FP32 data type: float32
Biases - FP32 data shape: (7840,)
Biases - FP32 data type: float32
Biases - FP32 data shape: (10,)
Weights - FP16 data type: float16
Weights - FP16 data shape: (7840,)
Biases - FP16 data type: float16
Biases - FP16 data shape: (10,)
NaN count: 0
INF count: 0
NaN count: 0
INF count: 0
Weights - FP8 data type: <U10
Weights - FP8 data shape: (7840,)
Biases - FP8 data type: <U10
Biases - FP8 data shape: (10,)


In [56]:
kwargs = {
            'IN_CH1': 28,
            'IN_CH2': 28,
            'OUT_CH': 10,
            'DATASET_SIZE': 5,
            'WEIGHTS': weights_fp16,
            'BIASES': biases_fp16,
            'prec': 16
}

In [57]:
emit_mnist_header_file('mnist', 'FP16', **kwargs)

Wrote /scratch1/msc22f11/snitch/sw/applications/data/data_fp16_test_mnist.h
