In [1]:
#!/usr/bin/env python3
# Copyright 2022 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import argparse
import pathlib
import hjson
import random
import os

In [2]:
np.random.seed(42)
torch.manual_seed(42)
global verbose

In [3]:
def array_to_cstr(a):
    out = '{'
    if isinstance(a, np.ndarray):
        a = a.flat
    if isinstance(a, torch.Tensor):
        a = a.numpy().flat
    for el in a:
        out += '{}, '.format(el)
    out = out[:-2] + '}'
    return out

In [23]:
def emit_mnist_data(name='mini_mnist', **kwargs):
    
    # constants
    IN_CH1 = kwargs['IN_CH1']
    IN_CH2 = kwargs['IN_CH2']
    OUT_CH = kwargs['OUT_CH']
    DATASET_SIZE = kwargs['DATASET_SIZE']
    
    # data
    MAT_INPUT = kwargs['INPUT']
    MAT_LABELS = kwargs['LABELS']

    # network init parameters from golden model
    MAT_WEIGHTS = kwargs['WEIGHTS']
    MAT_BIASES = kwargs['BIASES']
    MAT_WEIGHT_GRADS = kwargs['WEIGHT GRADIENTS']
    MAT_BIAS_GRADS = kwargs['BIAS GRADIENTS'] 

    IN_CH = IN_CH1*IN_CH2
    
    layer_str = ''
    layer_str += '#include "network.h"\n\n'
    layer_str += f'network_t {name}_t = {{\n'
    layer_str += f'\t.IN_CH1 = {IN_CH1},\n'
    layer_str += f'\t.IN_CH2 = {IN_CH2},\n'
    layer_str += f'\t.OUT_CH = {OUT_CH},\n'
    layer_str += f'\t.dtype = FP{kwargs["prec"]}\n'
    layer_str += '};\n\n\n'

    ctypes = {
        '64': 'double',
        '32': 'float',
        '16': '__fp16',
        'B16': '__bf16',
        '8': 'char'
    }

    dtype = ctypes[str(kwargs['prec'])]

    # network initialization
    layer_str += f'static {dtype} {name}_weights_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHTS) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_biases_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIASES) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_weight_grads_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHT_GRADS) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_bias_grads_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIAS_GRADS) + ';\n\n\n'


    # input data
    layer_str += f'static {dtype} {name}_images_dram [{DATASET_SIZE*IN_CH}][{1}] = ' + array_to_cstr(MAT_INPUT) + ';\n\n\n'
    layer_str += f'static uint32_t {name}_labels_dram [{DATASET_SIZE}][{1}] = ' + array_to_cstr(MAT_LABELS) + ';\n\n\n'
    #layer_str += f'static {dtype} {name}_images_dram [{IN_CH}][{1}] = ' + array_to_cstr(MAT_INPUT) + ';\n\n\n'
    #layer_str += f'static uint32_t {name}_labels_dram[{1}] = ' + array_to_cstr(MAT_LABELS) + ';\n\n\n'

    return layer_str


In [24]:
def emit_mnist_header_file(layer_type: str, **kwargs):

    file_path = '/scratch/msc22f11/msc22f11/snitch/sw/applications/data/'
    emit_str = "// Copyright 2022 ETH Zurich and University of Bologna.\n" + \
               "// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n" + \
               "// SPDX-License-Identifier: Apache-2.0\n\n"

    if(layer_type == 'mini_mnist'):
        file = file_path + 'data_fp16_mnist.h'
        emit_str += emit_mnist_data(**kwargs)

    with open(file, 'w') as f:
        f.write(emit_str)


In [4]:
# download MNIST dataset using DataLoader

transform = transforms.Compose(
    [
        transforms.ToTensor()
    ]
)

PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
mnist_dataset = MNIST(PATH_DATASETS, train=True, transform=transform, download=True)

# set seeds for reproducability 
g = torch.Generator()
g.manual_seed(42)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

mnist_dl = DataLoader(mnist_dataset, worker_init_fn=seed_worker, generator=g)

In [37]:
"""
Now we iterate through the dataset 
to retrieve the image data with their
respective labels
"""

data_iterator = iter(mnist_dl)

for i in range(0, len(mnist_dl)):
    image, label = data_iterator.next()
    np_image = image.numpy().flatten()
    np_label = label.numpy().flatten()
    if(i==0):
        images = np.array(np_image.tolist())
        labels = np.array(np_label.tolist())
    else:
        images = np.append(images, np_image)
        labels = np.append(labels, np_label)

KeyboardInterrupt: 

In [4]:
"""
Now we iterate through a smaller subset of the dataset 
to retrieve the image data with their
respective labels
"""

data_iterator = iter(mnist_dl)

for i in range(0, 5):
    s_image, s_label = data_iterator.next()
    np_s_image = s_image.numpy().flatten()
    np_s_label = s_label.numpy().flatten()
    if(i==0):
        s_images = np.array(np_s_image.tolist())
        s_labels = np.array(np_s_label.tolist())
    else:
        s_images = np.append(s_images, np_s_image)
        s_labels = np.append(s_labels, np_s_label)

In [14]:
in_ch = 28*28
out_ch = 10

class LinLayer(nn.Module):
    def __init__(self):
        super(LinLayer, self).__init__()
        torch.manual_seed(42)
        self.lin = nn.Linear(in_ch, out_ch, dtype=torch.float32)

    def forward(self, x):
        torch.manual_seed(42)
        out = self.lin(x.view(x.size(0), -1))
        return out

In [6]:
first_im, first_label = next(iter(mnist_dl))

In [22]:
it = iter(mnist_dl) 
for i in range(10):
    image, label = next(it)
    flattened_image = image.numpy().flatten()
    # iterate over each pixel
    for j in range(len(flattened_image)):
        print("image[%u][%u] = %f " %(i, j, flattened_image[j]))

image[0][0] = 0.000000 
image[0][1] = 0.000000 
image[0][2] = 0.000000 
image[0][3] = 0.000000 
image[0][4] = 0.000000 
image[0][5] = 0.000000 
image[0][6] = 0.000000 
image[0][7] = 0.000000 
image[0][8] = 0.000000 
image[0][9] = 0.000000 
image[0][10] = 0.000000 
image[0][11] = 0.000000 
image[0][12] = 0.000000 
image[0][13] = 0.000000 
image[0][14] = 0.000000 
image[0][15] = 0.000000 
image[0][16] = 0.000000 
image[0][17] = 0.000000 
image[0][18] = 0.000000 
image[0][19] = 0.000000 
image[0][20] = 0.000000 
image[0][21] = 0.000000 
image[0][22] = 0.000000 
image[0][23] = 0.000000 
image[0][24] = 0.000000 
image[0][25] = 0.000000 
image[0][26] = 0.000000 
image[0][27] = 0.000000 
image[0][28] = 0.000000 
image[0][29] = 0.000000 
image[0][30] = 0.000000 
image[0][31] = 0.000000 
image[0][32] = 0.000000 
image[0][33] = 0.000000 
image[0][34] = 0.000000 
image[0][35] = 0.000000 
image[0][36] = 0.000000 
image[0][37] = 0.000000 
image[0][38] = 0.000000 
image[0][39] = 0.000000 
image[0][4

In [16]:
torch.manual_seed(42)

net = LinLayer()
weights = net.lin.weight
biases = net.lin.bias

criterion = nn.CrossEntropyLoss()

it = iter(mnist_dl) 
correct = 0

for i in range(256):
    net.zero_grad
    image, label = next(it)
    # print(label[0].numpy())
    l = label[0].numpy()
    # print("image: ", image.flatten().numpy())
    output = net(image.to(torch.float64))
    print("output: ", output)
    pred = torch.argmax(output).numpy()
    print("Pred: ", pred, "Label: ", l)
    if(l == pred):
        correct += 1
    # print("acc = ", (correct/256) * 100)
    # print(torch.argmax(output).numpy())
    # loss = criterion(output.to(torch.float32), first_label)
    # loss.backward()
    # weight_grads = net.lin.weight.grad
    # bias_grads = net.lin.bias.grad


image:  [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0. 

In [16]:
output

tensor([[-0.1598, -0.0484,  0.0685,  0.4418,  0.0252, -0.2573,  0.2446, -0.1718,
          0.2287,  0.0333]], grad_fn=<AddmmBackward0>)

In [17]:
weight_grads.dtype

torch.float32

In [18]:
weights.dtype

torch.float32

In [56]:
biases.dtype

torch.float32

In [20]:
bias_grads

tensor([ 0.0801,  0.0895,  0.1006,  0.1462,  0.0964, -0.9273,  0.1200,  0.0791,
         0.1181,  0.0972])

In [11]:
(s_images.astype(np.float16)).dtype

dtype('float16')

In [31]:
kwargs = {
            'IN_CH1': 28,
            'IN_CH2': 28,
            'OUT_CH': 10,
            'DATASET_SIZE': 5,
            'INPUT': s_images.astype(np.float16),
            'LABELS': s_labels,
            'WEIGHTS': weights.detach(),
            'BIASES': biases.detach(),
            'WEIGHT GRADIENTS': weight_grads,
            'BIAS GRADIENTS': bias_grads,
            'prec': 16
}

In [33]:
emit_mnist_header_file('mini_mnist', **kwargs)

In [52]:
weights.dtype

torch.float32

In [54]:
weights.flatten()[0]

tensor(0.0273, grad_fn=<SelectBackward0>)

In [80]:
print(output)
sm = nn.Softmax(dim=1)
sm(output)

tensor([[-0.1598, -0.0484,  0.0685,  0.4418,  0.0252, -0.2573,  0.2446, -0.1718,
          0.2287,  0.0333]], grad_fn=<AddmmBackward0>)


tensor([[0.0801, 0.0895, 0.1006, 0.1462, 0.0964, 0.0727, 0.1200, 0.0791, 0.1181,
         0.0972]], grad_fn=<SoftmaxBackward0>)

In [36]:
torch.max(output)

tensor(0.4418, grad_fn=<MaxBackward1>)

In [19]:
np.where(weight_grads.flatten() != 0)

(array([ 152,  153,  154, ..., 7737, 7738, 7739]),)

In [22]:
weight_grads.flatten()[153]

tensor(0.0057)

In [24]:
bias_grads

tensor([ 0.0801,  0.0895,  0.1006,  0.1462,  0.0964, -0.9273,  0.1200,  0.0791,
         0.1181,  0.0972])

In [1]:
import pycparser as pyc
from pyclibrary import CParser


parser = CParser('/home/msc22f11/snitch/sw/applications/data/data_full_mnist.h')

print(parser)

In [None]:
parser.defs['variables']

In [12]:
parser.defs['variables']['mini_mnist_weights_dram'][0]

[0.02730494923889637,
 0.029643140733242035,
 -0.008366874419152737,
 0.032807547599077225,
 -0.007825127802789211,
 0.007206810638308525,
 -0.01738768070936203,
 0.020974380895495415,
 0.03148367628455162,
 -0.026201006025075912,
 0.03104272112250328,
 0.006684260908514261,
 0.02638603188097477,
 0.004836806561797857,
 0.017221003770828247,
 -0.005042536184191704,
 0.027531638741493225,
 0.005278890021145344,
 -0.016672855243086815,
 0.009103511460125446,
 -0.01645488105714321,
 -0.004188316408544779,
 -0.014505655504763126,
 0.023691821843385696,
 -0.028191793709993362,
 -0.016464656218886375,
 -0.01008481252938509,
 -0.021474016830325127,
 0.0033708258997648954,
 -0.03527425602078438,
 0.032253898680210114,
 -0.03033815324306488,
 0.027572408318519592,
 0.005943541415035725,
 -0.011596591211855412,
 0.022069642320275307,
 0.005566099192947149,
 0.028855836018919945,
 0.0039042746648192406,
 -0.011263328604400158,
 0.009595845825970173,
 -0.009684979915618896,
 0.015030627138912678,


## Determine FP8 from F32

Special values:
1. exponent and fractional part all zeros --> zero
1. exponent all ones and fractional all zeros --> INF
1. exponent all ones and fractional non-zero --> NaN

FP8 format:  | 5 bit exponent | 2 bit mantissa | <br/>
FP8ALT format: | 4 bit exponent | 3 bit mantissa | <br/>

Example below is for FP8 format. TODO: Discuss with GIM.

In [66]:
import struct

def floatToBinary32(value):
    return ''.join(f'{c:0>8b}' for c in struct.pack('!f', value))

# float to binary
fl0 = biases.detach().numpy()[0]
binstr = floatToBinary32(fl0)
print(f'Binary equivalent of {fl0}: {binstr}')

print(f'\nSign     ( 1 bit ) = {binstr[0]}\nExponent ( 8 bits) = {binstr[1:9]}\nMantissa (23 bits) = {binstr[9:]}')


Binary equivalent of -0.009314311668276787: 10111100000110001001101100001110

Sign     ( 1 bit ) = 1
Exponent ( 8 bits) = 01111000
Mantissa (23 bits) = 00110001001101100001110


In [70]:
# calculate exponent bias of an FP32
exp_bias_fp32 = 2 ** (8 - 1) - 1
exp_bias_fp32 

127

In [72]:
# calculate unbiased exponent
exponent_fp32 = int('01111000', 2) - exp_bias_fp32
exponent_fp32

-7

In [73]:
# calculate exponent bias of an FP8
exp_bias_fp8 = 2 ** (5 - 1) - 1
exp_bias_fp8

15

In [75]:
# determine the new exponent in the new FP format
# minimum exponent: E_min = b'00001 - b'01111 = -14 
# maximum exponent: E_max = b'01111 - b'00000 = 15
exp_8 = exponent_fp32 + exp_bias_fp8
exp_8

8

In [80]:
# determine the binary representation of the new exponent
exp_8_bin = "{0:b}".format(exp_8)
exp_8_bin

'1000'

In [81]:
# TODO: check this with GIM
# determine the new mantissa in the new FP format
man_8_bin = binstr[9:11]
man_8_bin

'00'

In [83]:
# 1 sign it + 5 exponent bits + 2 mantissa bits
fp_8 = binstr[0] + exp_8_bin + man_8_bin
fp_8

'1100000'