In [1]:
#!/usr/bin/env python3
# Copyright 2022 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import argparse
import pathlib
import hjson
import random
import os

In [2]:
np.random.seed(42)
torch.manual_seed(42)
global verbose

In [3]:
def array_to_cstr(a):
    out = '{'
    if isinstance(a, np.ndarray):
        a = a.flat
    if isinstance(a, torch.Tensor):
        a = a.numpy().flat
    for el in a:
        out += '{}, '.format(el)
    out = out[:-2] + '}'
    return out

In [56]:
def emit_mnist_data(name='mnist', **kwargs):
    
    # constants
    IN_CH = kwargs['IN_CH']
    OUT_CH = kwargs['OUT_CH']
    DATASET_SIZE = kwargs['DATASET_SIZE']
    
    # data
    MAT_INPUT = kwargs['INPUT']
    MAT_LABELS = kwargs['LABELS']

    # network init parameters from golden model
    MAT_WEIGHTS = kwargs['WEIGHTS']
    MAT_BIASES = kwargs['BIASES'] 
    
    layer_str = ''
    layer_str += '#include "network.h"\n\n'
    layer_str += f'network_benchmark_t {name}_t = {{\n'
    layer_str += f'\t.IN_CH = {IN_CH},\n'
    layer_str += f'\t.OUT_CH = {OUT_CH},\n'
    layer_str += f'\t.dtype = FP{kwargs["prec"]}\n'
    layer_str += '};\n\n\n'

    ctypes = {
        '64': 'double',
        '32': 'float',
        '16': '__fp16',
        'B16': '__bf16',
        '8': 'char'
    }

    dtype = ctypes[str(kwargs['prec'])]

    # network initialization
    layer_str += f'static {dtype} {name}_weights_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHTS) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_biases_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIASES) + ';\n\n\n'


    # input data
    layer_str += f'static {dtype} {name}_images_dram [{DATASET_SIZE*IN_CH}][{1}] = ' + array_to_cstr(MAT_INPUT) + ';\n\n\n'
    layer_str += f'static uint32_t {name}_labels_dram [{DATASET_SIZE}][{1}] = ' + array_to_cstr(MAT_LABELS) + ';\n\n\n'

    return layer_str


In [54]:
def emit_mnist_header_file(layer_type: str, **kwargs):

    file_path = '/scratch/msc22f11/msc22f11/snitch/sw/applications/data/'
    emit_str = "// Copyright 2022 ETH Zurich and University of Bologna.\n" + \
               "// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n" + \
               "// SPDX-License-Identifier: Apache-2.0\n\n"

    if(layer_type == 'mnist'):
        file = file_path + 'data_fp16_benchmark.h'
        emit_str += emit_mnist_data(**kwargs)

    with open(file, 'w') as f:
        f.write(emit_str)


In [7]:
def Linear(input, weights, bias, **kwargs):
    out = torch.mul(input, weights)
    return out

In [8]:
# download MNIST dataset using DataLoader

transform = transforms.Compose(
    [
        transforms.ToTensor()
    ]
)

PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
mnist_dataset = MNIST(PATH_DATASETS, train=True, transform=transform, download=True)

# set seeds for reproducability 
g = torch.Generator()
g.manual_seed(42)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

mnist_dl = DataLoader(mnist_dataset, worker_init_fn=seed_worker, generator=g)

In [9]:
first_im, first_label = next(iter(mnist_dl))

In [23]:
np.random.seed(42)
torch.manual_seed(42)
# get input channels
IN_CH = 1 * 28 * 28 # 3 channels, 32x32 pixels
OUT_CH = 16 # 16 classes
r1 = 0
r2 = 0.5

# get random input data with shape (IN_CH, 1)
input = first_im.to(torch.float16).view(first_im.to(torch.float16).size(0), -1) #torch.randn(IN_CH)
print(input.shape)

# get random weights with shape (OUT_CH, IN_CH)
weights = torch.FloatTensor(OUT_CH, IN_CH).uniform_(r1, r2).to(torch.float16) #torch.randn(OUT_CH, IN_CH).to(torch.float64)
print(weights.shape)

# get random bias with shape (OUT_CH, 1)
bias = torch.FloatTensor(OUT_CH).uniform_(r1, r2).to(torch.float16)#torch.randn(OUT_CH).to(torch.float64)
print(bias.shape)

# calculate the activations of the linear layer
activations = input @ weights.t() + bias
print(activations.shape)
# get a random integer between 0 and 16
label = torch.randint(0, 16, (1,))
print(label)

torch.Size([1, 784])
torch.Size([16, 784])
torch.Size([16])
torch.Size([1, 16])
tensor([7])


In [24]:
print(activations)
# print data type of the activations
print(activations.dtype)

tensor([[26.2812, 26.8750, 27.7812, 30.3125, 27.2812, 25.2812, 28.9375, 26.1094,
         28.7188, 27.1562, 25.7969, 28.0938, 27.6719, 25.0000, 27.7500, 29.6406]],
       dtype=torch.float16)
torch.float16


In [25]:
np.max(activations.numpy())

30.31

In [38]:
activations.to(torch.float32)

tensor([[26.2812, 26.8750, 27.7812, 30.3125, 27.2812, 25.2812, 28.9375, 26.1094,
         28.7188, 27.1562, 25.7969, 28.0938, 27.6719, 25.0000, 27.7500, 29.6406]])

In [39]:
np.exp(activations.to(torch.float32)[0][0])

tensor(2.5930e+11)

In [43]:
# apply softmax to the activations
softmax = torch.nn.Softmax(dim=1)
# upcast the activations to float32 to use the softmax function
ff_out = softmax(activations.to(torch.float32))
# downcast the output to float16
ff_out = ff_out.to(torch.float16)
print(ff_out)
print(ff_out.shape)
print(ff_out.dtype)

tensor([[0.0072, 0.0130, 0.0321, 0.4031, 0.0194, 0.0026, 0.1019, 0.0060, 0.0818,
         0.0172, 0.0044, 0.0438, 0.0287, 0.0020, 0.0311, 0.2058]],
       dtype=torch.float16)
torch.Size([1, 16])
torch.float16


In [61]:
# transform softmax activations to list
ff_out_l = ff_out.tolist()[0]
# if index matches label, subtract 1 from value at index
ff_out_l[label] = ff_out_l[label] - 1
print(ff_out_l)
# print the bias gradient 
bias_gradients = torch.FloatTensor(ff_out_l).reshape(1, -1).to(torch.float16)
print(bias_gradients)
print(bias_gradients.shape)
print(bias_gradients.dtype)

[0.007152557373046875, 0.0129547119140625, 0.032073974609375, 0.403076171875, 0.019439697265625, 0.00263214111328125, 0.10186767578125, -0.9939765930175781, 0.08184814453125, 0.0171661376953125, 0.004405975341796875, 0.0438232421875, 0.0287322998046875, 0.0019855499267578125, 0.03106689453125, 0.205810546875]
tensor([[ 0.0072,  0.0130,  0.0321,  0.4031,  0.0194,  0.0026,  0.1019, -0.9941,
          0.0818,  0.0172,  0.0044,  0.0438,  0.0287,  0.0020,  0.0311,  0.2058]],
       dtype=torch.float16)
torch.Size([1, 16])
torch.float16


In [53]:
# compute the weight gradient matrix
weight_gradients = torch.mul(input.t(), bias_gradients).t()
print(weight_gradients.shape)
# compute the checksum for every column of the weight gradient matrix
weight_gradients_checksum = torch.sum(weight_gradients, dim=1)
print(weight_gradients_checksum)
print(weight_gradients_checksum.shape)
print(weight_gradients_checksum.dtype)

torch.Size([16, 784])
tensor([   0.7720,    1.3984,    3.4629,   43.5000,    2.0977,    0.2842,
          10.9922, -107.3125,    8.8359,    1.8525,    0.4756,    4.7305,
           3.1016,    0.2144,    3.3535,   22.2188], dtype=torch.float16)
torch.Size([16])
torch.float16


In [51]:
# compute the training step
bias_update = bias - torch.mul(bias_gradients, 0.5)
print("bias_update = ", bias_update)
print(bias_update.shape)
print(bias_update.dtype)
weight_update = weights - torch.mul(weight_gradients, 0.5)
weight_update_checksum = torch.sum(weight_update, dim=1)
print("\nweight_update_checksum = ", weight_update_checksum)
print(weight_update_checksum.shape)
print(weight_update_checksum.dtype)

bias_update =  tensor([[ 0.3965,  0.4460,  0.3086,  0.2477,  0.1212,  0.0851, -0.0017,  0.7075,
          0.1843,  0.0993,  0.1624,  0.1475,  0.4768,  0.2312,  0.4370,  0.1713]],
       dtype=torch.float16)
torch.Size([1, 16])
torch.float16

weight_update_checksum =  tensor([188.3750, 197.7500, 191.1250, 176.7500, 190.5000, 195.2500, 197.7500,
        248.8750, 197.3750, 191.1250, 198.1250, 197.1250, 193.0000, 189.7500,
        194.1250, 180.6250], dtype=torch.float16)
torch.Size([16])
torch.float16


In [52]:
# calculate the memory requirements
if(activations.dtype == torch.float64):
    print(f'Input size: {IN_CH * 64 / 8 / 1024} KB')
    print(f'Weights size: {OUT_CH * IN_CH * 64 / 8 / 1024} KB')
    print(f'Bias size: {OUT_CH * 64 / 8 / 1024} KB')
    print(f'Output size: {OUT_CH * 64 / 8 / 1024} KB')
    print(f'\nTotal size: {(IN_CH + OUT_CH * IN_CH + OUT_CH) * 64 / 8 / 1024} KB')
elif(activations.dtype == torch.float32):
    print(f'Input size: {IN_CH * 32 / 8 / 1024} KB')
    print(f'Weights size: {OUT_CH * IN_CH * 32 / 8 / 1024} KB')
    print(f'Bias size: {OUT_CH * 32 / 8 / 1024} KB')
    print(f'Output size: {OUT_CH * 32 / 8 / 1024} KB')
    print(f'\nTotal size: {(IN_CH + OUT_CH * IN_CH + OUT_CH) * 32 / 8 / 1024} KB')
elif(activations.dtype == torch.float16):
    print(f'Input size: {IN_CH * 16 / 8 / 1024} KB')
    print(f'Weights size: {OUT_CH * IN_CH * 16 / 8 / 1024} KB')
    print(f'Bias size: {OUT_CH * 16 / 8 / 1024} KB')
    print(f'Output size: {OUT_CH * 16 / 8 / 1024} KB')
    print(f'\nTotal size: {(IN_CH + OUT_CH * IN_CH + OUT_CH) * 16 / 8 / 1024} KB')

Input size: 1.53125 KB
Weights size: 24.5 KB
Bias size: 0.03125 KB
Output size: 0.03125 KB

Total size: 26.0625 KB


In [59]:
kwargs = {
            'IN_CH': IN_CH,
            'OUT_CH': OUT_CH,
            'DATASET_SIZE': 1,
            'INPUT': input.to(torch.float16),
            'WEIGHTS': weights.detach().to(torch.float16),
            'BIASES': bias.detach().to(torch.float16),
            'LABELS': label,
            'prec': 16
}

In [60]:
emit_mnist_header_file('mnist', **kwargs)