In [1]:
#!/usr/bin/env python3
# Copyright 2022 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import argparse
import pathlib
import hjson
import random
import os

In [2]:
np.random.seed(42)
torch.manual_seed(42)
global verbose

In [3]:
def array_to_cstr(a):
    out = '{'
    if isinstance(a, np.ndarray):
        a = a.flat
    if isinstance(a, torch.Tensor):
        a = a.numpy().flat
    for el in a:
        out += '{}, '.format(el)
    out = out[:-2] + '}'
    return out

In [None]:
# TODO: check what is missing for CNN
def emit_mnist_data(name='mnist_cnn', **kwargs):
    
    # constants
    IN_CH1 = kwargs['IN_CH1']
    IN_CH2 = kwargs['IN_CH2']
    OUT_CH = kwargs['OUT_CH']
    DATASET_SIZE = kwargs['DATASET_SIZE']
    
    # data
    MAT_INPUT = kwargs['INPUT']
    MAT_LABELS = kwargs['LABELS']

    # network init parameters from golden model
    MAT_WEIGHTS = kwargs['WEIGHTS']
    MAT_BIASES = kwargs['BIASES']
    MAT_WEIGHT_GRADS = kwargs['WEIGHT GRADIENTS']
    MAT_BIAS_GRADS = kwargs['BIAS GRADIENTS'] 

    IN_CH = IN_CH1*IN_CH2
    
    layer_str = ''
    layer_str += '#include "network.h"\n\n'
    layer_str += f'network_t {name}_t = {{\n'
    layer_str += f'\t.IN_CH1 = {IN_CH1},\n'
    layer_str += f'\t.IN_CH2 = {IN_CH2},\n'
    layer_str += f'\t.OUT_CH = {OUT_CH},\n'
    layer_str += f'\t.dtype = FP{kwargs["prec"]}\n'
    layer_str += '};\n\n\n'

    ctypes = {
        '64': 'double',
        '32': 'float',
        '16': '__fp16',
        'B16': '__bf16',
        '8': 'char'
    }

    dtype = ctypes[str(kwargs['prec'])]

    # network initialization
    layer_str += f'static {dtype} {name}_weights_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHTS) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_biases_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIASES) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_weight_grads_dram [{OUT_CH}][{IN_CH}] = ' + array_to_cstr(MAT_WEIGHT_GRADS) + ';\n\n\n'
    layer_str += f'static {dtype} {name}_bias_grads_dram [{OUT_CH}][{1}] = ' + array_to_cstr(MAT_BIAS_GRADS) + ';\n\n\n'


    # input data
    layer_str += f'static {dtype} {name}_images_dram [{DATASET_SIZE*IN_CH}][{1}] = ' + array_to_cstr(MAT_INPUT) + ';\n\n\n'
    layer_str += f'static uint32_t {name}_labels_dram [{DATASET_SIZE}][{1}] = ' + array_to_cstr(MAT_LABELS) + ';\n\n\n'
    #layer_str += f'static {dtype} {name}_images_dram [{IN_CH}][{1}] = ' + array_to_cstr(MAT_INPUT) + ';\n\n\n'
    #layer_str += f'static uint32_t {name}_labels_dram[{1}] = ' + array_to_cstr(MAT_LABELS) + ';\n\n\n'

    return layer_str


In [4]:
# download MNIST dataset using DataLoader

transform = transforms.Compose(
    [
        transforms.ToTensor()
    ]
)

PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
mnist_dataset = MNIST(PATH_DATASETS, train=True, transform=transform, download=True)

# set seeds for reproducability 
g = torch.Generator()
g.manual_seed(42)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

mnist_dl = DataLoader(mnist_dataset, worker_init_fn=seed_worker, generator=g)

In [5]:
print(mnist_dataset)

Dataset MNIST
    Number of datapoints: 60000
    Root location: .
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )


In [6]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        # fully connected layer, output 10 classes
        self.out = nn.Linear(32 * 7 * 7, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return output, x    # return x for visualization

In [11]:
net = CNN()
print(net)

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)


In [9]:
"""
Now we iterate through a smaller subset of the dataset 
to retrieve the image data with their
respective labels
"""

data_iterator = iter(mnist_dl)

for i in range(0, 5):
    s_image, s_label = data_iterator.next()
    np_s_image = s_image.numpy().flatten()
    np_s_label = s_label.numpy().flatten()
    if(i==0):
        s_images = np.array(np_s_image.tolist())
        s_labels = np.array(np_s_label.tolist())
    else:
        s_images = np.append(s_images, np_s_image)
        s_labels = np.append(s_labels, np_s_label)

In [10]:
first_im, first_label = next(iter(mnist_dl))

In [24]:
output[0]

tensor([[-0.0137,  0.1058, -0.0069,  0.0514, -0.0232,  0.0712,  0.0820,  0.0368,
          0.0433,  0.0888]], grad_fn=<AddmmBackward0>)

In [27]:
torch.manual_seed(42)

weights_conv1 = net.conv1[0].weight
biases_conv1 = net.conv1[0].bias
weights_conv2 = net.conv2[0].weight
biases_conv2 = net.conv2[0].bias

criterion = nn.CrossEntropyLoss()

for i in range(1):
    net.zero_grad
    output = net(first_im)[0]
    loss = criterion(output, first_label)
    loss.backward()
    weight_grads_conv1 = net.conv1[0].weight.grad
    bias_grads_conv1 = net.conv1[0].bias.grad
    weight_grads_conv2 = net.conv2[0].weight.grad
    bias_grads_conv2 = net.conv2[0].bias.grad


In [30]:
from torchvision import models
from torchsummary import summary
summary(net, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             416
              ReLU-2           [-1, 16, 28, 28]               0
         MaxPool2d-3           [-1, 16, 14, 14]               0
            Conv2d-4           [-1, 32, 14, 14]          12,832
              ReLU-5           [-1, 32, 14, 14]               0
         MaxPool2d-6             [-1, 32, 7, 7]               0
            Linear-7                   [-1, 10]          15,690
Total params: 28,938
Trainable params: 28,938
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.32
Params size (MB): 0.11
Estimated Total Size (MB): 0.44
----------------------------------------------------------------
