In [1]:
import os
import numpy as np
import tensorflow as tf 
import tensorflow.keras.backend as K
from  tensorflow.keras import models, activations, layers

## Constants 

In [9]:
batch_size = 128
num_classes = 10
epochs = 10

## Load data 

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [3]:
print("Loaded data;\nx_train shape \t{}\ny_train shape \t{}\nx_test shape \t{}\ny_test shape \t{}".format(
    x_train.shape, y_train.shape, 
    x_test.shape, y_test.shape))

Loaded data;
x_train shape 	(60000, 28, 28)
y_train shape 	(60000,)
x_test shape 	(10000, 28, 28)
y_test shape 	(10000,)


## Export Training and test data 

In [4]:
x_train.tofile(os.path.join('exports', "x_train.data"))
y_train.tofile(os.path.join('exports', "y_train.data"))

In [5]:
x_test.tofile(os.path.join('exports', "x_test.data"))
y_test.tofile(os.path.join('exports', "y_test.data"))

In [6]:
# input image dimensions
img_rows, img_cols = 28, 28

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    
print("Loaded data;\nx_train shape \t{}\ny_train shape \t{}\nx_test shape \t{}\ny_test shape \t{}".format(
    x_train.shape, y_train.shape, 
    x_test.shape, y_test.shape))

Loaded data;
x_train shape 	(60000, 28, 28, 1)
y_train shape 	(60000,)
x_test shape 	(10000, 28, 28, 1)
y_test shape 	(10000,)


## Prepare data 

In [7]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

In [10]:
# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

## Shallow network 

In [20]:
model = models.Sequential()
model.add(layers.Flatten(input_shape=input_shape))
model.add(layers.Dense(num_classes, name="fc_1"))
model.add(layers.Activation(activations.softmax, name="output"))

In [21]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
fc_1 (Dense)                 (None, 10)                7850      
_________________________________________________________________
output (Activation)          (None, 10)                0         
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________


In [22]:
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adadelta(),
              metrics=['accuracy'])

### Train

In [23]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x115940350>

### Evaluate 

In [24]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

('Test loss:', 0.27310981670320034)
('Test accuracy:', 0.9242)


### Export weights 

The type of each entry in array is given by -dataType. The number of entries is equal to:

*inputFeatureChannels * outputFeatureChannels * kernelHeight * kernelWidth*

The layout of filter weight is as a 4D tensor (array)
weight[ outputChannels ][ kernelHeight ][ kernelWidth ][ inputChannels / groups ]

*Note: For binary-convolutions the layout of the weights are:
weight[ outputChannels ][ kernelHeight ][ kernelWidth ][ floor((inputChannels/groups)+31) / 32 ]
with each 32 sub input feature channel index specified in machine byte order, so that for example
the 13th feature channel bit can be extracted using bitmask = (1U << 13).*

In [25]:
def export_dense_weights(name, wts_coef, bias_coef, kernel_width, kernel_height):
    """
    A fully connected layer in a Convolutional Neural Network (CNN) is one where every input channel is connected 
    to every output channel. The kernel width is equal to the width of the source image, and the 
    kernel height is equal to the height of the source image. The width and height of the output is 1 x 1.
    
    A fully connected layer takes an MPSImage object with dimensions 
    source.width x source.height x Ni, convolves it with Weights[No][source.width][source.height][Ni], 
    and produces a 1 x 1 x No output.
    
    Thus, the following conditions must be true:
    - kernelWidth == source.width
    - kernelHeight == source.height
    - clipRect.size.width == 1
    - clipRect.size.height == 1
    
    You can think of a fully connected layer as a matrix multiplication where the image is 
    flattened into a vector of length source.width*source.height*Ni, and the weights are arranged in a 
    matrix of dimension No x (source.width*source.height*Ni) to produce an output vector of length No.
    
    The value of the strideInPixelsX, strideInPixelsY, and groups properties must be 1. 
    The offset property is not applicable and it is ignored. Because the clip rectangle is 
    clamped to the destination image bounds, if the destination is 1 x 1, you do not need to set the 
    clipRect property.
    """
    print("Exporting weights for {}\n\t{}\n\t{}".format(name, 
          os.path.join('exports', "{}_conv_wts.data".format(name)), 
          os.path.join('exports', "{}_bias_wts.data".format(name))))
        
    input_feature_channels = int(wts_coef.shape[0] / kernel_width / kernel_height) 
    output_feature_channels = wts_coef.shape[-1]            
    
    print("\tOriginal weights shape {}".format(wts_coef.shape))    
    wts_coef = np.reshape(wts_coef, [kernel_width, kernel_height, -1, output_feature_channels])    
        
    if bias_coef is not None:
        # [output_feature_channels]
        print("\tOriginal bias shape {}".format(bias_coef.shape))
    
    # [output_feature_channels, kernel_width, kernel_height, input_feature_channels]
    wts_coef = wts_coef.transpose(3, 0, 1, 2)
    print("\tReshaped weights shape {}".format(wts_coef.shape))    
    wts_coef.tofile(os.path.join('exports', "shallow_{}_wts.data".format(name)))    
    
    if bias_coef is not None:
        bias_coef = np.squeeze(bias_coef)
        print("\tReshaped bias_coef shape {}".format(bias_coef.shape))    
        bias_coef.tofile(os.path.join('exports', "shallow_{}_bias_terms.data".format(name)))   
        
    print("\n")

In [26]:
flatted_input_kernel_width = None
flatted_input_kernel_height = None

for layer in model.layers:        
    if "flatten" in layer.name:
        flatted_input_kernel_width = layer.input_shape[1] # None, 14, 14, 64
        flatted_input_kernel_height = layer.input_shape[2] # None, 14, 14, 64
        
    if len(layer.get_weights()) > 0:        
        name = layer.name         
        wts = layer.get_weights()
        
        export_dense_weights(layer.name, wts[0], wts[1] if len(wts) == 2 else None, 
                                flatted_input_kernel_width, flatted_input_kernel_height)        
        # after the initial pass (from cnn to fcn); flattern the kernel down to 1x1 
        # i.e. update the flatted_input_kernel_DIM to have the kernel width and height of 1 
        flatted_input_kernel_width, flatted_input_kernel_height = 1, 1 

Exporting weights for fc_1
	exports/fc_1_conv_wts.data
	exports/fc_1_bias_wts.data
	Original weights shape (784, 10)
	Original bias shape (10,)
	Reshaped weights shape (10, 28, 28, 1)
	Reshaped bias_coef shape (10,)




## Deeper network (1 hidden layer)

In [None]:
model = models.Sequential()
model.add(layers.Flatten(input_shape=input_shape))
model.add(layers.Dense(32, name="fc_1"))
mode.add(layers.Activation())
model.add(layers.Dense(num_classes, name="fc_2"))
model.add(layers.Activation(activations.softmax, name="output"))