In [16]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer
from tensorflow.keras import activations
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, BatchNormalization
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.python.keras.utils import conv_utils

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

# Set the matplotlib default settings
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

In [2]:
import sys
sys.path.append('..\\cifar10_resnet')
from layer_utils import *

In [4]:
# input image dimensions
img_rows, img_cols = 28, 28
num_classes = 10

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [9]:
x_train_3bit = quantize_rescale(quantize_unsigned(x_train, 3, 1.0), 3, 1.0)
x_test_3bit = quantize_rescale(quantize_unsigned(x_test, 3, 1.0), 3, 1.0)

In [18]:
split = 10000
total_index = np.arange(x_train.shape[0])
val_index = np.random.choice(total_index, split, replace=False)
train_index = np.delete(total_index, val_index)

x_val_div = x_train[val_index]
x_train_div = x_train[train_index]
y_val_div = y_train[val_index]
y_train_div = y_train[train_index]

In [19]:
weight_noise_train = 0.0
weight_noise_test = 0.0
weight_bits = None
activation_bits = 3

K.clear_session()

model = Sequential()
model.add(conv2d_noise(8, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, num_bits=weight_bits, input_shape=input_shape))
model.add(activation_quant(activation_bits, 3))
# model.add(layers.Activation(activations.relu))
model.add(conv2d_noise(8, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, num_bits=weight_bits))
model.add(activation_quant(activation_bits, 3))
# model.add(layers.Activation(activations.relu))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(conv2d_noise(16, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, num_bits=weight_bits))
model.add(activation_quant(activation_bits, 3))
# model.add(layers.Activation(activations.relu))
model.add(conv2d_noise(16, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, num_bits=weight_bits))
model.add(activation_quant(activation_bits, 3))
# model.add(layers.Activation(activations.relu))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(conv2d_noise(32, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, num_bits=weight_bits))
model.add(activation_quant(activation_bits, 3))
# model.add(layers.Activation(activations.relu))
model.add(conv2d_noise(32, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, num_bits=weight_bits))
model.add(activation_quant(activation_bits, 3))
# model.add(layers.Activation(activations.relu))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(Flatten())
model.add(dense_noise(num_classes, activation='softmax', noise_train=weight_noise_train, noise_test=weight_noise_test, num_bits=weight_bits, name='dense'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adam(1e-3),
              metrics=['accuracy'])
model.build(input_shape)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_noise (conv2d_noise)  (None, 28, 28, 8)         80        
_________________________________________________________________
activation (Activation)      (None, 28, 28, 8)         0         
_________________________________________________________________
conv2d_noise_1 (conv2d_noise (None, 28, 28, 8)         584       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 8)         0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 8)         0         
_________________________________________________________________
conv2d_noise_2 (conv2d_noise (None, 14, 14, 16)        1168      
_________________________________________________________________
activation_2 (Activation)    (None, 14, 14, 16)        0

In [None]:
# A shallow version

K.clear_session()

model = Sequential()
model.add(conv2d_noise(8, kernel_size=(3, 3), activation='relu', noise_magnitude=0.1, name='conv2d', input_shape=input_shape))
model.add(activation_quant(3, 3.7))
# model.add(Dropout(0.25))
# model.add(Conv2D(12, (3, 3), padding='valid', activation='relu', name='conv2d_1'))
model.add(conv2d_noise(12, (3, 3), padding='valid', activation='relu', noise_magnitude=0.1, name='conv2d_1'))
# model.add(Dropout(0.25))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
# model.add(Dropout(0.25))
model.add(Flatten())
model.add(activation_quant(3, 10.0))
# model.add(Dense(num_classes, activation='softmax', name='dense'))
model.add(dense_noise(num_classes, activation='softmax', noise_magnitude=0.1, name='dense'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=tf.train.AdamOptimizer(2e-4),
              metrics=['accuracy'])
model.build(input_shape)
model.summary()

In [22]:
batch_size = 128
epochs = 50

save_dir = 'mnist_cnn_7layer_input_64bit_wnoise0.00_val\\'
# load_dir = save_dir
# model.load_weights(load_dir)

ckpt_cbk = keras.callbacks.ModelCheckpoint(
        filepath=save_dir,
        save_weights_only=True,
        save_best_only=True,
        monitor='val_accuracy',
        verbose=1)

model.fit(x_train_div, y_train_div,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[ckpt_cbk],
          verbose=1,
          validation_data=(x_val_div, y_val_div))

model.save_weights(save_dir)

Train on 50000 samples, validate on 10000 samples
Epoch 1/50
Epoch 00001: val_accuracy improved from -inf to 0.99000, saving model to mnist_cnn_7layer_input_64bit_wnoise0.00_val\
Epoch 2/50
Epoch 00002: val_accuracy did not improve from 0.99000
Epoch 3/50
Epoch 00003: val_accuracy improved from 0.99000 to 0.99050, saving model to mnist_cnn_7layer_input_64bit_wnoise0.00_val\
Epoch 4/50
Epoch 00004: val_accuracy did not improve from 0.99050
Epoch 5/50
Epoch 00005: val_accuracy did not improve from 0.99050
Epoch 6/50
Epoch 00006: val_accuracy did not improve from 0.99050
Epoch 7/50
Epoch 00007: val_accuracy did not improve from 0.99050
Epoch 8/50
Epoch 00008: val_accuracy did not improve from 0.99050
Epoch 9/50
Epoch 00009: val_accuracy did not improve from 0.99050
Epoch 10/50
Epoch 00010: val_accuracy improved from 0.99050 to 0.99090, saving model to mnist_cnn_7layer_input_64bit_wnoise0.00_val\
Epoch 11/50
Epoch 00011: val_accuracy did not improve from 0.99090
Epoch 12/50
Epoch 00012: va

Epoch 28/50
Epoch 00028: val_accuracy did not improve from 0.99090
Epoch 29/50
Epoch 00029: val_accuracy did not improve from 0.99090
Epoch 30/50
Epoch 00030: val_accuracy did not improve from 0.99090
Epoch 31/50
Epoch 00031: val_accuracy did not improve from 0.99090
Epoch 32/50
Epoch 00032: val_accuracy did not improve from 0.99090
Epoch 33/50
Epoch 00033: val_accuracy did not improve from 0.99090
Epoch 34/50
Epoch 00034: val_accuracy improved from 0.99090 to 0.99140, saving model to mnist_cnn_7layer_input_64bit_wnoise0.00_val\
Epoch 35/50
Epoch 00035: val_accuracy did not improve from 0.99140
Epoch 36/50
Epoch 00036: val_accuracy did not improve from 0.99140
Epoch 37/50
Epoch 00037: val_accuracy did not improve from 0.99140
Epoch 38/50
Epoch 00038: val_accuracy did not improve from 0.99140
Epoch 39/50
Epoch 00039: val_accuracy did not improve from 0.99140
Epoch 40/50
Epoch 00040: val_accuracy did not improve from 0.99140
Epoch 41/50
Epoch 00041: val_accuracy did not improve from 0.99

In [19]:
weights = {}
for layer in model.layers:
    weights[layer.name] = layer.get_weights()

In [89]:
for klayer in model.layers:
    if klayer.name in weights:
        layer_params = weights[klayer.name]
        if weight_bits is not None and ('conv2d' in klayer.name or 'dense' in klayer.name):
            weight_range = np.abs(layer_params[0]).max()
            bias_range = np.abs(layer_params[1]).max()
            layer_params = layer_params + [weight_range, bias_range]
        klayer.set_weights(layer_params)

In [12]:
save_dir = 'mnist_cnn_7layer_input_3bit_wnoise0.15\\checkpoint'
load_dir = save_dir
model.load_weights(load_dir)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1a98ccec0b8>

In [21]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.04687360113807137
Test accuracy: 0.9898


In [13]:
ITERATION = 10
accuracy = np.zeros(ITERATION)
for i in range(ITERATION):
    accuracy[i] = model.evaluate(x_test_3bit, y_test, verbose=1)[1]
    
print(accuracy.mean())

0.9914399921894074
