In [1]:
from __future__ import print_function
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, load_model, save_model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import BatchNormalization, AveragePooling2D, Input, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
import sys
from imp import reload
from layer_utils import activation_quant, conv2d_noise, dense_noise
from resnet_model import resnet_v1, resnet_v2
from layers_numpy import *
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

# Set the matplotlib default settings
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

In [2]:
tf.__version__

'2.1.0'

In [3]:
num_classes = 10

# Subtracting pixel mean improves accuracy
subtract_pixel_mean = False

n = 3

# Model version
# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)
version = 1

# Computed depth from supplied model parameter n
if version == 1:
    depth = n * 6 + 2
elif version == 2:
    depth = n * 9 + 2


In [4]:
# Load the CIFAR10 data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Input image dimensions.
input_shape = x_train.shape[1:]

# Normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# If subtract pixel mean is enabled
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape)

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
y_train shape: (50000, 1)


In [5]:
x_train_4bit = quantize_rescale(quantize_unsigned(x_train, 4, 0.95), 4, 0.95)
x_test_4bit = quantize_rescale(quantize_unsigned(x_test, 4, 0.95), 4, 0.95)
# x_train_3bit = quantize_rescale(quantize_unsigned(x_train, 3, 0.95), 3, 0.95)
# x_test_3bit = quantize_rescale(quantize_unsigned(x_test, 3, 0.95), 3, 0.95)

In [26]:
activation_bits = 3
weight_noise = 0.20
weight_bits = None
model_name = 'act%db_wnoise%.2f_input0.95' % (activation_bits, weight_noise)

# Model name, depth and version
model_type = 'ResNet%dv%d_filter16_%s' % (depth, version, model_name)
print(model_type)

ResNet20v1_filter16_act3b_wnoise0.20_input0.95


In [27]:
# Prepare model model saving directory.
save_dir = os.path.join(os.getcwd(), 'model_checkpoint', model_type)
model_full_name = 'cifar10_%s_model' % model_type
# model_full_name = 'fwd_finetune_all_val'
filepath = os.path.join(save_dir, model_full_name)
K.clear_session()
model = load_model(filepath)

In [11]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_noise (conv2d_noise)  multiple                  448       
_________________________________________________________________
batch_normalization (BatchNo multiple                  64        
_________________________________________________________________
activation_quant (activation multiple                  1         
_________________________________________________________________
conv2d_noise_1 (conv2d_noise multiple                  2320      
_________________________________________________________________
batch_normalization_1 (Batch multiple                  64        
_________________________________________________________________
activation_quant_1 (activati multiple                  1     

In [28]:
weights = {}
for olayer in model.layers:
    weights[olayer.name] = olayer.get_weights()

In [None]:
# os.mkdir('./weight_distribution/noise_0.2/')
for layer in model.layers:
    if 'conv2d' in layer.name or 'dense' in layer.name:
        fig = plt.figure()
        plt.hist(layer.get_weights()[0].flatten(), 50);
        plt.savefig('./weight_distribution/noise_0.2/%s.png' % layer.name)
        plt.close(fig)

In [29]:
K.clear_session()

if version == 2:
    model = resnet_v2(input_shape=input_shape,
                      depth=depth,
                      activation_bits=activation_bits,
                      weight_noise_train=0.00,
                      weight_noise_test=0.10,
                      weight_bits=weight_bits)
else:
    model = resnet_v1(input_shape=input_shape,
                      depth=depth,
                      activation_bits=3,
                      relu_decay=1e-3,
                      weight_noise_train=0.00,
                      weight_noise_test=0.00,
                      weight_bits=None,
                      trainable_conv=True,
                      trainable_dense=True)
    

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(1e-4),
              metrics=['accuracy'])

model.summary()
print(model_type)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_noise (conv2d_noise)     (None, 32, 32, 16)   448         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 16)   64          conv2d_noise[0][0]               
__________________________________________________________________________________________________
activation_quant (activation_qu (None, 32, 32, 16)   1           batch_normalization[0][0]        
______________________________________________________________________________________________

In [30]:
for layer in model.layers:
    if layer.name in weights:
        layer.set_weights(weights[layer.name])

In [31]:
model.evaluate(x_test_4bit, y_test, verbose=1)



[0.9386348009109498, 0.8753]

In [None]:
# Score trained model (mean of noisy inference).
iteration = 10
accuracy = np.zeros(iteration)
for i in range(iteration):
    scores = model.evaluate(x_test_4bit, y_test, verbose=1)
    accuracy[i] = scores[1]
print('Test loss:', scores[0])
print('Test accuracy:', accuracy.mean())

# Intermediate finetuned models

In [None]:
def resnet20_finetune(activation_bits, weight_noise_train, weight_noise_test, decay=0):
    # Build model for finetuning

    layer_input_0 = Input(shape=(32, 32, 3)) # a new input tensor to be able to feed the desired layer
#     layer_input_1 = Input(shape=(32, 32, 16))

    # create the new nodes for each layer in the path

    y = conv2d_noise(16, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise')(layer_input_0)
    y = BatchNormalization(name='batch_normalization')(y)
    y = activation_quant(num_bits=activation_bits, max_value=3, decay=decay, name='activation_quant')(y)

    # Stack 1, block 1
    x = conv2d_noise(16, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_1')(y)
    x = BatchNormalization(name='batch_normalization_1')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_1')(x)
    x = conv2d_noise(16, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_2')(x)
    x = BatchNormalization(name='batch_normalization_2')(x)
    y = keras.layers.add([y, x])
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_2')(y)

    # Stack 1, block 2
    x = conv2d_noise(16, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_3')(y)
    x = BatchNormalization(name='batch_normalization_3')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_3')(x)
    x = conv2d_noise(16, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_4')(x)
    x = BatchNormalization(name='batch_normalization_4')(x)
    y = keras.layers.add([y, x])
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_4')(y)

    # Stack 1, block 3
    x = conv2d_noise(16, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_5')(y)
    x = BatchNormalization(name='batch_normalization_5')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_5')(x)
    x = conv2d_noise(16, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_6')(x)
    x = BatchNormalization(name='batch_normalization_6')(x)
    y = keras.layers.add([y, x])
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_6')(y)

    # Stack 2, block 1
    x = conv2d_noise(32, strides=2, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_7')(y)
    x = BatchNormalization(name='batch_normalization_7')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_7')(x)
    x = conv2d_noise(32, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_8')(x)
    y = conv2d_noise(32, kernel_size=1, strides=2, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_9')(y)
    x = BatchNormalization(name='batch_normalization_8')(x)
    y = keras.layers.add([y, x])
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_8')(y)

    # Stack 2, block 2
    x = conv2d_noise(32, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_10')(y)
    x = BatchNormalization(name='batch_normalization_9')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_9')(x)
    x = conv2d_noise(32, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_11')(x)
    x = BatchNormalization(name='batch_normalization_10')(x)
    y = keras.layers.add([y, x])
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_10')(y)

    # Stack 2, block 3
    x = conv2d_noise(32, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_12')(y)
    x = BatchNormalization(name='batch_normalization_11')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_11')(x)
    x = conv2d_noise(32, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_13')(x)
    x = BatchNormalization(name='batch_normalization_12')(x)
    y = keras.layers.add([y, x])
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_12')(y)

    # Stack 3, block 1
    x = conv2d_noise(64, strides=2, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_14')(y)
    x = BatchNormalization(name='batch_normalization_13')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_13')(x)
    x = conv2d_noise(64, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_15')(x)
    y = conv2d_noise(64, kernel_size=1, strides=2, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_16')(y)
    x = BatchNormalization(name='batch_normalization_14')(x)
    y = keras.layers.add([y, x])
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_14')(y)

    # Stack 3, block 2
    x = conv2d_noise(64, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_17')(y)
    x = BatchNormalization(name='batch_normalization_15')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_15')(x)
    x = conv2d_noise(64, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_18')(x)
    x = BatchNormalization(name='batch_normalization_16')(x)
    y = keras.layers.add([y, x])
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_16')(y)

    # Stack 3, block 3
    x = conv2d_noise(64, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_19')(y)
    x = BatchNormalization(name='batch_normalization_17')(x)
    x = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_17')(x)
    x = conv2d_noise(64, strides=1, padding='same', noise_train=weight_noise_train, noise_test=weight_noise_test, name='conv2d_noise_20')(x)
    x = BatchNormalization(name='batch_normalization_18')(x)
    y = keras.layers.add([y, x])

    y = AveragePooling2D(pool_size=8)(y)
    y = Flatten()(y)
    y = activation_quant(num_bits=activation_bits, max_value=3, name='activation_quant_18')(y)

    outputs = dense_noise(10, activation='softmax', noise_train=weight_noise_train, noise_test=weight_noise_test, name='dense_noise')(y)

    # create the model
    model = Model([layer_input_0], outputs)
    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(1e-4),
                  metrics=['accuracy'])
    model.summary()
    return model


def build_model(activation_bits, weight_noise_train, weight_noise_test, weights_map):
#     K.clear_session()
    model = resnet20_finetune(activation_bits, weight_noise_train, weight_noise_test)
    for klayer in model.layers:
        if klayer.name in weights_map:
            klayer.set_weights(weights_map[klayer.name])
    return model

In [None]:
model = build_model(3, 0.2, 0.0, weights)

In [None]:
finetune_previous_layer = 'conv2d_noise'
finetune_current_layer = 'conv2d_noise_1'
num_segment_previous = 1
num_segment_current = 2
DATA_PATH = '/scratch/users/weierwan/forward_finetune/'

train_previous_inputs = 0
test_previous_inputs = 0
for i in range(num_segment_previous):
    tmp = np.load(os.path.join(DATA_PATH, '%s_%d_train_2.npz' % (finetune_previous_layer, i)))
    train_previous_inputs += (tmp['out_chip'] - tmp['intercept']) / tmp['slope']
    tmp = np.load(os.path.join(DATA_PATH, '%s_%d_test_2.npz' % (finetune_previous_layer, i)))
    test_previous_inputs += (tmp['out_chip'] - tmp['intercept']) / tmp['slope']
train_current_inputs = 0
test_current_inputs = 0
for i in range(num_segment_current):
    tmp = np.load(os.path.join(DATA_PATH, '%s_%d_train.npz' % (finetune_current_layer, i)))
    train_current_inputs += (tmp['out_chip'] - tmp['intercept']) / tmp['slope']
    tmp = np.load(os.path.join(DATA_PATH, '%s_%d_test.npz' % (finetune_current_layer, i)))
    test_current_inputs += (tmp['out_chip'] - tmp['intercept']) / tmp['slope']

In [None]:
accuracy_train = model.evaluate([train_previous_inputs, train_current_inputs], y_train, verbose=1)[1]
accuracy_test = model.evaluate([test_previous_inputs, test_current_inputs], y_test, verbose=1)[1]
print('Noise-free train accuracy:', accuracy_train)
print('Noise-free test accuracy:', accuracy_test)

In [None]:
filepath = os.path.join(save_dir, 'relu_decay_1e-3')
model = load_model(filepath)

In [None]:
ITERATION = 2
accuracy_train = np.zeros(ITERATION)
accuracy_test = np.zeros(ITERATION)
for i in range(ITERATION):
    scores = model.evaluate([train_previous_inputs, train_current_inputs], y_train, verbose=0)
    print('Train loss:', scores[0])
    print('Train accuracy:', scores[1])
    accuracy_train[i] = scores[1]
    scores = model.evaluate([test_previous_inputs, test_current_inputs], y_test, verbose=0)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    accuracy_test[i] = scores[1]

print(accuracy_train.mean())
print(accuracy_test.mean())