# Construct the ResNet with Keras
Everything is the same as the regular ResNet in Construct_model.ipynb, except that all the blocks are replaced with bottleneck blocks

In [None]:
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.optimizers import Adam
from keras.utils.data_utils import get_file
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from keras.applications.imagenet_utils import preprocess_input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model
from keras.initializers import glorot_uniform
import scipy.misc
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
%matplotlib inline
import pickle
import keras.backend as K
import os
from keras import regularizers
import utilities
import read_file

In [None]:
##parameter list
# download cifar 10 at: https://www.cs.toronto.edu/~kriz/cifar.html

img_path = './cifar-10-batches-py/'    #the file path of the training & testing data
batch_size = 128  
epochs = 200
regu= 0.0003             ## regularization weight
num_layer= 30            ## number of layers in ResNet, it should satisfy num_layer= 9*n +2 where n is an integer
                         ## if not, num_layer will be converted to a number satisfying 9*n+2 
n= int((num_layer-2)/9)
print('The input num_layer is: '+ str(num_layer)+', The actual num_layer is: '+ str(9*n+2))

In [None]:
def identity_block(X, filters, f=3):
    """ identity block    H and W is unchaged before and after identity block
    
    The idea is to use 1 by 1 filter to reduce the number of channels first in order to reduce the number of parameters
    and the number of calculation. 
    
    Each block is composed of a 1 by 1 filter layer, a 3 by 3 filter layer and a 1 by 1 filter layer as the main path

    
    Inputs:
    X -- input tensor (m, H_prev, W_prev, C_prev)
    filters -- the number of filters (F1, F2, F3) for each layer. It is in the pattern (k, k, 4k)
    f -- filter size is (f, f)
    
    Output:
    X -- output tensor (H, W, C)
    """
    
    F1, F2, F3 = filters
    
    # Save the input value for later

    X_shortcut=Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', kernel_initializer = glorot_uniform(), kernel_regularizer=regularizers.l2(regu))(X)
    X_shortcut = BatchNormalization(axis = 3)(X_shortcut)

    
    # First layer of main path
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', kernel_initializer = glorot_uniform(), kernel_regularizer=regularizers.l2(regu))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)
    
    # Second layer of main path 
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same' , kernel_initializer = glorot_uniform(), kernel_regularizer=regularizers.l2(regu))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    # Third layer of main path 
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', kernel_initializer = glorot_uniform(), kernel_regularizer=regularizers.l2(regu))(X)
    X = BatchNormalization(axis = 3)(X)

    # Add shortcut value to main path
    X = Add()([X_shortcut, X])
    X = Activation('relu')(X)
    
    return X

In [None]:
def convolutional_block(X, filters, s = 2, f = 3):
    """
    convolutional block  H and W shrink by half due to stride =2 after the convolutional block
    
    H=H_prev/2, W=W_prev/2 with stride=2
    
    Inputs:
    X -- input tensor (m, H_prev, W_prev, C_prev)
    filters -- the number of filters (F1, F2, F3) for each layer in the main path
    f -- filter size is (f, f)
    s -- stride
    Outputs:
    X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C)
    """
    
    F1, F2, F3 = filters
    
    # Save the input value for later
    X_shortcut = X

    # First layer of main path 
    X = Conv2D(F1, (1, 1), strides = (s,s), kernel_initializer = glorot_uniform(), kernel_regularizer=regularizers.l2(regu))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    # Second layer of main path 
    X = Conv2D(F2, (f, f), strides = (1,1), padding = 'same', kernel_initializer = glorot_uniform(), kernel_regularizer=regularizers.l2(regu))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    # Third layer of main path 
    X = Conv2D(F3, (1, 1), strides = (1,1), kernel_initializer = glorot_uniform(),kernel_regularizer=regularizers.l2(regu))(X)
    X = BatchNormalization(axis = 3)(X)

    X_shortcut = Conv2D(F3, (1, 1), strides = (s,s), kernel_initializer = glorot_uniform(), kernel_regularizer=regularizers.l2(regu))(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3)(X_shortcut)

    # Add shortcut value to main path
    X = Add()([X_shortcut, X])
    X = Activation('relu')(X)
        
    return X

In [None]:
def ResNet(input_shape = (32, 32, 3), classes = 10, n = 3):
    """
    Inputs:
    input_shape -- shape of the images of the dataset (H=32, W=32, C=3)
    classes -- number of classes
    n -- number of blocks in each stage
    
    Outputs:
    model -- a Model instance in Keras
    """
    
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    
    # Zero-Padding
    X = ZeroPadding2D((1, 1))(X_input)
    
    # Stage 1
    
    ## (3, 3), 16 filters, output size 32 by 32
    
    X = Conv2D(16, (3, 3), strides = (1, 1), kernel_initializer = glorot_uniform())(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    # Stage 2   output size 32 by 32, 16 filters
    X = identity_block(X, filters = [16, 16, 64])
    for i in range(n-1):
        X = identity_block(X, filters = [16, 16, 64])
    

    # Stage 3 output size 16 by 16, 32 filters
    X = convolutional_block(X, filters = [32, 32, 128])
    for i in range(n-1):
        X = identity_block(X, filters = [32, 32, 128])

    # Stage 4 output size 8 by 8, 64 filters
    X = convolutional_block(X, filters = [64, 64, 256])
    for i in range(n-1):
        X = identity_block(X, filters = [64, 64, 256])

  
    X = AveragePooling2D(pool_size=(8, 8), strides=None, name="avg_pool")(X)
    
    # output layer
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', kernel_initializer = glorot_uniform(seed=0))(X)
    
    model = Model(inputs = X_input, outputs = X, name='ResNet')

    return model

In [None]:
model = ResNet(input_shape = (32, 32, 3), classes = 10, n= n)

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=utilities.lr_decay(0)),
              metrics=['accuracy'])

In [None]:
(X_train, Y_train), (X_test, Y_test) = read_file.img_read(img_path)  

datagen = ImageDataGenerator(
        # epsilon for ZCA whitening
        zca_epsilon=1e-06,
        width_shift_range=0.15,
        height_shift_range=0.15,
        horizontal_flip=True,
        )

datagen.fit(X_train)

modelname="ResNet_bottle_l"+str(num_layer)

model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
                    validation_data=(X_test, Y_test),
                    epochs=epochs, verbose=1,
                    callbacks=utilities.lr_callbacks(modelname))

# evaluate the model
scores = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])