# Construct the ResNet with Keras
## notes:
1. The model is trained on Kaggle with a NVIDIA Tesla K80. It typically takes a few hours (200 epochs, batch size= 128) to train the model. The trained model is save in .h5 format. Open test_script.ipynb to play with the trained models.
2. L2 regularization improves the accuracy by 0.5% ~ 1% 
3. Although it is sometimes claimed ResNet with bottleneck layer is better than the regular ResNet. ResNet with bottleneck layer is actually slightly worse than the regular ResNet in my case. Nevertheless, adding ResNet with bottleneck layer to the ensembled model helps the final results as expected. 
4. contact info: fengjc1214@gmail.com

In [None]:
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.optimizers import Adam
from keras.utils.data_utils import get_file
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from keras.applications.imagenet_utils import preprocess_input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model
from keras.initializers import glorot_uniform
import scipy.misc
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
%matplotlib inline
import pickle
import keras.backend as K
import os
from keras import regularizers
import utilities
import read_file

In [None]:
##parameter list
# download cifar 10 at: https://www.cs.toronto.edu/~kriz/cifar.html

img_path = './cifar-10-batches-py/'    #the file path of the training & testing data
batch_size = 128  
epochs = 200
regu= 0.0003             ## regularization weight
num_layer= 30            ## number of layers in ResNet, it should satisfy num_layer= 6*n +2 where n is an integer
                         ## if not, num_layer will be converted to a number satisfying 6*n+2 
n= int((num_layer-2)/6)
print('The input num_layer is: '+ str(num_layer)+', The actual num_layer is: '+ str(6*n+2))

In [None]:
def identity_block(X, filters, f=3):
    """ identity block    H and W is unchaged before and after identity block
    Inputs:
    X -- input tensor (m, H_prev, W_prev, C_prev)
    filters -- the number of filters
    f -- filter size is (f, f)
    
    Output:
    X -- output tensor (H, W, C)
    """
    
    # Save the input value for later
    X_shortcut = X
    
    # First layer of main path
    X = Conv2D(filters = filters, kernel_size = (f, f), strides = (1,1),kernel_regularizer=regularizers.l2(regu), padding = 'same', kernel_initializer = glorot_uniform())(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    # Second layer of main path
    X = Conv2D(filters = filters, kernel_size = (f, f), strides = (1,1), kernel_regularizer=regularizers.l2(regu), padding = 'same', kernel_initializer = glorot_uniform())(X)
    X = BatchNormalization(axis = 3)(X)

    # Add shortcut value to main path
    X = Add()([X_shortcut, X])
    X = Activation('relu')(X)
    
    return X

In [None]:
def convolutional_block(X, filters, f = 3, s= 2):
    """
    convolutional block    H and W shrink by half due to stride =2 after the convolutional block
    
    H=H_prev/2, W=W_prev/2 with stride=2
    
    Inputs:
    X -- input tensor (m, H_prev, W_prev, C_prev)
    filters -- the number of filters
    f -- filter size is (f, f)
    s -- stride
    Outputs:
    X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C)
    """
    
    # Save the input value for later
    X_shortcut = X

    # First layer of main path 
    X = Conv2D(filters, (f, f), strides = (s,s), padding = 'same', kernel_regularizer=regularizers.l2(regu), kernel_initializer = glorot_uniform())(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    # Second layer of main path 
    X = Conv2D(filters, (f, f), strides = (1,1), padding = 'same', kernel_regularizer=regularizers.l2(regu), kernel_initializer = glorot_uniform())(X)
    X = BatchNormalization(axis = 3)(X)

    # First layer of the shortcut path
    X_shortcut = Conv2D(filters, (1, 1), strides = (s,s), padding = 'same', kernel_initializer = glorot_uniform())(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3)(X_shortcut)

    # Add shortcut value to main path
    X = Add()([X_shortcut, X])
    X = Activation('relu')(X)
        
    return X

In [None]:
def ResNet(input_shape = (32, 32, 3), classes = 10, n=3):
    """
    Inputs:
    input_shape -- shape of the images of the dataset (H=32, W=32, C=3)
    classes -- number of classes
    n -- number of blocks in each stage
    
    Outputs:
    model -- a Model instance in Keras
    """
    
    # Construct an input tensor with shape input_shape
    X_input = Input(input_shape)

    
    # Padding
    X = ZeroPadding2D((1, 1))(X_input)
    
    # Stage 1
    
    ## output size 32 by 32, 16 filters
    
    X = Conv2D(16, (3, 3), strides = (1, 1), name = 'conv1', kernel_regularizer=regularizers.l2(regu), kernel_initializer = glorot_uniform())(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    # Stage 2  output size 32 by 32, 16 filters
    X = identity_block(X, filters = 16)
    for i in range(n-1):
        X = identity_block(X, filters = 16)

    # Stage 3 output size 16 by 16, 32 filters 
    X = convolutional_block(X, filters = 32)
    for i in range(n-1):
        X = identity_block(X, filters = 32)
    

    # Stage 4 output size 8 by 8, 64 filters
    X = convolutional_block(X, filters = 64)
    for i in range(n-1):
        X = identity_block(X, filters = 64)
    
    # Stage 5, averagepooling, output size (1, 1, 64)

    X = AveragePooling2D(pool_size=(8, 8), strides=None, name="avg_pool")(X)

    # output layer
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', kernel_regularizer=regularizers.l2(regu), kernel_initializer = glorot_uniform())(X)
    
    model = Model(inputs = X_input, outputs = X, name='ResNet')

    return model

In [None]:
model = ResNet(input_shape = (32, 32, 3), classes = 10, n= n)

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=utilities.lr_decay(0)),
              metrics=['accuracy'])

In [None]:
(X_train, Y_train), (X_test, Y_test) = read_file.img_read(img_path)  

datagen = ImageDataGenerator(
        # epsilon for ZCA whitening
        zca_epsilon=1e-06,
        width_shift_range=0.15,
        height_shift_range=0.15,
        horizontal_flip=True,
        )

datagen.fit(X_train)

modelname="ResNet_l"+str(num_layer)

model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
                    validation_data=(X_test, Y_test),
                    epochs=epochs, verbose=1,
                    callbacks=utilities.lr_callbacks(modelname))

# evaluate the model
scores = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])