# Implementation of ResNet-50, 2015

In [10]:
from keras.models import Sequential
from keras.layers import Activation, Conv2D, MaxPool2D, Dense
from keras.layers import BatchNormalization, Dropout, Flatten, AveragePooling2D, MaxPooling2D, Input
from keras.callbacks import ReduceLROnPlateau, LearningRateScheduler
from keras.optimizers import SGD
from keras import initializers, Model
from keras.layers import concatenate, Add
import keras
import numpy as np

### Bottlenect Residual Block

In [19]:
def bottleneck_residual_block(X, kernel_size, filters, reduce=False, s=2):
    # unpack the tuple to retrieve Filters of each CONV layer
    F1, F2, F3 = filters

    # Save the input value to use it later to add back to the main path.
    X_shortcut = X

    # if reduce is True
    if reduce:
        # if we are to reduce the spatial size, apply a 1x1 CONV layer to the shortcut path
        # to do that, we need both CONV layers to have similar strides
        X_shortcut = Conv2D(filters = F3, kernel_size = (1, 1), strides = (s,s))(X_shortcut)
        X_shortcut = BatchNormalization(axis = 3)(X_shortcut)

        # if reduce, we will need to set the strides of the first conv to be similar to the shortcut strides
        X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (s,s), padding = 'valid')(X)
        X = BatchNormalization(axis = 3)(X)
        X = Activation('relu')(X)
        
        # Second component of main path
        X = Conv2D(filters = F2, kernel_size = kernel_size, strides = (1,1), padding = 'same')(X)
        X = BatchNormalization(axis = 3)(X)
        X = Activation('relu')(X)

        # Third component of main path
        X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid')(X)
        X = BatchNormalization(axis = 3)(X)

        # Final step: Add shortcut value to main path, and pass it through a ReLU activation
        X = Add()([X, X_shortcut])
        X = Activation('relu')(X)

    else:
        # First component of main path
        X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid')(X)
        X = BatchNormalization(axis = 3)(X)
        X = Activation('relu')(X)

        # Second component of main path
        X = Conv2D(filters = F2, kernel_size = kernel_size, strides = (1,1), padding = 'same')(X)
        X = BatchNormalization(axis = 3)(X)
        X = Activation('relu')(X)

        # Third component of main path
        X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid')(X)
        X = BatchNormalization(axis = 3)(X)

        # Final step: Add shortcut value to main path, and pass it through a ReLU activation
        X = Add()([X, X_shortcut])
        X = Activation('relu')(X)

    return X

In [23]:
def ResNet50(input_shape, classes):
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    # Stage 1
    X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(X_input)
    X = BatchNormalization(axis=3, name='bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    # Stage 2: 1 conv and 2 identity block
    X = bottleneck_residual_block(X, 3, [64, 64, 256], reduce=True, s=1)
    X = bottleneck_residual_block(X, 3, [64, 64, 256])
    X = bottleneck_residual_block(X, 3, [64, 64, 256])

    # Stage 3: 1 conv and 3 identity block
    X = bottleneck_residual_block(X, 3, [128, 128, 512], reduce=True, s=2)
    X = bottleneck_residual_block(X, 3, [128, 128, 512])
    X = bottleneck_residual_block(X, 3, [128, 128, 512])
    X = bottleneck_residual_block(X, 3, [128, 128, 512])

    # Stage 4: 1 conv and 5 identity block
    X = bottleneck_residual_block(X, 3, [256, 256, 1024], reduce=True, s=2)
    X = bottleneck_residual_block(X, 3, [256, 256, 1024])
    X = bottleneck_residual_block(X, 3, [256, 256, 1024])
    X = bottleneck_residual_block(X, 3, [256, 256, 1024])
    X = bottleneck_residual_block(X, 3, [256, 256, 1024])
    X = bottleneck_residual_block(X, 3, [256, 256, 1024])

    # Stage 5: 1 conv and 2 identity block
    X = bottleneck_residual_block(X, 3, [512, 512, 2048], reduce=True, s=2)
    X = bottleneck_residual_block(X, 3, [512, 512, 2048])
    X = bottleneck_residual_block(X, 3, [512, 512, 2048])

    # AVGPOOL
    X = AveragePooling2D((7,7), strides=1)(X)

    # output layer
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', name='fc' + str(classes))(X)

    # Create the model
    model = Model(inputs = X_input, outputs = X, name='ResNet50')

    return model

In [24]:
model = ResNet50((224, 224, 3), 1000)

In [25]:
model.summary()

Model: "ResNet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 109, 109, 64) 9472        input_6[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 109, 109, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_104 (Activation)     (None, 109, 109, 64) 0           bn_conv1[0][0]                   
___________________________________________________________________________________________

### Training

In [29]:
from keras.callbacks import ReduceLROnPlateau
  
# set the training parameters
initial_lrate = 0.1
epochs = 200
batch_size = 256

sgd = SGD(lr=initial_lrate, momentum=0.9, nesterov=False)

# min_lr: lower bound on the learning rate
# factor: factor by which the learning rate will be reduced
reduce_lr= ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1),patience=5, min_lr=0.5e-6)

# compile the model
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [31]:
# train the model
# call the reduce_lr value using callbacks in the training method
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epochs, batch_size=batch_size, callbacks=[reduce_lr])

NameError: name 'X_train' is not defined