# Residual Networks

**After discovering the Residual Networks and their ability to deal with vanishing gradients giving us the possibility to develop much more deep networks ,I decided to totally implement the ResNet50 from Scratch.
In this notebook, I'm going to:**

- Implement the basic building blocks of ResNets in a deep neural network using Keras
- Implement the architecture of the popular ResNet50

<img src="we need to go deeper(ResNet).png">


In [1]:
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import glorot_uniform


%matplotlib inline
np.random.seed(1)
tf.random.set_seed(2)


### 1 - identity_block


In [2]:
def identity_block(X, f, filters, initializer=glorot_uniform):
    """
    Arguments:
    X -- input tensor
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    """

    F1, F2, F3 = filters
    X_shortcut = X

    X = Conv2D(filters = F1, kernel_size = 1, strides = (1,1), padding = 'valid', kernel_initializer = initializer(seed=0))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)
    

    X = Conv2D(filters=F2,kernel_size=f,strides=(1,1),padding='same',kernel_initializer = initializer(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # F3 has the same number of channels as X_shortcut
    X = Conv2D(filters=F3,kernel_size=1,strides=(1,1),padding='valid',kernel_initializer = initializer(seed=0))(X)
    X = BatchNormalization(axis=3)(X)

    X = Add()([X,X_shortcut])
    X = Activation('relu')(X)

    return X


### 2 - convolutional_block

In [6]:
def convolutional_block(X, f, filters, s = 2, initializer=glorot_uniform):
    """
    Arguments:
    X -- input tensor
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    s -- Integer, specifying the stride to be used
    """

    F1, F2, F3 = filters
    
    X_shortcut = X

    X = Conv2D(filters = F1, kernel_size = 1, strides = (s, s), padding='valid', kernel_initializer = initializer(seed=0))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    X = Conv2D(filters = F2, kernel_size = f, strides = 1, padding='same', kernel_initializer = initializer(seed=0))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)

    X = Conv2D(filters=F3,kernel_size=1,strides=(1,1),padding='valid',kernel_initializer = initializer(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    
    X_shortcut = Conv2D(filters=F3,kernel_size=1,strides=s,padding='valid',kernel_initializer = initializer(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis=3)(X_shortcut)

    
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    
    return X


### 3 - ResNet50

In [8]:
def ResNet50(input_shape = (64, 64, 3), classes = 6):
    """
    implementation of the architecture of the popular ResNet50:
    CONV2D -> BATCHNORM -> RELU -> MAXPOOL -> CONVBLOCK -> IDBLOCK*2 -> CONVBLOCK -> IDBLOCK*3
    -> CONVBLOCK -> IDBLOCK*5 -> CONVBLOCK -> IDBLOCK*2 -> AVGPOOL -> FLATTEN -> DENSE 

    """

    X_input = Input(input_shape)


    X = ZeroPadding2D((3, 3))(X_input)
    
    # Stage 1
    X = Conv2D(64, (7, 7), strides = (2, 2), kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    # Stage 2
    X = convolutional_block(X, f = 3, filters = [64, 64, 256], s = 1)
    X = identity_block(X, 3, [64, 64, 256])
    X = identity_block(X, 3, [64, 64, 256])

    # Stage 3
    X = convolutional_block(X,f=3,s=2,filters=[128,128,512])
    X = identity_block(X,f=3,filters=[128,128,512])
    X = identity_block(X,f=3,filters=[128,128,512])
    X = identity_block(X,f=3,filters=[128,128,512])

    # Stage 4
    X = convolutional_block(X,f=3,s=2,filters=[256,256,1024])
    X = identity_block(X,f=3,filters=[256,256,1024])
    X = identity_block(X,f=3,filters=[256,256,1024])
    X = identity_block(X,f=3,filters=[256,256,1024])
    X = identity_block(X,f=3,filters=[256,256,1024])
    X = identity_block(X,f=3,filters=[256,256,1024])
    
    # Stage 5
    X = convolutional_block(X,f=3,s=2,filters=[512, 512, 2048])
    X = identity_block(X,f=3,filters=[512, 512, 2048])
    X = identity_block(X,f=3,filters=[512, 512, 2048])
    

    X = AveragePooling2D()(X)
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', kernel_initializer = glorot_uniform(seed=0))(X)
    

    model = Model(inputs = X_input, outputs = X)

    return model

In [9]:
model = ResNet50(input_shape = (64, 64, 3), classes = 6)
print(model.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 64, 64, 3)]  0           []                               
                                                                                                  
 zero_padding2d (ZeroPadding2D)  (None, 70, 70, 3)   0           ['input_1[0][0]']                
                                                                                                  
 conv2d_24 (Conv2D)             (None, 32, 32, 64)   9472        ['zero_padding2d[0][0]']         
                                                                                                  
 batch_normalization_24 (BatchN  (None, 32, 32, 64)  256         ['conv2d_24[0][0]']              
 ormalization)                                                                                

 conv2d_34 (Conv2D)             (None, 15, 15, 256)  16640       ['activation_28[0][0]']          
                                                                                                  
 batch_normalization_34 (BatchN  (None, 15, 15, 256)  1024       ['conv2d_34[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 add_9 (Add)                    (None, 15, 15, 256)  0           ['batch_normalization_34[0][0]', 
                                                                  'activation_26[0][0]']          
                                                                                                  
 activation_29 (Activation)     (None, 15, 15, 256)  0           ['add_9[0][0]']                  
                                                                                                  
 conv2d_35

                                                                  'activation_35[0][0]']          
                                                                                                  
 activation_38 (Activation)     (None, 8, 8, 512)    0           ['add_12[0][0]']                 
                                                                                                  
 conv2d_45 (Conv2D)             (None, 8, 8, 128)    65664       ['activation_38[0][0]']          
                                                                                                  
 batch_normalization_45 (BatchN  (None, 8, 8, 128)   512         ['conv2d_45[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 activation_39 (Activation)     (None, 8, 8, 128)    0           ['batch_normalization_45[0][0]'] 
          

 batch_normalization_55 (BatchN  (None, 4, 4, 256)   1024        ['conv2d_55[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 activation_48 (Activation)     (None, 4, 4, 256)    0           ['batch_normalization_55[0][0]'] 
                                                                                                  
 conv2d_56 (Conv2D)             (None, 4, 4, 256)    590080      ['activation_48[0][0]']          
                                                                                                  
 batch_normalization_56 (BatchN  (None, 4, 4, 256)   1024        ['conv2d_56[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 activatio

                                                                                                  
 conv2d_66 (Conv2D)             (None, 4, 4, 1024)   263168      ['activation_58[0][0]']          
                                                                                                  
 batch_normalization_66 (BatchN  (None, 4, 4, 1024)  4096        ['conv2d_66[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 add_19 (Add)                   (None, 4, 4, 1024)   0           ['batch_normalization_66[0][0]', 
                                                                  'activation_56[0][0]']          
                                                                                                  
 activation_59 (Activation)     (None, 4, 4, 1024)   0           ['add_19[0][0]']                 
          

 add_22 (Add)                   (None, 2, 2, 2048)   0           ['batch_normalization_76[0][0]', 
                                                                  'activation_65[0][0]']          
                                                                                                  
 activation_68 (Activation)     (None, 2, 2, 2048)   0           ['add_22[0][0]']                 
                                                                                                  
 average_pooling2d (AveragePool  (None, 1, 1, 2048)  0           ['activation_68[0][0]']          
 ing2D)                                                                                           
                                                                                                  
 flatten (Flatten)              (None, 2048)         0           ['average_pooling2d[0][0]']      
                                                                                                  
 dense (De

In [11]:
np.random.seed(1)
tf.random.set_seed(2)
opt = tf.keras.optimizers.Adam(learning_rate=0.00015)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])