In [2]:
import keras
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D,BatchNormalization
from keras.layers import Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
import pydot
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from resnets_utils import *
from keras.initializers import glorot_uniform
import scipy.misc
from matplotlib.pyplot import imshow
from keras.datasets import cifar10
%matplotlib inline

import keras.backend as K
K.set_image_data_format('channels_last')
K.set_learning_phase(1)

Using TensorFlow backend.


In [3]:
def identity_block(X, f, filters, stage, block):
    """
    Implementation of the identity block as defined in Figure 3
    
    Arguments:
    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    stage -- integer, used to name the layers, depending on their position in the network
    block -- string/character, used to name the layers, depending on their position in the network
    
    Returns:
    X -- output of the identity block, tensor of shape (n_H, n_W, n_C)
    """
    
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    # Retrieve Filters
    F1, F2 = filters
    
    # Save the input value. later to add back to the main path. 
    X_shortcut = X
    
    # First component of main path
    X = Conv2D(filters = F1, kernel_size = (f, f), strides = (1,1), padding = "same", 
               name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    
    # Second component of main path 
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1, 1), padding = "same", 
               name = conv_name_base + "2b", kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation("relu")(X)
    
    return X

In [4]:
def convolutional_block(X, f, filters, stage, block, s = 2):
    """
    Implementation of the convolutional block as defined in Figure 4
    
    Arguments:
    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    stage -- integer, used to name the layers, depending on their position in the network
    block -- string/character, used to name the layers,depending on their position in the network
    s -- Integer, specifying the stride to be used
    
    Returns:
    X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C)
    """
    
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    # Retrieve Filters
    F1, F2 = filters
    
    # Save the input value
    X_shortcut = X

    # First component of main path 
    X = Conv2D(F1, (f, f), strides = (s,s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    # Second component of main path 
    X = Conv2D(F2, (f, f), strides = (1,1), padding = "same",
               name = conv_name_base + '2b',kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)

    X_shortcut = Conv2D(F2, (f, f), strides = (s,s), 
                        name = conv_name_base + '1', kernel_initializer = glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation("relu")(X)
    
    return X

In [7]:
def ResNet56(input_shape = (32, 32, 3), classes = 10):
    """
    Implementation of the popular ResNet50 the following architecture:
    CONV2D -> BATCHNORM -> RELU -> MAXPOOL -> CONVBLOCK 
    -> IDBLOCK*2 -> CONVBLOCK -> IDBLOCK*3
    -> CONVBLOCK -> IDBLOCK*5 -> CONVBLOCK -> IDBLOCK*2 
    -> AVGPOOL -> TOPLAYER

    Arguments:
    input_shape -- shape of the images of the dataset
    classes -- integer, number of classes

    Returns:
    model -- a Model() instance in Keras
    """
    
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    
    # Zero-Padding
    
    # Stage 1
    X = Conv2D(16, (3, 3), strides = (1, 1), padding = "same", name = 'conv1', kernel_initializer = glorot_uniform(seed=0))(X_input)
    X = BatchNormalization(axis = 1, name = 'bn_conv1')(X)
    X = Activation('relu')(X)
    print(X.shape)

    # Stage 2
    X = identity_block(X, 3, [16, 16], stage=2, block='a')
    X = identity_block(X, 3, [16, 16], stage=2, block='b')
    X = identity_block(X, 3, [16, 16], stage=2, block='c')
    X = identity_block(X, 3, [16, 16], stage=2, block='d')
    X = identity_block(X, 3, [16, 16], stage=2, block='e')
    X = identity_block(X, 3, [16, 16], stage=2, block='f')
    X = identity_block(X, 3, [16, 16], stage=2, block='g')
    X = identity_block(X, 3, [16, 16], stage=2, block='h')
    X = identity_block(X, 3, [16, 16], stage=2, block='i')
    print(X.shape)
    X = ZeroPadding2D((1, 1))(X)
    # Stage 3 
    X = convolutional_block(X, f = 3, filters = [32, 32], stage = 3, block='a', s = 2)
    X = identity_block(X, 3, [32, 32], stage=3, block='b')
    X = identity_block(X, 3, [32, 32], stage=3, block='c')
    X = identity_block(X, 3, [32, 32], stage=3, block='d')
    X = identity_block(X, 3, [32, 32], stage=3, block='e')
    X = identity_block(X, 3, [32, 32], stage=3, block='f')
    X = identity_block(X, 3, [32, 32], stage=3, block='g')
    X = identity_block(X, 3, [32, 32], stage=3, block='h')
    X = identity_block(X, 3, [32, 32], stage=3, block='i')
    print(X.shape)
    X = ZeroPadding2D((1, 1))(X)
    # Stage 4 
    X = convolutional_block(X, f = 3, filters = [64, 64], stage = 4, block='a', s = 2)
    X = identity_block(X, 3, [64, 64], stage=4, block='b')
    X = identity_block(X, 3, [64, 64], stage=4, block='c')
    X = identity_block(X, 3, [64, 64], stage=4, block='d')
    X = identity_block(X, 3, [64, 64], stage=4, block='e')
    X = identity_block(X, 3, [64, 64], stage=4, block='f')
    X = identity_block(X, 3, [64, 64], stage=4, block='g')
    X = identity_block(X, 3, [64, 64], stage=4, block='h')
    X = identity_block(X, 3, [64, 64], stage=4, block='i')
    print(X.shape)
    # AVGPOOL. Use "X = AveragePooling2D(...)(X)"
    X = AveragePooling2D(pool_size=(8, 8))(X)
    
    # output layer
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0))(X)
    
    
    # Create model
    model = Model(inputs = X_input, outputs = X, name='ResNet56')

    return model

In [8]:
model = ResNet56(input_shape = (32, 32, 3), classes = 10)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
(x_train,y_train),(x_test,y_test) = cifar10.load_data()
x_train = x_train/255
x_test = x_test/255
y_train = keras.utils.to_categorical(y_train,10)
y_test = keras.utils.to_categorical(y_test,10)

(?, 32, 32, 16)
(?, 32, 32, 16)
(?, 16, 16, 32)
(?, 8, 8, 64)


In [17]:
model.fit(x_train, y_train, epochs = 50, batch_size = 128)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x18d0e5deeb8>

In [18]:
preds = model.evaluate(x_test, y_test, batch_size = 64)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

Loss = 1.1632175745
Test Accuracy = 0.8007


In [12]:
model.fit(x_train, y_train, epochs = 10, batch_size = 128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x17c88889390>

In [13]:
preds = model.evaluate(x_test, y_test, batch_size = 64)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

Loss = 0.978442318726
Test Accuracy = 0.807
