In [8]:
from keras.models import Sequential
from keras.layers import Activation, Conv2D, MaxPool2D, Dense
from keras.layers import BatchNormalization, Dropout, Flatten
from keras.regularizers import l2
from keras.callbacks import ReduceLROnPlateau
import keras
import numpy as np

This implementation use Batch Normalization (BN) instead of Local Response Normalization (LRN)

In [9]:
# Instantiate an empty sequential model
model = Sequential()
# 1st layer (conv + pool + batchnorm)
model.add(Conv2D(filters= 96, kernel_size= (11,11), strides=(4,4), padding='valid', input_shape = (224,224,3)))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2)))
model.add(BatchNormalization())

# 2nd layer (conv + pool + batchnorm)
model.add(Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), padding='same', kernel_regularizer=l2(5e-4)))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2)))
model.add(BatchNormalization())
        
# layer 3 (conv + batchnorm)      <--- note that the authors did not add a POOL layer here
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(5e-4)))
model.add(Activation('relu'))
    
# layer 4 (conv + batchnorm)     <--- similar to layer 4
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(5e-4)))
model.add(Activation('relu'))
        
# layer 5 (conv + batchnorm) 
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(5e-4)))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2)))

# Flatten the CNN output to feed it with fully connected layers
model.add(Flatten())

# layer 6 (Dense layer + dropout) 
model.add(Dense(units = 4096, activation = 'relu'))
model.add(Dropout(0.5))

# layer 7 (Dense layers)
model.add(Dense(units = 4096, activation = 'relu'))
model.add(Dropout(0.5))
                        
# layer 8 (softmax output layer)
model.add(Dense(units = 1000, activation = 'softmax'))

# print the model summary
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 54, 54, 96)        34944     
_________________________________________________________________
activation_11 (Activation)   (None, 54, 54, 96)        0         
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 26, 26, 96)        0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 26, 26, 96)        384       
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 26, 26, 256)       614656    
_________________________________________________________________
activation_12 (Activation)   (None, 26, 26, 256)       0         
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 12, 12, 256)      

In [10]:
# reduce learning rate by 0.1 when the validation error plateaus
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1))

# set the SGD optimizer with lr of 0.01 and momentum of 0.9
optimizer = keras.optimizers.sgd(lr = 0.01, momentum = 0.9)

# compile the model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# train the model
# call the reduce_lr value using callbacks in the training method
model.fit(X_train, y_train, batch_size=128, epochs=90, validation_data=(X_test, y_test),
       verbose=2, callbacks=[reduce_lr])

NameError: name 'X_train' is not defined