https://github.com/keras-team/keras/blob/master/examples/mnist_transfer_cnn.py

** Transfer learning toy example**

- Train a simple convnet on the MNIST dataset the first 5 digits [0...4].
- Freeze convolutional layers and fine-tune dense/output layers for the classification for the rest digits [5...9].

Get to 99.8% test accuracy after 5 epochs for the first five digits classifier and 99.2% for the last five digits after transfer + fine-tuning.

In [1]:
from __future__ import print_function 

import datetime
import keras 
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten 
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
now = datetime.datetime.now

batch_size = 128

num_classes = 5

epochs = 5

In [3]:
# input image dimensions 
img_rows, img_cols = 28, 28

# number of convolutional filters to use 
filters = 32

# convolutional kernel size 
kernel_size = 3

# size of pooling area for max pooling
pool_size = 2


In [4]:
if K.image_data_format() == 'channels_first':
    input_shape = (1, img_rows, img_cols)
else:
    # tf backend has K.image_data_format() == 'channels_last'
    input_shape = (img_rows, img_cols, 1)

In [5]:
def train_model(model, train, test, num_classes):
    
    # change input data size so they match tensorflow requirements
    x_train = train[0].reshape((train[0].shape[0],) + input_shape)
    x_test = test[0].reshape((test[0].shape[0],) + input_shape)
    
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    # normalization like 
    x_train /= 255
    x_test /= 255
    
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')
    
    # one hot encode class vectors 
    y_train = keras.utils.to_categorical(train[1], num_classes)
    y_test = keras.utils.to_categorical(test[1], num_classes)
    
    # compile for training
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adadelta', metrics = ['accuracy'])
    
    t = now()
    model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, verbose = 1, validation_data = (x_test, y_test))
    
    print('Training time: %s' % (now() - t))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

In [6]:
# load the data

(x_train, y_train), (x_test, y_test) = mnist.load_data()

print(x_train.shape)
print(x_train[0].shape)

(60000, 28, 28)
(28, 28)


In [7]:
# create two datasets one with digits below 5 and the other above

x_train_lt5 = x_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
x_test_lt5 = x_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]

x_train_gte5 = x_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5
x_test_gte5 = x_test[y_test >= 5]
y_test_gte5 = y_test[y_test >= 5] - 5

print(x_train_lt5.shape)
print(x_train_lt5[0].shape)

(30596, 28, 28)
(28, 28)


In [56]:
# define two groups of layers: feature(convolutions) and classification(dense)

feature_layers = [Conv2D(filters, kernel_size, padding = 'valid', input_shape = input_shape),
                 Activation('relu'),
                 Conv2D(filters, kernel_size),
                 Activation('relu'),
                 MaxPooling2D(pool_size = pool_size),
                 Dropout(0.25),
                 Flatten()]


classification_layers = [Dense(128),
                         Activation('relu'),
                         Dropout(0.5), 
                         Dense(num_classes), 
                         Activation('softmax')]

In [57]:
# create the complete model

model = Sequential(feature_layers + classification_layers)

In [76]:
# train model for 5-digit clasification [0...4]

train_model(model, (x_train_lt5, y_train_lt5), (x_test_lt5, y_test_lt5), num_classes)

x_train shape: (30596, 28, 28, 1)
30596 train samples
5139 test samples
Train on 30596 samples, validate on 5139 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:03:53.497734
Test score: 0.0038775279982246635
Test accuracy: 0.9984432769021211


In [35]:
# freeze feature layers and rebuild the model for a different classification task [5...9]

for l in feature_layers:
    l.trainable = False

In [36]:
# tranfer - train dense layers for new classification task [5...9]. training using new task data 

train_model(model, (x_train_gte5, y_train_gte5), (x_test_gte5, y_test_gte5), num_classes)

x_train shape: (29404, 28, 28, 1)
29404 train samples
4861 test samples
Train on 29404 samples, validate on 4861 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:01:29.605572
Test score: 0.0218465242016587
Test accuracy: 0.9932112732901786


## transfer learning with VGG and MNIST

- **retrain output dense layer only/ using VGG as feature extractor(bottleneck feature)** The output layer in a vgg16 is a softmax activation with 1000 categories. Remove this layer and replace it with a softmax layer of 10 categories. VGG architectue is changed in this case.

- **freeze the weights of the first few layers** the first few layers capture universal features like curves and edges, which are also relevant to the new problem. VGG architectue is not changed in this case, but image size is **different** from which (224 x 224) VGG is trained on. Easier to retrain on the Dense layer Repalced VGG16.

## VGG16 as feature extractor & retrain the replaced output Dense layer ONLY

### VGG16 has requirement on minimium input size, which is cannot be met by MNIST input 

In [104]:
# retrain output dense layer only

from keras.applications.vgg16 import VGG16
import numpy as np

import matplotlib.pyplot as plt
% matplotlib inline

In [135]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [172]:
img_rows, img_cols = 28, 28

batch_size = 64

num_classes = 10

epochs = 20

In [80]:
# load VGG16 model weights

model = VGG16(weights = 'imagenet', include_top = False)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [150]:
# check which backend is used and reshape the data to match Keras' expectation 

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [167]:
# VGG is trained on color images - repeat grayscale image over three color channels
# np.repeat - if ``axis = 0`` it will be the first dimension and if ``axis = -1`` it will be the last dimension.
# expand channels for both training and testing images 

train_color_channel = []
for i in range(x_train.shape[0]):
    train_color_channel.append(np.repeat(x_train[i], 3, axis= -1))
    
test_color_channel = []
for i in range(x_test.shape[0]):
    test_color_channel.append(np.repeat(x_test[i], 3, axis= -1))

In [169]:
# data normalization 

x_train = np.array(train_color_channel).astype('float32')
x_test = np.array(test_color_channel).astype('float32')
# normalization like 
x_train /= 255
x_test /= 255

print('x_train shape:', x_train.shape)
print('number of train samples: ', x_train.shape[0])
print('number of test samples: ', x_test.shape[0])


# one hot encode class vectors

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 3)
number of train samples:  60000
number of test samples:  10000


In [None]:
# extract features from training and testing data using pretrained VGG16 without top 

now = datetime.datetime.now
t = now()

feature_train = model.predict(x_train)
feature_test = model.predict(x_test)

print('Feature extracting time: %s' % (now() - t))

print(feature_train.shape)
print(feature_test.shape)

In [None]:
# add the classification layer and train this layer only using MNIST data 

new_model = Sequential()
# new_model.add(model)
new_model.add(Dense(10, activation = 'softmax'))

layers = new_model.layers
for layer in layers[: -1]: 
    layer.trainable = False


In [None]:
# rehshape features extracted for training and testing data to be suitable for model fitting

feature_train = feature_train.reshape(feature_train.shape[0], feature_train.shape[-1])
feature_test = feature_test.reshape(feature_test.shape[0], feature_test.shape[-1])

# model compile 
new_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# model fitting 
# - x_train in the original model.fit has to be of (num_exampels, nrows, ncols, nchannels)
# - now only the Dense layer is trained, x_trian's size should match this requiremnt 

new_model.fit(feature_train, y_train, batch_size = batch_size, epochs = epochs, validation_data = (feature_test, y_test))


In [None]:
# save trained model weights 

new_model.save_weights('vgg_feature_extractor.h5')

new_model.load_weights('vgg_feature_extractor.h5')

new_model.evaluate(feature_test, y_test)