# Transfer Learning

## Imports and Preprocess

Import libraries

In [1]:
import numpy as np
import pandas as pd
import pickle
from src.sampler import class_weighter, train_test_bootstrapper
from src.model_api import getPretrainedModel

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from imblearn.over_sampling import SMOTE

from tensorflow import set_random_seed
from keras.applications.resnet50 import ResNet50
from keras.applications.densenet import DenseNet201
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.applications.nasnet import NASNetMobile
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


Import data

In [2]:
with open('data/derived/data_train.pickle', 'rb') as file:
    data_train = pickle.load(file)
with open('data/derived/labels_train.pickle', 'rb') as file:
    labels_train = pickle.load(file)
print('Shape of data_train:', data_train.shape)
print('Length of labels_train:', len(labels_train))

Shape of data_train: (25361, 224, 224, 3)
Length of labels_train: 25361


Convert labels to integers for model training

In [3]:
label_encoder = LabelEncoder()
labels_train_encoded = label_encoder.fit_transform(labels_train)
print('Original labels: {}'.format(labels_train[:5]))
print('Encoded labels: {}'.format(labels_train_encoded[:5]))
print('Label encoder classes: {}'.format(label_encoder.classes_[:5]))

Original labels: ['w_025911c', 'new_whale', 'new_whale', 'new_whale', 'new_whale']
Encoded labels: [44  0  0  0  0]
Label encoder classes: ['new_whale' 'w_0003639' 'w_0003c59' 'w_0027efa' 'w_00289b1']


Compute number of classes

In [4]:
num_classes = len(set(labels_train_encoded))
print('Number of clases:', num_classes)

Number of clases: 5005


Obtain class weights

In [5]:
class_weight_dict = class_weighter(labels_train_encoded)
print(class_weight_dict[0], class_weight_dict[1], class_weight_dict[2])

7.760761589403974e-05 0.75 0.75


## Determine performance of pre-trained models

ResNet50

In [5]:
filepath = 'models/weights-resnet50-{epoch:02d}-{val_loss:.3f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, save_weights_only=True,
                             mode='min')
early_stop = EarlyStopping(patience=2, monitor='val_loss')

model_resnet50 = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                                   num_classes=len(set(labels_train_encoded)),
                                   num_layers_freeze=0)

batch_size = 32
image_gen = ImageDataGenerator(rotation_range=20,
                               width_shift_range=.1,
                               height_shift_range=.1,
                               shear_range=0.5,
                               zoom_range=(0.9, 1.1),
                               fill_mode='constant',
                               horizontal_flip=True)

In [7]:
val_acc_list = []
for loop_no in range(5):
    
    X_train, X_test, y_train, y_test = train_test_bootstrapper(data_train, labels_train_encoded,
                                                               bootstrapper_size=2000,
                                                               random_state=2018+loop_no)
    
    y_train = to_categorical(y_train, num_classes=num_classes)
    y_test = to_categorical(y_test, num_classes=num_classes)
    image_generator_samples = image_gen.flow(X_train,
                                         y_train,
                                         batch_size=batch_size,
                                         seed=2018+loop_no)
    np.random.seed(2018+loop_no)
    set_random_seed(2018+loop_no)
    train_size, epochs = X_train.shape[0], 10
    hist = model_resnet50.fit_generator(image_generator_samples,
                                        steps_per_epoch=train_size//epochs,
                                        validation_data=(X_test, y_test),
                                        epochs=epochs,
                                        shuffle=True,
                                        callbacks=[checkpoint, early_stop])
    val_acc_list.append(max(hist.history['val_acc']))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 1/10
Epoch 2/10
Epoch 3/10


In [8]:
print('Mean of accuracy scores:', np.mean(val_acc_list))
print('SD of accuracy scores:', np.std(val_acc_list))

Mean of accuracy scores: 0.39530605764757765
SD of accuracy scores: 0.0007033052258070625


DenseNet201