# Transfer Learning using ResNet50

## Imports and Preprocess

Import libraries

In [1]:
import numpy as np
import pandas as pd

import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow import set_random_seed
from keras.applications.resnet50 import ResNet50, preprocess_input as resnet50_preprocess
#from keras.applications.densenet import DenseNet201
#from keras.applications.mobilenet_v2 import MobileNetV2
#from keras.applications.nasnet import NASNetMobile
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint

from src.sampler import train_test_bootstrapper
from src.model_api import getPretrainedModel

Using TensorFlow backend.


Import data

In [2]:
with open('data/derived/data_train.pickle', 'rb') as file:
    data_train = pickle.load(file)
with open('data/derived/labels_train.pickle', 'rb') as file:
    labels_train = pickle.load(file)
print('Shape of data_train:', data_train.shape)
print('Length of labels_train:', len(labels_train))

Shape of data_train: (25361, 224, 224, 3)
Length of labels_train: 25361


Convert labels to integers for model training

In [3]:
label_encoder = LabelEncoder()
labels_train_encoded = label_encoder.fit_transform(labels_train)
print('Original labels: {}'.format(labels_train[:5]))
print('Encoded labels: {}'.format(labels_train_encoded[:5]))
print('Label encoder classes: {}'.format(label_encoder.classes_[:5]))

Original labels: ['w_025911c', 'new_whale', 'new_whale', 'new_whale', 'new_whale']
Encoded labels: [44  0  0  0  0]
Label encoder classes: ['new_whale' 'w_0003639' 'w_0003c59' 'w_0027efa' 'w_00289b1']


Compute sample size and number of classes

In [4]:
sample_size = len(labels_train_encoded)
num_classes = len(set(labels_train_encoded))
print('Sample size:', sample_size)
print('Number of clases:', num_classes)

Sample size: 25361
Number of clases: 5005


## Train ResNet50

ResNet Model 1 (Random Seed = 1, assuming no class imbalance)

In [5]:
filepath = 'models/weights-{epoch:02d}-{val_loss:.3f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, save_weights_only=True,
                             mode='min')
early_stop = EarlyStopping(patience=2, monitor='val_loss')

model = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                           num_classes=len(set(labels_train_encoded)),
                           percent_layers_freeze=0.1)

batch_size = 32
image_gen = ImageDataGenerator(rotation_range=20,
                               width_shift_range=.1,
                               height_shift_range=.1,
                               shear_range=0.5,
                               zoom_range=(0.9, 1.1),
                               fill_mode='constant',
                               horizontal_flip=True)

In [6]:
X_train, X_test, y_train, y_test = train_test_bootstrapper(data_train, labels_train_encoded,
                                                           bootstrapper_size=sample_size,
                                                           class_imbalance=False, random_state=1)
    
X_train = resnet50_preprocess(X_train)
X_test = resnet50_preprocess(X_test)
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)
    
image_generator_samples = image_gen.flow(X_train, y_train, batch_size=batch_size, seed=1)
    
np.random.seed(1)
set_random_seed(1)
train_size, epochs = X_train.shape[0], 10
hist = model.fit_generator(image_generator_samples, steps_per_epoch=2*train_size//epochs,
                           validation_data=(X_test, y_test), epochs=epochs, shuffle=True,
                           callbacks=[checkpoint, early_stop])

model_resnet50.save_weights('models/weights_resnet50_1.hdf5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


ResNet Model 2 (Random Seed = 2, assuming no class imbalance)

In [7]:
filepath = 'models/weights-{epoch:02d}-{val_loss:.3f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, save_weights_only=True,
                             mode='min')
early_stop = EarlyStopping(patience=2, monitor='val_loss')

model = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                           num_classes=len(set(labels_train_encoded)),
                           percent_layers_freeze=0.1)

batch_size = 32
image_gen = ImageDataGenerator(rotation_range=20,
                               width_shift_range=.1,
                               height_shift_range=.1,
                               shear_range=0.5,
                               zoom_range=(0.9, 1.1),
                               fill_mode='constant',
                               horizontal_flip=True)

In [8]:
X_train, X_test, y_train, y_test = train_test_bootstrapper(data_train, labels_train_encoded,
                                                           bootstrapper_size=sample_size,
                                                           class_imbalance=False, random_state=2)
    
X_train = resnet50_preprocess(X_train)
X_test = resnet50_preprocess(X_test)
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)
    
image_generator_samples = image_gen.flow(X_train, y_train, batch_size=batch_size, seed=2)
    
np.random.seed(2)
set_random_seed(2)
train_size, epochs = X_train.shape[0], 10
hist = model.fit_generator(image_generator_samples, steps_per_epoch=2*train_size//epochs,
                           validation_data=(X_test, y_test), epochs=epochs, shuffle=True,
                           callbacks=[checkpoint, early_stop])

model_resnet50.save_weights('models/weights_resnet50_2.hdf5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


ResNet Model 3 (Random Seed = 1, considering class imbalance)

In [9]:
filepath = 'models/weights-{epoch:02d}-{val_loss:.3f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, save_weights_only=True,
                             mode='min')
early_stop = EarlyStopping(patience=2, monitor='val_loss')

model = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                           num_classes=len(set(labels_train_encoded)),
                           percent_layers_freeze=0.1)

batch_size = 32
image_gen = ImageDataGenerator(rotation_range=20,
                               width_shift_range=.1,
                               height_shift_range=.1,
                               shear_range=0.5,
                               zoom_range=(0.9, 1.1),
                               fill_mode='constant',
                               horizontal_flip=True)

In [10]:
X_train, X_test, y_train, y_test = train_test_bootstrapper(data_train, labels_train_encoded,
                                                           bootstrapper_size=sample_size,
                                                           class_imbalance=True, random_state=1)
    
X_train = resnet50_preprocess(X_train)
X_test = resnet50_preprocess(X_test)
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)
    
image_generator_samples = image_gen.flow(X_train, y_train, batch_size=batch_size, seed=1)
    
np.random.seed(1)
set_random_seed(1)
train_size, epochs = X_train.shape[0], 10
hist = model.fit_generator(image_generator_samples, steps_per_epoch=2*train_size//epochs,
                           validation_data=(X_test, y_test), epochs=epochs, shuffle=True,
                           callbacks=[checkpoint, early_stop])

model_resnet50.save_weights('models/weights_resnet50_3.hdf5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


ResNet Model 4 (Random Seed = 2, considering class imbalance)

In [11]:
filepath = 'models/weights-{epoch:02d}-{val_loss:.3f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, save_weights_only=True,
                             mode='min')
early_stop = EarlyStopping(patience=2, monitor='val_loss')

model = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                           num_classes=len(set(labels_train_encoded)),
                           percent_layers_freeze=0.1)

batch_size = 32
image_gen = ImageDataGenerator(rotation_range=20,
                               width_shift_range=.1,
                               height_shift_range=.1,
                               shear_range=0.5,
                               zoom_range=(0.9, 1.1),
                               fill_mode='constant',
                               horizontal_flip=True)

In [12]:
X_train, X_test, y_train, y_test = train_test_bootstrapper(data_train, labels_train_encoded,
                                                           bootstrapper_size=sample_size,
                                                           class_imbalance=True, random_state=2)
    
X_train = resnet50_preprocess(X_train)
X_test = resnet50_preprocess(X_test)
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)
    
image_generator_samples = image_gen.flow(X_train, y_train, batch_size=batch_size, seed=2)
    
np.random.seed(2)
set_random_seed(2)
train_size, epochs = X_train.shape[0], 10
hist = model.fit_generator(image_generator_samples, steps_per_epoch=2*train_size//epochs,
                           validation_data=(X_test, y_test), epochs=epochs, shuffle=True,
                           callbacks=[checkpoint, early_stop])

model_resnet50.save_weights('models/weights_resnet50_4.hdf5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
