# Ensemble Version 1

Ensemble sum consisting of ResNet and DenseNet models

## Imports

Import libraries

In [12]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.applications.resnet50 import ResNet50, preprocess_input as resnet50_preprocess
from keras.applications.densenet import DenseNet201, preprocess_input as densenet201_preprocess

from src.model_api import getPretrainedModel
from src.prediction import array_filter, array_to_string

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Import data

In [11]:
with open('data/derived/labels_train.pickle', 'rb') as file:
    labels_train = pickle.load(file)
with open('data/derived/data_test.pickle', 'rb') as file:
    data_test = pickle.load(file)
with open('data/derived/test_file_names.pickle', 'rb') as file:
    filenames_test = pickle.load(file)
print('Length of labels_train:', len(labels_train))
print('Shape of data_test:', data_test.shape)
print('Length of filename_test:', len(filenames_test))

Length of labels_train: 25361
Shape of data_test: (7960, 224, 224, 3)
Length of filename_test: 7960


Convert labels to integers for model training

In [3]:
label_encoder = LabelEncoder()
labels_train_encoded = label_encoder.fit_transform(labels_train)
print('Original labels: {}'.format(labels_train[:5]))
print('Encoded labels: {}'.format(labels_train_encoded[:5]))
print('Label encoder classes: {}'.format(label_encoder.classes_[:5]))

Original labels: ['w_025911c', 'new_whale', 'new_whale', 'new_whale', 'new_whale']
Encoded labels: [44  0  0  0  0]
Label encoder classes: ['new_whale' 'w_0003639' 'w_0003c59' 'w_0027efa' 'w_00289b1']


Compute sample size and number of classes

In [4]:
sample_size = len(labels_train_encoded)
num_classes = len(set(labels_train_encoded))
print('Sample size:', sample_size)
print('Number of clases:', num_classes)

Sample size: 25361
Number of clases: 5005


## Create Base Models

Instantiate model architectures

In [5]:
resnet50_model_1 = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                                      num_classes=len(set(labels_train_encoded)),
                                      percent_layers_freeze=0.1)
resnet50_model_2 = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                                      num_classes=len(set(labels_train_encoded)),
                                      percent_layers_freeze=0.1)
resnet50_model_3 = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                                      num_classes=len(set(labels_train_encoded)),
                                      percent_layers_freeze=0.1)
resnet50_model_4 = getPretrainedModel(ResNet50(include_top=False, weights='imagenet', pooling='max'),
                                      num_classes=len(set(labels_train_encoded)),
                                      percent_layers_freeze=0.1)
densenet201_model_1 = getPretrainedModel(DenseNet201(include_top=False, weights='imagenet', pooling='max'),
                                         num_classes=len(set(labels_train_encoded)),
                                         percent_layers_freeze=0.1)
densenet201_model_2 = getPretrainedModel(DenseNet201(include_top=False, weights='imagenet', pooling='max'),
                                         num_classes=len(set(labels_train_encoded)),
                                         percent_layers_freeze=0.1)
densenet201_model_3 = getPretrainedModel(DenseNet201(include_top=False, weights='imagenet', pooling='max'),
                                         num_classes=len(set(labels_train_encoded)),
                                         percent_layers_freeze=0.1)
densenet201_model_4 = getPretrainedModel(DenseNet201(include_top=False, weights='imagenet', pooling='max'),
                                         num_classes=len(set(labels_train_encoded)),
                                         percent_layers_freeze=0.1)

Load model weights

In [6]:
resnet50_model_1.load_weights('models/weights_resnet50_1.hdf5')
resnet50_model_2.load_weights('models/weights_resnet50_2.hdf5')
resnet50_model_3.load_weights('models/weights_resnet50_3.hdf5')
resnet50_model_4.load_weights('models/weights_resnet50_4.hdf5')
densenet201_model_1.load_weights('models/weights_densenet201_1.hdf5')
densenet201_model_2.load_weights('models/weights_densenet201_2.hdf5')
densenet201_model_3.load_weights('models/weights_densenet201_3.hdf5')
densenet201_model_4.load_weights('models/weights_densenet201_4.hdf5')

## Ensemble v1.1

All model results having same weightage

Add model predictions

In [7]:
resnet50_preds_1 = resnet50_model_1.predict(resnet50_preprocess(data_test))
resnet50_preds_2 = resnet50_model_2.predict(resnet50_preprocess(data_test))
resnet50_preds_3 = resnet50_model_3.predict(resnet50_preprocess(data_test))
resnet50_preds_4 = resnet50_model_4.predict(resnet50_preprocess(data_test))
densenet201_preds_1 = densenet201_model_1.predict(densenet201_preprocess(data_test))
densenet201_preds_2 = densenet201_model_2.predict(densenet201_preprocess(data_test))
densenet201_preds_3 = densenet201_model_3.predict(densenet201_preprocess(data_test))
densenet201_preds_4 = densenet201_model_4.predict(densenet201_preprocess(data_test))
overall_preds = resnet50_preds_1 + resnet50_preds_2 + resnet50_preds_3 + resnet50_preds_4 +\
densenet201_preds_1 + densenet201_preds_2 + densenet201_preds_3 + densenet201_preds_4

Obtain labels with top 5 softmax values for each array row and concatenate labels

In [16]:
top5_indices = np.apply_along_axis(array_filter, arr=overall_preds, axis=1, n_top=5, labels=label_encoder.classes_)
predictions_array = np.apply_along_axis(array_to_string, arr=top5_indices, axis=1)

Create submission DataFrame and export as CSV file

In [19]:
submission_df = pd.DataFrame({'Image': filenames_test, 'Id': predictions_array})
submission_df.to_csv('submission_v1.csv', index=False)
print(submission_df.head())

           Image                                                 Id
0  c303faac6.jpg  new_whale w_17b0d3a w_789c969 w_67a9841 w_a9304b9
1  96c2b7290.jpg  new_whale w_af367c3 w_8c25681 w_6822dbc w_f765256
2  69f6cd44f.jpg  new_whale w_23a388d w_d405854 w_5773c71 w_03670aa
3  a965dea33.jpg  new_whale w_1f0cf0a w_3de579a w_985d205 w_cd4cb49
4  9a225e056.jpg  new_whale w_5a2634c w_700ebb4 w_0a155b9 w_23a388d


Kaggle score: 0.286

## Ensemble v1.2

Models weighted by accuracy score during training and validation phase

Add model predictions

In [20]:
resnet50_preds_1 = resnet50_model_1.predict(resnet50_preprocess(data_test))
resnet50_preds_2 = resnet50_model_2.predict(resnet50_preprocess(data_test))
resnet50_preds_3 = resnet50_model_3.predict(resnet50_preprocess(data_test))
resnet50_preds_4 = resnet50_model_4.predict(resnet50_preprocess(data_test))
densenet201_preds_1 = densenet201_model_1.predict(densenet201_preprocess(data_test))
densenet201_preds_2 = densenet201_model_2.predict(densenet201_preprocess(data_test))
densenet201_preds_3 = densenet201_model_3.predict(densenet201_preprocess(data_test))
densenet201_preds_4 = densenet201_model_4.predict(densenet201_preprocess(data_test))
overall_preds = 0.38*resnet50_preds_1 + 0.38*resnet50_preds_2 + 0.58*resnet50_preds_3 + 0.57*resnet50_preds_4 +\
0.38*densenet201_preds_1 + 0.38*densenet201_preds_2 + 0.58*densenet201_preds_3 + 0.57*densenet201_preds_4

Obtain labels with top 5 softmax values for each array row and concatenate labels

In [21]:
top5_indices = np.apply_along_axis(array_filter, arr=overall_preds, axis=1, n_top=5, labels=label_encoder.classes_)
predictions_array = np.apply_along_axis(array_to_string, arr=top5_indices, axis=1)

Create submission DataFrame and export as CSV file

In [22]:
submission_df = pd.DataFrame({'Image': filenames_test, 'Id': predictions_array})
submission_df.to_csv('submission_v2.csv', index=False)
print(submission_df.head())

           Image                                                 Id
0  c303faac6.jpg  new_whale w_17b0d3a w_67a9841 w_a9304b9 w_789c969
1  96c2b7290.jpg  new_whale w_af367c3 w_8c25681 w_6822dbc w_f765256
2  69f6cd44f.jpg  new_whale w_d405854 w_23a388d w_5773c71 w_fd3e556
3  a965dea33.jpg  new_whale w_1f0cf0a w_cd4cb49 w_3de579a w_343f088
4  9a225e056.jpg  new_whale w_5a2634c w_700ebb4 w_0a155b9 w_17b0d3a


Kaggle score: 0.286