In [1]:
# for garbage collection
import gc

# for warnings
import warnings
warnings.filterwarnings("ignore")

# utility libraries
import os
import copy
import tqdm
import numpy as np 
import pandas as pd 
import cv2, random, time, shutil, csv
import tensorflow as tf
import math

# keras libraries
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import BatchNormalization, Dense, GlobalAveragePooling2D, Lambda, Dropout, InputLayer, Input
from keras.utils import to_categorical
from keras import backend as K

In [2]:
# set image size here
img_size = 331
data_dir = '/Users/mac/Documents/Personal Material/cv/project/dog-breed-prediction/'
data_df = pd.read_csv(os.path.join(data_dir, 'labels.csv'))
class_names = sorted(data_df['breed'].unique())
print(f"No. of classes read - {len(class_names)}")
#time.sleep(1)

images_list = sorted(os.listdir(os.path.join(data_dir, 'train')))


No. of classes read - 120


In [34]:
import pickle

# Save class_names to a file
with open('class_names.pkl', 'wb') as f:
    pickle.dump(class_names, f)

In [22]:
data_df[data_df['id']==images_list[0].split('.')[0]].iloc[0,1]

'boston_bull'

In [23]:
X = []
Y = []
i = 0
for image in tqdm.tqdm(images_list):
    cls_name = data_df[data_df['id'] == image[:-4]].iloc[0,1]
    cls_index = int(class_names.index(cls_name)) 

    # Reading RGB Images
    image_path = os.path.join(data_dir, 'train',image)
    orig_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    res_image = cv2.resize(orig_image,(img_size, img_size))
    X.append(res_image)
    Y.append(cls_index)
    i+=1

  0%|          | 0/10222 [00:00<?, ?it/s]

100%|██████████| 10222/10222 [00:23<00:00, 441.36it/s]


In [24]:
# Converting to arrays
print(len(X), len(Y))
Xarr = np.array(X)
Yarr = np.array(Y).reshape(-1,1)

del(X)
print(Xarr.shape, Yarr.shape)
gc.collect()

10222 10222
(10222, 331, 331, 3) (10222, 1)


418

In [25]:
# converting labels to one hot
Yarr_hot = to_categorical(Y)
print(Xarr.shape, Yarr_hot.shape)

(10222, 331, 331, 3) (10222, 120)


In [6]:
# FEATURE EXTRACTION OF TRAINING ARRAYS
AUTO = tf.data.experimental.AUTOTUNE
def get_features(model_name, data_preprocessor, data):
    '''
    1- Create a feature extractor to extract features from the data.
    2- Returns the extracted features and the feature extractor.

    '''
    dataset = tf.data.Dataset.from_tensor_slices(data)


    def preprocess(x):
        x = tf.image.random_flip_left_right(x)
        x = tf.image.random_brightness(x, 0.5)
        return x

    ds = dataset.map(preprocess, num_parallel_calls=AUTO).batch(64)

    input_size = data.shape[1:]
    #Prepare pipeline.
    input_layer = Input(input_size)
    preprocessor = Lambda(data_preprocessor)(input_layer)

    base_model = model_name(weights='imagenet', include_top=False,
                                input_shape=input_size)(preprocessor)

    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)


    #Extract feature.
    feature_maps = feature_extractor.predict(ds, verbose=1)
    print('Feature maps shape: ', feature_maps.shape)
    
    # deleting variables
    del(feature_extractor, base_model, preprocessor, dataset)
    gc.collect()
    return feature_maps

In [8]:
# FEATURE EXTRACTION OF VALIDAION AND TESTING ARRAYS
def get_valfeatures(model_name, data_preprocessor, data):
    '''
    Same as above except not image augmentations applied.
    Used for feature extraction of validation and testing.
    '''

    dataset = tf.data.Dataset.from_tensor_slices(data)

    ds = dataset.batch(64)

    input_size = data.shape[1:]
    #Prepare pipeline.
    input_layer = Input(input_size)
    preprocessor = Lambda(data_preprocessor)(input_layer)

    base_model = model_name(weights='imagenet', include_top=False,
                                input_shape=input_size)(preprocessor)

    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)
    #Extract feature.
    feature_maps = feature_extractor.predict(ds, verbose=1)
    print('Feature maps shape: ', feature_maps.shape)
    return feature_maps

In [9]:
# RETURNING CONCATENATED FEATURES USING MODELS AND PREPROCESSORS
def get_concat_features(feat_func, models, preprocs, array):

    print(f"Beggining extraction with {feat_func.__name__}\n")
    feats_list = []

    for i in range(len(models)):
        
        print(f"\nStarting feature extraction with {models[i].__name__} using {preprocs[i].__name__}\n")
        # applying the above function and storing in list
        feats_list.append(feat_func(models[i], preprocs[i], array))

    # features concatenating
    final_feats = np.concatenate(feats_list, axis=-1)
    # memory saving
    del(feats_list, array)
    gc.collect()

    return final_feats

In [12]:
# DEFINING models and preprocessors imports 

from keras.applications.inception_v3 import InceptionV3, preprocess_input
inception_preprocessor = preprocess_input

from keras.applications.xception import Xception, preprocess_input
xception_preprocessor = preprocess_input

from keras.applications.nasnet import NASNetLarge, preprocess_input
nasnet_preprocessor = preprocess_input

from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
inc_resnet_preprocessor = preprocess_input

models = [InceptionV3,  InceptionResNetV2, Xception, NASNetLarge]
preprocs = [inception_preprocessor,  inc_resnet_preprocessor, 
            xception_preprocessor, nasnet_preprocessor]


In [13]:
# calculating features of the data

final_train_features = get_concat_features(get_features, models, preprocs, Xarr)

print('Final feature maps shape', final_train_features.shape)

Beggining extraction with get_features


Starting feature extraction with InceptionV3 using preprocess_input

Feature maps shape:  (10222, 2048)

Starting feature extraction with InceptionResNetV2 using preprocess_input

Feature maps shape:  (10222, 1536)

Starting feature extraction with Xception using preprocess_input

Feature maps shape:  (10222, 2048)

Starting feature extraction with NASNetLarge using preprocess_input

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/nasnet/NASNet-large-no-top.h5
Feature maps shape:  (10222, 4032)
Final feature maps shape (10222, 9664)


# Model Training

In [14]:
from keras.callbacks import EarlyStopping
EarlyStop_callback = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True,
                                                   verbose=0)

my_callback=[EarlyStop_callback]

In [26]:
from sklearn.model_selection import StratifiedKFold

splits = list(StratifiedKFold(n_splits=3, shuffle=True, random_state=10).split(final_train_features, Y))

trained_models = []
accuracy = []
losses = []

#Prepare And Train DNN model

for i, (train_idx, valid_idx) in enumerate(splits): 

    print(f"\nStarting fold {i+1}\n")
    x_train_fold = final_train_features[train_idx, :]
    y_train_fold = Yarr_hot[train_idx, :]
    x_val_fold = final_train_features[valid_idx]
    y_val_fold = Yarr_hot[valid_idx, :]

    dnn = keras.models.Sequential([
        InputLayer(final_train_features.shape[1:]),
        Dropout(0.7),
        Dense(120, activation='softmax')
    ])

    dnn.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

    print("Training...")
    #Train simple DNN on extracted features.
    h = dnn.fit(x_train_fold, y_train_fold,
                batch_size=128,
                epochs=80,
                verbose=0,
                validation_data = (x_val_fold, y_val_fold),
                callbacks=my_callback)  # max 95.07

    print("Evaluating model ...")
    model_res = dnn.evaluate(x_val_fold, y_val_fold)

    accuracy.append(model_res[1])
    losses.append(model_res[0])
    trained_models.append(dnn)

    # Save the trained models
    model_name = f'model_fold_{i+1}.h5'
    model_path = os.path.join('models', model_name)
    dnn.save(model_path)
    print(f"Saved model {model_name} at {model_path}")

print('\n CV Score -')
print(f"\nAccuracy - {sum(accuracy)/len(accuracy)}")
print(f"\nLoss - {sum(losses)/len(losses)}")


Starting fold 1

Training...
Evaluating model ...
Saved model model_fold_1.h5 at models/model_fold_1.h5

Starting fold 2

Training...
Evaluating model ...
Saved model model_fold_2.h5 at models/model_fold_2.h5

Starting fold 3

Training...
Evaluating model ...
Saved model model_fold_3.h5 at models/model_fold_3.h5

 CV Score -

Accuracy - 0.934259295463562

Loss - 0.22888554632663727


In [32]:
# Save the trained models
for i, model in enumerate(trained_models):
    model_name = f'trained_model_fold_{i+1}.h5'
    model.save(os.path.join('models', model_name))

In [33]:
# Save the list of model names
model_names = [f'model_fold_{i+1}.h5' for i in range(len(trained_models))]
with open('model_names.txt', 'w') as file:
    file.write('\n'.join(model_names))

In [46]:
# SAVING RAM

del(final_train_features, Y, Yarr_hot, Xarr)
gc.collect()

4527

In [47]:
# TEST IMAGES
test_images_list = sorted(os.listdir(os.path.join(data_dir, 'test')))
X = []
i = 0
for image in tqdm.tqdm(test_images_list):

    image_path = os.path.join(data_dir, 'test',image)
    orig_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    res_image = cv2.resize(orig_image,(img_size, img_size))
    X.append(res_image)
    i+=1

Xtesarr = np.array(X)

del(X)
gc.collect()

Xtesarr.shape

100%|██████████| 10357/10357 [00:16<00:00, 642.11it/s]


(10357, 331, 331, 3)

In [49]:
# FEATURE EXTRACTION OF TEST IMAGES
test_features = get_concat_features(get_valfeatures, models, preprocs, Xtesarr)

del(Xtesarr)
gc.collect()
print('Final feature maps shape', test_features.shape)

Beggining extraction with get_valfeatures


Starting feature extraction with InceptionV3 using preprocess_input

Feature maps shape:  (10357, 2048)

Starting feature extraction with InceptionResNetV2 using preprocess_input

Feature maps shape:  (10357, 1536)

Starting feature extraction with Xception using preprocess_input

Feature maps shape:  (10357, 2048)
Final feature maps shape (10357, 5632)


In [50]:
y_pred_norm = trained_models[0].predict(test_features, batch_size=128)/3
for dnn in trained_models[1:]:
    y_pred_norm += dnn.predict(test_features, batch_size=128)/3

y_pred_norm.shape



(10357, 120)

In [74]:
random_index = random.randint(0, len(test_images_list) - 1)


In [29]:
import random

# Select a random test image index
random_index = random.randint(0, len(test_images_list) - 1)

# Retrieve the corresponding test image and its predicted probabilities
image_path = os.path.join(data_dir, 'test', test_images_list[random_index])

predicted_probs = y_pred_norm[random_index]

# Get the predicted breed label
predicted_label_index = np.argmax(predicted_probs)
predicted_label = class_names[predicted_label_index]

# Display the image
image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
plt.imshow(image)
plt.axis('off')
plt.title(f'Predicted Label: {predicted_label}')
plt.show()

# Plot the predicted probabilities
plt.figure(figsize=(8, 6))
plt.bar(class_names, predicted_probs)
plt.xticks(rotation=90)
plt.xlabel('Dog Breed')
plt.ylabel('Probability')
plt.title('Predicted Probabilities')
plt.show()



NameError: name 'test_images_list' is not defined

In [82]:
data_df[data_df['id'] == 'bb0c7d7af4bdc0d3646afaf1339a15f2']

Unnamed: 0,id,breed


In [84]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10222 entries, 0 to 10221
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      10222 non-null  object
 1   breed   10222 non-null  object
dtypes: object(2)
memory usage: 159.8+ KB


 Ensemble learning is a machine learning technique that combines multiple individual models to create a more powerful and accurate model. The idea behind ensemble learning is that by combining the predictions of multiple models, you can leverage the strengths and compensate for the weaknesses of each individual model, leading to improved overall performance.

In your code, you train multiple models using different architectures (InceptionV3, InceptionResNetV2, Xception), and then you concatenate the extracted features from these models into a single feature vector. This concatenated feature vector is used as input to a final Dense layer for classification. During prediction, the outputs of the trained models are combined by taking an average of their predictions.

By combining multiple models, each with its own unique approach or architecture, ensemble learning can help improve the accuracy and generalization of the final model. It can also enhance the model's ability to handle different types of inputs or capture diverse patterns in the data.

Ensemble learning is a powerful technique that has been widely used in various machine learning tasks, including classification, regression, and anomaly detection. It offers a way to leverage the benefits of different models and improve overall performance.

In [3]:
#Test on new image
X = []
i = 0

image_path = '/Users/mac/Desktop/German-Shepherd-dog-Alsatian.jpg'
orig_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
res_image = cv2.resize(orig_image,(img_size, img_size))
X.append(res_image)

Xtesarr = np.array(X)
Xtesarr.shape

del(X)
gc.collect()
# FEATURE EXTRACTION OF TEST IMAGES
test_features = get_concat_features(get_valfeatures, models, preprocs, Xtesarr)

y_pred_norm = trained_models[0].predict(test_features, batch_size=128)/3
for dnn in trained_models[1:]:
    y_pred_norm += dnn.predict(test_features, batch_size=128)/3

NameError: name 'cv2' is not defined

In [2]:
predicted_probs = y_pred_norm[0]

# Get the predicted breed label
predicted_label_index = np.argmax(predicted_probs)
predicted_label = class_names[predicted_label_index]

# Display the image
image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
plt.imshow(image)
plt.axis('off')
plt.title(f'Predicted Label: {predicted_label}')
plt.show()

# Plot the predicted probabilities
plt.figure(figsize=(8, 6))
plt.bar(class_names, predicted_probs)
plt.xticks(rotation=90)
plt.xlabel('Dog Breed')
plt.ylabel('Probability')
plt.title('Predicted Probabilities')
plt.show()

NameError: name 'y_pred_norm' is not defined

In [27]:
def predict_image(image_path):
    X = []
    orig_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    res_image = cv2.resize(orig_image, (img_size, img_size))
    X.append(res_image)
    X_test = np.array(X)

    test_features = get_concat_features(get_valfeatures, models, preprocs, X_test)
    

    y_pred_norm = trained_models[0].predict(test_features, batch_size=128) / 3
    for dnn in trained_models[1:]:
        y_pred_norm += dnn.predict(test_features, batch_size=128) / 3
    predicted_probs = y_pred_norm[0]

    # Get the predicted breed label
    predicted_label_index = np.argmax(predicted_probs)
    predicted_label = class_names[predicted_label_index]

    return predicted_label


In [28]:
predict_image('/Users/mac/Desktop/German-Shepherd-dog-Alsatian.jpg')

Beggining extraction with get_valfeatures


Starting feature extraction with InceptionV3 using preprocess_input

Feature maps shape:  (1, 2048)

Starting feature extraction with InceptionResNetV2 using preprocess_input

Feature maps shape:  (1, 1536)

Starting feature extraction with Xception using preprocess_input

Feature maps shape:  (1, 2048)

Starting feature extraction with NASNetLarge using preprocess_input

Feature maps shape:  (1, 4032)


'german_shepherd'