In [1]:
import os
import shutil
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

from keras.preprocessing import image as image_prepocessor
from keras.preprocessing.image import ImageDataGenerator

from keras.models import Sequential  
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D 
from keras.regularizers import l2

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# data_path = "/home/paperspace/data/dogBreed/"
data_path = r"E:\DogBreed"
im_size = 10
batch_size = 12
epochs = 50
_seed = 1

In [4]:
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
base_model = ResNet50(include_top=False, weights='imagenet')

In [10]:
_training_image_loc = os.path.join(data_path, "train")
_training_label_loc = os.path.join(data_path, "labels.csv")
_testing_image_loc = os.path.join(data_path, "test")
_sample_sub_loc = os.path.join(data_path, "sample_submission.csv")

In [11]:
temp_dir = "temp"
if not os.path.exists(temp_dir):
    os.mkdir(temp_dir)
    
bottle_ft_train_path = os.path.join(temp_dir,'btn_ft_train_{}_{}.npy'.format(im_size, batch_size))
bottle_ft_val_path = os.path.join(temp_dir,'btn_ft_val_{}_{}.npy'.format(im_size, batch_size))

base_modal_weight_loc = os.path.join(temp_dir,'base_modal_weight_loc.h5')

In [12]:
def split_images_into_breed(dataframe, new_dir_name):
    sub_path = os.path.join(_training_image_loc, new_dir_name)
    
    if os.path.exists(sub_path):
        print("Folder {} exists.".format(sub_path))
        return sub_path
    
    os.mkdir(sub_path)
        
    for name, group in tqdm(dataframe.groupby("breed")):
        breed_dir = os.path.join(sub_path, name)
        if not os.path.exists(breed_dir):
            os.mkdir(breed_dir)
        for file_name in group["id"]:
            file_path = os.path.join(_training_image_loc, file_name + ".jpg")
            if os.path.exists(file_path):
                shutil.copy(file_path, breed_dir)
    return sub_path

In [13]:
df = pd.read_csv(_training_label_loc)

np.random.seed(seed=_seed)
rnd = np.random.random(df.shape[0])
train_idx = rnd < 0.8
valid_idx = rnd >= 0.8

traing_df = df[train_idx]
validation_df = df[valid_idx]
train_data_loc = split_images_into_breed(traing_df, "train")
val_data_loc = split_images_into_breed(validation_df, "valid")

Folder E:\DogBreed\train\train exists.
Folder E:\DogBreed\train\valid exists.


In [10]:
# # Generator for fitting
# data_generator = ImageDataGenerator(preprocessing_function=preprocess_input, rescale=None)

# train_generator = data_generator.flow_from_directory(train_data_loc,
#                                                      target_size=(im_size, im_size), 
#                                                      batch_size=batch_size,
#                                                      class_mode='categorical',
#                                                      shuffle=False)
# num_classes = len(train_generator.class_indices)
# num_of_training_samples = len(train_generator.filenames)
# train_steps = int(math.ceil(num_of_training_samples / batch_size))  

# val_generator = data_generator.flow_from_directory(val_data_loc,
#                                                    target_size=(im_size, im_size), 
#                                                    batch_size=batch_size,
#                                                    class_mode=None,
#                                                    shuffle=False)
# num_of_val_samples = len(val_generator.filenames)
# val_steps = int(math.ceil(num_of_val_samples / batch_size))  


Found 8221 images belonging to 120 classes.
Found 2001 images belonging to 120 classes.


In [None]:
# x = base_model.output
# x = GlobalAveragePooling2D()(x)
# x = Dense(1024, activation="relu")(x)
# x = Dropout(0.8)(x)
# predictions = Dense(num_classes, activation='softmax')(x)

# model = Model(inputs=base_model.input, outputs=predictions)

# for layer in base_model.layers:
#     layer.trainable = False

# model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
# model.fit_generator(train_generator, steps_per_epoch=train_steps, epochs=epochs, 
#                     validation_data=val_generator, validation_steps=val_steps)

In [None]:
# # def read_img(img_id, training, size):
# #     if training == 'train':
# #         file_path = _training_image_loc
# #     else:
# #         file_path = _testing_image_loc
# #     img = image_prepocessor.load_img(os.path.join(file_path, '%s.jpg' % img_id), target_size=size)
# #     img = image_prepocessor.img_to_array(img)
# #     img = preprocess_input(img)
# #     return img

# # test = base_model.predict(np.array([read_img("000bec180eb18c7604dcecc8fe0dba07", "train", (150, 150))]))

# img_path = r"E:\DogBreed\train\000bec180eb18c7604dcecc8fe0dba07.jpg"
# img = image_prepocessor.load_img(img_path, target_size=(150, 150))
# x = image_prepocessor.img_to_array(img)
# x = np.expand_dims(x, axis=0)
# x = preprocess_input(x)

# preds = base_model.predict(x)


In [7]:
# base_model.output_shape

(None, None, None, 2048)

In [9]:
# base_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, None, None, 6 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

__________________________________________________________________________________________________
add_7 (Add)                     (None, None, None, 5 0           bn3d_branch2c[0][0]              
                                                                 activation_19[0][0]              
__________________________________________________________________________________________________
activation_22 (Activation)      (None, None, None, 5 0           add_7[0][0]                      
__________________________________________________________________________________________________
res4a_branch2a (Conv2D)         (None, None, None, 2 131328      activation_22[0][0]              
__________________________________________________________________________________________________
bn4a_branch2a (BatchNormalizati (None, None, None, 2 1024        res4a_branch2a[0][0]             
__________________________________________________________________________________________________
activation

__________________________________________________________________________________________________
activation_44 (Activation)      (None, None, None, 5 0           bn5b_branch2a[0][0]              
__________________________________________________________________________________________________
res5b_branch2b (Conv2D)         (None, None, None, 5 2359808     activation_44[0][0]              
__________________________________________________________________________________________________
bn5b_branch2b (BatchNormalizati (None, None, None, 5 2048        res5b_branch2b[0][0]             
__________________________________________________________________________________________________
activation_45 (Activation)      (None, None, None, 5 0           bn5b_branch2b[0][0]              
__________________________________________________________________________________________________
res5b_branch2c (Conv2D)         (None, None, None, 2 1050624     activation_45[0][0]              
__________

In [None]:
data_generator = ImageDataGenerator(preprocessing_function=preprocess_input, rescale=None)
if not os.path.exists(bottle_ft_train_path):
    train_generator = data_generator.flow_from_directory(train_data_loc,
                                                         target_size=(im_size, im_size), 
                                                         batch_size=batch_size,
                                                         class_mode=None,
                                                         shuffle=False)
    num_of_training_samples = len(train_generator.filenames)
    num_classes = len(train_generator.class_indices)
    steps = int(math.ceil(num_of_training_samples / batch_size))  

    bottleneck_features_train = base_model.predict_generator(train_generator, steps)
    np.save(bottle_ft_train_path, bottleneck_features_train)
else:
    print("Skipping saving of train data.")

Found 8221 images belonging to 120 classes.


In [None]:
if not os.path.exists(bottle_ft_val_path):
    val_generator = data_generator.flow_from_directory(val_data_loc,
                                                       target_size=(im_size, im_size), 
                                                       batch_size=batch_size,
                                                       class_mode=None,
                                                       shuffle=False)
    num_of_val_samples = len(val_generator.filenames)
    steps = int(math.ceil(num_of_val_samples / batch_size))  

    bottleneck_features_val = base_model.predict_generator(val_generator)
    np.save(bottle_ft_val_path, bottleneck_features_val)  
else:
    print("Skipping saving of validation data.")

In [None]:
# Generator with images and Labels
train_generator = data_generator.flow_from_directory(train_data_loc,
                                                     target_size=(im_size, im_size), 
                                                     batch_size=batch_size,
                                                     class_mode='categorical',
                                                     shuffle=False)
num_of_training_samples = len(train_generator.filenames)
num_classes = len(train_generator.class_indices)

train_data = np.load(bottle_ft_train_path)
train_labels = train_generator.classes.reshape(-1, 1)
encoder = OneHotEncoder()
train_labels = encoder.fit_transform(train_labels)

In [None]:
val_generator = data_generator.flow_from_directory(val_data_loc,
                                                   target_size=(im_size, im_size), 
                                                   batch_size=batch_size,
                                                   class_mode=None,
                                                   shuffle=False)
num_of_val_samples = len(val_generator.filenames)
validation_data = np.load(bottle_ft_val_path)  
validation_labels = val_generator.classes.reshape(-1, 1)
encoder = OneHotEncoder()
validation_labels = encoder.fit_transform(validation_labels)

In [9]:
top_model = Sequential()
top_model.add(GlobalAveragePooling2D(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(1024, activation="relu"))
top_model.add(Dropout(0.8))
# model.add(Dense(256, activation="relu"))
# model.add(Dropout(0.5))
top_model.add(Dense(120, activation='softmax'))

top_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
history = top_model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size,
                   validation_data=(validation_data, validation_labels))

top_model.save_weights(base_modal_weight_loc)

In [None]:
plt.figure(1)  
   
# summarize history for accuracy  
   
plt.subplot(211)  
plt.plot(history.history['acc'])  
plt.plot(history.history['val_acc'])  
plt.title('model accuracy')  
plt.ylabel('accuracy')  
plt.xlabel('epoch')  
plt.legend(['train', 'test'], loc='upper left')  

# summarize history for loss  

plt.subplot(212)  
plt.plot(history.history['loss'])  
plt.plot(history.history['val_loss'])  
plt.title('model loss')  
plt.ylabel('loss')  
plt.xlabel('epoch')  
plt.legend(['train', 'test'], loc='upper left')  
plt.show()  

# FINE TUNING

In [10]:
final_model = Sequential()
final_model.add(base_model)
final_model.add(top_model)

In [12]:
trainable_from = 163

for layer in final_model.layers[0].layers[:trainable_from]:
    layer.trainable = False

for layer in final_model.layers[0].layers[trainable_from:]:
    layer.trainable = True

# final_model.load_weights("final_modal_weights_FINAL.h5")

from keras.optimizers import SGD
final_model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=["accuracy"])

In [5]:
generator = ImageDataGenerator(rotation_range=10,
                               width_shift_range=0.1, 
                               height_shift_range=0.1, 
                               shear_range=0.15, 
                               zoom_range=0.1, 
                               channel_shift_range=10.0, 
                               horizontal_flip=True, 
                               preprocessing_function=preprocess_input)

In [19]:
train_generator = generator.flow_from_directory(train_data_loc,
                                                target_size=(im_size, im_size), 
                                                batch_size=batch_size,
                                                class_mode='categorical',
                                                shuffle=True,
                                                seed=_seed)

num_of_training_samples = len(train_generator.filenames)
num_classes = len(train_generator.class_indices)
steps = int(math.ceil(num_of_training_samples / batch_size)) * 1.1  # 10% additional data using Data augmentation.

val_generator = generator.flow_from_directory(val_data_loc,
                                                     target_size=(im_size, im_size), 
                                                     batch_size=batch_size,
                                                     class_mode='categorical',
                                                     shuffle=True,
                                                     seed=_seed)

num_of_val_samples = len(val_generator.filenames)
steps = int(math.ceil(num_of_val_samples / batch_size))

Found 8221 images belonging to 120 classes.


In [None]:
final_model.fit_generator(train_generator, 
                          steps_per_epoch=steps, 
                          epochs=epochs, 
                          validation_data=val_generator, 
                          validation_steps=steps)
final_model.save_weights(os.ppath.join(temp_dir, "final_modal_weight.h5"))