# ***In this notebook , We will not follow common way of predicting from just pretrained model , instead we will take features of image by predicting image with second last layer of pretrained model and then fit it again with xgboost and get final predictions.***

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('../input/hackereath-holiday-season-deep-learning-contest/dataset/train.csv')
df.head()

Unnamed: 0,Image,Class
0,image3476.jpg,Miscellaneous
1,image5198.jpg,Candle
2,image4183.jpg,Snowman
3,image1806.jpg,Miscellaneous
4,image7831.jpg,Miscellaneous


In [3]:
from keras.applications.inception_v3 import InceptionV3,preprocess_input

In [4]:
from keras.preprocessing.image import ImageDataGenerator

In [5]:
datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input)

# ***Using flow from dataframe method for mapping dataframe and directory both.***

In [6]:
train_generator = datagen.flow_from_dataframe(
    df,
    directory='../input/hackereath-holiday-season-deep-learning-contest/dataset/train',
    x_col = 'Image',
    y_col = 'Class',
    target_size=(299,299),
    class_mode = 'categorical',
    batch_size=32)

Found 6469 validated image filenames belonging to 6 classes.


# ***Using InceptionV3 pretrained model. You can try with others also.***

In [7]:
base_model = InceptionV3(include_top=False,weights='imagenet',input_shape=(299,299,3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [8]:
base_model.trainable = False

In [9]:
from keras import layers,models

# ***Adding some extra layers over pretrained model.***

In [10]:
model = models.Sequential()
model.add(base_model)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(512,activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(256,activation='relu'))
model.add(layers.Dense(6,activation='softmax'))

In [11]:
model.compile(
    optimizer='adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy'])

In [12]:
train_steps = np.ceil(train_generator.n/train_generator.batch_size)

In [13]:
model.fit(
    train_generator,
    epochs=12,
    batch_size=32,
    steps_per_epoch=train_steps)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<tensorflow.python.keras.callbacks.History at 0x7f8ae04e7150>

# ***Crucial step : Generated features of imagesby predicting it by removing the last layer of the model.***

In [14]:
import keras
new_train_x = []
new_train_y = []
model2 = keras.Model(model.input, model.layers[-5].output)
count = 0
while count < 200:
    x_batch,y_batch = next(train_generator)
    pred = model2.predict(x_batch)
    new_train_x.extend(pred)
    new_train_y.extend(y_batch)
    count += 1


In [15]:
new_train_y = np.argmax(new_train_y,axis=1)
print(new_train_y.shape)

(6400,)


In [16]:
new_train_x = np.array(new_train_x)
new_train_y = np.array(new_train_y)
print(new_train_x.shape)
print(new_train_y.shape)

(6400, 2048)
(6400,)


# ***Fitting new_train_x and new_train_y with xgboost.***

In [17]:
from xgboost import XGBClassifier
clf = XGBClassifier(max_depth=7, objective='multi:softmax', n_estimators=1000, 
                        num_classes=6)
clf.fit(new_train_x,new_train_y)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=10,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=1000, n_jobs=0, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [18]:
test_df = pd.DataFrame()
test_images = os.listdir('../input/hackereath-holiday-season-deep-learning-contest/dataset/test')
test_df['Image']=test_images
test_df.head()

Unnamed: 0,Image
0,image7761.jpg
1,image3202.jpg
2,image688.jpg
3,image233.jpg
4,image4332.jpg


# ***Preparing test generator***

In [19]:
test_generator = datagen.flow_from_dataframe(
    test_df,
    directory='../input/hackereath-holiday-season-deep-learning-contest/dataset/test',
    x_col = 'Image',
    y_col = None,
    target_size=(299,299),
    class_mode = None,
    batch_size=32,
    shuffle = False)

Found 3489 validated image filenames.


# ***Predicting on test_generator***

In [20]:
new_test_x = model2.predict(test_generator)
new_test_x = np.array(new_test_x)
predictions_xgb = clf.predict(new_test_x)

In [21]:
predictions_xgb

array([4, 4, 5, ..., 4, 4, 4])

In [22]:
test_df['Class']=predictions_xgb

In [23]:
num_to_class = dict((y,x) for (x,y) in train_generator.class_indices.items())
num_to_class

{0: 'Airplane',
 1: 'Candle',
 2: 'Christmas_Tree',
 3: 'Jacket',
 4: 'Miscellaneous',
 5: 'Snowman'}

In [24]:
test_df['Class']=test_df['Class'].map(num_to_class)
test_df.head()

Unnamed: 0,Image,Class
0,image7761.jpg,Miscellaneous
1,image3202.jpg,Miscellaneous
2,image688.jpg,Snowman
3,image233.jpg,Candle
4,image4332.jpg,Christmas_Tree


In [25]:
test_df.to_csv('pred.csv',index=False)

# ***If you have any doubts in above code , please ask in the comment section. I will surely revert back as soon as possible***

# ***If you find the notebook informative , please drop a like***