# **Flower Classification CNN**

This was a hackerearth challenge

Given a large class of flowers, 102 to be precise. Build a flower classification model which is discriminative between classes but can correctly classify all flower images belonging to the same class. There are a total of 20549 (train + test) images of flowers. Predict the category of the flowers present in the test folder with good accuracy.


Data modelling: https://github.com/tsaideepak7/flower-classification 

# **Model Building and Training**

In [None]:
import keras
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras import models

import pandas as pd
import numpy as np
import time
import datetime
import matplotlib.pyplot as plt

In [None]:
train_images = 40591 #number of train images
val_images = 10101 #number of validation images
train_batchsize = 50 #number of train images in each batch
val_batchsize = 50 #number of validation images in each batch
img_shape=(128,128) #image shape

In [None]:
#since the dataset is huge, we use generators to train the model
train_datagen = ImageDataGenerator(rescale=1./255)
x_train = train_datagen.flow_from_directory(
    directory=r'../input/flower-datatree/datatree/train/', #location of train images
    batch_size=train_batchsize,
    target_size=img_shape,
    class_mode="categorical", #classification 
    shuffle=True, #shuffling the train images
    seed=42 #seed for the shuffle
)

validation_datagen = ImageDataGenerator(rescale=1./255)
x_validation = validation_datagen.flow_from_directory(
    directory=r'../input/flower-datatree/datatree/validation/', #location of validation images
    batch_size=val_batchsize,
    target_size=img_shape,
    class_mode="categorical", #classification
    shuffle=True, #shuffling the validation images
    seed=42 #seed for the shuffle
)


In [None]:
#building the model architecture
model = Sequential()
model.add(Conv2D(16,(5,5),activation='relu',input_shape=(128,128,3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.1))

model.add(Conv2D(20,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.1))

model.add(Conv2D(20,(3, 3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.1))

model.add(Flatten())
model.add(Dropout(0.1))
model.add(Dense(102,activation='softmax'))

model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])

model.summary()

In [None]:
train_steps=int(np.ceil(train_images//train_batchsize)) #number of steps for training the model
val_steps=int(np.ceil(val_images//val_batchsize)) #number of steps for validating the model
print(train_steps,val_steps)

In [None]:
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=3, verbose=1, mode='auto')
# Reducing the learning Rate if result is not improving
reduce_lr = ReduceLROnPlateau(monitor='val_loss', min_delta=0.0004, patience=2, factor=0.1, min_lr=1e-6, mode='auto',verbose=1)


In [None]:
savepath="flowermodel.hdf5"
checkpoint = ModelCheckpoint(savepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 
#saves the model only with the highest validation accuracy

In [None]:
start=time.time()
cnn=model.fit_generator(x_train,steps_per_epoch = train_steps,validation_data=x_validation,validation_steps = val_steps,epochs=80,callbacks=[early_stop, reduce_lr , checkpoint],verbose=1)  
end=time.time()

print('training time: '+str(datetime.timedelta(seconds=(end-start))))

In [None]:
#accuracy
print(cnn.history.keys())
plt.plot(cnn.history['acc'])
plt.plot(cnn.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.plot(np.argmax(cnn.history["val_acc"]), np.max(cnn.history["val_acc"]), marker="x", color="r",label="best model")
plt.legend(['Training set', 'Test set','best'], loc='upper left')
plt.show()

#loss
plt.plot(cnn.history['loss'])
plt.plot(cnn.history['val_loss'])
plt.title('Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training set', 'Test set'], loc='upper left')
plt.show()

# **Visualizing intermediate activation layers of the model**
> > 

In [None]:
no_layers=9 #number of layers to inspect

In [None]:
layer_outputs = [layer.output for layer in model.layers[:no_layers]] #extract output of the layers
activation_model = models.Model(inputs=model.input, outputs=layer_outputs) # creates a model that will return these outputs, given the model input

In [None]:
img_path = r'../input/flower-datatree/datatree/test/19791.jpg' #select a image
img = image.load_img(img_path, target_size=img_shape)
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)
img_tensor /= 255.
plt.imshow(img_tensor[0])
plt.show()
print(img_tensor.shape)

In [None]:
activations = activation_model.predict(img_tensor) # returns a list of Numpy arrays: one array per layer activation

In [None]:
first_layer_activation = activations[0]
print(first_layer_activation.shape)
plt.matshow(first_layer_activation[0, :, :, 1], cmap='viridis') #choosing any one output from one of the filter in the first layer

In [None]:

layer_names = []
for layer in model.layers[:no_layers]:
    layer_names.append(layer.name) # names of the layers, so you can have them as part of your plot
    
images_per_row = 16  #number of images per row while displaying the filter outputs

for layer_name, layer_activation in zip(layer_names, activations): # displays the feature maps
    n_features = layer_activation.shape[-1] # number of features in the feature map
    size = layer_activation.shape[1] #the feature map has shape (1, size, size, n_features).
    n_cols = n_features // images_per_row # tiles the activation channels in this matrix
    display_grid = np.zeros((size * n_cols, images_per_row * size))
    for col in range(n_cols): # tiles each filter into a horizontal grid
        for row in range(images_per_row):
            channel_image = layer_activation[0,:,:,col * images_per_row + row]
            channel_image -= channel_image.mean() # post-processes the feature to make it visually palatable
            channel_image /= channel_image.std()
            channel_image *= 64
            channel_image += 128
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')
            display_grid[col * size : (col + 1) * size,row * size : (row + 1) * size] = channel_image # displays the grid
    scale = 1. / size
    plt.figure(figsize=(scale * display_grid.shape[1],scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.grid(False)
    plt.imshow(display_grid, aspect='auto', cmap='viridis')

# **Predciting test data using the trained model**

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)
x_test = test_datagen.flow_from_directory(
    directory=r'../input/flower-datatree/datatree/',
    target_size=img_shape,
    classes=['test'],
    batch_size=1,
    shuffle=False
)

In [None]:
test_images = 2009

test_stepsize = test_images
x_test.reset() #
predict = model.predict_generator(x_test ,steps=test_stepsize , verbose=1)
print(predict)

In [None]:
predict.shape

In [None]:
predictions=[] #saving all the prediction on the test images
for i in predict:
    predictions.append(np.argmax(i)+1)

In [None]:
#undoing the sorting of the categories caused by ImageDataGenerator
####very very important####
actual=[str(i) for i in range(1,103)]
gen=sorted(actual)

labels={}

for i in range(1,103):
    labels[i]=int(gen[i-1])
n_predictions=[]
for i in predictions:
    n_predictions.append(labels[i])

predictions = n_predictions

In [None]:
from collections import Counter
freq=Counter()
freq.update(predictions)

In [None]:
import matplotlib.pylab as plt

lists = sorted(freq.items()) # sorted by key, return a list of tuples
x, y = zip(*lists) # unpack a list of pairs into two tuples
plt.figure(figsize=(20,5))
plt.bar(x, y)
plt.xlabel('category')
plt.ylabel('number of images')
plt.title("test results")
plt.show()

In [None]:
names=[i for i in range(18540,20549)]
results = pd.Series(predictions,name = "category")
names=pd.Series(names,name = "image_id")
submission = pd.concat([names,results],axis = 1)
submission.to_csv("output.csv",index=False)