## Import packages

In [1]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
import itertools

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Dropout, concatenate, Input, Conv2D, MaxPooling2D
from keras.optimizers import Adam, Adadelta
from keras.layers.advanced_activations import LeakyReLU
from keras.utils.np_utils import to_categorical

## Load Data

In [2]:
#Define Image Directory
train_dir = '/kaggle/input/plant-seedlings-classification/train/'
test_dir = '/kaggle/input/plant-seedlings-classification/test/'
sample_submission = pd.read_csv('/kaggle/input/plant-seedlings-classification/sample_submission.csv')

In [3]:
SPECIES = ['Black-grass',
           'Charlock',
           'Cleavers',
           'Common Chickweed',
           'Common wheat',
           'Fat Hen',
           'Loose Silky-bent',
           'Maize',
           'Scentless Mayweed',
           'Shepherds Purse',
           'Small-flowered Cranesbill',
           'Sugar beet']

## Training Data

In [4]:
train = []

for species_num, species in enumerate(SPECIES):
    for file in os.listdir(os.path.join(train_dir, species)):
        train.append(['../input/plant-seedlings-classification/train/{}/{}'.format(species, file), species_num, species])
        
train = pd.DataFrame(train, columns=['file', 'species_num', 'species'])

print('Training Data: ',train.shape)

Training Data:  (4750, 3)


## Image Pre-processing (Masking/Segmentation/Sharpening)

In [5]:
def create_mask_for_plant(image):
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])

    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask

def segment_plant(image):
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image, image, mask = mask)
    return output

def sharpen_image(image):
    image_blurred = cv2.GaussianBlur(image, (0, 0), 3)
    image_sharp = cv2.addWeighted(image, 1.5, image_blurred, -0.5, 0)
    return image_sharp

In [6]:
train_data = []

for i in range(len(train)):
    img = cv2.imread(train['file'][i])
    img = cv2.resize(img,dsize=(256,256))
    img_stack = segment_plant(img)
    img_stack = sharpen_image(img_stack)
    img_stack = cv2.cvtColor(img_stack, cv2.COLOR_RGB2GRAY )
    img_stack = np.reshape(img_stack,(256,256,1))
    train_data.append(np.concatenate((np.array(img),np.array(img_stack)),axis=2))

train_data = np.array(train_data)

## Label Encoding

In [7]:
labels = train['species_num']
labels = to_categorical(labels, num_classes = len(SPECIES))

## Train/Test Split

In [8]:
x_train, x_val, y_train, y_val = train_test_split(train_data, labels, test_size = 0.1, random_state=10)

### Input Image shape

In [9]:
input_shape = x_train[1].shape
print('Input Shape is :', input_shape)

Input Shape is : (256, 256, 4)


## Build CNN Model

In [10]:
def fire_incept(x, fire=16, intercept=64):
    x = Conv2D(fire, (5,5), strides=(2,2))(x)
    x = LeakyReLU(alpha=0.15)(x)
    
    left = Conv2D(intercept, (3,3), padding='same')(x)
    left = LeakyReLU(alpha=0.15)(left)
    
    right = Conv2D(intercept, (5,5), padding='same')(x)
    right = LeakyReLU(alpha=0.15)(right)
    
    x = concatenate([left, right], axis=3)
    return x

def fire_squeeze(x, fire=16, intercept=64):
    x = Conv2D(fire, (1,1))(x)
    x = LeakyReLU(alpha=0.15)(x)
    
    left = Conv2D(intercept, (1,1))(x)
    left = LeakyReLU(alpha=0.15)(left)
    
    right = Conv2D(intercept, (3,3), padding='same')(x)
    right = LeakyReLU(alpha=0.15)(right)
    
    x = concatenate([left, right], axis=3)
    return x

In [11]:
image_input=Input(shape=input_shape)

x = fire_incept((image_input), fire=16, intercept=16)

x = fire_incept(x, fire=32, intercept=32)
x = fire_squeeze(x, fire=32, intercept=32)

x = fire_incept(x, fire=64, intercept=64)
x = fire_squeeze(x, fire=64, intercept=64)

x = fire_incept(x, fire=64, intercept=64)
x = fire_squeeze(x, fire=64, intercept=64)

x = Conv2D(64, (3,3))(x)
x = LeakyReLU(alpha=0.1)(x)

x = Flatten()(x)

x = Dense(512)(x)
x = LeakyReLU(alpha=0.1)(x)
x = Dropout(0.1)(x)

out = Dense(len(SPECIES), activation='softmax')(x)

model_new = Model(image_input, out)
model_new.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 4) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 126, 126, 16) 1616        input_1[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 126, 126, 16) 0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 126, 126, 16) 2320        leaky_re_lu[0][0]                
_______________________________________________________________________________________

In [12]:
model_new.compile(optimizer = Adam(lr=.00025) , loss = 'categorical_crossentropy', metrics=['accuracy'])

In [13]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, 
                                            factor=0.5, min_lr=0.00001)

In [14]:
datagen = ImageDataGenerator(rotation_range=40, 
                             zoom_range = 0.2, 
                             width_shift_range=0.2, 
                             height_shift_range=0.2,
                             horizontal_flip=True, 
                             vertical_flip=True)
datagen.fit(x_train)

## Fit Model

In [15]:
batch_size = 32
epochs = 40

In [16]:
model_new.fit(datagen.flow(x_train,y_train, batch_size=batch_size), 
              epochs = epochs,
              validation_data = (x_val,y_val), 
              verbose = 1, 
              steps_per_epoch=x_train.shape[0] // batch_size, 
              callbacks=[learning_rate_reduction])
model_new.save_weights('/kaggle/working/pretrainedweight.h5')

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [17]:
x_test = []

for file in sample_submission['file']:
    img = cv2.imread(os.path.join(test_dir,file))
    img = cv2.resize(img,dsize=(256,256))
    img_stack = segment_plant(img)
    img_stack = sharpen_image(img_stack)
    img_stack = cv2.cvtColor(img_stack, cv2.COLOR_RGB2GRAY )
    img_stack = np.reshape(img_stack,(256,256,1))
    x_test.append(np.concatenate((np.array(img),np.array(img_stack)),axis=2))

x_test = np.array(x_test)

In [18]:
score = model_new.evaluate(x_val,y_val)
print('Accuracy on Validation Set',score[1])

Accuracy on Validation Set 0.9073684215545654


# Applying XGBoost

### Extracting Features from last Layer

In [19]:
model_feat = Model(inputs=model_new.input,outputs=model_new.get_layer('dense_1').output)

feat_train = model_feat.predict(x_train)
print(feat_train.shape)

feat_val = model_feat.predict(x_val)
print(feat_val.shape)

feat_test = model_feat.predict(x_test)
print(feat_test.shape)

(4275, 12)
(475, 12)
(794, 12)


### Applying XGBoost

In [20]:
import xgboost as xgb

xb = xgb.XGBClassifier()

xb.fit(feat_train,np.argmax(y_train,axis=1))

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [21]:
# Prediction Score [Training Features]
xb.score(feat_train,np.argmax(y_train,axis=1))

1.0

In [22]:
# Prediction Score [Validation Features]
xb.score(feat_val,np.argmax(y_val,axis=1))

0.9263157894736842

### Predict on Test dataset

In [23]:
Pred_labels = xb.predict(feat_test)

In [24]:
Pred_labels = pd.DataFrame(Pred_labels,index =None,columns=['species_num'])

test_id = []
for file in os.listdir(test_dir):
    test_id.append(['{}'.format(file)])

test_id = pd.DataFrame(test_id, columns=['file'])

### Save file for Submission

In [25]:
test_df = pd.DataFrame()
test_df['species_num'] = Pred_labels['species_num']
test_df['file'] = test_id['file']
test_df['species'] = [SPECIES[i] for i in Pred_labels['species_num']]

submission = pd.merge(left=sample_submission, right=test_df[['file', 'species']], on="file", how="right")
submission.drop(['species_x'], axis = 1, inplace = True)
submission.columns = ['file','species'] 
print(submission.head())

submission.to_csv('/kaggle/working/submission_xgb.csv', index=False)

            file                    species
0  8916793ce.png  Small-flowered Cranesbill
1  2df78338c.png                    Fat Hen
2  c4ed8ed38.png                 Sugar beet
3  8ece6efec.png           Common Chickweed
4  1f290e016.png                 Sugar beet


## Test model on New Test Dataset (images downloaded from Google)

In [26]:
test_new_dir = '/kaggle/input/plant-seedlings-test-new/Test_new/'
Test_new = pd.read_csv('/kaggle/input/plant-seedlings-test-new/Test_new_actual.csv')

In [30]:
x_test_new = []

for file in Test_new['file']:
    img = cv2.imread(os.path.join(test_new_dir,file))
    img = cv2.resize(img,dsize=(256,256))
    img_stack = segment_plant(img)
    img_stack = sharpen_image(img_stack)
    img_stack = cv2.cvtColor(img_stack, cv2.COLOR_RGB2GRAY )
    img_stack = np.reshape(img_stack,(256,256,1))
    x_test_new.append(np.concatenate((np.array(img),np.array(img_stack)),axis=2))

x_test_new = np.array(x_test_new)

In [31]:
feat_test_new = model_feat.predict(x_test_new)
print(feat_test_new.shape)

(20, 12)


In [37]:
Pred_labels_new = xb.predict(feat_test_new)
Pred_labels_new = pd.DataFrame(Pred_labels_new,index =None,columns=['species_num'])
Test_new['prediction'] = [SPECIES[i] for i in Pred_labels_new['species_num']]
Test_new

Unnamed: 0,file,species,prediction
0,3c75e7d79.png,Charlock,Charlock
1,456.jpg,Fat Hen,Fat Hen
2,789.jpg,Charlock,Charlock
3,123.jpg,Fat Hen,Fat Hen
4,131415.jpg,Small-flowered Cranesbill,Charlock
5,101112.jpg,Charlock,Common wheat
6,161718.jpg,Chickweed,Fat Hen
7,da38229c7.png,Cleavers,Cleavers
8,202122.jpg,Cleavers,Charlock
9,0ebf8f2f4.png,Maize,Maize


### The XGBoost Model get 12/20 correct

In [38]:
Test_new.to_csv('/kaggle/working/test_new_result.csv',index=False)