In [2]:
#Import Libraries
import imageio
import os
import pandas as pd
import numpy as np
from skimage.transform import resize
import tensorflow as tf
from sklearn.model_selection import train_test_split
import random
from scipy import ndarray
import skimage as sk
from skimage import transform
from skimage import util

In [4]:
#REading all images folders of plant seedling data set
folders = os.listdir('train')

# Label encoding
class_label = {}
index = 0
for foldername in folders:
    class_label[foldername] = index
    index += 1
class_label

{'Black-grass': 0,
 'Charlock': 1,
 'Cleavers': 2,
 'Common Chickweed': 3,
 'Common wheat': 4,
 'Fat Hen': 5,
 'Loose Silky-bent': 6,
 'Maize': 7,
 'Scentless Mayweed': 8,
 'Shepherds Purse': 9,
 'Small-flowered Cranesbill': 10,
 'Sugar beet': 11}

In [5]:
# Count of number of files in each plant folder
for foldername in folders:
    files = os.listdir(os.path.join('train', foldername))
    print(len(files),foldername)

263 Black-grass
390 Charlock
287 Cleavers
611 Common Chickweed
221 Common wheat
475 Fat Hen
654 Loose Silky-bent
221 Maize
516 Scentless Mayweed
231 Shepherds Purse
496 Small-flowered Cranesbill
385 Sugar beet


In [6]:
#Assinging maximum images as 750 in order to perform data augumentation 

MAX_DATA = 750

img_df = {'image': [],'label': []}

for foldername in folders:
    plantname = foldername
    foldername = os.path.join('train', foldername)
    nooffiles = len(os.listdir(foldername))  
    nooffiles_togen = MAX_DATA - nooffiles
    gen_files = 0
    for filename in os.listdir(foldername):
        filename = os.path.join(foldername, filename)
        im = imageio.imread(filename)
        img = resize(im, (64, 64, 3))
        img_df['image'].append(img)
        img_df['label'].append(class_label[plantname])
        if gen_files < nooffiles_togen:
            #Randon Rotation
            random_degree = random.uniform(-25, 25)
            transformed_image = sk.transform.rotate(img, random_degree)
            img_df['image'].append(transformed_image)
            img_df['label'].append(class_label[plantname])
            gen_files += 1
            #Horizontal Flip
            transformed_image = img[:, ::-1]
            img_df['image'].append(transformed_image)
            img_df['label'].append(class_label[plantname])
            gen_files += 1

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [7]:
pd.DataFrame(img_df['label']).groupby(by=0).agg({0:'count'})

Unnamed: 0_level_0,0
0,Unnamed: 1_level_1
0,751
1,750
2,751
3,751
4,663
5,751
6,750
7,663
8,750
9,693


In [8]:
print('Classes', np.unique(img_df['label']))

Classes [ 0  1  2  3  4  5  6  7  8  9 10 11]


In [9]:
print('No of Images', len(img_df['image']))
print('No of Label', len(img_df['label']))

No of Images 8774
No of Label 8774


In [10]:
X = np.array(img_df['image'])
Y = img_df['label']

In [11]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=1)

In [12]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Activation, Dropout, Flatten, Reshape
tf.reset_default_graph()

Using TensorFlow backend.


In [13]:
from keras.preprocessing.image import ImageDataGenerator

# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
    featurewise_center=False,  
    samplewise_center=False,  
    featurewise_std_normalization=False, 
    rotation_range=360,  
    height_shift_range=0.3,  
    width_shift_range=0.3, 
    vertical_flip=True, 
    horizonatal_flip=True) 

In [15]:
# Define model
model = Sequential()

# 1st Conv Layer
model.add(Conv2D(64, (3, 3), input_shape=(64, 64, 3)))
model.add(Activation('relu'))

# 2nd Conv Layer
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))

# 3rd Conv Layer
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))

# 4th Conv Layer
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))

# Fully Connected Layer
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))

#Batch Normalisation
model.add(keras.layers.BatchNormalization())

# Prediction Layer
model.add(Dense(output_dim=12, init='he_normal', bias=True))
model.add(Activation('softmax'))

# Loss and Optimizer
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
    
# Store Training Results
early_stopping = keras.callbacks.EarlyStopping(monitor='val_acc', patience=5, verbose=1, mode='auto')
callback_list = [early_stopping]



In [16]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 62, 62, 64)        1792      
_________________________________________________________________
activation_4 (Activation)    (None, 62, 62, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 60, 60, 64)        36928     
_________________________________________________________________
activation_5 (Activation)    (None, 60, 60, 64)        0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 20, 20, 64)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 18, 18, 128)       73856     
_________________________________________________________________
activation_6 (Activation)    (None, 18, 18, 128)       0         
__________

In [18]:
model.fit_generator(datagen.flow(x_train, y_train, batch_size=100),
               steps_per_epoch=5*len(x_train)/100,
               epochs=5,
               verbose=1,
               shuffle=True,
               validation_data=(x_test, y_test),
               callbacks=callback_list)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1f880047be0>

In [19]:
y_pred = model.predict(x_test)
y_pred_lst = np.argmax(y_pred, axis=1)
y_pred_df = pd.DataFrame(y_pred_lst)
y_pred_df.head(5)

Unnamed: 0,0
0,11
1,1
2,11
3,0
4,5


In [20]:
from sklearn.metrics import classification_report  
print(classification_report(y_test, y_pred_df))

              precision    recall  f1-score   support

           0       0.59      0.67      0.63       140
           1       0.64      0.99      0.77       144
           2       0.98      0.62      0.76       144
           3       0.95      0.88      0.92       156
           4       0.82      0.97      0.89       128
           5       0.91      0.87      0.89       158
           6       0.86      0.51      0.64       164
           7       0.78      0.99      0.87       132
           8       0.70      0.94      0.80       162
           9       0.97      0.57      0.72       131
          10       0.99      0.88      0.93       146
          11       0.90      0.87      0.89       150

   micro avg       0.81      0.81      0.81      1755
   macro avg       0.84      0.81      0.81      1755
weighted avg       0.84      0.81      0.81      1755



### Improvements done through many iterations:
1. Data augumentation for images for each class helped in increasing the accuracy.
2. Increasing conv layers helped in better accuracy
3. By varying hyper -paremeters accuracy increased upto 80%