In [None]:
Project Tomato Leaf Classification

# Import the libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import cv2
from pathlib import Path
from skimage.io import imread, imsave
from skimage.transform import resize
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras import layers as L
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import RMSprop, Adam
from keras.applications import vgg16

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Loading the data

!unzip '/content/drive/MyDrive/Adv. Ai by Sundaram- 9th Sep 2023/14th Oct 2023/tomato_leaf_images.zip'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: tomato_leaf_images/train/MagnesiumDeficiency/IMG_20190407_111224_256.jpg  
  inflating: __MACOSX/tomato_leaf_images/train/MagnesiumDeficiency/._IMG_20190407_111224_256.jpg  
  inflating: tomato_leaf_images/train/MagnesiumDeficiency/IMG_20190407_115252_BURST001_COVER_256.jpg  
  inflating: __MACOSX/tomato_leaf_images/train/MagnesiumDeficiency/._IMG_20190407_115252_BURST001_COVER_256.jpg  
  inflating: tomato_leaf_images/train/MagnesiumDeficiency/IMG_20190407_105309_256.jpg  
  inflating: __MACOSX/tomato_leaf_images/train/MagnesiumDeficiency/._IMG_20190407_105309_256.jpg  
  inflating: tomato_leaf_images/train/MagnesiumDeficiency/IMG_20190407_102538_Bokeh_256.jpg  
  inflating: __MACOSX/tomato_leaf_images/train/MagnesiumDeficiency/._IMG_20190407_102538_Bokeh_256.jpg  
  inflating: tomato_leaf_images/train/MagnesiumDeficiency/IMG_20190407_115828_256.jpg  
  inflating: __MACOSX/tomato_leaf_images/train/MagnesiumD

In [None]:
# Prepare the dataset for training and validation

training_data = Path('/content/tomato_leaf_images/train')
validation_data = Path('/content/tomato_leaf_images/val')

In [None]:
print(training_data, validation_data)

/content/tomato_leaf_images/train /content/tomato_leaf_images/val


In [None]:
# Labels the files name by using integer
labels_dict = {'AmericanLeafMiner':0, 'Healthy':1, 'MagnesiumDeficiency':2,
               'SerpentineLeafMiner':3}

In [None]:
print(labels_dict)

{'AmericanLeafMiner': 0, 'Healthy': 1, 'MagnesiumDeficiency': 2, 'SerpentineLeafMiner': 3}


In [None]:
# creating a dataframe for the training dataset

train_df = []

for folder in os.listdir(training_data):
  imgs_path = training_data / folder
#   print(imgs_path)

  # get the list of all the images stored in that directory
  imgs = sorted(imgs_path.glob('*.jpg'))
#   print(imgs)

  # store each image path and corresponding lable
  for img_name in imgs:
    train_df.append((str(img_name), labels_dict[folder]))
# print(train_df)

# creating dataframe
train_df = pd.DataFrame(train_df, columns=['image', 'label'], index=None)
# print(train_df)

# shuffle the dataset
train_df = train_df.sample(frac=1.).reset_index(drop=True)
# print(train_df)


In [None]:
train_df

Unnamed: 0,image,label
0,/content/tomato_leaf_images/train/MagnesiumDef...,2
1,/content/tomato_leaf_images/train/SerpentineLe...,3
2,/content/tomato_leaf_images/train/AmericanLeaf...,0
3,/content/tomato_leaf_images/train/Healthy/resi...,1
4,/content/tomato_leaf_images/train/Healthy/IMG_...,1
...,...,...
4249,/content/tomato_leaf_images/train/AmericanLeaf...,0
4250,/content/tomato_leaf_images/train/MagnesiumDef...,2
4251,/content/tomato_leaf_images/train/AmericanLeaf...,0
4252,/content/tomato_leaf_images/train/MagnesiumDef...,2


In [None]:
# creating a dataframe for the training dataset

valid_df = []

for folder in os.listdir(validation_data):
  imgs_path = validation_data / folder

  # get the list of all the images stored in that directory
  imgs = sorted(imgs_path.glob('*.jpg'))

  # store each image path and corresponding lable
  for img_name in imgs:
    valid_df.append((str(img_name), labels_dict[folder]))

# creating dataframe
valid_df = pd.DataFrame(valid_df, columns=['image', 'label'], index=None)

# shuffle the dataset
valid_df = valid_df.sample(frac=1.).reset_index(drop=True)


In [None]:
valid_df

Unnamed: 0,image,label
0,/content/tomato_leaf_images/val/AmericanLeafMi...,0
1,/content/tomato_leaf_images/val/AmericanLeafMi...,0
2,/content/tomato_leaf_images/val/SerpentineLeaf...,3
3,/content/tomato_leaf_images/val/MagnesiumDefic...,2
4,/content/tomato_leaf_images/val/MagnesiumDefic...,2
...,...,...
466,/content/tomato_leaf_images/val/AmericanLeafMi...,0
467,/content/tomato_leaf_images/val/SerpentineLeaf...,3
468,/content/tomato_leaf_images/val/SerpentineLeaf...,3
469,/content/tomato_leaf_images/val/MagnesiumDefic...,2


In [None]:
# Configuration

# dimensions to consider for the image
img_rows, img_cols, img_channels = 224,224,3

# batch_size
batch_size = 8
# total no of classes
nb_classes = 4

In [None]:
# Data Augmentation

import imgaug as ia
from imgaug import augmenters as iaa
seed = 1234
ia.seed(seed)

# Augmentation sequence

seq = iaa.OneOf([
    iaa.Fliplr(),
    iaa.Affine(rotate=20),
    iaa.Multiply((1.2, 1.5))])

In [None]:
# Data Generator

def data_generator(data, batch_size, preprocessing_fn = None, is_validation_data=False):
  n = len(data)
  nb_batches = int(np.ceil(n/batch_size))
  indices = np.arange(n)

  while True:
    if not is_validation_data:
      np.random.shuffle(indices)

    for i in range(nb_batches):
      next_batch_indices = indices[i*batch_size:(i+1)*batch_size]
      nb_examples = len(next_batch_indices)

      # Define two numpy array for containing batch data and labels

      batch_data = np.zeros((nb_examples, img_rows, img_cols, img_channels), dtype =np.float32)
      batch_labels = np.zeros((nb_examples, nb_classes), dtype = np.float32)

      # Process the next batch

      for j, idx in enumerate(next_batch_indices):
        img = cv2.imread(data.iloc[idx]['image'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = data.iloc[idx]['label']

        if not is_validation_data:
          img = seq.augment_image(img)

        img = cv2.resize(img, (img_rows, img_cols)).astype(np.float32)
        batch_data[j] = img
        batch_labels[j] = to_categorical(label, num_classes=nb_classes)

        if preprocessing_fn is not None:
          batch_data = preprocessing_fn(batch_data)

        yield batch_data, batch_labels

In [None]:
# VGG16 model

preprocessing_fn = vgg16.preprocess_input

train_data_gen = data_generator(train_df, batch_size, preprocessing_fn)
valid_data_gen = data_generator(valid_df, batch_size, preprocessing_fn, is_validation_data=True)

# Transfer Learning with Fine Tuning

In [None]:
def get_base_model():
  base_model = vgg16.VGG16(input_shape=(img_rows, img_cols, img_channels), weights="imagenet",
                           include_top = True)
  return base_model

In [None]:
# get the base model
base_model = get_base_model()

# get the output of the second last dense layer
base_model_output = base_model.layers[-2].output

# add new layers
x = L.Dropout(0.5, name='drop2')(base_model_output)
output = L.Dense(nb_classes, activation='softmax', name='fc3')(x)

# Define a new model

model = Model(base_model.input, output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [None]:
# Freeze all the base model layers
"""
for layer in base_model.layers[:-1]:
  layer.trainable = False
"""

In [None]:
# Compile the model and check it

optimizer = RMSprop(0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
# Model Traning

# Use early stopping
es = EarlyStopping(patience=100,restore_best_weights=True)

# Checkpoint to save model

chkpt = ModelCheckpoint(filepath = 'model_checkpoint.h5', save_best_only=True)

# number of training and validation steps for training and validation
nb_train_steps = int(np.ceil(len(train_df)/batch_size))
nb_valid_steps = int(np.ceil(len(valid_df)/batch_size))

# number of epochs
nb_epochs = 50

In [None]:
# train the model
history = model.fit_generator(train_data_gen, epochs=nb_epochs,
                              steps_per_epoch=nb_train_steps, validation_data=valid_data_gen,
                              validation_steps=nb_valid_steps, callbacks=[es, chkpt])

Epoch 1/50


  history = model.fit_generator(train_data_gen, epochs=nb_epochs,




  saving_api.save_model(


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
