<a href="https://colab.research.google.com/github/yvkrishna/vrushak/blob/main/paddy_classifer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/yvkrishna/paddy_disease_classification.git

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os 
from os import path
from PIL import Image, ImageFilter, ImageDraw
from tqdm import tqdm
import pandas as pd
import shutil
from sklearn.model_selection import train_test_split

In [None]:
from tensorflow.keras.applications import InceptionResNetV2
import tensorflow as tf

In [None]:
os.chdir('/content')

In [None]:
os.chdir('paddy_disease_classification/rice_leaf_diseases')
path = os.getcwd()
training_classes = [f.name for f in os.scandir(path) if f.is_dir()]
print(training_classes)

In [None]:
def applyMedian(folder):
  ''' 
    Applies Meadian Filter to all the images in the given folder. 
    Args : 
      Folder : (str). : Image_directory
  '''
  previous_path = os.getcwd()
  os.chdir(folder)
  current_path = os.getcwd()

  for image in tqdm(list(os.listdir(current_path))):
    img = cv2.imread(image)
    # applies median filter to the image.
    median = cv2.medianBlur(img, 5)
    # saving the image by adding the blur feature.
    im = Image.fromarray(median)
    im.save(image)
  os.chdir(previous_path)

In [None]:
for folder in training_classes:
  applyMedian(folder)

In [None]:
os.chdir('/content')

In [None]:
base_dir = 'paddy_disease_classification'
train_dir = os.path.join(base_dir, 'rice_leaf_diseases')

In [None]:
train_bact_leaf_smut_dir = os.path.join(train_dir, 'Bacterial leaf blight')  # directory with our training cat pictures
train_brown_spot_dir = os.path.join(train_dir, 'Brown spot')  # directory with our training dog pictures
train_leaf_smut_dir = os.path.join(train_dir, 'Leaf smut')  # directory with our training dog pictures

In [None]:
num_bact_leaf_smut_tr = len(os.listdir(train_bact_leaf_smut_dir))
num_brown_spot_tr = len(os.listdir(train_brown_spot_dir))
num_leaf_smut_tr = len(os.listdir(train_leaf_smut_dir))

total_train = num_bact_leaf_smut_tr + num_brown_spot_tr + num_leaf_smut_tr\

In [None]:
print('Data before Data augumentation')
print("--")
print('total training Bacterial leaf blight images:', num_bact_leaf_smut_tr)
print('total training Brown spot images:', num_brown_spot_tr)
print('total training Leaf smut images:', num_leaf_smut_tr)
print("--")
print("Total training images:", total_train)

In [None]:
BATCH_SIZE = 20
IMG_SHAPE  = 299

In [None]:
def rotateImages(rotationAmt,folder):
  '''
    rotateImages is used as one of the image augumentation techniques to 
    increase the dataset thereby increasing the accuracy.

    rotateImages function rotates images in the current directory.

   Args:
   rotationAmt : int. The value of rotation in the image.
   
  '''
  previous_path = os.getcwd()
  os.chdir(folder)
  current_path = os.getcwd()

  total_images = 0

  for image in tqdm(list(os.listdir(current_path))):
    # check if the image is already rotated. 
    if (image.find("rot") == -1): 
      img = Image.open(image)
      # get the image name
      img_name = list(image.split("."))[0]
      rotimg = img.rotate(rotationAmt)
      # saving the image with its rotation information as well.
      rotimg.save(img_name+"rot"+str(rotationAmt)+".jpg")
      img.close()
      total_images+=1
  print(total_images)
  os.chdir(previous_path)

In [None]:
def addBlur(folder):
  '''
    Adds Blur to the images.
    This function will list out all the images in the current directory and 
    applies blur to the image and saves it in the same folder.
  '''
  previous_path = os.getcwd()
  os.chdir(folder)
  current_path = os.getcwd()
  # for each image in the current directory
  total_images = 0

  for image in tqdm(list(os.listdir(current_path))):
    if (image.find("rot") == -1): 
      img = Image.open(image)
      # adds blur to the image using ImageFilter.Blur
      blured_image = img.filter(ImageFilter.BLUR)
      # get the image name
      img_name = list(image.split("."))[0]
      # saving the image by adding the blur feature.
      blured_image.save(img_name+"blur.jpg")
      total_images+=1
  print(total_images)
  os.chdir(previous_path)


In [None]:
def horizontalFlip(folder):
  '''
    Adds Blur to the images.
    This function will list out all the images in the current directory and 
    applies blur to the image and saves it in the same folder.
  '''
  previous_path = os.getcwd()
  os.chdir(folder)
  current_path = os.getcwd()
  # for each image in the current directory
  total_images = 0

  for image in tqdm(list(os.listdir(current_path))):
    if (image.find("rot") == -1 and image.find("blur") == -1): 
      img = cv2.imread(image) 
      # Flips the image
      flip = cv2.flip(img, 1)
      # get the image name
      img_name = list(image.split("."))[0]
      # saving the image by adding the flip feature.
      cv2.imwrite(img_name+"flip.jpg",flip)
      total_images+=1
  print(total_images)
  os.chdir(previous_path)


In [None]:
os.chdir('paddy_disease_classification/testing')
for folder in training_classes:
  # Rotating images of train_label_img_locations with an angle of 90 deg.
  rotateImages(90,folder)

  # Blur images in train_label_img_locations
  addBlur(folder)

  # # Adds Uniform Noise to images in train_label_img_locations
  # addUniformNoise(folder)

  # horizontally flips the images
  horizontalFlip(folder)

  print()
os.chdir('/content')

In [None]:
os.chdir('paddy_disease_classification/rice_leaf_diseases')
for folder in training_classes:
  # Rotating images of train_label_img_locations with an angle of 90 deg.
  rotateImages(90,folder)

  # Blur images in train_label_img_locations
  addBlur(folder)

  # # Adds Uniform Noise to images in train_label_img_locations
  # addUniformNoise(folder)

  # horizontally flips the images
  horizontalFlip(folder)

  print()
os.chdir('/content')

In [None]:
base_dir = 'paddy_disease_classification'
train_dir = os.path.join(base_dir, 'rice_leaf_diseases')

In [None]:
train_bact_leaf_smut_dir = os.path.join(train_dir, 'Bacterial leaf blight')  # directory with our training cat pictures
train_brown_spot_dir = os.path.join(train_dir, 'Brown spot')  # directory with our training dog pictures
train_leaf_smut_dir = os.path.join(train_dir, 'Leaf smut')  # directory with our training dog pictures

In [None]:
num_bact_leaf_smut_tr = len(os.listdir(train_bact_leaf_smut_dir))
num_brown_spot_tr = len(os.listdir(train_brown_spot_dir))
num_leaf_smut_tr = len(os.listdir(train_leaf_smut_dir))

total_train = num_bact_leaf_smut_tr + num_brown_spot_tr + num_leaf_smut_tr\

In [None]:
print('Data after Data augumentation')
print("--")
print('total training Bacterial leaf blight images:', num_bact_leaf_smut_tr)
print('total training Brown spot images:', num_brown_spot_tr)
print('total training Leaf smut images:', num_leaf_smut_tr)
print("--")
print("Total training images:", total_train)

In [None]:
os.chdir('paddy_disease_classification')
!mkdir train
os.chdir('/content')

In [None]:
def create_dataset(folder):
  previous_path = os.getcwd()
  os.chdir(folder)
  current_path = os.getcwd()
  # for each image in the current directory
  total_images = 0

  for image in tqdm(list(os.listdir(current_path))):

    shutil.move(image, "/content/paddy_disease_classification/train")

    if (folder=='Bacterial leaf blight'):
      img_label[image] = np.asarray([1,0,0], dtype=np.float32)
    elif (folder=='Brown spot'):
      img_label[image] = np.asarray([0,0,1], dtype=np.float32)
    else:
      img_label[image] = np.asarray([0,1,0], dtype=np.float32)
 
  os.chdir(previous_path)

In [None]:
os.chdir('paddy_disease_classification/rice_leaf_diseases')

img_label = {}

for folder in training_classes:
  create_dataset(folder)
  

os.chdir('/content')

In [None]:
print(img_label)

In [None]:

def load_image(image_path):
    '''
      Converts the image to size = (299,299,3) and normalizes the data
      Args : 
      image_path : str. Image path for processing the image
    '''
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (299, 299))
    # Normalizing the image
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img, image_path

In [None]:
# Test code for checking the image shape and max value of image
os.chdir('paddy_disease_classification/train')
image, path = load_image('DSC_0112blur.jpg')
print(f"image has a shape of {image.shape}")
print(tf.reduce_max(image))
os.chdir('/content')

In [None]:
# differentiating the complete dataset into training and validating datasets.
img_name_train, img_name_val, output_label_train, output_label_val = train_test_split(
                                                                    list(img_label.keys()),
                                                                    list(img_label.values()),
                                                                    test_size=0.1,
                                                                    random_state=0)

print(f'Length of training images = {len(img_name_train)}')
print(f'Length of training labels = {len(output_label_train)}')
print(f"Length of validating images = {len(img_name_val)}")
print(f'Length of validating labels = {len(output_label_val)}')

In [None]:
os.chdir('paddy_disease_classification/train')

channels = 3

train_images = np.ndarray(shape=(len(img_name_train), IMG_SHAPE, IMG_SHAPE, channels), dtype=np.float32)
train_labels = np.ndarray(shape=(len(output_label_train), 3 ), dtype=np.float32)
val_images = np.ndarray(shape=(len(img_name_val), IMG_SHAPE, IMG_SHAPE, channels), dtype=np.float32)
val_labels = np.ndarray(shape=(len(output_label_val), 3 ), dtype=np.float32)

i=0
for image in tqdm(list(img_name_train)):
  x, path = load_image(image)
  train_images[i] = x
  train_labels[i] = np.asarray(output_label_train[i])
  i += 1


i=0
for image in tqdm(list(img_name_val)):
  x, path = load_image(image)
  val_images[i] = x
  val_labels[i] = np.asarray(output_label_val[i])
  i += 1

os.chdir('/content')

In [None]:
train_images.shape, train_labels.shape

In [None]:
os.chdir('paddy_disease_classification')
!mkdir train
os.chdir('/content')

In [None]:
def create_dataset(folder):
  previous_path = os.getcwd()
  os.chdir(folder)
  current_path = os.getcwd()
  # for each image in the current directory
  total_images = 0

  for image in tqdm(list(os.listdir(current_path))):

    shutil.move(image, "/content/paddy_disease_classification/train")

    if (folder=='Bacterial leaf blight'):
      img_label[image] = np.asarray([1,0,0], dtype=np.float32)
    elif (folder=='Brown spot'):
      img_label[image] = np.asarray([0,0,1], dtype=np.float32)
    else:
      img_label[image] = np.asarray([0,1,0], dtype=np.float32)
 
  os.chdir(previous_path)

In [None]:
os.chdir('paddy_disease_classification/rice_leaf_diseases')

img_label = {}

for folder in training_classes:
  create_dataset(folder)
  

os.chdir('/content')

In [None]:
print(img_label)

In [None]:

def load_image(image_path):
    '''
      Converts the image to size = (299,299,3) and normalizes the data
      Args : 
      image_path : str. Image path for processing the image
    '''
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (299, 299))
    # Normalizing the image
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img, image_path

In [None]:
# Test code for checking the image shape and max value of image
os.chdir('paddy_disease_classification/train')
image, path = load_image('DSC_0112blur.jpg')
print(f"image has a shape of {image.shape}")
print(tf.reduce_max(image))
os.chdir('/content')

In [None]:
# differentiating the complete dataset into training and validating datasets.
img_name_train, img_name_val, output_label_train, output_label_val = train_test_split(
                                                                    list(img_label.keys()),
                                                                    list(img_label.values()),
                                                                    test_size=0.1)

print(f'Length of training images = {len(img_name_train)}')
print(f'Length of training labels = {len(output_label_train)}')
print(f"Length of validating images = {len(img_name_val)}")
print(f'Length of validating labels = {len(output_label_val)}')

In [None]:
os.chdir('paddy_disease_classification/train')

channels = 3

train_images = np.ndarray(shape=(len(img_name_train), IMG_SHAPE, IMG_SHAPE, channels), dtype=np.float32)
train_labels = np.ndarray(shape=(len(output_label_train), 3 ), dtype=np.float32)
val_images = np.ndarray(shape=(len(img_name_val), IMG_SHAPE, IMG_SHAPE, channels), dtype=np.float32)
val_labels = np.ndarray(shape=(len(output_label_val), 3 ), dtype=np.float32)

i=0
for image in tqdm(list(img_name_train)):
  x, path = load_image(image)
  train_images[i] = x
  train_labels[i] = np.asarray(output_label_train[i])
  i += 1


i=0
for image in tqdm(list(img_name_val)):
  x, path = load_image(image)
  val_images[i] = x
  val_labels[i] = np.asarray(output_label_val[i])
  i += 1

os.chdir('/content')

In [None]:
train_images.shape, train_labels.shape

In [None]:
# Pre trained model 
inception_V3_pre_trained = InceptionV3(include_top=True, weights='imagenet')

In [None]:
for layer in inception_V3_pre_trained.layers:
    layer.trainable = True

predictions = Dense(3, activation='softmax')(inception_V3_pre_trained.output)
inception_V3 = Model(inputs=inception_V3_pre_trained.input, outputs=predictions)

In [None]:
adam = tf.keras.optimizers.Adam(
    learning_rate=0.00001, beta_1=0.9, beta_2=0.9999, epsilon=1e-08,
    amsgrad=True, name='Adam' )

inception_V3.compile(
  optimizer=adam,
  loss='categorical_crossentropy',
  metrics=['accuracy',tf.keras.metrics.AUC()])

EPOCHS = 20
history_inception_V3 = inception_V3.fit(
    train_images, train_labels,
    epochs=EPOCHS,
    batch_size=10,
    validation_data=(val_images, val_labels)
)

In [None]:
acc = history_inception_V3.history['accuracy']
val_acc = history_inception_V3.history['val_accuracy']

loss = history_inception_V3.history['loss']
val_loss = history_inception_V3.history['val_loss']

epochs_range = range(EPOCHS)
plt.figure(num=None, figsize=(20,20), dpi=40, facecolor='w', edgecolor='k')

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label = 'validation Accuracy')
plt.legend(loc='lower right',fontsize=20)
plt.title('Accuracy Plot',fontsize=30)
plt.xlabel('Number of Epochs',fontsize=30)
plt.ylabel('Accuracy',fontsize=30)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label = 'validation Loss')
plt.legend(loc='upper right',fontsize=20)
plt.title('Loss Plot',fontsize=30)
plt.xlabel('Number of Epochs',fontsize=30)
plt.ylabel('Loss',fontsize=30)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.show()

In [None]:
def Get_Results_Inception_V3(folder):
  ''' 
    Generates results for Inception_Resnet_V2 model
      Folder : (str). : Image_directory
  '''
  previous_path = os.getcwd()
  os.chdir(folder)
  current_path = os.getcwd()
  predicts = []

  for image in tqdm(list(os.listdir(current_path))):
    # img = cv2.imread(image)
    # median = cv2.medianBlur(img, 5)
    # im = Image.fromarray(median)
    # im.save(image)

    img = keras.preprocessing.image.load_img(image, target_size=(299,299, 3))
    img = keras.preprocessing.image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.inception_v3.preprocess_input(img)

    result_inception_v3['image_names'].append(image)
    predicts.append(inception_V3.predict(img)[0])

    if (folder=='Bacterial leaf blight'):
      result_inception_v3['ground_truths'].append(np.asarray([1,0,0]))
    elif (folder=='Brown spot'):
      result_inception_v3['ground_truths'].append(np.asarray([0,0,1]))
    else:
      result_inception_v3['ground_truths'].append(np.asarray([0,1,0]))
  
  result_inception_v3['predictions'].append(predicts)
  os.chdir(previous_path)

In [None]:
result_inception_v3 = {'predictions':[],'image_names':[],'ground_truths':[]}
os.chdir('paddy_disease_classification/testing')

for folder in training_classes:
  Get_Results_Inception_V3(folder)

os.chdir('/content')
result_inception_v3['predictions'] = np.asarray(result_inception_v3['predictions']).reshape((60, 3))

In [None]:
print(np.asarray(result_inception_v3['ground_truths']))
print(np.asarray(result_inception_v3['predictions']))
print(np.asarray(result_inception_v3['ground_truths']).shape)
print(np.asarray(result_inception_v3['predictions']).shape)

In [None]:
pred = np.asarray(result_inception_v3['predictions'])
pred = np.argmax(pred, axis=1).reshape(-1)
pred = np.asarray(np.eye(len(training_classes),dtype=int)[pred])
print(pred)
print(pred.shape)

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(training_classes)):
    fpr[i], tpr[i], _ = roc_curve(result_inception_v3['ground_truths'][i], result_inception_v3['predictions'][i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(np.asarray(result_inception_v3['ground_truths']).ravel(),np.asarray(result_inception_v3['predictions']).ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

In [None]:
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(len(training_classes))]))

# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(len(training_classes)):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])

# Finally average it and compute AUC
mean_tpr /= len(training_classes)

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves
lw = 2
plt.figure()
plt.plot(fpr["micro"], tpr["micro"],
         label='micro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["micro"]),
         color='deeppink', linestyle=':', linewidth=4)

plt.plot(fpr["macro"], tpr["macro"],
         label='macro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["macro"]),
         color='navy', linestyle=':', linewidth=4)

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(len(training_classes)), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
             label='ROC curve of class {0} (area = {1:0.2f})'
             ''.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic using Inception-V3')
plt.legend(loc="lower right")
plt.show()


In [None]:
data = {'y_Actual': [np.where(r==1)[0][0] for r in np.asarray(result_inception_v3['ground_truths'])],
        'y_Predicted': [np.where(r==1)[0][0] for r in pred]
        }

df = pd.DataFrame(data, columns=['y_Actual','y_Predicted'])
confusion_matrix = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames=['Actual'], colnames=['Predicted'])

sn.heatmap(confusion_matrix, annot=True)
plt.show()