In [1]:
import os # file/directory operations
import numpy as np #Numerical computations and array
import matplotlib.pyplot as plt #Visualization
import cv2  #Image loading, resizing , filtering , etc
import tensorflow as tf
from tensorflow.keras.models import Sequential # for building CNN Model
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten , Dense , Dropout,BatchNormalization) # Model layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator #Augmentation
from tensorflow.keras.utils import to_categorical # For one-hot endcoding
#Data Splitting and Evaluation
from sklearn.model_selection import train_test_split # To split into train/val
from sklearn.metrics import classification_report # for performance evaluation
from tensorflow.keras.preprocessing import image

2025-05-18 04:55:57.634632: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747544157.865825      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747544157.933633      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
def preprocess_image(img_path, target_size):
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = img_array / 255.0  # Normalize to [0,1]
    return img_array

In [3]:
# def load_dataset(dataset_path, target_size=(128,128), test_size = 0.2):
#     """
#     Load images from dataset directory, preproocess , and split into train and val sets.
#     Args:
#          dataset_path (str): Path to PlantVillage dataset folder (with subfolders per class)
#          target_size (tuple): Image size to resize to.
#          test_size (float): Fraction of dataset for validation.

#     Returns:
#             X_train, X_val, y_train, y_val (np.arrays): Preprocessed and split data.
#             class_names (list) : List of class labels.
#     """
#     images = []
#     labels = []
#     class_names = sorted(os.listdir(dataset_path)) # Folder names = class names

#     for idx, class_name in enumerate(class_names):
#         class_folder = os.path.join(dataset_path,class_name)
#         if not os.path.isdir(class_folder):
#             continue
#         for img_file in os.listdir(class_folder):
#             img_path = os.path.join(class_folder, img_file)
#             try:
#                 img = preprocess_image(img_path, target_size) # call our preprocessing fn
#                 images.append(img)
#                 labels.append(idx)
#             except Exception as e:
#                 print(f"Error loading {img_path}:{e}")
#     images = np.array(images)
#     labels = np.array(labels)
#     #One-hot encode labels 
#     labels = tf.keras.utils.to_categorical(labels, num_classes=len(class_names))
#     #split dataset
#     X_train, X_val , y_train , y_val = train_test_split(images, labels , test_size = test_size, random_state = 42, stratify = labels)
#     return X_train , X_val , y_train , y_val , class_names

In [4]:
def load_dataset(dataset_path, target_size=(64, 64), batch_size=32):
    """
    Efficiently load PlantVillage dataset using ImageDataGenerator
    Returns training & validation generators + class names.
    """

    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

    X_train = datagen.flow_from_directory(
        dataset_path,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='training',
        shuffle=True
    )

    X_val = datagen.flow_from_directory(
        dataset_path,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation',
        shuffle=False
    )

    # Dummy vars to match your expected return signature
    y_train = X_train.classes
    y_val = X_val.classes
    class_names = list(X_train.class_indices.keys())

    return X_train, X_val, y_train, y_val, class_names

In [5]:
dataset_path = "/kaggle/input/plantvillage-dataset/color"
X_train, X_val , y_train , y_val , class_names = load_dataset(dataset_path)
print(f"\nLoaded {len(X_train)} training images and {len(X_val)} validation images")
print("Classes:", class_names)


Found 43456 images belonging to 38 classes.
Found 10849 images belonging to 38 classes.

Loaded 1358 training images and 340 validation images
Classes: ['Apple___Apple_scab', 'Apple___Black_rot', 'Apple___Cedar_apple_rust', 'Apple___healthy', 'Blueberry___healthy', 'Cherry_(including_sour)___Powdery_mildew', 'Cherry_(including_sour)___healthy', 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot', 'Corn_(maize)___Common_rust_', 'Corn_(maize)___Northern_Leaf_Blight', 'Corn_(maize)___healthy', 'Grape___Black_rot', 'Grape___Esca_(Black_Measles)', 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)', 'Grape___healthy', 'Orange___Haunglongbing_(Citrus_greening)', 'Peach___Bacterial_spot', 'Peach___healthy', 'Pepper,_bell___Bacterial_spot', 'Pepper,_bell___healthy', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy', 'Raspberry___healthy', 'Soybean___healthy', 'Squash___Powdery_mildew', 'Strawberry___Leaf_scorch', 'Strawberry___healthy', 'Tomato___Bacterial_spot', 'Tomato___Early_bl

In [6]:
def build_cnn_model(input_shape, num_classes):
    model = Sequential()
    #convolution Block 1
    model.add(Conv2D(32,(3,3),activation = 'relu',input_shape = input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    #Convolution Block 2
    model.add(Conv2D(64,(3,3),activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    #Convolution Block 3
    model.add(Conv2D(128,(3,3),activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    #flatten + Dense
    model.add(Flatten())
    model.add(Dense(128,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    return model

In [7]:
input_shape = (64,64,3)
num_classes = len(class_names)
model = build_cnn_model(input_shape,num_classes)
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics = ['accuracy'])
model.summary()
              

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1747544199.515242      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [9]:
history = model.fit(
    X_train,
    validation_data=X_val,
    epochs=15
)

  self._warn_if_super_not_called()


Epoch 1/15


I0000 00:00:1747544268.385100     112 service.cc:148] XLA service 0x7b2bd8013ee0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747544268.386224     112 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1747544268.837608     112 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   1/1358[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:52:08[0m 8s/step - accuracy: 0.0312 - loss: 5.3711

I0000 00:00:1747544272.055952     112 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m328s[0m 236ms/step - accuracy: 0.4368 - loss: 2.2311 - val_accuracy: 0.6692 - val_loss: 1.1506
Epoch 2/15
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 55ms/step - accuracy: 0.6741 - loss: 1.1203 - val_accuracy: 0.7469 - val_loss: 0.8431
Epoch 3/15
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 55ms/step - accuracy: 0.7460 - loss: 0.8296 - val_accuracy: 0.7753 - val_loss: 0.8583
Epoch 4/15
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 54ms/step - accuracy: 0.8013 - loss: 0.6416 - val_accuracy: 0.8141 - val_loss: 0.6757
Epoch 5/15
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 56ms/step - accuracy: 0.8336 - loss: 0.5324 - val_accuracy: 0.8800 - val_loss: 0.4002
Epoch 6/15
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 56ms/step - accuracy: 0.8569 - loss: 0.4517 - val_accuracy: 0.8983 - val_loss: 0.3312
Epoch 7/15
[