In [2]:
import os
# import zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

import warnings
warnings.filterwarnings("ignore")


In [None]:
# EDA for Images dataset

data_dir=r"D:\project\RiceLeaf-disease-detection\project\Data"
if os.path.exists(data_dir):
    classes = os.listdir(data_dir)
    class_count= {cls:len(os.listdir(os.path.join(data_dir,cls))) for cls in classes}
    print("Classes found:", classes)
    print("Number of images per class:", class_count)
    # Visualize the number of images per class
    sns.barplot(x=list(class_count.keys()), y=list(class_count.values()))
    plt.title("Number of images per class")
    plt.xlabel("Classes")
    plt.ylabel("Number of images")
    plt.show()

In [None]:
image_size = []
for cls in classes:
    cls_dir = os.path.join(data_dir, cls)
    for img_file in os.listdir(cls_dir):
        img_path = os.path.join(cls_dir, img_file)
        try:
            img = Image.open(img_path)
            image_size.append(img.size)
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
print(f"total images:{len(image_size)}")
# Convert to series of images and labels
pd.Series(image_size).value_counts().head(5).plot(kind='bar')
plt.title("Sample image sizes")
plt.xlabel("Image Size")
plt.ylabel("Count")
plt.show()


In [16]:
# Data Preparation
target_size = (128,128)  # Define the target size for resizing images
batch_size = 16  # Define the batch size for training
# Create a data generator for loading images
datagen= ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)
validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)


Found 96 images belonging to 3 classes.
Found 23 images belonging to 3 classes.


In [17]:
# Build the CNN model
model =models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128,128,3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(classes), activation='softmax')    ]) # Output layer for multi-class classification
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
                metrics=['accuracy'])
model.summary()

# Train the model
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10
)

Epoch 1/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 423ms/step - accuracy: 0.3451 - loss: 3.8247 - val_accuracy: 0.3043 - val_loss: 1.4384
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 272ms/step - accuracy: 0.3613 - loss: 1.2837 - val_accuracy: 0.3478 - val_loss: 1.0726
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 309ms/step - accuracy: 0.5091 - loss: 0.9866 - val_accuracy: 0.3913 - val_loss: 1.1046
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 369ms/step - accuracy: 0.5787 - loss: 0.9340 - val_accuracy: 0.5652 - val_loss: 1.2448
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 415ms/step - accuracy: 0.5829 - loss: 0.9138 - val_accuracy: 0.6957 - val_loss: 0.7993
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 344ms/step - accuracy: 0.5818 - loss: 0.8212 - val_accuracy: 0.6522 - val_loss: 0.8214
Epoch 7/10
[1m6/6[0m [32m━━━━━━━━━━━━

In [20]:
# Data augmentation
aug_datagen = ImageDataGenerator(
    rescale=1./255,          # Normalize pixel values (0–1)
    rotation_range=20,       # Randomly rotate images up to ±20°
    zoom_range=0.2,          # Randomly zoom in up to 20%
    horizontal_flip=True,    # Randomly flip some images left-right
    validation_split=0.2     # Use 20% of data for validation
)
# Create augmented data generators
aug_train_generator = aug_datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)
aug_validation_generator = aug_datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)
history_aug = model.fit(
    aug_train_generator,
    validation_data=aug_validation_generator,
    epochs=10
)

Found 96 images belonging to 3 classes.
Found 23 images belonging to 3 classes.
Epoch 1/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 389ms/step - accuracy: 0.7140 - loss: 0.7291 - val_accuracy: 0.6957 - val_loss: 0.6003
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 350ms/step - accuracy: 0.7756 - loss: 0.7161 - val_accuracy: 0.6957 - val_loss: 0.7971
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 340ms/step - accuracy: 0.7692 - loss: 0.5883 - val_accuracy: 0.6522 - val_loss: 1.0410
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 367ms/step - accuracy: 0.8052 - loss: 0.6922 - val_accuracy: 0.8261 - val_loss: 0.6800
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 392ms/step - accuracy: 0.7472 - loss: 0.5855 - val_accuracy: 0.5652 - val_loss: 1.0552
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 401ms/step - accuracy: 0.8315 - loss: 0.5428 - 

In [None]:
# Before Augmentation
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()

In [None]:
# After Augmentation
plt.plot(history_aug.history['accuracy'], label='accuracy')
plt.plot(history_aug.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()

In [None]:
# Evaluate the model
validation_generator.reset()
predictions = model.predict(validation_generator, steps=len(validation_generator), verbose=1)
y_pred = np.argmax(predictions, axis=1)
y_true = validation_generator.classes
print(classification_report(y_true, y_pred, target_names=validation_generator.class_indices.keys()))
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=validation_generator.class_indices.keys(),
            yticklabels=validation_generator.class_indices.keys())
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [22]:
# Save the model
model.save('rice_leaf_disease_model1.h5')

