1. EDA

In [None]:
#1.1 class distribution for whole dataset
import os
import cv2
import numpy as np
from tqdm import tqdm  # For progress bar
import matplotlib.pyplot as plt
import random
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

dataset_path="Plant Village Dataset/data/data with aug/"

# Count images per class
class_counts = {}
class_labels = []

# Iterate over classes and count the images
for idx, class_dir in enumerate(os.listdir(dataset_path)):
    class_path = os.path.join(dataset_path, class_dir)
    if os.path.isdir(class_path):
        class_labels.append(f'class{idx + 1}')
        class_counts[f'class{idx + 1}'] = len(os.listdir(class_path))

# Plot class distribution
plt.figure(figsize=(12, 6))
plt.bar(class_counts.keys(), class_counts.values(), color='skyblue')
plt.xlabel("Classes")
plt.ylabel("Number of Images")
plt.title("Class Distribution")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 1.2 Display random samples from each class
for class_dir in random.sample(os.listdir(dataset_path), 4):  # Random 4 classes
    class_path = os.path.join(dataset_path, class_dir)
    image_file = random.choice(os.listdir(class_path))
    image = Image.open(os.path.join(class_path, image_file))
    plt.figure()
    plt.imshow(image)
    plt.title(class_dir)
    plt.axis("off")
    plt.show()

In [None]:
# 1.3 sample augmented images

import os
from PIL import Image
import matplotlib.pyplot as plt
import random

data_dir = "Plant Village Dataset/data/data with aug/"

# Set the random seed for reproducibility
random.seed(51)

# Get a mapping of class directories to image paths
class_images = {}
for root, dirs, files in os.walk(data_dir):
    for dir_name in dirs:  # Loop through each class folder
        class_dir = os.path.join(root, dir_name)
        images = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('.png', '.jpg', '.jpeg'))]
        if images:  # Ensure the class has images
            class_images[dir_name] = images

# Select one random image from each class
selected_images = []
for class_name, images in class_images.items():
    selected_images.append(random.choice(images))  # Randomly pick one image from the class

# Display up to 5 sample images from different classes
fig, axes = plt.subplots(1, 5, figsize=(15, 5))
for i, ax in enumerate(axes):
    if i < len(selected_images):
        img = Image.open(selected_images[i])  # Open image without resizing
        ax.imshow(img)
        ax.axis('off')
plt.suptitle("Sample Images From Different Classes")
plt.show()

In [None]:
# 1.4 all of Try1 eda.py

import os
import cv2
import numpy as np
np.bool = bool  # Temporary fix for deprecated alias

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm
import plotly.express as px
from collections import Counter

# Set paths
IMAGE_PATH = "Plant Village Dataset/data/data with aug/"  # Path to training images (adjust as needed)

# Function to load an image
def load_image(image_id):
    """Loads an image given its file name without extension."""
    file_path = image_id + ".jpg"
    image = cv2.imread(os.path.join(IMAGE_PATH, file_path))
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Visualize one leaf image
def visualize_one_leaf(image):
    """Displays a single leaf image using Plotly."""
    resized_image = cv2.resize(image, (205, 136))
    fig = px.imshow(resized_image)
    fig.update_layout(coloraxis_showscale=False, title="Leaf Image")
    fig.show()

# Visualize channel distributions
def visualize_channel_distributions(image):
    """Plots the distributions of RGB channels for a single image."""
    channels = {"Red": image[:, :, 0].flatten(),
                "Green": image[:, :, 1].flatten(),
                "Blue": image[:, :, 2].flatten()}

    plt.figure(figsize=(12, 6))
    for channel, values in channels.items():
        sns.histplot(values, bins=50, kde=True, label=channel, color=channel.lower())
    plt.title("Channel Intensity Distributions")
    plt.xlabel("Pixel Intensity")
    plt.ylabel("Frequency")
    plt.legend()
    plt.show()

# Visualize sample leaves from different categories
def visualize_sample_leaves(image_path, categories, num_samples=5):
    """Displays sample leaves for each category."""
    plt.figure(figsize=(15, len(categories) * 3))
    for i, category in enumerate(categories):
        category_path = os.path.join(image_path, category)
        images = os.listdir(category_path)[:num_samples]

        for j, img_name in enumerate(images):
            img = cv2.imread(os.path.join(category_path, img_name))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            plt.subplot(len(categories), num_samples, i * num_samples + j + 1)
            plt.imshow(img)
            plt.axis("off")
            if j == 0:
                plt.ylabel(category, fontsize=12)
    plt.suptitle("Sample Leaves from Each Category", fontsize=16)
    plt.tight_layout()
    plt.show()

# Visualize target distributions
def visualize_targets(image_path):
    """Plots the distribution of categories (targets) in the dataset."""
    categories = [category for category in os.listdir(image_path) if os.path.isdir(os.path.join(image_path, category))]
    category_counts = {category: len(os.listdir(os.path.join(image_path, category))) for category in categories}

    plt.figure(figsize=(12, 6))
    sns.barplot(x=list(category_counts.keys()), y=list(category_counts.values()))
    plt.xticks(rotation=90)
    plt.title("Target Distribution")
    plt.xlabel("Category")
    plt.ylabel("Number of Images")
    plt.tight_layout()
    plt.show()

# Example Usage
if __name__ == "__main__":
    # Load a single image for visualization
    categories = [category for category in os.listdir(IMAGE_PATH) if os.path.isdir(os.path.join(IMAGE_PATH, category))]
    sample_category = categories[0]
    sample_image_path = os.path.join(IMAGE_PATH, sample_category, os.listdir(os.path.join(IMAGE_PATH, sample_category))[0])
    sample_image = cv2.imread(sample_image_path)
    sample_image = cv2.cvtColor(sample_image, cv2.COLOR_BGR2RGB)

    # 1. Visualize one leaf
    visualize_one_leaf(sample_image)

    # 2. Visualize channel distributions
    visualize_channel_distributions(sample_image)

    # 3. Visualize sample leaves belonging to different categories
    visualize_sample_leaves(IMAGE_PATH, categories, num_samples=5)

    # 4. Visualize target distributions
    visualize_targets(IMAGE_PATH)


2. Data Preprocessing

In [None]:
import os
import shutil
import random
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Define directories
original_dir = "Plant Village Dataset/data/data with aug/"  # Original dataset directory
split_dir = "split_dataset/"  # Directory for split data
preprocessed_dir = "preprocessed_dataset/"  # Directory for preprocessed data

# Parameters
IMG_SIZE = (224, 224)  # Resizing dimensions
BATCH_SIZE = 32
TEST_SIZE = 0.2
VAL_SIZE = 0.1

# Step 1: Split data into train, val, and test
def split_data(original_dir, split_dir, test_size, val_size):
    if not os.path.exists(split_dir):
        os.makedirs(split_dir)

    for class_name in os.listdir(original_dir):
        class_dir = os.path.join(original_dir, class_name)
        if not os.path.isdir(class_dir):
            continue

        images = os.listdir(class_dir)
        images_train, images_test = train_test_split(images, test_size=test_size, random_state=42)
        images_train, images_val = train_test_split(images_train, test_size=val_size / (1 - test_size), random_state=42)

        # Save split data
        for split_name, split_images in zip(["train", "val", "test"], [images_train, images_val, images_test]):
            split_class_dir = os.path.join(split_dir, split_name, class_name)
            os.makedirs(split_class_dir, exist_ok=True)
            for img in split_images:
                shutil.copy(os.path.join(class_dir, img), os.path.join(split_class_dir, img))

split_data(original_dir, split_dir, TEST_SIZE, VAL_SIZE)

# Step 2: Perform undersampling on the training set
def undersample_data(train_dir):
    # Define thresholds
    high_threshold = 3500  # For classes with more than 5000 images
    target_high = 1400     # Reduce these classes to 2000 images

    # Process each class
    for class_name in os.listdir(train_dir):
        class_dir = os.path.join(train_dir, class_name)
        if not os.path.isdir(class_dir):
            continue

        # Get all images in the class
        images = os.listdir(class_dir)
        num_images = len(images)

        # Apply tier-based undersampling
        if num_images > high_threshold:
            # Undersample to the target for high-volume classes
            images_to_remove = random.sample(images, num_images - target_high)
            for img in images_to_remove:
                os.remove(os.path.join(class_dir, img))

        # For other classes, keep the images as they are (1500–2000 or 1000–1500)

train_dir = os.path.join(split_dir, "train")
undersample_data(train_dir)

# Step 3: Resize and normalize images, and save them in the preprocessed directory
def preprocess_and_save_data(split_dir, preprocessed_dir, img_size):
    if not os.path.exists(preprocessed_dir):
        os.makedirs(preprocessed_dir)

    for split in ["train", "val", "test"]:
        split_path = os.path.join(split_dir, split)
        preprocessed_split_path = os.path.join(preprocessed_dir, split)
        os.makedirs(preprocessed_split_path, exist_ok=True)

        for class_name in os.listdir(split_path):
            class_dir = os.path.join(split_path, class_name)
            preprocessed_class_dir = os.path.join(preprocessed_split_path, class_name)
            os.makedirs(preprocessed_class_dir, exist_ok=True)

            for img_name in os.listdir(class_dir):  # Only iterate over existing images
                img_path = os.path.join(class_dir, img_name)
                img = tf.keras.preprocessing.image.load_img(img_path, target_size=img_size)
                img_array = tf.keras.preprocessing.image.img_to_array(img)
                img_array = img_array / 255.0  # Normalize
                preprocessed_img_path = os.path.join(preprocessed_class_dir, img_name)
                tf.keras.preprocessing.image.save_img(preprocessed_img_path, img_array)

# Preprocess and save the final dataset
preprocess_and_save_data(split_dir, preprocessed_dir, IMG_SIZE)

EDA again

In [None]:
#1.1 class distribution for training dataset
import os
import cv2
import numpy as np
from tqdm import tqdm  # For progress bar
import matplotlib.pyplot as plt
import random
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

dataset_path="preprocessed_dataset/train/"

# Count images per class
class_counts = {}
class_labels = []

# Iterate over classes and count the images
for idx, class_dir in enumerate(os.listdir(dataset_path)):
    class_path = os.path.join(dataset_path, class_dir)
    if os.path.isdir(class_path):
        class_labels.append(f'class{idx + 1}')
        class_counts[f'class{idx + 1}'] = len(os.listdir(class_path))

# Plot class distribution
plt.figure(figsize=(12, 6))
plt.bar(class_counts.keys(), class_counts.values(), color='skyblue')
plt.xlabel("Classes")
plt.ylabel("Number of Images")
plt.title("Class Distribution")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 2.5 Display sample augmented images
data_dir = "preprocessed_dataset/train/"
img_height, img_width = 224, 224
batch_size = 32

# Data Generator for EDA
data_gen = ImageDataGenerator(rescale=1.0/255.0, validation_split=0.2)

# Load training data for EDA
train_gen = data_gen.flow_from_directory(
    data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

# Display sample augmented images
sample_images, _ = next(train_gen)  # Fetch one batch of images
fig, axes = plt.subplots(1, 5, figsize=(15, 5))
for img, ax in zip(sample_images[:5], axes):  # Show only 5 images
    ax.imshow(img)
    ax.axis('off')
plt.suptitle("Sample Augmented Images")
plt.show()

In [None]:
# 1.4 all of Try1 eda.py

import os
import cv2
import numpy as np
np.bool = bool  # Temporary fix for deprecated alias

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm
import plotly.express as px
from collections import Counter

# Set paths
IMAGE_PATH = "split_dataset/train/"  # Path to training images (adjust as needed)

# Function to load an image
def load_image(image_id):
    """Loads an image given its file name without extension."""
    file_path = image_id + ".jpg"
    image = cv2.imread(os.path.join(IMAGE_PATH, file_path))
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Visualize one leaf image
def visualize_one_leaf(image):
    """Displays a single leaf image using Plotly."""
    resized_image = cv2.resize(image, (205, 136))
    fig = px.imshow(resized_image)
    fig.update_layout(coloraxis_showscale=False, title="Leaf Image")
    fig.show()

# Visualize channel distributions
def visualize_channel_distributions(image):
    """Plots the distributions of RGB channels for a single image."""
    channels = {"Red": image[:, :, 0].flatten(),
                "Green": image[:, :, 1].flatten(),
                "Blue": image[:, :, 2].flatten()}

    plt.figure(figsize=(12, 6))
    for channel, values in channels.items():
        sns.histplot(values, bins=50, kde=True, label=channel, color=channel.lower())
    plt.title("Channel Intensity Distributions")
    plt.xlabel("Pixel Intensity")
    plt.ylabel("Frequency")
    plt.legend()
    plt.show()

# Visualize sample leaves from different categories
def visualize_sample_leaves(image_path, categories, num_samples=5):
    """Displays sample leaves for each category."""
    plt.figure(figsize=(15, len(categories) * 3))
    for i, category in enumerate(categories):
        category_path = os.path.join(image_path, category)
        images = os.listdir(category_path)[:num_samples]

        for j, img_name in enumerate(images):
            img = cv2.imread(os.path.join(category_path, img_name))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            plt.subplot(len(categories), num_samples, i * num_samples + j + 1)
            plt.imshow(img)
            plt.axis("off")
            if j == 0:
                plt.ylabel(category, fontsize=12)
    plt.suptitle("Sample Leaves from Each Category", fontsize=16)
    plt.tight_layout()
    plt.show()

# Visualize target distributions
def visualize_targets(image_path):
    """Plots the distribution of categories (targets) in the dataset."""
    categories = [category for category in os.listdir(image_path) if os.path.isdir(os.path.join(image_path, category))]
    category_counts = {category: len(os.listdir(os.path.join(image_path, category))) for category in categories}

    plt.figure(figsize=(12, 6))
    sns.barplot(x=list(category_counts.keys()), y=list(category_counts.values()))
    plt.xticks(rotation=90)
    plt.title("Target Distribution")
    plt.xlabel("Category")
    plt.ylabel("Number of Images")
    plt.tight_layout()
    plt.show()

# Example Usage
if __name__ == "__main__":
    # Load a single image for visualization
    categories = [category for category in os.listdir(IMAGE_PATH) if os.path.isdir(os.path.join(IMAGE_PATH, category))]
    sample_category = categories[0]
    sample_image_path = os.path.join(IMAGE_PATH, sample_category, os.listdir(os.path.join(IMAGE_PATH, sample_category))[0])
    sample_image = cv2.imread(sample_image_path)
    sample_image = cv2.cvtColor(sample_image, cv2.COLOR_BGR2RGB)

    # 1. Visualize one leaf
    visualize_one_leaf(sample_image)

    # 2. Visualize channel distributions
    visualize_channel_distributions(sample_image)

    # 3. Visualize sample leaves belonging to different categories
    visualize_sample_leaves(IMAGE_PATH, categories, num_samples=5)

    # 4. Visualize target distributions
    visualize_targets(IMAGE_PATH)

3. Model Selection and Training

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from prettytable import PrettyTable

preprocessed_dir = "preprocessed_dataset/"  # Directory for preprocessed data

# Directories (adjust these paths as necessary)
train_dir = os.path.join(preprocessed_dir, "train")
val_dir = os.path.join(preprocessed_dir, "val")
test_dir = os.path.join(preprocessed_dir, "test")

# Parameters
img_height, img_width = 224, 224
batch_size = 32
num_classes = len(os.listdir(train_dir))  # Automatically detect number of classes

# Data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

val_test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Generate training, validation, and test data
train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="categorical"
)

val_gen = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="categorical"
)

test_gen = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False
)

# Load pre-trained ResNet50 model without the top layer
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(img_height, img_width, 3))
base_model.trainable = False  # Freeze the base model

# Build the model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(num_classes, activation="softmax")
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# Train the model
epochs = 10
history = model.fit(train_gen, validation_data=val_gen, epochs=epochs)

# Save the trained model
model.save("saved_models/model_resnet50.h5")

# Evaluate the model on the test dataset
test_loss, test_acc = model.evaluate(test_gen)
print(f"\nTest Accuracy: {test_acc:.2f}")

# Classification report and confusion matrix
y_true = test_gen.classes  # Ground truth labels
y_pred = np.argmax(model.predict(test_gen), axis=1)  # Predicted labels
class_labels = list(test_gen.class_indices.keys())  # Class names

# PrettyTable for classification report
classification_rep = classification_report(y_true, y_pred, target_names=class_labels, output_dict=True)
table = PrettyTable()
table.field_names = ["Class", "Precision", "Recall", "F1-Score", "Support"]
for class_name, metrics in classification_rep.items():
    if class_name not in ["accuracy", "macro avg", "weighted avg"]:
        table.add_row([class_name, metrics['precision'], metrics['recall'], metrics['f1-score'], metrics['support']])

print("\nClassification Report:")
print(table)

# Seaborn for confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

# Plot training history
plt.figure(figsize=(12, 4))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy', color='blue')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', color='orange')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='orange')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

4. Model Evaluation

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
from prettytable import PrettyTable

# Paths
MODEL_SAVE_PATH = "saved_models/model_resnet50.h5"  # Path to the saved model
TEST_DIR = "preprocessed_dataset/test/"  # Path to the test dataset

# Configuration
IMG_SIZE = (224, 224)  # Image size (must match training input size)
BATCH_SIZE = 32  # Batch size for test data
CLASS_LABELS = ['Apple__Apple_scab', 'Apple__Black_rot', 'Apple__Cedar_apple_rust', 'Apple__healthy',
                'Blueberry__healthy', 'Cherry__healthy', 'Cherry__Powdery_mildew', 'Corn__Cercospora_leaf_spot Grat_leaf_spot',
                'Corn__Common_rust', 'Corn__healthy', 'Corn__Northern_Leaf_Blight', 'Grape__Black_rot', 'Grape__Esca_(Black_Measles)',
                'Grape__healthy', 'Grape__Leaf_blight_(Isariopsis_Leaf_Spot)', 'Orange__Haunglongbing_(Citrus_greening)',
                'Peach__Bacterial_spot', 'Peach__healthy', 'Pepper,_bell__Bacterial_spot', 'Pepper,_bell__healthy',
                'Potato__Early_blight', 'Potato__healthy', 'Potato__Late_blight', 'Raspberry__healthy', 'Soybean__healthy',
                'Squash__Powdery_mildew', 'Strawberry__healthy', 'Strawberry__Leaf_scorch', 'Tomato__Bacterial_spot',
                'Tomato__Early_blight', 'Tomato__healthy', 'Tomato__Late_blight', 'Tomato__Leaf_Mold', 'Tomato__Septoria_leaf_spot',
                'Tomato__Spider_mites Two-spotted_spider_mite', 'Tomato__Target_Spot', 'Tomato__Tomato_mosaic_virus',
                'Tomato__Tomato_Yellow_Leaf_Curl_Virus']

# 1. Data Preprocessing for Test Data
def load_test_data(test_dir, img_size, batch_size):
    """Loads and preprocesses the test data."""
    test_datagen = ImageDataGenerator(rescale=1.0 / 255)  # Only rescaling for test data
    test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode="categorical",
        shuffle=False  # Ensure data order matches predictions
    )
    return test_generator

# 2. Load the Saved Model
def load_trained_model(model_path):
    """Loads the trained model."""
    model = load_model(model_path)
    print("Model loaded successfully!")
    return model

# 3. Evaluate the Model
def evaluate_model(model, test_generator):
    """Evaluates the model on test data and calculates metrics."""
    # Predict on test data
    y_pred = model.predict(test_generator)
    y_pred_classes = np.argmax(y_pred, axis=1)  # Convert predictions to class indices
    y_true = test_generator.classes  # True labels

    # Classification report
    print("\nClassification Report:")
    report = classification_report(y_true, y_pred_classes, target_names=list(test_generator.class_indices.keys()), output_dict=True)

    # PrettyTable for classification report
    table = PrettyTable()
    table.field_names = ["Class", "Precision", "Recall", "F1-Score", "Support"]

    for class_name, metrics in report.items():
        if class_name not in ["accuracy", "macro avg", "weighted avg"]:  # Skip aggregate metrics
            table.add_row([
                class_name,
                f"{metrics['precision']:.2f}",
                f"{metrics['recall']:.2f}",
                f"{metrics['f1-score']:.2f}",
                int(metrics['support'])
            ])

    # Add overall metrics
    table.add_row(["Overall (Accuracy)", "-", "-", f"{report['accuracy']:.2f}", sum(y_true)])
    table.add_row(["Macro Avg", f"{report['macro avg']['precision']:.2f}", f"{report['macro avg']['recall']:.2f}",
                   f"{report['macro avg']['f1-score']:.2f}", "-"])
    table.add_row(["Weighted Avg", f"{report['weighted avg']['precision']:.2f}", f"{report['weighted avg']['recall']:.2f}",
                   f"{report['weighted avg']['f1-score']:.2f}", "-"])

    print(table)

    # Confusion Matrix
    class_labels = list(test_generator.class_indices.keys())

    # Use LabelEncoder for numeric labels
    encoder = LabelEncoder()
    numeric_class_labels = encoder.fit_transform(class_labels)  # Numeric labels 0 to 37

    # Update confusion matrix class names to numeric labels
    cm = confusion_matrix(y_true, y_pred_classes)

    # Plot the confusion matrix as a heatmap
    plt.figure(figsize=(12, 10))  # Adjust figure size
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=numeric_class_labels, yticklabels=numeric_class_labels,
                annot_kws={"size": 8})  # Decrease font size of annotations
    plt.xticks(ha='right')  # Rotate x-axis labels
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()  # Automatically adjust layout to avoid cutoff
    plt.show()

    # To map back numeric labels to class names if needed
    print("Class mapping:")
    for numeric_label, class_label in zip(numeric_class_labels, class_labels):
        print(f"{numeric_label}: {class_label}")


# 4. Main Workflow
if __name__ == "__main__":
    # Load test data
    test_generator = load_test_data(TEST_DIR, IMG_SIZE, BATCH_SIZE)

    # Load the trained model
    model = load_trained_model(MODEL_SAVE_PATH)

    # Evaluate the model
    print("Evaluating model on test data...")
    evaluate_model(model, test_generator)

5. Deployment

In [None]:
import tkinter as tk
from tkinter import filedialog, messagebox
from PIL import Image, ImageTk
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# --- Paths and Configuration ---
MODEL_SAVE_PATH = "saved_models/model_resnet50.h5"
IMG_SIZE = (224, 224)
CLASS_LABELS = ['Apple__Apple_scab', 'Apple__Black_rot', 'Apple__Cedar_apple_rust', 'Apple__healthy',
                'Blueberry__healthy', 'Cherry__healthy', 'Cherry__Powdery_mildew',
                'Corn__Cercospora_leaf_spot Grat_leaf_spot', 'Corn__Common_rust', 'Corn__healthy',
                'Corn__Northern_Leaf_Blight', 'Grape__Black_rot', 'Grape__Esca_(Black_Measles)',
                'Grape__healthy', 'Grape__Leaf_blight_(Isariopsis_Leaf_Spot)',
                'Orange__Haunglongbing_(Citrus_greening)', 'Peach__Bacterial_spot', 'Peach__healthy',
                'Pepper,_bell__Bacterial_spot', 'Pepper,_bell__healthy', 'Potato__Early_blight',
                'Potato__healthy', 'Potato__Late_blight', 'Raspberry__healthy', 'Soybean__healthy',
                'Squash__Powdery_mildew', 'Strawberry__healthy', 'Strawberry__Leaf_scorch',
                'Tomato__Bacterial_spot', 'Tomato__Early_blight', 'Tomato__healthy', 'Tomato__Late_blight',
                'Tomato__Leaf_Mold', 'Tomato__Septoria_leaf_spot', 'Tomato__Spider_mites Two-spotted_spider_mite',
                'Tomato__Target_Spot', 'Tomato__Tomato_mosaic_virus', 'Tomato__Tomato_Yellow_Leaf_Curl_Virus']

# --- Load Model ---
def load_trained_model(model_path):
    """Loads the trained model."""
    model = load_model(model_path)
    print("Model loaded successfully!")
    return model

# --- Image Preprocessing ---
def preprocess_image(image_path):
    """Preprocesses an image for prediction."""
    img = load_img(image_path, target_size=IMG_SIZE)  # Load and resize image
    img_array = img_to_array(img)  # Convert image to array
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = img_array / 255.0  # Normalize pixel values
    return img_array

# --- GUI Code ---
def launch_gui(model):
    """Launches the GUI for the plant disease prediction."""
    def show_image(image_path):
        """Displays the image in the GUI."""
        img = Image.open(image_path)
        img = img.resize((250, 250))
        img = ImageTk.PhotoImage(img)
        img_label.config(image=img)
        img_label.image = img

    def predict_image():
        """Handles image prediction when the user selects an image."""
        image_path = filedialog.askopenfilename(title="Select an Image", filetypes=[("Image Files", "*.jpg *.png *.jpeg")])
        if image_path:
            try:
                # Preprocess the image
                img_array = preprocess_image(image_path)
                # Predict using the model
                predictions = model.predict(img_array)
                predicted_class_index = np.argmax(predictions)
                confidence_score = predictions[0][predicted_class_index]
                predicted_class_label = CLASS_LABELS[predicted_class_index]

                # Split the label to extract plant species and disease
                plant_species, disease = predicted_class_label.split("__", 1)

                # Display the results in the GUI
                show_image(image_path)
                species_label.config(text=f"Plant Species: {plant_species}")
                result_label.config(text=f"Leaf Disease: {disease}")
                confidence_label.config(text=f"Confidence: {confidence_score:.4f}")
            except Exception as e:
                messagebox.showerror("Error", f"Failed to process image: {str(e)}")

    # GUI Window Setup
    window = tk.Tk()
    window.title("Plant Disease Detection")
    window.geometry("600x600")

    # GUI Components
    img_label = tk.Label(window)
    img_label.pack(pady=10)

    species_label = tk.Label(window, text="Plant Species: ", font=('Helvetica', 14, 'bold'), fg="green")
    species_label.pack(pady=10)

    result_label = tk.Label(window, text="Leaf Disease: ", font=('Helvetica', 14, 'bold'), fg="red")
    result_label.pack(pady=10)

    confidence_label = tk.Label(window, text="Confidence: ", font=('Helvetica', 12, 'italic'), fg="blue")
    confidence_label.pack(pady=10)

    predict_button = tk.Button(window, text="Select Image to Predict", command=predict_image, font=('Helvetica', 14))
    predict_button.pack(pady=20)

    window.mainloop()

# --- Main Execution ---
if __name__ == "__main__":
    # Load the trained model
    model = load_trained_model(MODEL_SAVE_PATH)
    # Launch the GUI
    launch_gui(model)