# GreenClassify: Deep Learning-Based Vegetable Image Classification

This notebook demonstrates the complete pipeline for building a CNN-based vegetable image classification system.

## Project Overview
- **Objective**: Classify vegetable images into 15 categories
- **Model**: Convolutional Neural Network (CNN)
- **Framework**: TensorFlow/Keras

## 1. Data Collection

Download the Vegetable Image Dataset from Kaggle. The dataset contains:
- Train: 15,000 images
- Test: 3,000 images
- Validation: 3,000 images

Each folder contains subfolders for 15 different vegetable categories.

In [None]:
# Setup Kaggle API (for Google Colab)
# Upload your kaggle.json file first

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download the dataset
!kaggle datasets download -d misrakahmed/vegetable-image-dataset

In [None]:
# Unzip the dataset
!unzip vegetable-image-dataset.zip

## 2. Import Required Libraries

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras.utils import load_img, img_to_array

print(f"TensorFlow Version: {tf.__version__}")

## 3. Data Analysis and Visualization

In [None]:
# Define data paths
train_path = "/content/Vegetable Images/train"
validation_path = "/content/Vegetable Images/validation"
test_path = "/content/Vegetable Images/test"

# Get image categories
image_categories = os.listdir(train_path)
print(f"Number of categories: {len(image_categories)}")
print(f"Categories: {image_categories}")

In [None]:
# Function to plot sample images from each category
def plot_images(image_categories, base_path):
    plt.figure(figsize=(15, 12))
    for i, cat in enumerate(image_categories):
        # Load images for the ith category
        image_path = base_path + '/' + cat
        images_in_folder = os.listdir(image_path)
        first_image_of_folder = images_in_folder[0]
        first_image_path = image_path + '/' + first_image_of_folder
        img = load_img(first_image_path)
        img_arr = img_to_array(img) / 255.0
        
        plt.subplot(3, 5, i + 1)
        plt.imshow(img_arr)
        plt.title(cat)
        plt.axis('off')
    
    plt.suptitle('Sample Images from Each Category', fontsize=16)
    plt.tight_layout()
    plt.show()

plot_images(image_categories, train_path)

In [None]:
# Count images per category
def count_images_per_category(base_path):
    categories = os.listdir(base_path)
    counts = {}
    for cat in categories:
        cat_path = os.path.join(base_path, cat)
        counts[cat] = len(os.listdir(cat_path))
    return counts

train_counts = count_images_per_category(train_path)
print("Training images per category:")
for cat, count in train_counts.items():
    print(f"  {cat}: {count}")
print(f"\nTotal training images: {sum(train_counts.values())}")

## 4. Data Pre-Processing

Configure ImageDataGenerator for data augmentation and preprocessing.

In [None]:
# Image parameters
IMG_HEIGHT = 150
IMG_WIDTH = 150
BATCH_SIZE = 32

# Configure ImageDataGenerator for training with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Configure ImageDataGenerator for validation and test (only rescaling)
val_test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Create data generators
train_image_generator = train_datagen.flow_from_directory(
    train_path,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_image_generator = val_test_datagen.flow_from_directory(
    validation_path,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_image_generator = val_test_datagen.flow_from_directory(
    test_path,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

In [None]:
# Print the class encodings done by the generators
class_map = dict((v, k) for k, v in train_image_generator.class_indices.items())
print("Class Mapping:")
print(class_map)

## 5. Model Building

Build a Convolutional Neural Network (CNN) for image classification.

In [None]:
# Define the number of classes
NUM_CLASSES = len(image_categories)
print(f"Number of classes: {NUM_CLASSES}")

# Build the CNN model
model = Sequential()  # Model object

# Add Convolutional Layers
model.add(Conv2D(filters=32, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=[150, 150, 3]))
model.add(MaxPooling2D(2,))

model.add(Conv2D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(MaxPooling2D(2))

# Flatten the feature map
model.add(Flatten())

# Add the fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(128, activation='relu'))
model.add(Dense(NUM_CLASSES, activation='softmax'))

# Print the model summary
model.summary()

In [None]:
# Visualize the model architecture
from tensorflow.keras.utils import plot_model

plot_model(model, to_file='model_architecture.png', show_shapes=True, show_layer_names=True)

## 6. Model Training

In [None]:
# Configure early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Model compiled successfully!")

In [None]:
# Train the model
EPOCHS = 100
STEPS_PER_EPOCH = 15000 // BATCH_SIZE
VALIDATION_STEPS = 3000 // BATCH_SIZE

hist = model.fit(
    train_image_generator,
    epochs=EPOCHS,
    verbose=1,
    validation_data=val_image_generator,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_steps=VALIDATION_STEPS,
    callbacks=[early_stopping]
)

## 7. Training Visualization

In [None]:
# Plot training history
h = hist.history

plt.style.use('ggplot')
plt.figure(figsize=(12, 5))

# Plot Loss
plt.subplot(1, 2, 1)
plt.plot(h['loss'], c='red', label='Training Loss')
plt.plot(h['val_loss'], c='red', linestyle='--', label='Validation Loss')
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.title("Training and Validation Loss")
plt.legend(loc='best')

# Plot Accuracy
plt.subplot(1, 2, 2)
plt.plot(h['accuracy'], c='blue', label='Training Accuracy')
plt.plot(h['val_accuracy'], c='blue', linestyle='--', label='Validation Accuracy')
plt.xlabel("Number of Epochs")
plt.ylabel("Accuracy")
plt.title("Training and Validation Accuracy")
plt.legend(loc='best')

plt.tight_layout()
plt.savefig('training_history.png')
plt.show()

## 8. Model Evaluation

In [None]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_image_generator)

print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")

In [None]:
# Generate predictions
predictions = model.predict(test_image_generator)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = test_image_generator.classes

print(f"Number of predictions: {len(predicted_classes)}")

In [None]:
# Classification Report
from sklearn.metrics import classification_report, confusion_matrix

class_names = list(train_image_generator.class_indices.keys())

print("Classification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_names))

In [None]:
# Confusion Matrix
import seaborn as sns

cm = confusion_matrix(true_classes, predicted_classes)

plt.figure(figsize=(15, 12))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.savefig('confusion_matrix.png')
plt.show()

## 9. Save the Model

In [None]:
# Save the model in H5 format
model.save('vegetable_classification.h5')
print("Model saved as 'vegetable_classification.h5'")

# Also save in SavedModel format
model.save('vegetable_classification_savedmodel')
print("Model saved in SavedModel format")

## 10. Test the Model with Sample Images

In [None]:
# Load the saved model
from keras.models import load_model

loaded_model = load_model('vegetable_classification.h5', compile=False)
print("Model loaded successfully!")

In [None]:
# Function to predict vegetable from image
def predict_vegetable(image_path, model):
    # Class mapping
    op = {
        0: 'Bean', 
        1: 'Bitter_Gourd', 
        2: 'Bottle_Gourd', 
        3: 'Brinjal', 
        4: 'Broccoli', 
        5: 'Cabbage', 
        6: 'Capsicum', 
        7: 'Carrot', 
        8: 'Cauliflower', 
        9: 'Cucumber', 
        10: 'Papaya', 
        11: 'Potato', 
        12: 'Pumpkin', 
        13: 'Radish', 
        14: 'Tomato'
    }
    
    # Load and preprocess image
    img = load_img(image_path, target_size=(150, 150))
    img_arr = img_to_array(img)
    img_input = np.expand_dims(img_arr, axis=0)
    
    # Make prediction
    pred = np.argmax(model.predict(img_input))
    
    # Display results
    plt.imshow(img)
    plt.title(f"Predicted: {op[pred]}")
    plt.axis('off')
    plt.show()
    
    return op[pred]

# Test with a sample image
sample_image = test_path + "/Tomato/" + os.listdir(test_path + "/Tomato")[0]
result = predict_vegetable(sample_image, loaded_model)
print(f"Prediction: {result}")

In [None]:
# Test multiple random images
import random

plt.figure(figsize=(15, 10))

for i in range(9):
    # Select random category and image
    random_cat = random.choice(image_categories)
    cat_path = os.path.join(test_path, random_cat)
    random_image = random.choice(os.listdir(cat_path))
    image_path = os.path.join(cat_path, random_image)
    
    # Load and predict
    img = load_img(image_path, target_size=(150, 150))
    img_arr = img_to_array(img)
    img_input = np.expand_dims(img_arr, axis=0)
    pred = np.argmax(loaded_model.predict(img_input, verbose=0))
    
    # Display
    plt.subplot(3, 3, i + 1)
    plt.imshow(img)
    plt.title(f"True: {random_cat}\nPred: {class_map[pred]}")
    plt.axis('off')

plt.tight_layout()
plt.savefig('sample_predictions.png')
plt.show()

## Summary

This notebook demonstrated:
1. **Data Collection**: Downloaded vegetable image dataset from Kaggle
2. **Data Analysis**: Explored the dataset structure and visualized samples
3. **Data Preprocessing**: Applied image augmentation using ImageDataGenerator
4. **Model Building**: Built a CNN with Conv2D, MaxPooling2D, Dense layers
5. **Model Training**: Trained with early stopping callback
6. **Model Evaluation**: Evaluated on test data with classification report
7. **Model Saving**: Saved in H5 format for Flask deployment

The saved model (`vegetable_classification.h5`) can now be used in the Flask web application for real-time vegetable classification.