In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from skimage import exposure, color
from skimage.filters import gaussian, median
from skimage.transform import resize
from skimage.segmentation import slic
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report



In [2]:
dataset_path = '/kaggle/input/crop-dataset-augmented/dataset-augmented2'

In [3]:
def load_dataset(dataset_path):
    data = []
    labels = []

    # Iterate through each class in the dataset
    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)

        # Check if the item is a directory
        if os.path.isdir(class_path):
            # Iterate through each image in the class folder
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                img = cv2.imread(img_path)

                # Resize the image to a common size (e.g., 224x224)
                img = cv2.resize(img, (224, 224))

                data.append(img)
                labels.append(class_folder)

    return np.array(data), np.array(labels)


In [4]:

data, labels = load_dataset(dataset_path)

In [5]:
# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)


In [6]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)


In [7]:
# Flatten the images
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)


In [8]:
# Hyperparameter Tuning using RandomizedSearchCV
param_dist = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [9]:
# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(
    RandomForestClassifier(random_state=42),
    param_distributions=param_dist,
    n_iter=10,  # Adjust the number of iterations as needed
    cv=5,
    random_state=42,
    n_jobs=-1  # Utilize multiple cores
)


In [10]:
# Fit the model
random_search.fit(X_train_flat, y_train)


In [11]:
# Get the best parameters
best_params_random = random_search.best_params_
print(f"Best Parameters (RandomizedSearchCV): {best_params_random}")


Best Parameters (RandomizedSearchCV): {'n_estimators': 150, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 30}


In [12]:
# Use the best model for predictions
best_model_random = random_search.best_estimator_
y_pred_tuned_random = best_model_random.predict(X_test_flat)


In [13]:
# Evaluate the model
accuracy_tuned_random = accuracy_score(y_test, y_pred_tuned_random)
print(f"Accuracy after hyperparameter tuning (RandomizedSearchCV): {accuracy_tuned_random}")


Accuracy after hyperparameter tuning (RandomizedSearchCV): 0.38333333333333336


In [14]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_tuned_random)
print("Confusion Matrix:\n", conf_matrix)


Confusion Matrix:
 [[3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [2 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0]
 [2 0 2 3 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 4 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 1 2 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 2 0 2 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [1 1 0 3 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 2]
 [0 0 0 1 0 0 0 0 2 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 1 0 1 1 2 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0]
 [1 1 0 0 0 0 0 1 0 0 0 0 2 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 1]
 [1 0 0 0 0 0 0 0 0 1 0 0 0 5 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 2 0 0 0 0 0 0 0 5 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 

In [15]:
# Classification Report
class_report = classification_report(y_test, y_pred_tuned_random)
print("Classification Report:\n", class_report)

Classification Report:
               precision    recall  f1-score   support

           0       0.23      0.60      0.33         5
           1       0.40      0.50      0.44         8
           2       0.67      0.18      0.29        11
           3       0.21      0.57      0.31         7
           4       0.25      0.14      0.18         7
           5       0.40      0.29      0.33         7
           6       0.54      0.88      0.67         8
           7       0.00      0.00      0.00        11
           8       0.25      0.25      0.25         8
           9       0.50      0.10      0.17        10
          10       0.50      0.86      0.63         7
          11       0.40      0.40      0.40        10
          12       0.33      0.22      0.27         9
          13       0.71      0.50      0.59        10
          14       0.83      0.56      0.67         9
          15       0.12      0.25      0.17         4
          16       1.00      0.18      0.31        11
   