In [23]:
import numpy as np
import os
import time
from datasets import load_dataset
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

# TensorFlow / Keras for MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image

# --- 1. SETTINGS ---
TARGET_CLASSES = [
    "Apple", "Banana", "Orange", "Mango", "Grapes",
    "Pineapple", "Watermelon", "Pomegranate", "Strawberry", "Lemon"
]

# MobileNetV2 expects images to be at least 32x32.
# Standard is 224x224, but 96x96 or 128x128 works well for speed.
IMG_SIZE = (96, 96)
BATCH_SIZE = 32

def get_data_and_extract_features():
    print("Loading Dataset from Hugging Face...")
    dataset = load_dataset("ysif9/fruit-recognition")

    # Filter Classes
    all_class_names = dataset['train'].features['label'].names
    target_ids = [all_class_names.index(name) for name in TARGET_CLASSES if name in all_class_names]
    dataset = dataset.filter(lambda example: example['label'] in target_ids)

    # --- SETUP MOBILENETV2 ---
    print("Loading MobileNetV2 (Pre-trained on ImageNet)...")
    # include_top=False removes the final classification layer, giving us raw features
    # pooling='avg' averages the features into a 1D vector (size 1280)
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3), pooling='avg')

    def process_and_extract(split_name):
        print(f"Processing {split_name} data...")
        images = []
        labels = []

        # Iterate through dataset
        ds_split = dataset[split_name]

        # We process in batches to save RAM, but for simplicity in this script
        # let's prep the array first.
        raw_images_batch = []

        for i, example in enumerate(ds_split):
            img = example['image']
            if img.mode != 'RGB': img = img.convert('RGB')
            img = img.resize(IMG_SIZE)

            # Convert to array and preprocess for MobileNet
            img_array = img_to_array(img)
            img_array = preprocess_input(img_array) # scales to [-1, 1]

            raw_images_batch.append(img_array)
            labels.append(example['label'])

            # Extract in chunks of 500 to keep memory low
            if len(raw_images_batch) >= 500:
                batch_arr = np.array(raw_images_batch)
                features = base_model.predict(batch_arr, verbose=0)
                images.append(features)
                raw_images_batch = [] # clear memory
                print(f"  Processed {i+1}/{len(ds_split)} images...", end='\r')

        # Process remaining
        if raw_images_batch:
            batch_arr = np.array(raw_images_batch)
            features = base_model.predict(batch_arr, verbose=0)
            images.append(features)

        # Concatenate all batches
        X = np.vstack(images)
        y = np.array(labels)
        print(f"\nFinished {split_name}. Shape: {X.shape}")
        return X, y

    X_train, y_train = process_and_extract('train')
    X_test, y_test = process_and_extract('test')

    return X_train, y_train, X_test, y_test

# --- MAIN EXECUTION ---
X_train, y_train, X_test, y_test = get_data_and_extract_features()

Loading Dataset from Hugging Face...


Filter:   0%|          | 0/25659 [00:00<?, ? examples/s]

Filter:   0%|          | 0/6821 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7070 [00:00<?, ? examples/s]

Loading MobileNetV2 (Pre-trained on ImageNet)...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_96_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Processing train data...




  Processed 17000/17203 images...
Finished train. Shape: (17203, 1280)
Processing test data...
  Processed 5000/5077 images...
Finished test. Shape: (5077, 1280)


In [24]:
# Encode Labels
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Get Names
temp_ds = load_dataset("ysif9/fruit-recognition", split="train[:1]")
all_names = temp_ds.features['label'].names
encoded_target_names = [all_names[i] for i in le.classes_]

print("\n---------------------------------------------------")
print(f"New Feature Shape: {X_train.shape}")
print("(Notice it is now (N, 1280) instead of (N, 12288) or similar pixels)")
print("---------------------------------------------------\n")


---------------------------------------------------
New Feature Shape: (17203, 1280)
(Notice it is now (N, 1280) instead of (N, 12288) or similar pixels)
---------------------------------------------------



In [25]:
from sklearn import metrics
from sklearn.svm import SVC

#SVM
model = SVC(gamma='auto', kernel='linear')
model.fit(X_train, y_train_encoded)
y_pred = model.predict(X_test)
precision = metrics.accuracy_score(y_pred, y_test_encoded) * 100
print("Accuracy with SVM: {0:.2f}%".format(precision))

#K-NN
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train_encoded)
y_pred = model.predict(X_test)
precision = metrics.accuracy_score(y_pred, y_test_encoded) * 100
print("Accuracy with K-NN: {0:.2f}%".format(precision))

#DECISION TREE
model = DecisionTreeClassifier()
model.fit(X_train,y_train_encoded)
y_pred = model.predict(X_test)
precision = metrics.accuracy_score(y_pred, y_test_encoded) * 100
print("Accuracy with Decision Tree: {0:.2f}%".format(precision))

Accuracy with SVM: 84.60%
Accuracy with K-NN: 80.74%
Accuracy with Decision Tree: 60.35%


In [26]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# loss='hinge' makes this behave exactly like a Linear SVM
# n_jobs=-1 uses all your processor cores
sgd_model = SGDClassifier(loss='hinge', penalty='l2', n_jobs=-1, random_state=42)

print("Training SGD (Linear SVM approximation)...")
sgd_model.fit(X_train, y_train_encoded) # Make sure to use encoded labels if available, or y_train

y_pred = sgd_model.predict(X_test)
acc = accuracy_score(y_test_encoded, y_pred) * 100
print(f"Accuracy with SGD-SVM: {acc:.2f}%")

Training SGD (Linear SVM approximation)...
Accuracy with SGD-SVM: 83.28%


In [27]:
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

# 1. Create a pipeline: PCA first, then SVM
# n_components=100 compresses the image from 12,288 features to just 100 features
# This makes the SVM run 100x faster.
pca = PCA(n_components=100, whiten=True, random_state=42)
svc = SVC(kernel='rbf', class_weight='balanced', C=10, gamma=0.01) # RBF is usually better than Linear

svc_pca_model = make_pipeline(pca, svc)

print("Training PCA + SVM...")
# This should take about 1-2 minutes instead of hours
svc_pca_model.fit(X_train, y_train_encoded)

y_pred = svc_pca_model.predict(X_test)
acc = accuracy_score(y_test_encoded, y_pred) * 100
print(f"Accuracy with PCA+SVM: {acc:.2f}%")

Training PCA + SVM...
Accuracy with PCA+SVM: 87.71%


In [29]:
xgb_cls = xgb.XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    eval_metric='mlogloss',
    # use_label_encoder=False,
    n_jobs=-1,
    random_state=42
)

print("Training XGBoost...")
xgb_cls.fit(X_train, y_train_encoded)

y_pred = xgb_cls.predict(X_test)
acc = accuracy_score(y_test_encoded, y_pred) * 100
print(f"Accuracy with XGBoost: {acc:.2f}%")

Training XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy with XGBoost: 83.75%


In [30]:
from sklearn.model_selection import GridSearchCV

# --- 5. GRID SEARCH CV FOR RANDOM FOREST ---
print("\n=== GridSearchCV (Random Forest) ===")
print("Starting Grid Search... this may take a while depending on your CPU.")

# Define the parameter grid
# Note: Keeping the grid relatively small to save execution time.
# You can expand these lists for better tuning.
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}

rf = RandomForestClassifier(random_state=42, n_jobs=-1)

# Initialize GridSearchCV
# cv=3 means 3-fold cross-validation
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=3,
    scoring='accuracy',
    verbose=2,
    n_jobs=-1  # Use all available cores
)

start_time = time.time()
grid_search.fit(X_train, y_train_encoded)
time_taken = time.time() - start_time

print(f"\nGrid Search complete in {time_taken:.2f} seconds.")
print(f"Best Parameters found: {grid_search.best_params_}")
print(f"Best Cross-Val Accuracy: {grid_search.best_score_:.4f}")

# Predict using the best model found
best_rf = grid_search.best_estimator_
y_pred_rf = best_rf.predict(X_test)
final_acc = accuracy_score(y_test_encoded, y_pred_rf)

print(f"\nTest Set Accuracy (Best RF): {final_acc:.4f}")
print(classification_report(y_test_encoded, y_pred_rf, target_names=encoded_target_names, zero_division=0))


=== GridSearchCV (Random Forest) ===
Starting Grid Search... this may take a while depending on your CPU.
Fitting 3 folds for each of 18 candidates, totalling 54 fits

Grid Search complete in 129.41 seconds.
Best Parameters found: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Best Cross-Val Accuracy: 0.8363

Test Set Accuracy (Best RF): 0.8202
              precision    recall  f1-score   support

       Apple       0.81      0.70      0.75       435
      Banana       0.83      0.91      0.87       484
      Grapes       0.82      0.89      0.85       426
       Lemon       0.78      0.56      0.65       408
       Mango       0.77      0.62      0.69       346
      Orange       0.72      0.87      0.79       872
   Pineapple       0.86      0.86      0.86       373
 Pomegranate       0.86      0.89      0.87       787
  Strawberry       0.93      0.85      0.89       419
  Watermelon       0.91      0.87      0.89       527

    accuracy                          