In [None]:
import numpy as np
import pandas as pd
import keras
import matplotlib.pyplot as plt

from skimage.feature import hog
from sklearn.preprocessing import MinMaxScaler

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from keras import layers, models

from sklearn.metrics import accuracy_score, f1_score, precision_score , recall_score
from tabulate import tabulate

In [None]:
from keras.datasets import mnist
(X_train , y_train) , (X_test , y_test) = mnist.load_data()
X_train
y_train


array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [None]:
len(X_train)
len(X_test)
X_train.shape

(60000, 28, 28)

In [None]:
# Function to extract raw pixel features and scale them
def extract_raw_pixel_features(data):
    # Reshape the data if it's in image format (3D array)
    if len(data.shape) > 2:
        # Flatten each image into a 1D array
        data = data.reshape(data.shape[0], -1)

    # Scale the pixel values to range [0, 1]
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(data)
    return scaled_data


# Extract raw pixel features and scale for both training and testing data
X_train_raw_scaled = extract_raw_pixel_features(X_train)
X_test_raw_scaled = extract_raw_pixel_features(X_test)

X_test_raw_scaled

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
# Function to extract HOG features
def extract_hog_features(data):
    features = []
    for img in data:
        fd, hog_image = hog(img.reshape((28, 28)), orientations=9, pixels_per_cell=(8, 8),
                            cells_per_block=(2, 2), visualize=True, multichannel=False)
        features.append(fd)
    return np.array(features)


# Extract HOG features for both training and testing data
X_train_hog = extract_hog_features(X_train)
X_test_hog = extract_hog_features(X_test)

X_test_hog

  fd, hog_image = hog(img.reshape((28, 28)), orientations=9, pixels_per_cell=(8, 8),


array([[0.04600394, 0.        , 0.15826409, ..., 0.        , 0.        ,
        0.        ],
       [0.16635561, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.09783391,
        0.04775092],
       [0.        , 0.        , 0.        , ..., 0.01602295, 0.        ,
        0.        ]])

In [None]:
# Reshape data for CNN
X_train_cnn = X_train.reshape(-1, 28, 28, 1)
X_test_cnn = X_test.reshape(-1, 28, 28, 1)

In [None]:
# Define models for comparison
models = {
    'SVM': SVC(),
    'Decision Tree': DecisionTreeClassifier(max_depth=20, min_samples_split=10, min_samples_leaf=5),
    'Random Forest': RandomForestClassifier(),
    'CNN': models.Sequential([
                layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
                layers.MaxPooling2D((2, 2)),
                layers.Conv2D(64, (3, 3), activation='relu'),
                layers.MaxPooling2D((2, 2)),
                layers.Conv2D(64, (3, 3), activation='relu'),
                layers.Flatten(),
                layers.Dense(64, activation='relu'),
                layers.Dense(10, activation='softmax')
            ])
}

In [None]:
# Results storage
results = {}

In [None]:
# Train and evaluate models with raw pixel features
print("\nTraining models with Raw Pixel Features:")
for model_name, model in models.items():
    if model_name == 'CNN':
        # Train CNN model
        print(f"Training {model_name}...")
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        model.fit(X_train_cnn, y_train, epochs=5, batch_size=64, validation_data=(X_test_cnn, y_test))
        y_pred = np.argmax(model.predict(X_test_cnn), axis=-1)
    else:
        print(f"Training {model_name}...")
        model.fit(X_train_raw_scaled, y_train)
        y_pred = model.predict(X_test_raw_scaled)

    # Evaluate model
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    results[model_name] = {
        'accuracy': accuracy,
        'f1_score': f1,
        'precision': precision,
        'recall': recall
    }


Training models with Raw Pixel Features:
Training SVM...
Training Decision Tree...
Training Random Forest...
Training CNN...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Train and evaluate models with HOG features
print("\nTraining models with HOG Features:")
for model_name, model in models.items():
    if model_name == 'CNN':
        # Train CNN model
        print(f"Training {model_name}...")
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        model.fit(X_train_cnn, y_train, epochs=5, batch_size=64, validation_data=(X_test_cnn, y_test))
        y_pred = np.argmax(model.predict(X_test_cnn), axis=-1)
    else:
        print(f"Training {model_name}...")
        model.fit(X_train_hog, y_train)
        y_pred = model.predict(X_test_hog)

    # Evaluate model
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    results[model_name + '_HOG'] = {
        'accuracy': accuracy,
        'f1_score': f1,
        'precision': precision,
        'recall': recall
    }


Training models with HOG Features:
Training SVM...
Training Decision Tree...
Training Random Forest...
Training CNN...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Display results in a table
print("\nResults:")
table = []
for model_name, metrics in results.items():
    table.append([model_name, metrics['accuracy'], metrics['f1_score'], metrics['precision'], metrics['recall']])

print(tabulate(table, headers=['Model', 'Accuracy', 'F1 Score', 'Precision', 'Recall'], tablefmt='grid'))


Results:
+-------------------+------------+------------+-------------+----------+
| Model             |   Accuracy |   F1 Score |   Precision |   Recall |
| SVM               |     0.9793 |   0.979286 |    0.979302 |   0.9793 |
+-------------------+------------+------------+-------------+----------+
| Decision Tree     |     0.8798 |   0.879668 |    0.879819 |   0.8798 |
+-------------------+------------+------------+-------------+----------+
| Random Forest     |     0.9702 |   0.970182 |    0.970193 |   0.9702 |
+-------------------+------------+------------+-------------+----------+
| CNN               |     0.9872 |   0.987197 |    0.987247 |   0.9872 |
+-------------------+------------+------------+-------------+----------+
| SVM_HOG           |     0.9724 |   0.972379 |    0.972423 |   0.9724 |
+-------------------+------------+------------+-------------+----------+
| Decision Tree_HOG |     0.8399 |   0.839584 |    0.839837 |   0.8399 |
+-------------------+------------+-------