# Running the Benchmark Model

In [1]:
!python benchmark.py

****************************************
*        RGB Benchmark Solution        *
****************************************
Loading data...done.
****************************************
*        LDA Benchmark Solution        *
****************************************
Fitting LDA model...done.
Test accuracy: 0.3713

****************************************
*        QDA Benchmark Solution        *
****************************************
Fitting QDA model...done.
Test accuracy: 0.3623

****************************************
*     Naive Bayes Benchmark Solution    *
****************************************
Fitting Naive Bayes model...done.
Test accuracy: 0.2976

****************************************
*     Grayscale Benchmark Solution     *
****************************************
Loading data...done.
****************************************
*        LDA Benchmark Solution        *
****************************************
Fitting LDA model...done.
Test accuracy: 0.2739

***************

# Load and prepare the data 

In [2]:
from utils import load_and_prepare_data

# Load and prepare the data
X_train, y_train, X_test, y_test = load_and_prepare_data()

# Now you have your training and test data ready for further processing
# X_train: Training images
# y_train: Training labels
# X_test: Test images
# y_test: Test labels


In [3]:
X_train

array([[[[ 59,  62,  63],
         [ 43,  46,  45],
         [ 50,  48,  43],
         ...,
         [158, 132, 108],
         [152, 125, 102],
         [148, 124, 103]],

        [[ 16,  20,  20],
         [  0,   0,   0],
         [ 18,   8,   0],
         ...,
         [123,  88,  55],
         [119,  83,  50],
         [122,  87,  57]],

        [[ 25,  24,  21],
         [ 16,   7,   0],
         [ 49,  27,   8],
         ...,
         [118,  84,  50],
         [120,  84,  50],
         [109,  73,  42]],

        ...,

        [[208, 170,  96],
         [201, 153,  34],
         [198, 161,  26],
         ...,
         [160, 133,  70],
         [ 56,  31,   7],
         [ 53,  34,  20]],

        [[180, 139,  96],
         [173, 123,  42],
         [186, 144,  30],
         ...,
         [184, 148,  94],
         [ 97,  62,  34],
         [ 83,  53,  34]],

        [[177, 144, 116],
         [168, 129,  94],
         [179, 142,  87],
         ...,
         [216, 184, 140],
        

# 1. Linear Discriminant Analysis

In [4]:
import numpy as np

In [5]:
import numpy as np

class LDAModel:
    def fit(self, X, y):
        # Compute class means
        class_means = np.array([np.mean(X[y == c], axis=0) for c in np.unique(y)])

        # Compute shared covariance matrix
        N = len(X)
        shared_covariance_matrix = np.zeros((X.shape[1], X.shape[1]))
        for c in np.unique(y):
            class_samples = X[y == c]
            class_mean_centered = class_samples - class_means[c]
            shared_covariance_matrix += np.dot(class_mean_centered.T, class_mean_centered)
        shared_covariance_matrix /= N

        self.class_means = class_means
        self.shared_covariance_matrix = shared_covariance_matrix

    def predict(self, X):
        # Calculate discriminant functions
        discriminant_values = np.dot(X, np.linalg.inv(self.shared_covariance_matrix)) @ self.class_means.T
        
        # Predict the class with the highest discriminant value
        predictions = np.argmax(discriminant_values, axis=1)
        return predictions


In [6]:
from sklearn.metrics import accuracy_score

# Load and prepare the data
X_train, y_train, X_test, y_test = load_and_prepare_data()

# Initialize the LDA model
lda = LDAModel()

# Fit the model to the training data
lda.fit(X_train.reshape(len(X_train), -1), y_train)

# Make predictions on the test data
test_preds = lda.predict(X_test.reshape(len(X_test), -1))

# Calculate accuracy
test_acc = accuracy_score(y_test, test_preds)
print("LDA Test Accuracy:", test_acc)


LDA Test Accuracy: 0.3027


# 2. Quadratic Discriminant Analysis

In [7]:
import numpy as np

class QDAModel:
    def __init__(self):
        self.class_means = None
        self.class_covariance_matrices = None

    def fit(self, X_train, y_train):
        # Compute class means
        self.class_means = np.array([np.mean(X_train[y_train == c], axis=0) for c in np.unique(y_train)])

        # Compute class covariance matrices
        self.class_covariance_matrices = []
        for c in np.unique(y_train):
            class_samples = X_train[y_train == c]
            class_mean_centered = class_samples - self.class_means[c]
            class_covariance_matrix = np.dot(class_mean_centered.T, class_mean_centered) / len(class_samples)
            self.class_covariance_matrices.append(class_covariance_matrix)

    def predict(self, X_test):
        # Calculate discriminant values
        discriminant_values = []
        for i in range(len(self.class_means)):
            class_mean = self.class_means[i]
            class_covariance_matrix = self.class_covariance_matrices[i]
            class_mean_centered = X_test - class_mean
            discriminant_value = -0.5 * np.sum(class_mean_centered.dot(np.linalg.inv(class_covariance_matrix)) * class_mean_centered, axis=1)
            discriminant_values.append(discriminant_value)

        # Predict the class with the highest discriminant value
        predictions = np.argmax(discriminant_values, axis=0)
        return predictions


In [8]:

from utils import load_and_prepare_data
from sklearn.metrics import accuracy_score

# Load and prepare the data
X_train, y_train, X_test, y_test = load_and_prepare_data()

# Initialize QDA model
qda = QDAModel()

# Fit the model to the training data
qda.fit(X_train.reshape(len(X_train), -1), y_train)

# Make predictions on the test data
test_preds = qda.predict(X_test.reshape(len(X_test), -1))

# Calculate accuracy
test_acc = accuracy_score(y_test, test_preds)
print("QDA Test Accuracy:", test_acc)


QDA Test Accuracy: 0.2364


# 3. Gaussian Naive Bayes

In [9]:
import numpy as np

class GaussianNaiveBayesModel:
    def __init__(self):
        self.class_means = None
        self.class_variances = None
        self.class_priors = None

    def fit(self, X_train, y_train):
        # Compute class means and variances
        self.class_means = np.array([np.mean(X_train[y_train == c], axis=0) for c in np.unique(y_train)])
        self.class_variances = np.array([np.var(X_train[y_train == c], axis=0) for c in np.unique(y_train)])

        # Compute class priors
        total_samples = len(X_train)
        self.class_priors = np.array([np.sum(y_train == c) / total_samples for c in np.unique(y_train)])

    def predict(self, X_test):
        # Calculate class conditional probabilities
        class_conditional_probs = []
        for i in range(len(self.class_means)):
            class_mean = self.class_means[i]
            class_variance = self.class_variances[i]
            class_conditional_prob = np.exp(-(X_test - class_mean) ** 2 / (2 * class_variance)) / np.sqrt(2 * np.pi * class_variance)
            class_conditional_probs.append(class_conditional_prob)

        # Calculate posterior probabilities
        posterior_probs = np.prod(class_conditional_probs, axis=2) * self.class_priors.reshape(-1, 1)

        # Predict the class with the highest posterior probability
        predictions = np.argmax(posterior_probs, axis=0)
        return predictions


In [10]:
from utils import load_and_prepare_data
from sklearn.metrics import accuracy_score

# Load and prepare the data
X_train, y_train, X_test, y_test = load_and_prepare_data()

# Initialize Gaussian Naive Bayes model
gnb = GaussianNaiveBayesModel()

# Fit the model to the training data
gnb.fit(X_train.reshape(len(X_train), -1), y_train)

# Make predictions on the test data
test_preds = gnb.predict(X_test.reshape(len(X_test), -1))

# Calculate accuracy
test_acc = accuracy_score(y_test, test_preds)
print("Gaussian Naive Bayes Test Accuracy:", test_acc)


Gaussian Naive Bayes Test Accuracy: 0.1


# Test Acuuracy Result for RGB & Greyscale datasets

In [12]:
from utils import load_and_prepare_data
from sklearn.metrics import accuracy_score

# Load and prepare the data for RGB
X_train_rgb, y_train_rgb, X_test_rgb, y_test_rgb = load_and_prepare_data(as_grayscale=False)

# Initialize and fit models for RGB data
lda_rgb = LDAModel()
lda_rgb.fit(X_train_rgb.reshape(len(X_train_rgb), -1), y_train_rgb)

qda_rgb = QDAModel()
qda_rgb.fit(X_train_rgb.reshape(len(X_train_rgb), -1), y_train_rgb)

gnb_rgb = GaussianNaiveBayesModel()
gnb_rgb.fit(X_train_rgb.reshape(len(X_train_rgb), -1), y_train_rgb)

# Make predictions and evaluate accuracy for RGB data
lda_preds_rgb = lda_rgb.predict(X_test_rgb.reshape(len(X_test_rgb), -1))
lda_acc_rgb = accuracy_score(y_test_rgb, lda_preds_rgb)
print("LDA Test Accuracy (RGB):", lda_acc_rgb)

qda_preds_rgb = qda_rgb.predict(X_test_rgb.reshape(len(X_test_rgb), -1))
qda_acc_rgb = accuracy_score(y_test_rgb, qda_preds_rgb)
print("QDA Test Accuracy (RGB):", qda_acc_rgb)

gnb_preds_rgb = gnb_rgb.predict(X_test_rgb.reshape(len(X_test_rgb), -1))
gnb_acc_rgb = accuracy_score(y_test_rgb, gnb_preds_rgb)
print("Gaussian Naive Bayes Test Accuracy (RGB):", gnb_acc_rgb)

# Load and prepare the data for Grayscale
X_train_gray, y_train_gray, X_test_gray, y_test_gray = load_and_prepare_data(as_grayscale=True)

# Initialize and fit models for Grayscale data
lda_gray = LDAModel()
lda_gray.fit(X_train_gray.reshape(len(X_train_gray), -1), y_train_gray)

qda_gray = QDAModel()
qda_gray.fit(X_train_gray.reshape(len(X_train_gray), -1), y_train_gray)

gnb_gray = GaussianNaiveBayesModel()
gnb_gray.fit(X_train_gray.reshape(len(X_train_gray), -1), y_train_gray)

# Make predictions and evaluate accuracy for Grayscale data
lda_preds_gray = lda_gray.predict(X_test_gray.reshape(len(X_test_gray), -1))
lda_acc_gray = accuracy_score(y_test_gray, lda_preds_gray)
print("LDA Test Accuracy (Grayscale):", lda_acc_gray)

qda_preds_gray = qda_gray.predict(X_test_gray.reshape(len(X_test_gray), -1))
qda_acc_gray = accuracy_score(y_test_gray, qda_preds_gray)
print("QDA Test Accuracy (Grayscale):", qda_acc_gray)

gnb_preds_gray = gnb_gray.predict(X_test_gray.reshape(len(X_test_gray), -1))
gnb_acc_gray = accuracy_score(y_test_gray, gnb_preds_gray)
print("Gaussian Naive Bayes Test Accuracy (Grayscale):", gnb_acc_gray)


LDA Test Accuracy (RGB): 0.3027
QDA Test Accuracy (RGB): 0.2364
Gaussian Naive Bayes Test Accuracy (RGB): 0.1
LDA Test Accuracy (Grayscale): 0.1754
QDA Test Accuracy (Grayscale): 0.2602
Gaussian Naive Bayes Test Accuracy (Grayscale): 0.1
