1.(Gaussian Na誰ve Bayes Classifier) Implement Gaussian Na誰ve Bayes Classifier on the Iris dataset from sklearn.datasets using 
 
(i)Step-by-step implementation 
(ii)In-built function 

In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import math
import warnings
warnings.filterwarnings("ignore")

iris = load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

class GaussianNaiveBayes:
    def fit(self, X, y):
        """Compute mean, variance, and prior probabilities for each class"""
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}

        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = X_c.mean(axis=0)
            self.var[c] = X_c.var(axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]

    def gaussian_probability(self, class_idx, x):
        """Calculate Gaussian probability for a feature value"""
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(-((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def predict(self, X):
        """Predict class labels"""
        y_pred = []
        for x in X:
            posteriors = []

            for c in self.classes:
                prior = np.log(self.priors[c])
                class_conditional = np.sum(np.log(self.gaussian_probability(c, x)))
                posterior = prior + class_conditional
                posteriors.append(posterior)

            y_pred.append(self.classes[np.argmax(posteriors)])
        return np.array(y_pred)

gnb_manual = GaussianNaiveBayes()
gnb_manual.fit(X_train, y_train)

y_pred_manual = gnb_manual.predict(X_test)

print("(i) Step-by-step Gaussian Na誰ve Bayes")
print("Accuracy:", accuracy_score(y_test, y_pred_manual))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_manual))
print("Classification Report:\n", classification_report(y_test, y_pred_manual, target_names=target_names))

from sklearn.naive_bayes import GaussianNB

gnb_builtin = GaussianNB()
gnb_builtin.fit(X_train, y_train)
y_pred_builtin = gnb_builtin.predict(X_test)

print("\n(ii) In-built GaussianNB")
print("Accuracy:", accuracy_score(y_test, y_pred_builtin))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_builtin))
print("Classification Report:\n", classification_report(y_test, y_pred_builtin, target_names=target_names))


(i) Step-by-step Gaussian Na誰ve Bayes
Accuracy: 0.9777777777777777
Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]
Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      0.92      0.96        13
   virginica       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45


(ii) In-built GaussianNB
Accuracy: 0.9777777777777777
Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]
Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      0.92      0.96        13
   virginica       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted av

2 Explore about GridSearchCV toot in scikit-learn. This is a tool that is often used for tuning hyperparameters of machine learning models. Use this tool to find the best value of K for K-NN Classifier using any dataset. 

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

knn = KNeighborsClassifier()

param_grid = {
    'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15],
    'weights': ['uniform', 'distance']
}

grid_search = GridSearchCV(
    estimator=knn,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

print("Best Parameters Found:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

print("\n Test Set Evaluation")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

results = pd.DataFrame(grid_search.cv_results_)
print("\nAll Grid Search Results (Top 5):")
print(results[['param_n_neighbors', 'param_weights', 'mean_test_score']].head())


Best Parameters Found: {'n_neighbors': 3, 'weights': 'uniform'}
Best Cross-Validation Accuracy: 0.9428571428571428

 Test Set Evaluation
Accuracy: 1.0
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45


All Grid Search Results (Top 5):
   param_n_neighbors param_weights  mean_test_score
0                  1       uniform         0.923810
1                  1      distance         0.923810
2                  3       uniform         0.942857
3                  3      distance         0.942857
4                  5       uniform         0.914286
