In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
import pickle
import numpy as np

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('./diabetes.csv')

In [None]:
df.head()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
0,148,0,33.6,50,1
1,85,0,26.6,31,0
2,183,0,23.3,32,1
3,89,94,28.1,21,0
4,137,168,43.1,33,1


In [None]:
df.shape

(768, 5)

In [None]:
X = df.drop(columns=['Outcome'])
y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=65)

print(f"Training data shape: {X_train.shape}, {y_train.shape}")
print(f"Testing data shape: {X_test.shape}, {y_test.shape}")

Training data shape: (614, 4), (614,)
Testing data shape: (154, 4), (154,)


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
class CustomPerceptron:
    def __init__(self, learning_rate: float = 0.1, epochs: int = 1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    # ReLu
    def activation_fn(self, x: float) -> int:
        return 1 if x >= 0 else 0

    def train(self, X: np.ndarray, y: np.ndarray) -> None:
        N, D = X.shape

        np.random.seed(68)
        self.weights = np.random.randn(D)
        self.bias = 0.01

        for _ in range(self.epochs):
            for i in range(N):
                linear_output = np.dot(X[i], self.weights) + self.bias
                y_pred = self.activation_fn(linear_output)

                update = self.learning_rate * (y[i] - y_pred)
                self.weights += update * X[i]
                self.bias += update

    def predict(self, X: np.ndarray) -> np.ndarray:
        linear_output = np.dot(X, self.weights) + self.bias
        y_pred = np.array([self.activation_fn(x) for x in linear_output])
        return y_pred

In [None]:
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train,y_train)
y_pred_nb=naive_bayes_model.predict(X_test)

In [None]:
perceptron_model = Perceptron (max_iter=1000, tol=1e-3, random_state=42)
perceptron_model.fit(X_train, y_train)
y_pred_perceptron = perceptron_model.predict(X_test)

In [None]:
custom_perceptron = CustomPerceptron()
custom_perceptron.train(np.array(X_train), np.array(y_train))
y_pred_csm = custom_perceptron.predict(np.array(X_test))

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb)
recall_nb = recall_score(y_test, y_pred_nb)
f1_nb = f1_score(y_test, y_pred_nb)

accuracy_perceptron = accuracy_score(y_test, y_pred_perceptron)
precision_perceptron = precision_score(y_test, y_pred_perceptron)
recall_perceptron = recall_score(y_test, y_pred_perceptron)
f1_perceptron = f1_score(y_test, y_pred_perceptron)

accuracy_csm = accuracy_score(y_test, y_pred_csm)
precision_csm = precision_score(y_test, y_pred_csm)
recall_csm = recall_score(y_test, y_pred_csm)
f1_csm = f1_score(y_test, y_pred_csm)

print("Naive Bayes Model:")
print(f"Accuracy: {accuracy_nb:.2f}")
print(f"Precision: {precision_nb:.2f}")
print(f"Recall: {recall_nb:.2f}")
print(f"F1 Score: {f1_nb:.2f}\n")

print("Perceptron Model:")
print(f"Accuracy: {accuracy_perceptron:.2f}")
print(f"Precision: {precision_perceptron:.2f}")
print(f"Recall: {recall_perceptron:.2f}")
print(f"F1 Score: {f1_perceptron:.2f}\n")

print("Custom Perceptron Model:")
print(f"Accuracy: {accuracy_csm:.2f}")
print(f"Precision: {precision_csm:.2f}")
print(f"Recall: {recall_csm:.2f}")
print(f"F1 Score: {f1_csm:.2f}\n")

Naive Bayes Model:
Accuracy: 0.75
Precision: 0.74
Recall: 0.51
F1 Score: 0.60

Perceptron Model:
Accuracy: 0.76
Precision: 0.69
Recall: 0.63
F1 Score: 0.66

Custom Perceptron Model:
Accuracy: 0.70
Precision: 1.00
Recall: 0.19
F1 Score: 0.32



In [None]:
import pickle

with open('naive_bayes_model.pkl', 'wb') as file:
    pickle.dump(naive_bayes_model, file)

with open('perceptron_model.pkl', 'wb') as file:
    pickle.dump(perceptron_model, file)

with open('custom_perceptron_model.pkl', 'wb') as file:
    pickle.dump(custom_perceptron, file)

print("Models have been saved successfully.")

Models have been saved successfully.


Step 7 and 8 shared on github

Perceptron && Naive Bayes model added to flask

In [None]:
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

kf = KFold(n_splits=10, shuffle=True, random_state=68)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=68)

def evaluate_model(model, X, y):
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        accuracies.append(accuracy_score(y_test, y_pred))
        precisions.append(precision_score(y_test, y_pred))
        recalls.append(recall_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred))

    return {
        'accuracy': np.mean(accuracies),
        'precision': np.mean(precisions),
        'recall': np.mean(recalls),
        'f1_score': np.mean(f1_scores)
    }

naive_bayes_results = evaluate_model(naive_bayes_model, X, y)
perceptron_results = evaluate_model(perceptron_model, X, y)

print("Naive Bayes Model (K-Fold CV):")
for metric, value in naive_bayes_results.items():
    print(f"{metric.capitalize()}: {value:.2f}")
print()

print("Perceptron Model (K-Fold CV):")
for metric, value in perceptron_results.items():
    print(f"{metric.capitalize()}: {value:.2f}")
print()

naive_bayes_model.fit(X, y)
perceptron_model.fit(X, y)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Naive Bayes Model (K-Fold CV):
Accuracy: 0.76
Precision: 0.71
Recall: 0.55
F1_score: 0.61

Perceptron Model (K-Fold CV):
Accuracy: 0.56
Precision: 0.20
Recall: 0.32
F1_score: 0.22



# Comparison and Analysis of Naive Bayes and Perceptron

We see that while both models show nearly equal accuracy values, Naive Bayes has much higher recall value and f1 score. On the other hand, Perceptron has a perfect precision score.
This tells us that while the perceptron has no false positives, but it's too conservative, resulting in many missed positives.

However, when we analyse the performance metrics of both models in K-fold cross validation, we see that Naive Bayes clearly out-performs Perceptron. This tells us that the Perceptron overgeneralises.

This can be due to the following reasons :

1.   Perceptron assumes linear separability of data. This can result in poor performance when the classes don't have a clear linear boundary.
2.   Naive Bayes works reasonably well with limited data, Perceptron requires a bigger amount.



