In [1]:
%run Model_2.ipynb

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

df = pd.read_csv("../data/data.csv")
df["diagnosis"] = df["diagnosis"].map({"M": 1, "B": 0})
df = df.dropna()

X = df.drop(columns=["id", "diagnosis"]).values.astype(float)
y = df["diagnosis"].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

my_nb = GaussianNaiveBayes()
my_nb.fit(X_train, y_train)
y_pred_my = my_nb.predict(X_test)
proba_my = my_nb.predict_proba(X_test)
sk_nb = GaussianNB(var_smoothing=1e-9)
sk_nb.fit(X_train, y_train)
y_pred_sk = sk_nb.predict(X_test)
proba_sk = sk_nb.predict_proba(X_test)

print("My accuracy:", accuracy_score(y_test, y_pred_my))
print("sklearn accuracy:", accuracy_score(y_test, y_pred_sk))

same_labels = np.array_equal(y_pred_my, y_pred_sk)
print("Class predictions exactly equal:", same_labels)

print("\nMy classes_     :", my_nb.classes_)
print("sklearn classes_:", sk_nb.classes_)

order_my = np.argsort(my_nb.classes_)
order_sk = np.argsort(sk_nb.classes_)

proba_my_aligned = proba_my[:, order_my]
proba_sk_aligned = proba_sk[:, order_sk]

close_probs = np.allclose(
    proba_my_aligned,
    proba_sk_aligned,
    rtol=1e-8,   
    atol=1e-10   
)
print("Probabilities numerically equal (up to tolerance):", close_probs)


My accuracy: 0.9385964912280702
sklearn accuracy: 0.9385964912280702
Class predictions exactly equal: True

My classes_     : [0 1]
sklearn classes_: [0 1]
Probabilities numerically equal (up to tolerance): True
