# Implement Naïve-Bayes – Gaussian using sklearn

In [7]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns  

# Load iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=42)

# Create Gaussian Naive Bayes model
gnb = GaussianNB()
gnb.fit(X_train,y_train)

# Predict on test set
y_pred = gnb.predict(X_test)
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Metrics
print("Accuracy :", accuracy_score(y_test,y_pred))
print("Precision :", precision_score(y_test,y_pred, average='macro'))
print("Recall :", recall_score(y_test,y_pred, average='macro'))
print("F1-score :", f1_score(y_test,y_pred, average='macro'))


# Predictions for new samples
sample1 = [[5.1, 3.5, 1.4, 0.2]]   # expected setosa
sample2 = [[6.7, 3.1, 4.4, 1.5]]   # expected versicolor
print("Sample1 Prediction:", iris.target_names[gnb.predict(sample1)[0]])
print("Sample2 Prediction:", iris.target_names[gnb.predict(sample2)[0]])


Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Accuracy : 1.0
Precision : 1.0
Recall : 1.0
F1-score : 1.0
Sample1 Prediction: setosa
Sample2 Prediction: versicolor


# Implement Naïve-Bayes – Multivariate Bernoulli using sklearn

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

path = "emails.csv"
data = pd.read_csv(path)

X = data['text']
y = data['spam']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

vectorizer = TfidfVectorizer(stop_words='english', binary=True)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec  = vectorizer.transform(X_test)

model = BernoulliNB()
model.fit(X_train_vec, y_train)

y_pred = model.predict(X_test_vec)
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision :", precision_score(y_test,y_pred, average='macro'))
print("Recall :", recall_score(y_test,y_pred, average='macro'))
print("F1-score :", f1_score(y_test,y_pred, average='macro'))




Confusion Matrix:
[[869   3]
 [ 16 258]]
Accuracy: 0.9834205933682374
Precision : 0.9852133255406195
Recall : 0.9690827362217906
F1-score : 0.97683604700025


# Implement Naïve-Bayes – Multinomial using sklearn

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

path = "emails.csv"
data = pd.read_csv(path)

X = data['text']
y = data['spam']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Vectorization
vectorizer = TfidfVectorizer(stop_words='english', binary=False)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec  = vectorizer.transform(X_test)

# Multinomial Naive Bayes 
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Predictions
y_pred = model.predict(X_test_vec)
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision :", precision_score(y_test,y_pred, average='macro'))
print("Recall :", recall_score(y_test,y_pred, average='macro'))
print("F1-score :", f1_score(y_test,y_pred, average='macro'))




Confusion Matrix:
[[872   0]
 [105 169]]
Accuracy: 0.9083769633507853
Precision : 0.9462640736949847
Recall : 0.8083941605839415
F1-score : 0.8530961156478946
