In [None]:
# Task 1: Bernoulli Naive Bayes on Binary Text Data
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Step 1: Load dataset
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep="\t", header=None, names=["label", "message"])

# Step 2: Encode labels (spam=1, ham=0)
df["label_num"] = df["label"].map({"ham": 0, "spam": 1})

# Step 3: Binary features
vectorizer = CountVectorizer(binary=True)
X = vectorizer.fit_transform(df["message"])
y = df["label_num"]

# Step 4: Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Train BernoulliNB
bnb = BernoulliNB()
bnb.fit(X_train, y_train)

# Step 6: Evaluation
y_pred = bnb.predict(X_test)
print("=== Task 1 Results ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


# Task 2: Gaussian Naive Bayes on Iris Dataset

from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

# Step 1: Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Step 2: Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Step 4: Evaluation
y_pred = gnb.predict(X_test)
print("\n=== Task 2 Results ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


# Task 3: Multinomial Naive Bayes on Word Frequencies

from sklearn.naive_bayes import MultinomialNB

# Step 1: Reuse SMS Spam dataset
df2 = df.copy()

# Step 2: Word frequency features
vectorizer2 = CountVectorizer()
X2 = vectorizer2.fit_transform(df2["message"])
y2 = df2["label_num"]

# Step 3: Train/Test split
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.3, random_state=42)

# Step 4: Train MultinomialNB
mnb = MultinomialNB()
mnb.fit(X2_train, y2_train)

# Step 5: Evaluation
y2_pred = mnb.predict(X2_test)
print("\n=== Task 3 Results ===")
print("Accuracy:", accuracy_score(y2_test, y2_pred))
print("Confusion Matrix:\n", confusion_matrix(y2_test, y2_pred))


=== Task 1 Results ===
Accuracy: 0.9814593301435407
Confusion Matrix:
 [[1445    3]
 [  28  196]]

=== Task 2 Results ===
Accuracy: 0.9777777777777777
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45


=== Task 3 Results ===
Accuracy: 0.9850478468899522
Confusion Matrix:
 [[1432   16]
 [   9  215]]
