In [None]:
# --- Import Libraries ---
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings

# --- (a) Load Dataset ---
warnings.filterwarnings('ignore')
df = pd.read_csv("NaiveBayes.csv")  # or spam.csv, sms_spam.csv, etc.

print("--- Data Head ---")
print(df.head())

# --- (b) Data Preprocessing ---
# Assuming dataset columns are ['label', 'message']
df.rename(columns={'label': 'Category', 'message': 'Message'}, inplace=True)

# Convert labels 'spam' and 'ham' to numeric (1 = spam, 0 = ham)
df['Category'] = df['Category'].map({'spam': 1, 'ham': 0})

# --- (c) Feature Extraction using Bag of Words ---
cv = CountVectorizer(stop_words='english')
X = cv.fit_transform(df['Message'])
y = df['Category']

# --- (d) Split Data ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- (e) Apply Na√Øve Bayes Classifier ---
nb_clf = MultinomialNB()
nb_clf.fit(X_train, y_train)

# --- (f) Evaluate the Model ---
y_pred = nb_clf.predict(X_test)

print("\n--- Model Evaluation ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


--- Data Head ---
   Age  Salary  Purchased
0   19   19000          0
1   35   20000          0
2   26   43000          0
3   27   57000          0
4   19   76000          0


KeyError: 'Category'