In [1]:
# --- Import Libraries ---
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings

# --- (a) Load Dataset ---
warnings.filterwarnings('ignore')
df = pd.read_csv("Social_Network_Ads.csv")

print("--- Data Head ---")
print(df.head())


# --- (b) Data Preprocessing ---
# If 'Gender' column exists (categorical), encode it to numeric form
if 'Gender' in df.columns:
    le = LabelEncoder()
    df['Gender'] = le.fit_transform(df['Gender'])

# Define features (X) and target (y)
# Assuming 'Purchased' is the target column
X = df.drop(columns=['Purchased'])
y = df['Purchased']

# --- (c) Split Data ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- (d) Apply Na√Øve Bayes Classifier ---
nb_clf = GaussianNB()
nb_clf.fit(X_train, y_train)

# --- (e) Evaluate the Model ---
y_pred = nb_clf.predict(X_test)

print("\n--- Model Evaluation ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


--- Data Head ---
    User ID  Gender  Age  EstimatedSalary  Purchased
0  15624510    Male   19            19000          0
1  15810944    Male   35            20000          0
2  15668575  Female   26            43000          0
3  15603246  Female   27            57000          0
4  15804002    Male   19            76000          0

--- Model Evaluation ---
Accuracy: 0.93

Confusion Matrix:
[[51  1]
 [ 5 23]]

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.98      0.94        52
           1       0.96      0.82      0.88        28

    accuracy                           0.93        80
   macro avg       0.93      0.90      0.91        80
weighted avg       0.93      0.93      0.92        80

