# 📧 Email Spam Classifier using Logistic Regression
This notebook builds a simple spam classifier using logistic regression.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
# Load dataset (you must have 'spam.csv' in the same directory)
df = pd.read_csv("spam.csv", encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'message']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
df.head()


In [None]:
# EDA
sns.countplot(x='label', data=df)
plt.title("Distribution of Spam and Ham")
plt.show()


In [None]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['message'])
y = df['label']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("✅ Accuracy:", accuracy)
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred))
