# Text Classification 

- Email (Spam/Ham)

### Step 1: Import Necessary Libraries

In [1]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

### Step 2: Load the Dataset

In [2]:
data = pd.read_csv("spam.csv")

### Step 3: Data Preprocessing

In [3]:
# Download and prepare stop words
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Tokenization and text cleaning
data['Message'] = data['Message'].apply(lambda x: ' '.join(word.lower() for word in word_tokenize(x) if word.isalpha()))

# Stop words removal
data['Message'] = data['Message'].apply(lambda x: ' '.join(word for word in x.split() if word not in stop_words))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\kavit\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Step 4: Feature Extraction

In [4]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data['Message'])

### Step 5: Split the Data into Training and Testing Sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, data['Category'], test_size=0.2, random_state=42)

### Step 6: Build and Train the Model

In [6]:
model = MultinomialNB()
model.fit(X_train, y_train)

### Step 7: Model Evaluation

In [7]:
y_pred = model.predict(X_test)

### Step 8: Print the Results

In [8]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9713004484304932
              precision    recall  f1-score   support

         ham       0.97      1.00      0.98       966
        spam       1.00      0.79      0.88       149

    accuracy                           0.97      1115
   macro avg       0.98      0.89      0.93      1115
weighted avg       0.97      0.97      0.97      1115

