# SMS Classifier

## Import necessary libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

- Load the SMS dataset
- Make sure your dataset has two columns: 'text' (SMS messages) and 'label' (spam or ham)
- You can replace 'your_dataset.csv' with the actual filename or URL of your dataset

In [2]:
df = pd.read_csv('/content/drive/MyDrive/Internship/Bharat Intern/train.csv')

## Explore the dataset

In [3]:
print(df.head())

                                                 sms  label
0  Go until jurong point, crazy.. Available only ...      0
1                    Ok lar... Joking wif u oni...\n      0
2  Free entry in 2 a wkly comp to win FA Cup fina...      1
3  U dun say so early hor... U c already then say...      0
4  Nah I don't think he goes to usf, he lives aro...      0


## Split the dataset into training and testing sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df['sms'], df['label'], test_size=0.2, random_state=42)

## Feature extraction: Convert text into numerical features using CountVectorizer

In [6]:
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

## Build a Naive Bayes classifier

In [7]:
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

## Make predictions on the test set

In [8]:
predictions = classifier.predict(X_test_vectorized)

## Evaluate the model

In [9]:
accuracy = accuracy_score(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)
classification_rep = classification_report(y_test, predictions)

In [10]:
print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{classification_rep}')

Accuracy: 0.9856502242152466
Confusion Matrix:
[[949   5]
 [ 11 150]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       954
           1       0.97      0.93      0.95       161

    accuracy                           0.99      1115
   macro avg       0.98      0.96      0.97      1115
weighted avg       0.99      0.99      0.99      1115

