Mount Google Drive to access files

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Import necessary libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

Load the dataset

In [None]:
file_path = '/content/train.csv'
try:
    data = pd.read_csv(file_path, names=['sms', 'label'])
except FileNotFoundError:
    print(f"File not found at: {file_path}")
    print("Please double-check the file path and make sure the file exists.")

 Explore the dataset

In [None]:
print(data.head())

                                                 sms  label
0                                                sms  label
1  Go until jurong point, crazy.. Available only ...      0
2                    Ok lar... Joking wif u oni...\n      0
3  Free entry in 2 a wkly comp to win FA Cup fina...      1
4  U dun say so early hor... U c already then say...      0


Split the dataset into training and testing sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data['sms'], data['label'], test_size=0.2, random_state=42)

Text vectorization using CountVectorizer

In [None]:
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

Build a Naive Bayes classifier

In [None]:
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

 Make predictions on the test set

In [None]:
predictions = classifier.predict(X_test_vectorized)

Evaluate the model

In [None]:
accuracy = accuracy_score(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)
classification_rep = classification_report(y_test, predictions)
print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{classification_rep}')

Accuracy: 0.979372197309417
Confusion Matrix:
[[950   7]
 [ 16 142]]
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       957
           1       0.95      0.90      0.93       158

    accuracy                           0.98      1115
   macro avg       0.97      0.95      0.96      1115
weighted avg       0.98      0.98      0.98      1115

