Aim: Demonstrate the text classifier using Naïve bayes classifier algorithm.
Program: Write a program to implement the naive Bayesian classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

In [None]:
# Load the dataset with the first row as headers
msg = pd.read_csv('training_dataset.csv', names=['message', 'label'], header=0)

In [12]:
# Print the dimensions of the dataset
print('The dimensions of the dataset:', msg.shape)

# Map labels to numerical values
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})

# Separate features and labels
X = msg.message
y = msg.labelnum

# Print features and labels
print(X)
print(y)

The dimensions of the dataset: (10, 3)
0                    I love this product
1      This is the worst experience ever
2       I am very happy with the service
3    The item was broken when it arrived
4        Great quality and fast shipping
5              Terrible customer support
6              Excellent value for money
7                     Very disappointing
8                     Amazing experience
9                    Not worth the price
Name: message, dtype: object
0    1
1    0
2    1
3    0
4    1
5    0
6    1
7    0
8    1
9    0
Name: labelnum, dtype: int64


In [8]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of the train and test sets
print('Training set shape:', X_train.shape)
print('Test set shape:', X_test.shape)
print('Training labels shape:', y_train.shape)
print('Test labels shape:', y_test.shape)

Training set shape: (8,)
Test set shape: (2,)
Training labels shape: (8,)
Test labels shape: (2,)


In [9]:
# Transform the text data to feature vectors
count_vect = CountVectorizer()
X_train_dtm = count_vect.fit_transform(X_train)
X_test_dtm = count_vect.transform(X_test)

In [10]:
# Train the Naive Bayes classifier
clf = MultinomialNB().fit(X_train_dtm, y_train)

In [11]:
# Predict the labels for the test set
predicted = clf.predict(X_test_dtm)

# Evaluate the classifier
print('Accuracy metrics')
print('Accuracy of the classifier:', metrics.accuracy_score(y_test, predicted))
print('Confusion matrix:\n', metrics.confusion_matrix(y_test, predicted))
print('Recall:', metrics.recall_score(y_test, predicted))
print('Precision:', metrics.precision_score(y_test, predicted))


Accuracy metrics
Accuracy of the classifier: 0.0
Confusion matrix:
 [[0 1]
 [1 0]]
Recall: 0.0
Precision: 0.0
