In [17]:
import numpy as np
import pandas as pd
import csv
import random

In [87]:
reviews, sentiments = [], []
with open("c.csv", "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        reviews.append(row[0].lower())
        sentiments.append(1 if row[1].strip().lower() == "positive" else 0)

sentiments = np.array(sentiments)

In [88]:
# split the data into training and testing sets 80-20

train_reviews = reviews[:int(0.8 * len(reviews))]   # 80% of the data for training
train_sentiments = sentiments[:int(0.8 * len(sentiments))]  # 80% of the data for training 

test_reviews = reviews[int(0.8 * len(reviews)):]  # 20% of the data for testing
test_sentiments = sentiments[int(0.8 * len(sentiments)):]  # 20% of the data for testing


In [None]:
# create a function to train the model

def train_naive_bayes(train_vectors, train_sentiments):
    num_docs = train_vectors.shape[0]
    num_words = train_vectors.shape[1]
    
    # calculate the prior probabilities
    phi_y = np.sum(train_sentiments) / num_docs
    phi_y0 = 1 - phi_y
    
    # calculate the conditional probabilities
    phi_x_y1 = (np.sum(train_vectors[train_sentiments == 1], axis=0) + 1) / (np.sum(train_vectors[train_sentiments == 1]) + num_words)
    phi_x_y0 = (np.sum(train_vectors[train_sentiments == 0], axis=0) + 1) / (np.sum(train_vectors[train_sentiments == 0]) + num_words)
    
    return phi_y, phi_y0, phi_x_y1, phi_x_y0

phi_y, phi_y0, phi_x_y1, phi_x_y0 = train_naive_bayes(train_vectors, train_sentiments)

# create a function to predict the sentiment of a review

def predict_naive_bayes(test_vectors, phi_y, phi_y0, phi_x_y1, phi_x_y0):
    y_pred = []
    for vector in test_vectors:
        p_y1 = np.sum(np.log(phi_x_y1) * vector) + np.log(phi_y)
        p_y0 = np.sum(np.log(phi_x_y0) * vector) + np.log(phi_y0)
        y_pred.append(1 if p_y1 > p_y0 else 0)
    return y_pred

y_pred = predict_naive_bayes(test_vectors, phi_y, phi_y0, phi_x_y1, phi_x_y0)

# calculate the accuracy of the model

accuracy = np.mean(y_pred == test_sentiments)

print(f"Accuracy: {accuracy:.2f}")

In [None]:
# predict the sentiment of a new review

def predict_review(review, phi_y, phi_y0, phi_x_y1, phi_x_y0, word2index):
    vector = review_to_vector(review, word2index)
    y_pred = predict_naive_bayes([vector], phi_y, phi_y0, phi_x_y1, phi_x_y0)
    return "Positive" if y_pred[0] == 1 else "Negative"

review = "This is a great movie. I loved it."

print(f"Review: {review}")

sentiment = predict_review(review, phi_y, phi_y0, phi_x_y1, phi_x_y0, word2index)

print(f"Sentiment: {sentiment}")

review = "This is a terrible movie. I hated it."

print(f"Review: {review}")

sentiment = predict_review(review, phi_y, phi_y0, phi_x_y1, phi_x_y0, word2index)

print(f"Sentiment: {sentiment}")