In [1]:
# 1.1 Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score

In [2]:
# 1.2 Load Data
data = pd.read_csv('fake reviews dataset1.csv')

In [3]:
# Remove rows with NaN values in 'text_' or 'label'
data = data.dropna(subset=['text_', 'label'])

In [4]:
# 1.3 Split the dataset into features and labels
X = data['text_']
y = data['label']

In [5]:
# 1.4 Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# 2.1 Create Pipelines for Naive Bayes and Random Forest
nb_pipeline = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', MultinomialNB(class_prior=[0.6, 0.4]))  # Adjust class_prior based on your class distribution
])

In [7]:
rf_pipeline = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', RandomForestClassifier(n_estimators=100, random_state=42))
])

In [8]:
# 2.2 Train the Naive Bayes Model
nb_pipeline.fit(X_train, y_train)

In [10]:
# 2.3 Train the Random Forest Model
rf_pipeline.fit(X_train, y_train)

In [11]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    return accuracy, precision, recall, f1

In [12]:
nb_accuracy, nb_precision, nb_recall, nb_f1 = evaluate_model(nb_pipeline, X_test, y_test)
rf_accuracy, rf_precision, rf_recall, rf_f1 = evaluate_model(rf_pipeline, X_test, y_test)


In [13]:
print("Naive Bayes Model Performance:")
print(f"Accuracy: {nb_accuracy}")
print(f"Precision: {nb_precision}")
print(f"Recall: {nb_recall}")
print(f"F1 Score: {nb_f1}")

Naive Bayes Model Performance:
Accuracy: 0.854506919336741
Precision: 0.8550128383187435
Recall: 0.854506919336741
F1 Score: 0.8544234294446058


In [14]:
print("\nRandom Forest Model Performance:")
print(f"Accuracy: {rf_accuracy}")
print(f"Precision: {rf_precision}")
print(f"Recall: {rf_recall}")
print(f"F1 Score: {rf_f1}")


Random Forest Model Performance:
Accuracy: 0.8622366288492707
Precision: 0.8630320708562169
Recall: 0.8622366288492707
F1 Score: 0.8621947632027056


In [15]:
# 3.2 Predict Reviews
def predict_reviews(reviews, model):
    # Ensure the input is a list of reviews
    if isinstance(reviews, str):
        reviews = [reviews]

In [17]:
 #3.2 Predict Reviews
def predict_reviews(reviews, model):
    # Ensure the input is a list of reviews
    if isinstance(reviews, str):
        reviews = [reviews]

    # Predict whether the reviews are fake or genuine
    predictions = model.predict(reviews)
    for review, label in zip(reviews, predictions):
        print(f'Review: "{review}" - Label: {"Fake" if label == 1 else "Genuine"}')

# Example usage
new_review = input("Enter the review: ")
print("\nNaive Bayes Prediction:")
predict_reviews(new_review, nb_pipeline)
print("\nRandom Forest Prediction:")
predict_reviews(new_review, rf_pipeline)

Enter the review:  "Good quality"



Naive Bayes Prediction:
Review: ""Good quality"" - Label: Fake

Random Forest Prediction:
Review: ""Good quality"" - Label: Fake


In [18]:
new_review = input("Enter the review: ")
print("\nNaive Bayes Prediction:")
predict_reviews(new_review, nb_pipeline)
print("\nRandom Forest Prediction:")
predict_reviews(new_review, rf_pipeline)

Enter the review:  "This product is same as my expectations."



Naive Bayes Prediction:
Review: ""This product is same as my expectations."" - Label: Genuine

Random Forest Prediction:
Review: ""This product is same as my expectations."" - Label: Genuine


In [19]:
new_review = input("Enter the review: ")
print("\nNaive Bayes Prediction:")
predict_reviews(new_review, nb_pipeline)
print("\nRandom Forest Prediction:")
predict_reviews(new_review, rf_pipeline)

Enter the review:  "This pillow saved my back. I love the look and feel of this pillow."



Naive Bayes Prediction:
Review: ""This pillow saved my back. I love the look and feel of this pillow."" - Label: Fake

Random Forest Prediction:
Review: ""This pillow saved my back. I love the look and feel of this pillow."" - Label: Fake


In [20]:
new_review = input("Enter the review: ")
print("\nNaive Bayes Prediction:")
predict_reviews(new_review, nb_pipeline)
print("\nRandom Forest Prediction:")
predict_reviews(new_review, rf_pipeline)

Enter the review:  "I love the dinner set. I highly recommend this product."



Naive Bayes Prediction:
Review: ""I love the dinner set. I highly recommend this product."" - Label: Fake

Random Forest Prediction:
Review: ""I love the dinner set. I highly recommend this product."" - Label: Fake


In [21]:
new_review = input("Enter the review: ")
print("\nNaive Bayes Prediction:")
predict_reviews(new_review, nb_pipeline)
print("\nRandom Forest Prediction:")
predict_reviews(new_review, rf_pipeline)

Enter the review:  "Worst product ever."



Naive Bayes Prediction:
Review: ""Worst product ever."" - Label: Genuine

Random Forest Prediction:
Review: ""Worst product ever."" - Label: Genuine
