<a href="https://colab.research.google.com/github/zmohaghegh/Customer-Sentiment-Analysis-NLP/blob/main/NLP_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset from a reliable source
# Using a sample of 5000 reviews for faster processing
url = 'https://raw.githubusercontent.com/Ankit152/IMDB-sentiment-analysis/master/IMDB-Dataset.csv'
df = pd.read_csv(url).head(5000)

print("Dataset loaded successfully!")
print(df.head())

# Convert sentiment labels (positive/negative) to numerical values (1/0)
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

# Split the data into Training and Testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)

# Convert text data to numbers using TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_features=2000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# Evaluate the model's performance
y_pred = model.predict(X_test_vec)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")



def predict_sentiment(text):
    """Function to predict the sentiment of a given text."""
    text_vec = vectorizer.transform([text])
    prediction = model.predict(text_vec)
    return "Positive ðŸ˜Š" if prediction[0] == 1 else "Negative ðŸ˜ž"

# Test with a custom review
sample_review = "I really enjoyed this project, the results are amazing!"
print(f"Review: {sample_review}")
print(f"Prediction: {predict_sentiment(sample_review)}")

Dataset loaded successfully!
                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive
Model Accuracy: 85.10%
Review: I really enjoyed this project, the results are amazing!
Prediction: Positive ðŸ˜Š
