In [8]:
# Import libraries
import pandas as pd
import re
import nltk

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

nltk.download('stopwords')
from nltk.corpus import stopwords

# Load dataset
data = pd.read_csv("review.csv")

# Clean text
stop_words = set(stopwords.words("english"))

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return " ".join(words)

data["clean_review"] = data["review"].apply(clean_text)

# Convert text to numbers (IMPROVED)
vectorizer = TfidfVectorizer(
    ngram_range=(1, 2),
    max_features=5000
)

X = vectorizer.fit_transform(data["clean_review"])
y = data["sentiment"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluate model
predictions = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, predictions))
print("\nDetailed Report:\n")
print(classification_report(y_test, predictions))

# Test custom input
def predict_sentiment(text):
    cleaned = clean_text(text)
    vector = vectorizer.transform([cleaned])
    return model.predict(vector)[0]

print("\nTest Predictions:")
print("Input: This phone is very good →", predict_sentiment("This phone is very good"))
print("Input: I hate this product →", predict_sentiment("I hate this product"))


Accuracy: 0.6

Detailed Report:

              precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
    positive       0.50      1.00      0.67         2

    accuracy                           0.60         5
   macro avg       0.75      0.67      0.58         5
weighted avg       0.80      0.60      0.57         5


Test Predictions:
Input: This phone is very good → positive
Input: I hate this product → positive


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
