In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import re
import string


In [None]:

# Sample customer review dataset
data = {
    'review': [
        'I love this product! It works great.',
        'Terrible service, I will never come back.',
        'Excellent quality and fast shipping!',
        'Very disappointed with the purchase.',
        'Amazing value for the price.',
        'Not what I expected, quite bad.',
        'Best purchase I have made this year!',
        'Worst experience ever. Do not recommend.',
        'I am happy with the results.',
        'It was a waste of money.'
    ],
    'sentiment': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # 1 = Positive, 0 = Negative
}
df = pd.DataFrame(data)
df.head()


In [None]:

def preprocess_text(text):
    text = text.lower()
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)
    text = re.sub(r"\d+", "", text)
    return text.strip()

df['cleaned_review'] = df['review'].apply(preprocess_text)
df.head()


In [None]:

tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['cleaned_review'])
y = df['sentiment']


In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [None]:

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
