# 🛍️ Product Review Analysis Pipeline (Notebook)
An end-to-end pipeline that cleans product reviews, performs sentiment analysis, extracts features, and stores results using SQL.

In [None]:
import pandas as pd
import re
df = pd.read_csv('../data/sample_reviews.csv')
df.drop_duplicates(subset='review_text', inplace=True)
def clean_text(text):
    if pd.isna(text): return ''
    text = text.lower()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return re.sub(r'\s+', ' ', text).strip()
df['cleaned_review'] = df['review_text'].apply(clean_text)
df.head()

In [None]:
from textblob import TextBlob
def get_sentiment(text):
    polarity = TextBlob(text).sentiment.polarity
    return 'positive' if polarity > 0 else 'negative' if polarity < 0 else 'neutral'
df['sentiment'] = df['cleaned_review'].apply(get_sentiment)
df[['review_text', 'sentiment']].head()