# Task 2: Sentiment and Thematic Analysis

### Sentiment Analysis

In [8]:
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
from tqdm import tqdm

def hybrid_sentiment(text):
    """Combine TextBlob and VADER for robust analysis"""
    text = str(text)
    
    # VADER (better for informal text)
    vader = SentimentIntensityAnalyzer()
    vader_score = vader.polarity_scores(text)['compound']
    
    # TextBlob (better for formal text)
    tb_score = TextBlob(text).sentiment.polarity
    
    # Combined score (weighted average)
    combined_score = (vader_score * 0.6) + (tb_score * 0.4)
    
    # Determine label
    if combined_score > 0.05:
        return "positive", combined_score
    elif combined_score < -0.05:
        return "negative", combined_score
    return "neutral", combined_score

# Load data
df = pd.read_csv("Dataset/ETHbanks_mobileApp_reviews_clean.csv")

# Process with progress bar
tqdm.pandas()
df[['sentiment_label', 'sentiment_score']] = df['review'].progress_apply(
    lambda x: pd.Series(hybrid_sentiment(x)))

# Save results
df.to_csv("Dataset/reviews_with_sentiment.csv", index=False)
print(f"Saved {len(df)} analyzed reviews")

100%|██████████| 6817/6817 [03:20<00:00, 34.00it/s]


Saved 6817 analyzed reviews
