# Social Media Sentiment Analysis

This notebook performs sentiment analysis on social media data to understand public opinion and attitudes towards specific topics or brands.

In [5]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
import nltk
from wordcloud import WordCloud
import plotly.express as px
import plotly.graph_objects as go

# Set style for visualizations
plt.style.use('seaborn')
sns.set_palette('husl')

OSError: 'seaborn' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)

## 1. Data Loading and Preprocessing

In [None]:
# Load the dataset
df = pd.read_csv('../data/twitter_entity_sentiment.csv')

# Display basic information about the dataset
print("Dataset Info:")
df.info()

print("\nFirst few rows:")
df.head()

## 2. Text Preprocessing Functions

In [None]:
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')

def clean_text(text):
    # Convert to lowercase
    text = str(text).lower()
    
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    
    # Remove user mentions
    text = re.sub(r'@\w+', '', text)
    
    # Remove hashtags
    text = re.sub(r'#\w+', '', text)
    
    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)
    
    return text.strip()

# Apply text cleaning
df['cleaned_text'] = df['text'].apply(clean_text)

## 3. Sentiment Analysis

In [None]:
def get_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Calculate sentiment scores
df['sentiment_score'] = df['cleaned_text'].apply(get_sentiment)

# Categorize sentiment
df['sentiment_category'] = pd.cut(df['sentiment_score'],
                                 bins=[-1, -0.1, 0.1, 1],
                                 labels=['Negative', 'Neutral', 'Positive'])

## 4. Visualization

In [None]:
# 1. Overall Sentiment Distribution
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='sentiment_category')
plt.title('Distribution of Sentiment Categories')
plt.show()

# 2. Sentiment Distribution by Entity
entity_sentiment = df.groupby('entity')['sentiment_score'].mean().sort_values()
plt.figure(figsize=(12, 6))
entity_sentiment.plot(kind='bar')
plt.title('Average Sentiment Score by Entity')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 5. Word Cloud Generation

In [None]:
# Generate word cloud for positive sentiment
positive_text = ' '.join(df[df['sentiment_score'] > 0]['cleaned_text'])
wordcloud_positive = WordCloud(width=800, height=400, background_color='white').generate(positive_text)

plt.figure(figsize=(10, 5))
plt.imshow(wordcloud_positive, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - Positive Sentiment')
plt.show()

# Generate word cloud for negative sentiment
negative_text = ' '.join(df[df['sentiment_score'] < 0]['cleaned_text'])
wordcloud_negative = WordCloud(width=800, height=400, background_color='white').generate(negative_text)

plt.figure(figsize=(10, 5))
plt.imshow(wordcloud_negative, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - Negative Sentiment')
plt.show()

## 6. Interactive Visualizations with Plotly

In [None]:
# Create interactive sentiment distribution plot
fig = px.histogram(df, x='sentiment_score', nbins=50,
                  title='Distribution of Sentiment Scores',
                  labels={'sentiment_score': 'Sentiment Score', 'count': 'Frequency'})
fig.show()

# Create interactive entity sentiment plot
entity_sentiment_df = df.groupby('entity').agg({
    'sentiment_score': ['mean', 'count']
}).reset_index()

fig = px.scatter(entity_sentiment_df, 
                x=('sentiment_score', 'mean'),
                y=('sentiment_score', 'count'),
                text='entity',
                title='Entity Sentiment Analysis',
                labels={'sentiment_score': 'Average Sentiment Score',
                       'count': 'Number of Mentions'})
fig.show()