In [None]:

class SentimentAnalyzer:
    def __init__(self):
        self.vader = SentimentIntensityAnalyzer()
        try:
            self.transformer_sentiment = pipeline(
                'sentiment-analysis',
                model='cardiffnlp/twitter-roberta-base-sentiment-latest'
            )
            self.has_transformer = True
        except Exception as e:
            print(f"Warning: Could not load transformer model: {e}")
            self.has_transformer = False

    def analyze_vader_sentiment(self, text):
        if pd.isna(text) or text == '':
            return {'compound': 0, 'pos': 0, 'neu': 1, 'neg': 0}
        return self.vader.polarity_scores(str(text))

    def analyze_textblob_sentiment(self, text):
        if pd.isna(text) or text == '':
            return 0.0
        return TextBlob(str(text)).sentiment.polarity

    def analyze_transformer_sentiment(self, text):
        if not self.has_transformer or pd.isna(text) or text == '':
            return {'label': 'NEUTRAL', 'score': 0.0}
        try:
            result = self.transformer_sentiment(text[:512])[0]
            return {'label': result['label'], 'score': result['score']}
        except:
            return {'label': 'ERROR', 'score': 0.0}

# Initialize analyzer
sentiment = SentimentAnalyzer()

# Apply to both datasets
if not twitter_df.empty:
    print("Analyzing Twitter sentiment...")
    twitter_df['vader'] = twitter_df['cleaned_text'].apply(sentiment.analyze_vader_sentiment)
    twitter_df['vader_compound'] = twitter_df['vader'].apply(lambda x: x['compound'])
    twitter_df['textblob'] = twitter_df['cleaned_text'].apply(sentiment.analyze_textblob_sentiment)

if not reddit_df.empty:
    print("Analyzing Reddit sentiment...")
    reddit_df['vader'] = reddit_df['cleaned_text'].apply(sentiment.analyze_vader_sentiment)
    reddit_df['vader_compound'] = reddit_df['vader'].apply(lambda x: x['compound'])
    reddit_df['textblob'] = reddit_df['cleaned_text'].apply(sentiment.analyze_textblob_sentiment)
        

## 6. Visualization & Correlation Analysis

In [None]:

# Combine Twitter and Reddit data
twitter_df['platform'] = 'Twitter'
reddit_df['platform'] = 'Reddit'
combined_df = pd.concat([
    twitter_df[['created_at', 'cleaned_text', 'vader_compound', 'textblob', 'platform']],
    reddit_df[['created_at', 'cleaned_text', 'vader_compound', 'textblob', 'platform']]
], ignore_index=True)

# Convert created_at to datetime if not already
combined_df['created_at'] = pd.to_datetime(combined_df['created_at'])

# Plot sentiment over time
plt.figure(figsize=(12, 6))
combined_df.set_index('created_at').resample('D')['vader_compound'].mean().plot(label='VADER Sentiment')
combined_df.set_index('created_at').resample('D')['textblob'].mean().plot(label='TextBlob Sentiment')
plt.title('Daily Average Sentiment Over Time')
plt.ylabel('Sentiment Score')
plt.legend()
plt.grid(True)
plt.show()
        

In [None]:

# Correlate with economic indicators
def plot_correlation_with_sentiment(sentiment_series, economic_df, title):
    combined = pd.DataFrame({
        'sentiment': sentiment_series.resample('D').mean()
    })
    combined = combined.join(economic_df['Close'].resample('D').mean(), how='inner')
    combined.columns = ['Sentiment', 'Economic Indicator']
    combined.dropna(inplace=True)
    corr = combined.corr().iloc[0,1]
    
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=combined.index, y=combined['Sentiment'], name='Sentiment'), secondary_y=False)
    fig.add_trace(go.Scatter(x=combined.index, y=combined['Economic Indicator'], name='Economic'), secondary_y=True)
    fig.update_layout(title=f'{title} (Corr: {corr:.2f})', xaxis_title='Date')
    fig.show()

# Convert to datetime index
combined_df.set_index('created_at', inplace=True)

# Plot correlations
plot_correlation_with_sentiment(combined_df['vader_compound'], economic_data['USDZAR'], 'Sentiment vs USD/ZAR')
plot_correlation_with_sentiment(combined_df['vader_compound'], economic_data['JSE'], 'Sentiment vs JSE Index')
plot_correlation_with_sentiment(combined_df['vader_compound'], economic_data['EZA'], 'Sentiment vs EZA ETF')
        