# Bank Earnings: Regulatory Topic & Sentiment Analysis

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("transcript_sentences.csv")
df['quarter_year'] = df['quarter'] + " " + df['year'].astype(str)


## 1. What are the most common topics (financial and regulatory)?

In [None]:

reg_topics = [col for col in df.columns if col.startswith("Mentions ")]
reg_summary = df[reg_topics].apply(lambda col: (col == "Yes").sum()).sort_values(ascending=False)
reg_summary.plot(kind='bar', figsize=(12,4), title='Mentions of Prudential Regulatory Themes')
plt.ylabel("Number of Sentences")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


## 2. What do executives say when they mention Prudential Regulation Themes?

In [None]:

mentions_mask = df[[col for col in df.columns if col.startswith("Mentions ")]].eq("Yes").any(axis=1)
reg_df = df[mentions_mask].copy()
print(f"Total rows where regulation themes are mentioned: {len(reg_df)}")
reg_df[['quarter', 'year', 'speaker', 'sentence'] + [c for c in df.columns if c.startswith("Mentions ")][:3]].head(10)


## 3. What is the sentiment trend around regulatory themes by quarter?

In [None]:

import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

reg_df['sentiment_score'] = reg_df['sentence'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
reg_df['sentiment_label'] = reg_df['sentiment_score'].apply(lambda x: 'positive' if x > 0.2 else 'negative' if x < -0.2 else 'neutral')

sentiment_summary = reg_df.groupby(['quarter_year', 'sentiment_label']).size().unstack(fill_value=0)
sentiment_summary.plot(kind='bar', stacked=True, figsize=(12, 6), colormap='coolwarm', title='Sentiment of Regulatory Sentences per Quarter')
plt.ylabel("Sentence Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


## 4. Are executives complaining, complying or preparing? (Manual hints based on keywords)

In [None]:

def classify_tone(text):
    text = text.lower()
    if any(word in text for word in ['concern', 'challenge', 'problem', 'headwind']):
        return 'Complaining'
    elif any(word in text for word in ['comply', 'compliance', 'met requirement', 'within range']):
        return 'Complying'
    elif any(word in text for word in ['prepare', 'planning', 'readiness', 'scenario']):
        return 'Preparing'
    return 'Unclear'

reg_df['reg_tone'] = reg_df['sentence'].apply(classify_tone)
tone_summary = reg_df.groupby(['quarter_year', 'reg_tone']).size().unstack(fill_value=0)
tone_summary.plot(kind='bar', stacked=True, figsize=(12,6), title='Regulatory Tone Classification Over Time')
plt.ylabel("Sentence Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
