# Explore Tweets by Keyword

This notebook allows you to select a keyword and view Pro Ruling and Pro Opposition tweets for all stances (favor, against, neutral) with **all CSV columns displayed**.

In [3]:
import pandas as pd
from IPython.display import display, HTML
import ipywidgets as widgets

# Load the data
DATA_CSV = "/scratch/ziv_baretto/Research_X/Partisan-Discourse-on-X-English-/final_results+visualisations_folder/combined_stance_results.csv"
df = pd.read_csv(DATA_CSV, low_memory=False)

print(f"Loaded {len(df):,} tweets")
print(f"Available keywords: {df['keyword'].nunique()}")
print(f"\nKeywords: {sorted(df['keyword'].unique())}")
print(f"\nColumns in CSV ({len(df.columns)}):")
for i, col in enumerate(df.columns, 1):
    print(f"  {i}. {col}")

Loaded 544,679 tweets
Available keywords: 37

Keywords: ['aatmanirbhar', 'ayodhya', 'balochistan', 'bhakts', 'caa', 'china', 'congress', 'democracy', 'demonetisation', 'dictatorship', 'farm laws', 'farmers protests', 'gdp', 'hathras', 'hindu', 'hindutva', 'inflation', 'islamists', 'kashmir', 'kashmiri pandits', 'lynching', 'mahotsav', 'minorities', 'modi', 'msp', 'muslim', 'new parliament', 'rahulgandhi', 'ram mandir', 'ratetvdebate', 'sangh', 'shaheen bagh', 'sharia', 'spyware', 'suicides', 'ucc', 'unemployment']

Columns in CSV (14):
  1. source_row
  2. tweet
  3. original_author
  4. tweet_label
  5. _label_norm
  6. keyword
  7. subjects_scored
  8. stance_gold
  9. fewshot_raw
  10. fewshot_label
  11. fewshot_label_for_against
  12. fewshot_reason
  13. fewshot_shots_json
  14. language


In [4]:
# Create keyword dropdown
keywords = sorted(df['keyword'].unique())

keyword_dropdown = widgets.Dropdown(
    options=keywords,
    value=keywords[0],
    description='Keyword:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

# Number of tweets to display per category
num_tweets_slider = widgets.IntSlider(
    value=3,
    min=1,
    max=10,
    step=1,
    description='Tweets per category:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

output = widgets.Output()

def display_tweets(keyword, num_tweets):
    output.clear_output()
    with output:
        # Filter by keyword
        keyword_df = df[df['keyword'] == keyword].copy()
        
        print(f"\n{'='*100}")
        print(f"KEYWORD: {keyword.upper()}")
        print(f"Total tweets: {len(keyword_df):,}")
        print(f"{'='*100}\n")
        
        alignments = ['Pro Ruling', 'Pro OPP']
        stances = ['favor', 'against', 'neutral']
        
        for alignment in alignments:
            print(f"\n{'#'*100}")
            print(f"### {alignment.upper()} TWEETS ###")
            print(f"{'#'*100}")
            
            alignment_df = keyword_df[keyword_df['tweet_label'] == alignment]
            print(f"Total {alignment} tweets: {len(alignment_df):,}")
            
            for stance in stances:
                stance_df = alignment_df[alignment_df['fewshot_label'] == stance]
                
                print(f"\n{'-'*80}")
                print(f"STANCE: {stance.upper()} ({len(stance_df):,} tweets)")
                print(f"{'-'*80}")
                
                if len(stance_df) == 0:
                    print("  No tweets found for this combination.")
                else:
                    sample_df = stance_df.sample(min(num_tweets, len(stance_df)), random_state=42)
                    
                    for idx, (_, row) in enumerate(sample_df.iterrows(), 1):
                        print(f"\n  ===== TWEET {idx} =====")
                        for col in df.columns:
                            val = row[col]
                            # Truncate long values for readability
                            val_str = str(val)
                            if len(val_str) > 200:
                                val_str = val_str[:200] + '...'
                            print(f"  {col}: {val_str}")

def on_button_click(b):
    display_tweets(keyword_dropdown.value, num_tweets_slider.value)

button = widgets.Button(
    description='Show Tweets',
    button_style='primary',
    layout=widgets.Layout(width='200px')
)
button.on_click(on_button_click)

display(widgets.VBox([
    widgets.HTML("<h3>Select a Keyword and Number of Tweets to Display</h3>"),
    keyword_dropdown,
    num_tweets_slider,
    button,
    output
]))

VBox(children=(HTML(value='<h3>Select a Keyword and Number of Tweets to Display</h3>'), Dropdown(description='â€¦

## Alternative: Direct Function Call

If widgets don't work in your environment, use the function below:

In [None]:
def show_tweets_for_keyword(keyword, num_tweets=3, show_as_table=True):
    """
    Display Pro Ruling and Pro Opposition tweets for all stances with ALL columns.
    
    Parameters:
    -----------
    keyword : str
        The keyword to filter tweets by
    num_tweets : int
        Number of tweets to display per category (default: 3)
    show_as_table : bool
        If True, display as pandas DataFrame tables (default: True)
    """
    keyword_df = df[df['keyword'] == keyword].copy()
    
    if len(keyword_df) == 0:
        print(f"No tweets found for keyword: {keyword}")
        print(f"Available keywords: {sorted(df['keyword'].unique())}")
        return
    
    print(f"\n{'='*100}")
    print(f"KEYWORD: {keyword.upper()}")
    print(f"Total tweets: {len(keyword_df):,}")
    print(f"{'='*100}\n")
    
    alignments = ['Pro Ruling', 'Pro OPP']
    stances = ['favor', 'against', 'neutral']
    
    for alignment in alignments:
        print(f"\n{'#'*100}")
        print(f"### {alignment.upper()} TWEETS ###")
        print(f"{'#'*100}")
        
        alignment_df = keyword_df[keyword_df['tweet_label'] == alignment]
        print(f"Total {alignment} tweets: {len(alignment_df):,}")
        
        for stance in stances:
            stance_df = alignment_df[alignment_df['fewshot_label'] == stance]
            
            print(f"\n{'-'*80}")
            print(f"STANCE: {stance.upper()} ({len(stance_df):,} tweets)")
            print(f"{'-'*80}")
            
            if len(stance_df) == 0:
                print("  No tweets found for this combination.")
            else:
                sample_df = stance_df.sample(min(num_tweets, len(stance_df)), random_state=42)
                
                if show_as_table:
                    # Display as transposed table for each tweet (all columns visible)
                    for idx, (_, row) in enumerate(sample_df.iterrows(), 1):
                        print(f"\n  ===== TWEET {idx} =====")
                        display(pd.DataFrame(row).T)
                else:
                    for idx, (_, row) in enumerate(sample_df.iterrows(), 1):
                        print(f"\n  ===== TWEET {idx} =====")
                        for col in df.columns:
                            val = row[col]
                            val_str = str(val)
                            if len(val_str) > 300:
                                val_str = val_str[:300] + '...'
                            print(f"  {col}: {val_str}")

# Example usage:
# show_tweets_for_keyword('modi', num_tweets=2, show_as_table=True)

In [None]:
# Quick summary of available keywords and their tweet counts
summary = df.groupby(['keyword', 'tweet_label', 'fewshot_label']).size().unstack(fill_value=0)
print("\nTweet counts by Keyword, Political Alignment, and Stance:")
print("="*80)
display(summary)

In [None]:
# Example: Show tweets for 'modi' keyword with all columns as tables
show_tweets_for_keyword('modi', num_tweets=2, show_as_table=True)