In [None]:
# Classification of headlines mentioning "Obama" using 1)Support Model and 2) Performance Model
# Original model: mlburnham/Political_DEBATE_DeBERTa_large_v1.1 on huggingface
# Further trained on sample of candidate related headlines


#: zkava01/AuthorSupport_Oct29 on huggingface
#: zkava1/AuthorPerformance_Oct29 on huggingface

In [1]:
import os
import pandas as pd
import re
from transformers import pipeline
from tqdm import tqdm  

In [2]:
# Construct Dropbox path
dropbox_path = os.path.expanduser("FILEHERE.csv")

# Load the CSV file
df = pd.read_csv(dropbox_path, low_memory=False)

# Convert "Date" to datetime format 
df["date"] = pd.to_datetime(df["date"], errors="coerce")

# Here is where you can filter for the particular election year, or years you want #
# Here we'll restrict to articles from election day 2007 to election day 2016 
start_date = "2007-11-01"
end_date = "2016-11-15"
df = df[(df["date"] >= start_date) & (df["date"] <= end_date)]

# Define the list of political figures with their last names
figures_full = {
    "Harry Truman": ["Truman"],
    "Thomas Dewey": ["Dewey"],
    "Strom Thurmond": ["Thurmond"],
    "Dwight Eisenhower": ["Eisenhower"],
    "Adlai Stevenson": ["Stevenson"],
    "John F. Kennedy": ["Kennedy"],
    "Richard Nixon": ["Nixon"],
    "Lyndon B. Johnson": ["Johnson"],
    "Barry Goldwater": ["Goldwater"],
    "Hubert Humphrey": ["Humphrey"],
    "George Wallace": ["Wallace"],
    "George McGovern": ["McGovern"],
    "Jimmy Carter": ["Carter"],
    "Gerald Ford": ["Ford"],
    "Ronald Reagan": ["Reagan"],
    "John B. Anderson": ["Anderson"],
    "Walter Mondale": ["Mondale"],
    "George H.W Bush": ["Bush"],
    "Michael Dukakis": ["Dukakis"],
    "Bill Clinton": ["Clinton"],
    "Ross Perot": ["Perot"],
    "Bob Dole": ["Dole"],
    "Al Gore": ["Gore"],
    "John Kerry": ["Kerry"],
    "Barack Obama": ["Obama"],
    "John McCain": ["McCain"],
    "Mitt Romney": ["Romney"],
    "Donald Trump": ["Trump"],
    "Joe Biden": ["Biden"],
    "Kamala Harris": ["Harris"]
}

# Create a mapping for last names to their full figure name
last_name_to_full = {alt.lower(): full for full, alts in figures_full.items() for alt in alts}

# match full names and last names (case insensitive)
figure_pattern = r'\b(?:' + '|'.join(re.escape(name) for name in last_name_to_full.keys()) + r')\b'


In [3]:

# Rebuild the regex pattern 
figure_pattern = r'\b(?:' + '|'.join(re.escape(name) for name in last_name_to_full.keys()) + r')\b'

# Choose the figures you're interested in
key_figures = ["Barack Obama"]  # or ["Donald Trump", "Joe Biden", "Kamala Harris", ...]

# Expand into long format. one row per (headline, matched figure)
records = []
for _, row in df.iterrows():
    title = str(row["title"])
    found = re.findall(figure_pattern, title, flags=re.IGNORECASE)
    for match in set(found):  # unique matches only
        full_name = last_name_to_full[match.lower()]
        if full_name in key_figures:
            record = row.copy()
            record["figure"] = full_name  # <-- this replaces 'figure1'
            records.append(record)

# Final long-format dataframe for classification
df_filtered = pd.DataFrame(records)


In [4]:
#### Updated loop with new models - Support and Performance ####

# Load both classifiers
support_classifier = pipeline(
    "zero-shot-classification",
    model="zkava01/AuthorSupport_Oct29",
    tokenizer="zkava01/AuthorSupport_Oct29"
)

performance_classifier = pipeline(
    "zero-shot-classification",
    model="zkava01/AuthorPerformance_Oct29",
    tokenizer="zkava01/AuthorPerformance_Oct29"
)

# Hypothesis templates
support_hypotheses_template = [
    "The author of this text supports {}",
    "The author of this text does not support {}"
]

performance_hypotheses_template = [
    "The author of this text believes {} is performing/performed/will perform well",
    "The author of this text believes {} is performing/performed/will perform poorly"
]

# Chunk setup
chunk_size = 500
num_chunks = (len(df_filtered) // chunk_size) + 1
output_dir = "OUTPUTHERE"
chunk_dir = os.path.join(output_dir, "chunks")
output_base = "Obama"
os.makedirs(chunk_dir, exist_ok=True)

merged_chunks = []

print(f"Running support and performance classification in {num_chunks} chunks...")

for i in range(num_chunks):
    start_idx = i * chunk_size
    end_idx = min((i + 1) * chunk_size, len(df_filtered))
    chunk = df_filtered.iloc[start_idx:end_idx].copy()

    # Add output columns
    chunk["debate_support"] = "NA"
    chunk["debate_performance"] = "NA"

    for idx, row in tqdm(chunk.iterrows(), total=len(chunk), desc=f"Chunk {i+1}/{num_chunks}"):
        title = row["title"]
        figure = row["figure"]

        if figure in key_figures:
            # Author Support 
            support_hypotheses = [h.format(figure) for h in support_hypotheses_template]
            support_output = support_classifier(title, support_hypotheses, multi_label=True)
            support_scores = dict(zip(support_output["labels"], support_output["scores"]))

            if support_scores.get(support_hypotheses[0], 0) > 0.99:
                chunk.at[idx, "debate_support"] = 1
            elif support_scores.get(support_hypotheses[1], 0) > 0.99:
                chunk.at[idx, "debate_support"] = -1
            else:
                chunk.at[idx, "debate_support"] = 0

            # Author Performance 
            performance_hypotheses = [h.format(figure) for h in performance_hypotheses_template]
            perf_output = performance_classifier(title, performance_hypotheses, multi_label=True)
            perf_scores = dict(zip(perf_output["labels"], perf_output["scores"]))

            if perf_scores.get(performance_hypotheses[0], 0) > 0.5:
                chunk.at[idx, "debate_performance"] = 1
            elif perf_scores.get(performance_hypotheses[1], 0) > 0.5:
                chunk.at[idx, "debate_performance"] = -1
            else:
                chunk.at[idx, "debate_performance"] = 0

    # Save chunk
    chunk_filename = f"{output_base}_chunk{i+1}.csv"
    chunk_path = os.path.join(chunk_dir, chunk_filename)
    chunk.to_csv(chunk_path, index=False)
    print(f"Saved {chunk_filename}")
    merged_chunks.append(chunk)

# Merge and save all chunks
final_df = pd.concat(merged_chunks, ignore_index=True)
final_output_path = os.path.join(output_dir, "ObamaHeadlines.csv")
final_df.to_csv(final_output_path, index=False)
print(f"Saved final combined file: {final_output_path}")


Device set to use mps:0
Device set to use mps:0


Running support and performance classification in 109 chunks...


Chunk 1/109: 100%|██████████| 500/500 [01:35<00:00,  5.26it/s]


Saved Obama_chunk1.csv


Chunk 2/109: 100%|██████████| 500/500 [01:58<00:00,  4.22it/s]


Saved Obama_chunk2.csv


Chunk 3/109: 100%|██████████| 500/500 [01:59<00:00,  4.18it/s]


Saved Obama_chunk3.csv


Chunk 4/109: 100%|██████████| 500/500 [01:59<00:00,  4.18it/s]


Saved Obama_chunk4.csv


Chunk 5/109: 100%|██████████| 500/500 [01:59<00:00,  4.18it/s]


Saved Obama_chunk5.csv


Chunk 6/109: 100%|██████████| 500/500 [02:00<00:00,  4.14it/s]


Saved Obama_chunk6.csv


Chunk 7/109: 100%|██████████| 500/500 [01:58<00:00,  4.21it/s]


Saved Obama_chunk7.csv


Chunk 8/109: 100%|██████████| 500/500 [02:00<00:00,  4.16it/s]


Saved Obama_chunk8.csv


Chunk 9/109: 100%|██████████| 500/500 [01:59<00:00,  4.17it/s]


Saved Obama_chunk9.csv


Chunk 10/109: 100%|██████████| 500/500 [01:59<00:00,  4.17it/s]


Saved Obama_chunk10.csv


Chunk 11/109: 100%|██████████| 500/500 [02:00<00:00,  4.15it/s]


Saved Obama_chunk11.csv


Chunk 12/109: 100%|██████████| 500/500 [01:59<00:00,  4.17it/s]


Saved Obama_chunk12.csv


Chunk 13/109: 100%|██████████| 500/500 [01:58<00:00,  4.21it/s]


Saved Obama_chunk13.csv


Chunk 14/109: 100%|██████████| 500/500 [01:57<00:00,  4.26it/s]


Saved Obama_chunk14.csv


Chunk 15/109: 100%|██████████| 500/500 [01:57<00:00,  4.26it/s]


Saved Obama_chunk15.csv


Chunk 16/109: 100%|██████████| 500/500 [01:58<00:00,  4.22it/s]


Saved Obama_chunk16.csv


Chunk 17/109: 100%|██████████| 500/500 [01:59<00:00,  4.18it/s]


Saved Obama_chunk17.csv


Chunk 18/109: 100%|██████████| 500/500 [01:59<00:00,  4.20it/s]


Saved Obama_chunk18.csv


Chunk 19/109: 100%|██████████| 500/500 [01:57<00:00,  4.25it/s]


Saved Obama_chunk19.csv


Chunk 20/109: 100%|██████████| 500/500 [01:56<00:00,  4.31it/s]


Saved Obama_chunk20.csv


Chunk 21/109: 100%|██████████| 500/500 [01:55<00:00,  4.31it/s]


Saved Obama_chunk21.csv


Chunk 22/109: 100%|██████████| 500/500 [01:57<00:00,  4.26it/s]


Saved Obama_chunk22.csv


Chunk 23/109: 100%|██████████| 500/500 [02:00<00:00,  4.16it/s]


Saved Obama_chunk23.csv


Chunk 24/109: 100%|██████████| 500/500 [01:59<00:00,  4.18it/s]


Saved Obama_chunk24.csv


Chunk 25/109: 100%|██████████| 500/500 [02:02<00:00,  4.09it/s]


Saved Obama_chunk25.csv


Chunk 26/109: 100%|██████████| 500/500 [02:00<00:00,  4.14it/s]


Saved Obama_chunk26.csv


Chunk 27/109: 100%|██████████| 500/500 [02:01<00:00,  4.10it/s]


Saved Obama_chunk27.csv


Chunk 28/109: 100%|██████████| 500/500 [01:59<00:00,  4.20it/s]


Saved Obama_chunk28.csv


Chunk 29/109: 100%|██████████| 500/500 [01:56<00:00,  4.29it/s]


Saved Obama_chunk29.csv


Chunk 30/109: 100%|██████████| 500/500 [01:56<00:00,  4.29it/s]


Saved Obama_chunk30.csv


Chunk 31/109: 100%|██████████| 500/500 [01:57<00:00,  4.27it/s]


Saved Obama_chunk31.csv


Chunk 32/109: 100%|██████████| 500/500 [01:58<00:00,  4.23it/s]


Saved Obama_chunk32.csv


Chunk 33/109: 100%|██████████| 500/500 [01:50<00:00,  4.52it/s]


Saved Obama_chunk33.csv


Chunk 34/109: 100%|██████████| 500/500 [01:22<00:00,  6.05it/s]


Saved Obama_chunk34.csv


Chunk 35/109: 100%|██████████| 500/500 [01:22<00:00,  6.05it/s]


Saved Obama_chunk35.csv


Chunk 36/109: 100%|██████████| 500/500 [01:22<00:00,  6.06it/s]


Saved Obama_chunk36.csv


Chunk 37/109: 100%|██████████| 500/500 [01:22<00:00,  6.03it/s]


Saved Obama_chunk37.csv


Chunk 38/109: 100%|██████████| 500/500 [01:22<00:00,  6.03it/s]


Saved Obama_chunk38.csv


Chunk 39/109: 100%|██████████| 500/500 [01:21<00:00,  6.11it/s]


Saved Obama_chunk39.csv


Chunk 40/109: 100%|██████████| 500/500 [01:21<00:00,  6.14it/s]


Saved Obama_chunk40.csv


Chunk 41/109: 100%|██████████| 500/500 [01:21<00:00,  6.11it/s]


Saved Obama_chunk41.csv


Chunk 42/109: 100%|██████████| 500/500 [01:21<00:00,  6.15it/s]


Saved Obama_chunk42.csv


Chunk 43/109: 100%|██████████| 500/500 [01:21<00:00,  6.10it/s]


Saved Obama_chunk43.csv


Chunk 44/109: 100%|██████████| 500/500 [01:21<00:00,  6.12it/s]


Saved Obama_chunk44.csv


Chunk 45/109: 100%|██████████| 500/500 [01:22<00:00,  6.08it/s]


Saved Obama_chunk45.csv


Chunk 46/109: 100%|██████████| 500/500 [01:21<00:00,  6.11it/s]


Saved Obama_chunk46.csv


Chunk 47/109: 100%|██████████| 500/500 [01:23<00:00,  6.00it/s]


Saved Obama_chunk47.csv


Chunk 48/109: 100%|██████████| 500/500 [01:22<00:00,  6.07it/s]


Saved Obama_chunk48.csv


Chunk 49/109: 100%|██████████| 500/500 [01:24<00:00,  5.95it/s]


Saved Obama_chunk49.csv


Chunk 50/109: 100%|██████████| 500/500 [01:21<00:00,  6.12it/s]


Saved Obama_chunk50.csv


Chunk 51/109: 100%|██████████| 500/500 [01:22<00:00,  6.09it/s]


Saved Obama_chunk51.csv


Chunk 52/109: 100%|██████████| 500/500 [01:21<00:00,  6.12it/s]


Saved Obama_chunk52.csv


Chunk 53/109: 100%|██████████| 500/500 [01:21<00:00,  6.16it/s]


Saved Obama_chunk53.csv


Chunk 54/109: 100%|██████████| 500/500 [01:21<00:00,  6.15it/s]


Saved Obama_chunk54.csv


Chunk 55/109: 100%|██████████| 500/500 [01:22<00:00,  6.08it/s]


Saved Obama_chunk55.csv


Chunk 56/109: 100%|██████████| 500/500 [01:22<00:00,  6.09it/s]


Saved Obama_chunk56.csv


Chunk 57/109: 100%|██████████| 500/500 [01:21<00:00,  6.11it/s]


Saved Obama_chunk57.csv


Chunk 58/109: 100%|██████████| 500/500 [01:21<00:00,  6.17it/s]


Saved Obama_chunk58.csv


Chunk 59/109: 100%|██████████| 500/500 [01:21<00:00,  6.15it/s]


Saved Obama_chunk59.csv


Chunk 60/109: 100%|██████████| 500/500 [01:21<00:00,  6.15it/s]


Saved Obama_chunk60.csv


Chunk 61/109: 100%|██████████| 500/500 [01:21<00:00,  6.12it/s]


Saved Obama_chunk61.csv


Chunk 62/109: 100%|██████████| 500/500 [01:20<00:00,  6.19it/s]


Saved Obama_chunk62.csv


Chunk 63/109: 100%|██████████| 500/500 [01:21<00:00,  6.16it/s]


Saved Obama_chunk63.csv


Chunk 64/109: 100%|██████████| 500/500 [01:21<00:00,  6.11it/s]


Saved Obama_chunk64.csv


Chunk 65/109: 100%|██████████| 500/500 [01:21<00:00,  6.13it/s]


Saved Obama_chunk65.csv


Chunk 66/109: 100%|██████████| 500/500 [01:22<00:00,  6.07it/s]


Saved Obama_chunk66.csv


Chunk 67/109: 100%|██████████| 500/500 [01:21<00:00,  6.13it/s]


Saved Obama_chunk67.csv


Chunk 68/109: 100%|██████████| 500/500 [01:21<00:00,  6.11it/s]


Saved Obama_chunk68.csv


Chunk 69/109: 100%|██████████| 500/500 [01:23<00:00,  6.01it/s]


Saved Obama_chunk69.csv


Chunk 70/109: 100%|██████████| 500/500 [01:21<00:00,  6.13it/s]


Saved Obama_chunk70.csv


Chunk 71/109: 100%|██████████| 500/500 [01:22<00:00,  6.09it/s]


Saved Obama_chunk71.csv


Chunk 72/109: 100%|██████████| 500/500 [01:22<00:00,  6.09it/s]


Saved Obama_chunk72.csv


Chunk 73/109: 100%|██████████| 500/500 [01:21<00:00,  6.13it/s]


Saved Obama_chunk73.csv


Chunk 74/109: 100%|██████████| 500/500 [01:21<00:00,  6.10it/s]


Saved Obama_chunk74.csv


Chunk 75/109: 100%|██████████| 500/500 [01:21<00:00,  6.17it/s]


Saved Obama_chunk75.csv


Chunk 76/109: 100%|██████████| 500/500 [01:20<00:00,  6.20it/s]


Saved Obama_chunk76.csv


Chunk 77/109: 100%|██████████| 500/500 [01:20<00:00,  6.22it/s]


Saved Obama_chunk77.csv


Chunk 78/109: 100%|██████████| 500/500 [01:19<00:00,  6.26it/s]


Saved Obama_chunk78.csv


Chunk 79/109: 100%|██████████| 500/500 [01:20<00:00,  6.19it/s]


Saved Obama_chunk79.csv


Chunk 80/109: 100%|██████████| 500/500 [01:20<00:00,  6.23it/s]


Saved Obama_chunk80.csv


Chunk 81/109: 100%|██████████| 500/500 [01:20<00:00,  6.20it/s]


Saved Obama_chunk81.csv


Chunk 82/109: 100%|██████████| 500/500 [01:20<00:00,  6.18it/s]


Saved Obama_chunk82.csv


Chunk 83/109: 100%|██████████| 500/500 [01:20<00:00,  6.25it/s]


Saved Obama_chunk83.csv


Chunk 84/109: 100%|██████████| 500/500 [01:20<00:00,  6.21it/s]


Saved Obama_chunk84.csv


Chunk 85/109: 100%|██████████| 500/500 [01:20<00:00,  6.19it/s]


Saved Obama_chunk85.csv


Chunk 86/109: 100%|██████████| 500/500 [01:21<00:00,  6.13it/s]


Saved Obama_chunk86.csv


Chunk 87/109: 100%|██████████| 500/500 [01:22<00:00,  6.08it/s]


Saved Obama_chunk87.csv


Chunk 88/109: 100%|██████████| 500/500 [01:20<00:00,  6.18it/s]


Saved Obama_chunk88.csv


Chunk 89/109: 100%|██████████| 500/500 [01:21<00:00,  6.13it/s]


Saved Obama_chunk89.csv


Chunk 90/109: 100%|██████████| 500/500 [01:20<00:00,  6.18it/s]


Saved Obama_chunk90.csv


Chunk 91/109: 100%|██████████| 500/500 [01:21<00:00,  6.10it/s]


Saved Obama_chunk91.csv


Chunk 92/109: 100%|██████████| 500/500 [01:21<00:00,  6.16it/s]


Saved Obama_chunk92.csv


Chunk 93/109: 100%|██████████| 500/500 [01:21<00:00,  6.10it/s]


Saved Obama_chunk93.csv


Chunk 94/109: 100%|██████████| 500/500 [01:21<00:00,  6.12it/s]


Saved Obama_chunk94.csv


Chunk 95/109: 100%|██████████| 500/500 [01:21<00:00,  6.12it/s]


Saved Obama_chunk95.csv


Chunk 96/109: 100%|██████████| 500/500 [01:22<00:00,  6.07it/s]


Saved Obama_chunk96.csv


Chunk 97/109: 100%|██████████| 500/500 [01:22<00:00,  6.04it/s]


Saved Obama_chunk97.csv


Chunk 98/109: 100%|██████████| 500/500 [01:20<00:00,  6.25it/s]


Saved Obama_chunk98.csv


Chunk 99/109: 100%|██████████| 500/500 [01:20<00:00,  6.25it/s]


Saved Obama_chunk99.csv


Chunk 100/109: 100%|██████████| 500/500 [01:20<00:00,  6.24it/s]


Saved Obama_chunk100.csv


Chunk 101/109: 100%|██████████| 500/500 [01:20<00:00,  6.20it/s]


Saved Obama_chunk101.csv


Chunk 102/109: 100%|██████████| 500/500 [01:19<00:00,  6.25it/s]


Saved Obama_chunk102.csv


Chunk 103/109: 100%|██████████| 500/500 [01:19<00:00,  6.29it/s]


Saved Obama_chunk103.csv


Chunk 104/109: 100%|██████████| 500/500 [01:19<00:00,  6.29it/s]


Saved Obama_chunk104.csv


Chunk 105/109: 100%|██████████| 500/500 [01:19<00:00,  6.29it/s]


Saved Obama_chunk105.csv


Chunk 106/109: 100%|██████████| 500/500 [01:19<00:00,  6.29it/s]


Saved Obama_chunk106.csv


Chunk 107/109: 100%|██████████| 500/500 [01:19<00:00,  6.25it/s]


Saved Obama_chunk107.csv


Chunk 108/109: 100%|██████████| 500/500 [01:20<00:00,  6.25it/s]


Saved Obama_chunk108.csv


Chunk 109/109: 100%|██████████| 395/395 [01:03<00:00,  6.22it/s]


Saved Obama_chunk109.csv
Saved final combined file: /Users/kd769/Library/CloudStorage/Dropbox/Yale/Research/Election Media Coverage/shortpaper/code/python/results/ObamaHeadlines.csv
