## Presidential results (D%, R%, two-party %)

Difficulty: Easy

Sources: MIT Election Data & Science Lab (county & state returns), state SOS

MIT Election Lab
Quality issues: late-certified adjustments are rare but check state certification dates. Third-party vote handling—compute two-party share.
Estimated time: hours to 1 day using MIT Election Lab CSVs.

https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/42MVDX

In [None]:
#!pip install pandas

In [None]:
# pip install lxml

In [None]:
#!pip install pyjanitor

In [None]:
#!pip install matplotlib

In [None]:
#!pip install seaborn

In [None]:
#!pip install plotly

In [None]:
#!pip install --upgrade kaleido

In [None]:
#!pip install duckdb

In [None]:
import pandas as pd
df = pd.read_csv("data/POTUS/1976-2020-president.csv")

In [None]:
#print(df)

# Load 1976-2020 Data from MIT

- Source: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/42MVDX
- File: `1976-2020-president.csv`

In [None]:
import pandas as pd
from datetime import datetime

# === 1. Load the data ===
df = pd.read_csv("data/POTUS/1976-2020-president.csv")

df["state_fips"] = df["state_fips"].astype(str).str.zfill(2)

# Normalize column names just in case (some datasets differ in case)
df.columns = df.columns.str.lower()

# === 2. Keep only relevant columns ===
cols = ["year", "state", "state_po", "state_fips", "party_detailed", "candidatevotes", "totalvotes"]
df = df[cols].rename(
    columns = {"candidatevotes": "candidate_votes", "totalvotes": "total_votes"}
)

# === 3. Group parties into Dem, Rep, or Other ===
df["party_grouped"] = df["party_detailed"].apply(
    lambda x: (
        "dem" if "democrat" in str(x).lower()
        else "rep" if "republican" in str(x).lower()
        else "other"
    )
)

# === 4. Aggregate votes by year, state, and party group ===
pivot_df = (
    df.groupby(["year", "state", "state_po", "state_fips", "party_grouped"])["candidate_votes"]
    .sum()
    .unstack(fill_value=0)
    .reset_index()
)

# === 5. Merge in total votes (max should be same per state-year) ===
total_votes = df.groupby(["year", "state", "state_po", "state_fips"])["total_votes"].max().reset_index()

merged = pivot_df.merge(total_votes, on=["year", "state", "state_po", "state_fips"], how="left")

# === 6. Compute party vote percentages ===
for party in ["dem", "rep", "other"]:
    merged[f"{party.lower()}_pct"] = (merged[party]*100 / merged["total_votes"]).round(2)

# === 7. Compute Dem–Rep difference ===
merged["d_r_diff"] = (merged["dem_pct"] - merged["rep_pct"]).round(2)
merged["r_d_diff"] = (merged["rep_pct"] - merged["dem_pct"]).round(2)

# === 8. sort and inspect ===
df19762020 = merged.sort_values(["year", "state_po"]).reset_index(drop=True)
#print(merged.head())
#display(df19762020)

In [None]:
df19762020

# 2024 POTUS election data

Source: FEC.gov [Excel file](https://view.officeapps.live.com/op/view.aspx?src=https%3A%2F%2Fwww.fec.gov%2Fresources%2Fcms-content%2Fdocuments%2F2024presgeresults.xlsx&wdOrigin=BROWSELINK)

In [None]:
import pandas as pd
import janitor
df24 = pd.read_excel("data/POTUS/2024presgeresults.xlsx", header=0, nrows=51)
df24 = df24.clean_names()

# Select specific columns and rename
df24 = df24[["state", "electoral_votes", "electoral_vote_trump_r_", "electoral_vote_harris_d_", "harris", "trump", "total_votes"]].rename(
    columns = {"state": "state_po", "electoral_votes": "ev_total", "electoral_vote_trump_r_": "ev_rep", "electoral_vote_harris_d_": "ev_dem"
               , "harris": "dem", "trump":"rep"}
)

# coerce all numerica cols to int
num_cols = [c for c in df24.columns if c != "state_po"]
df24[num_cols] = df24[num_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)

df24["other"] = df24["total_votes"] - df24["dem"] - df24["rep"]

# calc xx_pct columns
for party in ["dem", "rep", "other"]:
    df24[f"{party.lower()}_pct"] = (df24[party]*100 / df24["total_votes"]).round(2)

# calc diffs
df24["d_r_diff"] = (df24["dem_pct"] - df24["rep_pct"]).round(2)
df24["r_d_diff"] = (df24["rep_pct"] - df24["dem_pct"]).round(2)

# add "year" and move to first column
df24["year"] = 2024
col = "year"
df24 = df24[[col] + [c for c in df24.columns if c != col]]

In [None]:
display(df24)

# Combine 1976-2020 and 2024 data

In [None]:
df19762024 = pd.concat([df19762020, df24[df19762020.columns.intersection(df24.columns)]], ignore_index=True)


In [None]:
df19762024

# Fill in missing state and state_fips for 2024

In [None]:
# Update/Set/From/Where operation to populate state and state_fips for 2024 from the values from 2020

# Create 2020 lookup
df_2020 = df19762024.query("year == 2020")[["state_po", "state", "state_fips"]]

if True:
    # Left join back
    df19762024 = (
        df19762024
        .merge(df_2020, on="state_po", how="left", suffixes=("", "_2020"))
        .assign(
            state=lambda d: d["state"].where(d["year"] != 2024, d["state_2020"]),
            state_fips=lambda d: d["state_fips"].where(d["year"] != 2024, d["state_fips_2020"])
        )
        .drop(columns=["state_2020", "state_fips_2020"])
    )
    #display(df19762024)

# Load electoral college vote data (see ElectoralCollege notebook)
df_ev = pd.read_csv("data/electoral_college_votes_by_state_year.csv")
df_ev["state_fips"] = pd.to_numeric(df_ev["state_fips"], errors="coerce").astype("Int64").astype(str).str.zfill(2)
df_ev["state"] = df_ev["state"].str.upper()
num_cols = ["state_ev_total", "ev_dem", "ev_rep"]
df_ev[num_cols] = df_ev[num_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)

df_final = df19762024.merge(df_ev, on=["year", "state", "state_po", "state_fips"], how="left")
df_final.to_csv("data/potus_votes_bystate.csv", index=False)
print(f"{dt.now().strftime("%Y-%m-%d %H:%M:%S")} - Fill in missing state cell complete")

# Summarize to National numbers per election year

In [None]:
from datetime import datetime as dt
nation = (
    df_final.groupby("year", as_index=False)
      .agg({
          "dem": "sum",
          "rep": "sum",
          "other": "sum",
          "total_votes": "sum",
          "ev_dem": "sum",
          "ev_rep": "sum"
      })
      .assign(
          dem_pct=lambda d: (100 * d.dem / d.total_votes).round(2),
          rep_pct=lambda d: (100 * d.rep / d.total_votes).round(2),
          other_pct=lambda d: (100 * d.other / d.total_votes).round(2),
          d_r_diff=lambda d: (d.dem_pct - d.rep_pct).round(2),
          r_d_diff=lambda d: (d.rep_pct - d.dem_pct).round(2)
      )
)

#print(nation.head())
nation.to_csv("data/potus_votes_national.csv", index=False)
print(f"{dt.now().strftime("%Y-%m-%d %H:%M:%S")} - nation cell complete")

# Plot Heamap 

In [None]:
# Jupyter notebook rendering fix for plotly
import plotly.io as pio
pio.renderers.default = "notebook_connected"

In [None]:
import pandas as pd
import plotly.express as px

# ============================================================
#  Load and prepare data
# ============================================================
df = pd.read_csv("C:/Github/Python-StatePoliticalLeaning/data/potus_votes_bystate.csv")

# Proper casing — but do NOT reverse here, we’ll keep natural order (Alabama first)
df["state"] = df["state"].str.title()

# Use alphabetical order so Alabama appears first (top)
state_order = sorted(df["state"].unique())
df["state"] = pd.Categorical(df["state"], categories=state_order, ordered=True)

# Pivot: rows = state, columns = year
heat_df = df.pivot(index="state", columns="year", values="r_d_diff")
heat_df = heat_df.sort_index(axis=1)
text_df = df.pivot(index="state", columns="year", values="state_ev_total")


# Symmetric color range
max_abs = df["r_d_diff"].abs().max()

# ============================================================
#  Interactive heatmap
# ============================================================
fig = px.imshow(
    heat_df,
    color_continuous_scale=[(0.0, "blue"), (0.5, "white"), (1.0, "red")],
    zmin=-max_abs, zmax=max_abs,
    aspect="auto",
    labels=dict(x="", y="", color="Dem − Rep diff"),
    title="POTUS: State Partisan Lean by Year",
)
# Add electoral votes as text overlay
fig.update_traces(
    text=text_df.values,
    texttemplate="%{text}",   # just show the number
    textfont={"size": 10, "color": "black"},
)
# DO NOT reverse — data order already puts Alabama at top
fig.update_yaxes(tickmode="array", tickvals=list(heat_df.index))

# Years at bottom, all shown
fig.update_xaxes(side="bottom", tickangle=45,
                 tickmode="array", tickvals=list(heat_df.columns))

# Explicit figure pixel size (1200 × 900)
fig.update_layout(
    width=1200,
    height=900,
    title=dict(x=0.5, xanchor="center"),
    coloraxis_colorbar=dict(title="Dem − Rep diff"),
    margin=dict(l=80, r=60, t=80, b=80),
    font=dict(size=12),
    annotations=[
        dict(
            text="Data: MIT Election Lab<br>X@bdill, bsky@wbdill",
            showarrow=False,
            xref="paper", yref="paper",
            x=0, y=-0.10, xanchor="left", yanchor="top",
            font=dict(size=11, color="gray")
        )
    ]
)

import plotly.io as pio
# New, future-proof way (post-5.24)
pio.defaults.default_format = "png"
pio.defaults.default_width  = 1200
pio.defaults.default_height = 900
fig.write_image("data/potus_heatmap.png")

#fig.write_image("data/potus_heatmap.png", width=1200, height=900, scale=1)

fig.show()


# Non-interactive plot of heatmap

Fallback in case you can't get plotly to render correctly

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

#  Load and prepare data
df = pd.read_csv("C:/Github/Python-StatePoliticalLeaning/data/potus_votes_bystate.csv")

# Format state names and reverse order (Alabama at top)
df["state"] = df["state"].str.title()
state_order = list(df["state"].unique()[::-1])  # reverse
df["state"] = pd.Categorical(df["state"], categories=state_order, ordered=True)

# Pivot for heatmap: rows = state, columns = year
heat_df = df.pivot(index="state", columns="year", values="r_d_diff")

# Determine symmetric color scale
max_abs = df["r_d_diff"].abs().max()

# Define smooth blue→white→red colormap
cmap = LinearSegmentedColormap.from_list("blue_white_red", ["blue", "white", "red"])

# ============================================================
#  Figure setup (exact 1800×900 px)
# ============================================================
width_px, height_px = 1800, 900
dpi = 100
figsize = (width_px / dpi, height_px / dpi)
fig, ax = plt.subplots(figsize=figsize, dpi=dpi)

# ============================================================
#  Heatmap
# ============================================================
sns.heatmap(
    heat_df,
    cmap=cmap,
    vmin=-max_abs,
    vmax=max_abs,
    center=0,
    linewidths=0.5,
    linecolor="white",
    cbar_kws={"label": "Dem − Rep diff"},
    ax=ax
)

# ============================================================
#  Styling
# ============================================================
ax.set_title("POTUS: State Partisan Lean by Year", fontsize=16, pad=16)
ax.set_xlabel("")
ax.set_ylabel("")
plt.xticks(rotation=45, ha="right")
ax.invert_yaxis()             # Alabama at top
ax.figure.tight_layout()

# Add caption below chart
plt.figtext(
    0.02, -0.02,
    "Data: MIT Election Lab | X@bdill, bsky@wbdill",
    ha="left", fontsize=10, color="gray"
)

# ============================================================
#  Show inline and export
# ============================================================
plt.show()

# Optional: export exact-size PNG
fig.savefig(
    "data/potus_heatmap.png",
    dpi=dpi,
    bbox_inches="tight",
    facecolor="white"
)


In [None]:
import pandas as pd
import plotly.express as px

# ============================================================
# Example data
# ============================================================
df = pd.DataFrame({
    "state": ["Alabama", "Alabama", "Alaska", "Alaska"],
    "year": [2020, 2024, 2020, 2024],
    "r_d_diff": [25, 22, 10, 15],
    "state_ev_total": [9, 9, 3, 3]
})

# Ensure proper categorical ordering
state_order = sorted(df["state"].unique())
year_order = sorted(df["year"].unique())
df["state"] = pd.Categorical(df["state"], categories=state_order, ordered=True)
df["year"] = pd.Categorical(df["year"], categories=year_order, ordered=True)

# Pivot both dataframes
heat_df = df.pivot(index="state", columns="year", values="r_d_diff")
text_df = df.pivot(index="state", columns="year", values="state_ev_total")

# Symmetric color range
max_abs = df["r_d_diff"].abs().max()

# ============================================================
# Interactive heatmap with text labels
# ============================================================
fig = px.imshow(
    heat_df,
    color_continuous_scale=[(0.0, "blue"), (0.5, "white"), (1.0, "red")],
    zmin=-max_abs,
    zmax=max_abs,
    text_auto=False,   # we’ll use custom text array
)

# Add electoral votes as text overlay
fig.update_traces(
    text=text_df.values,
    texttemplate="%{text}",   # just show the number
    textfont={"size": 10, "color": "black"},
)

# ============================================================
# Formatting
# ============================================================
fig.update_layout(
    width=1200,
    height=900,
    title="POTUS Election: Republican vs Democratic Margin by State and Year",
    coloraxis_colorbar_title="R–D Difference (%)",
)

fig.update_yaxes(title=None, autorange="reversed")  # so Alabama is at top

fig.show()
