In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

In [None]:
# Data from https://www.ncsbe.gov/results-data/voter-registration-data
# Downloaded 11/7/25

## Load the Watauga and NC datasets

In [None]:
file = 'ncvoter95.txt'

wat_df = pd.read_csv(
    file,
    delimiter="\t",        
    usecols = ['zip_code', 'registr_dt', 'race_code', 'ethnic_code', 'party_cd', 'gender_code', 'birth_year', 'status_cd', 'precinct_desc'],
    encoding="latin-1",  
)

wat_df = wat_df[wat_df['status_cd'] == 'A']
wat_df.count()

In [None]:
wat_df['precinct_desc'].unique()

In [None]:
wat_empty = (wat_df['precinct_desc'].isna() | (wat_df['precinct_desc'] == '')).sum()
wat_empty.sum()

In [None]:
file = 'ncvoter_Statewide.txt'

nc_df = pd.read_csv(
    file,
    delimiter="\t",        
    usecols = ['zip_code', 'registr_dt', 'race_code', 'ethnic_code', 'party_cd', 'gender_code', 'birth_year', 'status_cd', 'precinct_desc'],
    encoding="latin-1",   
)

nc_df = nc_df[nc_df['status_cd'] == 'A']
nc_df.count()

In [None]:
nc_df['precinct_desc'].unique()

In [None]:
nc_na = nc_df['precinct_desc'].isna() 
nc_empty = (nc_df['precinct_desc'] == '').sum()
print(nc_na.sum())
print(nc_empty.sum())

## Clean and filter Watauga dataset

In [None]:
wat_df = wat_df[wat_df['registr_dt'] != '##/##/####']
wat_df['registr_dt'] = pd.to_datetime(wat_df['registr_dt'])
wat_df = wat_df.set_index('registr_dt')

wat_df_asc = wat_df.sort_index(ascending=True)

In [None]:
wat_df_asc.head()

In [None]:
wat_current = wat_df_asc.loc['2025-08-01':]
wat_past = wat_df_asc.loc['2023-08-01':'2023-11-07']
percent = (len(wat_past) - len(wat_current)) / len(wat_past)
print(f'The 2023 Watauga election had a registration count of {len(wat_past)}, while the 2025 election cycle ' +
      f'had a registration count of {len(wat_current)}; a {percent:.1%} decrease.')

## Clean and filter NC dataset

In [None]:
nc_df = nc_df[nc_df['registr_dt'] != '##/##/####']
nc_df['registr_dt'] = pd.to_datetime(nc_df['registr_dt'])
nc_df = nc_df.set_index('registr_dt')

nc_df_asc = nc_df.sort_index(ascending=True)

In [None]:
nc_current = nc_df_asc.loc['2025-08-01':]
nc_past = nc_df_asc.loc['2023-08-01':'2023-11-07']
percent = (len(nc_past) - len(nc_current)) / len(nc_past)
print(f'The 2023 NC election had a new registration count of {len(nc_past)}, while the 2025 election cycle ' +
      f'had a registration count of {len(nc_current)}; a {percent:.1%} decrease.')

## Group-by-precinct Watauga Dataset

In [None]:
# pd.set_option('display.max_rows', None)


# wat_df = wat_current[['precinct_desc', 'race_code']]

# wat_pre = wat_df.groupby(['precinct_desc', 'race_code']).size()

# wat_df = pd.DataFrame(wat_pre)
# wat_df

In [None]:
wat_df = wat_current[['precinct_desc', 'race_code']]

wat_pre = wat_df.groupby(['precinct_desc', 'race_code']).size().reset_index(name='count')

wat_pre

# wat_df.groupby('precinct_desc').sum() >= 100

In [None]:
wat_current.head()

In [None]:
wat_current.groupby(['precinct_desc','race_code']).size().unstack('race_code').plot(kind = 'bar', ylim=(0, 90), stacked = True, figsize=(10, 6))

plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')


In [None]:
wat_current.groupby(['precinct_desc','race_code']).size().unstack('race_code')

## Plots!

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 6))

nc_past['party_cd'].value_counts().plot.bar(ax = axes[0,0], ylim=(0, 70000), color = ['grey', 'red', 'blue', 'green'], title = '2023 NC Party', rot = 0)
nc_current['party_cd'].value_counts().plot.bar(ax = axes[0,1], ylim=(0, 70000), color = ['grey', 'red', 'blue', 'green'], title = '2025 NC Party', rot = 0)

wat_past['party_cd'].value_counts().plot.bar(ax = axes[1,0], ylim=(0, 1200), color = ['grey', 'blue', 'red', 'green'], title = '2023 WAT Party', rot = 0)
wat_current['party_cd'].value_counts().plot.bar(ax = axes[1,1], ylim=(0, 1200), color = ['grey', 'blue', 'red', 'green'], title = '2023 WAT Party', rot = 0)

plt.tight_layout(h_pad=3.0) 


In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 6))

nc_past['gender_code'].value_counts().plot.bar(ax = axes[0,0], ylim=(0, 61000), color = ['hotpink', 'blue', 'grey'], title = '2023 NC Gender', rot = 0)
nc_current['gender_code'].value_counts().plot.bar(ax = axes[0,1], ylim=(0, 61000), color = ['hotpink', 'blue', 'grey'], title = '2025 NC Gender', rot = 0)

wat_past['gender_code'].value_counts().plot.bar(ax = axes[1,0], ylim=(0, 1100), color = ['hotpink', 'blue', 'grey'], title = '2023 WAT Gender', rot = 0)
wat_current['gender_code'].value_counts().plot.bar(ax = axes[1,1], ylim=(0, 1100), color = ['hotpink', 'blue', 'grey'], title = '2025 WAT Gender', rot = 0)

plt.tight_layout(h_pad=3.0) 


In [None]:
fig, axes = plt.subplots(2, 2, figsize = (10, 6))

nc_past['race_code'].value_counts().plot.bar(ax = axes[0,0], ylim=(0, 80000), color = ['whitesmoke', 'black', 'grey', 'green'], edgecolor='black', title = '2023 NC Race', rot = 0)
nc_current['race_code'].value_counts().plot.bar(ax = axes[0,1], ylim=(0, 80000), color = ['whitesmoke', 'grey', 'black', 'green'], edgecolor='black', title = '2025 NC Race', rot = 0)

wat_past['race_code'].value_counts().plot.bar(ax = axes[1,0], ylim=(0, 1600), color = ['whitesmoke', 'grey', 'black', 'green'], edgecolor='black', title = '2023 WAT Race', rot = 0)
wat_current['race_code'].value_counts().plot.bar(ax = axes[1,1], ylim=(0, 1600), color = ['whitesmoke', 'grey', 'black', 'green'], edgecolor='black', title = '2025 WAT Race', rot = 0)

plt.tight_layout(h_pad=3.0) 


In [None]:
# Explore UNA data

una_nc_past = nc_past[nc_past['party_cd'] == 'UNA']
una_nc_current = nc_current[nc_current['party_cd'] == 'UNA']

una_wat_past = wat_past[wat_past['party_cd'] == 'UNA']
una_wat_current = wat_current[wat_current['party_cd'] == 'UNA']


## Function time!!

In [None]:
nc_current = nc_df_asc.loc['2025-08-01':]
nc_past = nc_df_asc.loc['2023-08-01':'2023-11-07']
percent = (len(nc_past) - len(nc_current)) / len(nc_past)
print(f'The 2023 NC election had a new registration count of {len(nc_past)}, while the 2025 election cycle ' +
      f'had a registration count of {len(nc_current)}; a {percent:.1%} decrease.')

In [None]:
past_df = pd.DataFrame(wat_past.groupby(['precinct_desc','party_cd']).size().unstack('party_cd'))
current_df = pd.DataFrame(wat_current.groupby(['precinct_desc','party_cd']).size().unstack('party_cd'))

# past_df = past_df.T

pct_chg_df = ((past_df - current_df) / past_df) * 100

pct_chg_df

# past_pct_chg = pd.DataFrame()

# thresh = 20

# Input data from above filtered df into the empty pct_chg df to get the pct_chg from two different time periods
# for col in past_pct_chg:
#     pct_chg[col] = past_pct_chg[col].pct_change() * 100

# party_df = pd.DataFrame(data_df)
# party = party_df.T

# # Create an empty pct_chg df for analysis
# pct_chg = pd.DataFrame()

# thresh = 20
# # Input data from above filtered df into the empty pct_chg df to get the pct_chg from two different time periods
# for col in party:
#     pct_chg[col] = party[col].pct_change() * 100
    
# # Adds the pct_chg values to the dataset for analysis
# party.loc['pct_change'] = pct_chg.iloc[-1]

# party = party.T
# party

# fil_df = party[abs(party['pct_change']) > thresh]

# # print(f"The 2025 election had a {fil_df['pct_change'].iloc[0]:.2f}% change in {fil_df.index[0]}, " +
# #     f"compared to 2023.")
# statements = []
# for index, row in fil_df.iterrows():
#     statement = f"The {current.index.year.unique().item()} election had a {row['pct_change']:.2f}% change in {row.name}, compared to {past.index.year.unique().item()}."
#     statements.append(statement)

# for s in statements:
#     print(s)
# # print(f"The {current.index.year.unique().item()} election had a {row['pct_change']:.2f}% change in {row.index[0]}, " +
# #     f"compared to {past.index.year.unique().item()}.")

In [None]:
from datetime import date

def compare_df(df, col, start1, end1, start2, end2, thresh):
    
    # Filter df by date range desired
    current = df.loc[start1:end1]
    past = df.loc[start2:end2]
   
    # creates a dataset based on above filtered data
    data = {
        f'{current.index.year.unique().item()}': current[col].value_counts(),
        f'{past.index.year.unique().item()}' : past[col].value_counts()
    }
    
    # Create a df from above dataset
    df = pd.DataFrame(data)
    df = df.T
    
    # Create an empty pct_chg df for analysis
    pct_chg = pd.DataFrame()

    thresh = thresh
    # Input data from above filtered df into the empty pct_chg df to get the pct_chg from two different time periods
    for col in df:
        pct_chg[col] = df[col].pct_change() * 100
    
    # Adds the pct_chg values to the dataset for analysis
    df.loc['pct_change'] = pct_chg.iloc[-1]
    
    df = df.T
    df

    fil_df = df[abs(df['pct_change']) > thresh]

    statements = []
    for index, row in fil_df.iterrows():
        statement = f"The {past.index.year.unique().item()} election had a {row['pct_change']:.1f}% change in {row.name}, compared to {current.index.year.unique().item()}."
        statements.append(statement)

    for s in statements:
        print(s)
    

In [None]:
compare_df(wat_df_asc, 'gender_code', date(2023, 8, 1), date(2023, 11, 7), date(2025, 8, 1), date(2025, 11, 7), 1)



In [None]:
def percent_chg_df(df, group, col, past1, past2, current1, current2):
    
    # Filter df by date range desired
    current = df.loc[current1:current2]
    past = df.loc[past1:past2]
    
    
    past_df = pd.DataFrame(wat_past.groupby([f'{group}',f'{col}']).size().unstack(f'{col}'))
    current_df = pd.DataFrame(wat_current.groupby([f'{group}',f'{col}']).size().unstack(f'{col}'))

    # past_df = past_df.T

    pct_chg_df = ((past_df - current_df) / past_df) * 100
    
    ax = sns.heatmap(pct_chg_df, annot=True, cmap='viridis', fmt=".1f", linewidths=.5)

    ax.xaxis.tick_top()
    ax.set(xlabel="", ylabel="")

    
    return ax
    
   # return round(pct_chg_df, 1)

In [None]:
percent_chg_df(wat_df_asc, 'party_cd', 'race_code', date(2023, 8, 1), date(2023, 11, 7), date(2025, 8, 1), date(2025, 11, 7))

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Create a sample DataFrame
data = {
    'Column_A': np.random.rand(5),
    'Column_B': np.random.rand(5) * 10,
    'Column_C': np.random.rand(5) * 100
}
df = pd.DataFrame(data, index=['Row_1', 'Row_2', 'Row_3', 'Row_4', 'Row_5'])

# Create the heatmap
plt.figure(figsize=(8, 6)) # Adjust figure size as needed
sns.heatmap(df, annot=True, cmap='viridis', fmt=".2f", linewidths=.5)

# Add a title to the heatmap
plt.title('Heatmap of DataFrame Values')

# Display the plot
plt.show()
