In [1]:
import pandas as pd
import os
import sys
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display

module_directory = (
    "/Users/necabotheking/Documents/Github/2023-fall-clinic-climate-cabinet"
)
sys.path.append(module_directory)

from utils.preprocess_mi_campaign_data import read_expenditure_data
from utils.constants import EXP_FILEPATH, MI_EXPENDITURE_COLUMNS

### Michigan Campaign Expenditure Data 2018 - 2023 Exploratory Data Analysis

#### Task 1: Read in the Dataset and merge into one Pandas DataFrame

In [2]:
campaign_dataframe_lst = []

for file in os.listdir(EXP_FILEPATH):
    filepath = EXP_FILEPATH + file
    campaign_dataframe_lst.append(
        read_expenditure_data(filepath, MI_EXPENDITURE_COLUMNS)
    )

In [3]:
pd.options.display.max_columns = 100
campaign_dataframe_lst[0].head(5)

# the gub_account_type and election type are missing?

Unnamed: 0,doc_seq_no,expenditure_type,gub_account_type,gub_elec_type,page_no,expense_id,detail_id,doc_stmnt_year,doc_type_desc,com_legal_name,common_name,cfr_com_id,com_type,schedule_desc,exp_desc,purpose,extra_desc,f_name,lname_or_org,address,city,state,zip,exp_date,amount,state_loc,supp_opp,can_or_ballot,county,debt_payment,vend_name,vend_addr,vend_city,vend_state,vend_zip,gotv_ink_ind,fundraiser
0,488507,1B,,,0,4100,0,2020,ANNUAL CS,COMMITTEE TO ELECT CHARIS LEE FOR STATE REPRES...,COMMITTEE TO ELECT CHARIS LEE FOR ST,519354.0,CAN,DIRECT,COMPUTER COSTS,CHUNIQ INPOWER GOSQ.COM,,,COMMITTEE TO ELECT CHARIS LEE,1133 FAIRFAX ST,FLINT,MI,48505-0000,11/15/2019,354.28,,,,,,,,,,,,
1,488507,1B,,,0,4101,0,2020,ANNUAL CS,COMMITTEE TO ELECT CHARIS LEE FOR STATE REPRES...,COMMITTEE TO ELECT CHARIS LEE FOR ST,519354.0,CAN,DIRECT,PRINT ADVERTISING,SAWICKI & SON 313-962-2725,,,COMMITTEE TO ELECT CHARIS LEE,1133 FAIRFAX ST,FLINT,MI,48505-0000,12/02/2019,500.0,,,,,,,,,,,,
2,488507,1B,,,0,4108,0,2020,ANNUAL CS,COMMITTEE TO ELECT CHARIS LEE FOR STATE REPRES...,COMMITTEE TO ELECT CHARIS LEE FOR ST,519354.0,CAN,DIRECT,"MAILING,POSTAGE,BULK RATE",STAPLES,,,COMMITTEE TO ELECT CHARIS LEE,1133 FAIRFAX ST,FLINT,MI,48505-0000,11/26/2019,101.61,,,,,,,,,,,,
3,488507,1B,,,0,4109,0,2020,ANNUAL CS,COMMITTEE TO ELECT CHARIS LEE FOR STATE REPRES...,COMMITTEE TO ELECT CHARIS LEE FOR ST,519354.0,CAN,DIRECT,"MAILING,POSTAGE,BULK RATE",STAPLES,,,COMMITTEE TO ELECT CHARIS LEE,1133 FAIRFAX ST,FLINT,MI,48505-0000,11/26/2019,131.18,,,,,,,,,,,,X
4,488507,1B,,,0,4110,0,2020,ANNUAL CS,COMMITTEE TO ELECT CHARIS LEE FOR STATE REPRES...,COMMITTEE TO ELECT CHARIS LEE FOR ST,519354.0,CAN,DIRECT,"CONSULTATION, RESEARCH",FIELD OPERATIONS,,,COMMITTEE TO ELECT CHARIS LEE,1133 FAIRFAX ST,FLINT,MI,48505-0000,12/03/2019,100.65,,,,,,,,,,,,


In [4]:
merged_campaign_df = pd.concat(campaign_dataframe_lst)

In [5]:
merged_campaign_df.dtypes

doc_seq_no            int64
expenditure_type     object
gub_account_type     object
gub_elec_type        object
page_no               int64
expense_id            int64
detail_id             int64
doc_stmnt_year        int64
doc_type_desc        object
com_legal_name       object
common_name          object
cfr_com_id          float64
com_type             object
schedule_desc        object
exp_desc             object
purpose              object
extra_desc           object
f_name               object
lname_or_org         object
address              object
city                 object
state                object
zip                  object
exp_date             object
amount               object
state_loc            object
supp_opp            float64
can_or_ballot        object
county               object
debt_payment         object
vend_name            object
vend_addr            object
vend_city            object
vend_state           object
vend_zip             object
gotv_ink_ind        

In [6]:
merged_campaign_df["amount"] = pd.to_numeric(
    merged_campaign_df["amount"], errors="coerce"
)

In [7]:
merged_campaign_df.dtypes

doc_seq_no            int64
expenditure_type     object
gub_account_type     object
gub_elec_type        object
page_no               int64
expense_id            int64
detail_id             int64
doc_stmnt_year        int64
doc_type_desc        object
com_legal_name       object
common_name          object
cfr_com_id          float64
com_type             object
schedule_desc        object
exp_desc             object
purpose              object
extra_desc           object
f_name               object
lname_or_org         object
address              object
city                 object
state                object
zip                  object
exp_date             object
amount              float64
state_loc            object
supp_opp            float64
can_or_ballot        object
county               object
debt_payment         object
vend_name            object
vend_addr            object
vend_city            object
vend_state           object
vend_zip             object
gotv_ink_ind        

In [8]:
null_percentage = (merged_campaign_df.isna().mean() * 100).reset_index()
null_percentage.columns = ["Column Name", "Missing Percentage"]

# The percentage of null values in each column is as follows below
display(null_percentage)

Unnamed: 0,Column Name,Missing Percentage
0,doc_seq_no,0.0
1,expenditure_type,0.0
2,gub_account_type,0.0
3,gub_elec_type,0.0
4,page_no,0.0
5,expense_id,0.0
6,detail_id,0.0
7,doc_stmnt_year,0.0
8,doc_type_desc,0.0
9,com_legal_name,0.010585


#### Top 10 recipients

In [13]:
# Top Contributions supporting a Candidate or Ballot Issue

top_10_individual_recipients = merged_campaign_df[
    (merged_campaign_df["lname_or_org"].notnull())
    & (merged_campaign_df["f_name"].notnull())
]
top_10_individual_recipients = (
    top_10_individual_recipients.groupby(
        ["f_name", "lname_or_org", "supp_opp", "can_or_ballot"]
    )["amount"]
    .sum()
    .reset_index()
)
top_10_individual_recipients = top_10_individual_recipients.sort_values(
    by="amount", ascending=False
).head(10)
display(top_10_individual_recipients)

Unnamed: 0,f_name,lname_or_org,supp_opp,can_or_ballot,amount
202,HEATHER,RICKETTS,1.0,VOTERS NOT POLITICIANS,78985.66
156,DENNIS,LENNOX,1.0,YES ON NAT'L POPULAR VOTE,63105.23
283,KATHRYN,FAHEY,1.0,VOTERS NOT POLITICIANS,60235.15
210,JACK,JENNINGS,1.0,VOTERS NOT POLITICIANS,58293.37
436,SCOTT,DREXEL,1.0,YES ON NAT'L POPULAR VOTE,51488.27
219,JAMES,LANCASTER,1.0,VOTERS NOT POLITICIANS,45817.0
316,LENORE,GOLDMAN,1.0,VOTERS NOT POLITICIANS,39219.42
451,TALYCE,MURRAY,1.0,2022 PROP 2,34147.19
295,KEVIN,OGLESBY,1.0,REPEAL 1945PA302,32400.0
416,ROBERT,DRAHEIM,1.0,VOTERS NOT POLITICIANS,31871.04


In [16]:
# Top Contributions opposing an Office or Ballot Issue

# Need to get the sum for each individual candidate, not the highest amount

top_10_individual_recipients_opposing = merged_campaign_df[
    (merged_campaign_df["lname_or_org"].notnull())
    & (merged_campaign_df["f_name"].notnull())
    & (merged_campaign_df["supp_opp"] == 2.0)
]
top_10_individual_recipients_opposing = (
    top_10_individual_recipients_opposing.groupby(
        ["f_name", "lname_or_org", "supp_opp", "can_or_ballot"]
    )["amount"]
    .sum()
    .reset_index()
)
top_10_individual_recipients_opposing = (
    top_10_individual_recipients_opposing.sort_values(
        by="amount", ascending=False
    ).head(10)
)
display(top_10_individual_recipients_opposing)

Unnamed: 0,f_name,lname_or_org,supp_opp,can_or_ballot,amount
48,MICHAEL,GILMORE ESQ.,2.0,SECURE MI VOTE,18532.26
45,LINDA,KOJIRO,2.0,MICHIGAN PROPOSAL 3,5550.0
4,BRADLEY,OCONNER,2.0,STEPHEN RANZINI,500.0
37,JAY,ANDERSON,2.0,MIKE DUGGAN,430.0
3,BLAKE,EDMUNDS,2.0,GRETCHEN WHITMER,293.3
8,CLAUDIA,RODRIGUEZ,2.0,BRIAN MEAKIN,284.1
18,CLAUDIA,RODRIGUEZ,2.0,ROGER HAUCK,227.28
13,CLAUDIA,RODRIGUEZ,2.0,GREG VANWOERKOM,227.28
17,CLAUDIA,RODRIGUEZ,2.0,LARRY INMAN,227.28
16,CLAUDIA,RODRIGUEZ,2.0,JOSEPH BELLINO,227.28


In [12]:
# Top recipients of expenditures for organizations

top_10_org_recipients = merged_campaign_df[merged_campaign_df["f_name"].isnull()]
top_10_org_recipients = (
    top_10_org_recipients.groupby(["lname_or_org", "purpose"])["amount"]
    .sum()
    .reset_index()
)
top_10_org_recipients = top_10_org_recipients.sort_values(
    by="amount", ascending=False
).head(10)
display(top_10_org_recipients)

Unnamed: 0,lname_or_org,purpose,amount
68303,KELLY SCOTT & MADISON INC,MEDIA AD PLACEMENT,17453108.0
54063,GMMB,MEDIA BUY,16954706.0
86686,NATIONAL PETITION MANAGEMENT,PETITIONS; SIGNATURE GATHERING,9901099.24
82390,MICHIGAN DEMOCRATIC STATE CENTRAL CO,CONTRIBUTION,8157143.18
54071,GMMB,TV ADVERTISING,7477029.48
46123,FIELD WORKS LLC,PETITION GATHERING EXPENSES,7340198.76
69753,KNOW-HOW STRATEGIES,MEDIA BUY,6671420.08
111062,TARGETED PLATFORM MEDIA LLC,ADVERTISING,6551748.27
109018,STRATEGIC MEDIA PLACEMENT INC,PLACED MEDIA,5224802.0
46151,FIELDWORKS LLC,SIGNATURE GATHERING,5209286.84
