# FEC Campaign Finance Analysis

## Candidate-Committee Linkages

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
year = '2020'

In [3]:
# read in candidate-committee linkage data
ccl = pd.read_csv(f'data/ccl{year[2:]}.zip', sep = '|', 
                  error_bad_lines = False, header = None)

# read in column headers
headers = pd.read_csv('data/ccl_header_file.csv')

ccl.columns = [x.lower() for x in headers.columns]
print('original length:', len(ccl))

cand = pd.read_csv('data/01/cand_clean_01a.csv')
cands = list(set(cand['cand_id']))

ccl = ccl[ccl['cand_id'].apply(lambda x: x in cands)]
print(f'{year} cand_id:', len(ccl))

ccl = ccl[ccl['fec_election_yr'] == int(year)]
print('fec election year:', len(ccl))

ccl.drop(['fec_election_yr', 'linkage_id'], axis = 1, inplace = True)
cand_dict = dict(zip(cand['cand_id'], cand['cand_name']))
ccl['cand_name'] = [cand_dict[x] for x in ccl['cand_id']]

ccl.head()

original length: 5961
2020 cand_id: 1761
fec election year: 1761


Unnamed: 0,cand_id,cand_election_yr,cmte_id,cmte_tp,cmte_dsgn,cand_name
2,H0AL01055,2020,C00697789,H,P,"CARL, JERRY LEE, JR"
3,H0AL01063,2020,C00701557,H,P,"LAMBERT, DOUGLAS WESTLEY III"
4,H0AL01071,2020,C00701409,H,P,"PRINGLE, CHRISTOPHER PAUL"
5,H0AL01089,2020,C00703066,H,P,"HIGHTOWER, BILL"
6,H0AL01097,2020,C00708867,H,P,"AVERHART, JAMES"


In [4]:
# confirm all entries in cand_id column are candidate IDs
ccl['cand_id'].apply(lambda x: x[0]).value_counts()

H    1509
S     233
P      19
Name: cand_id, dtype: int64

In [5]:
# check for nulls
ccl.isnull().sum()[ccl.isnull().sum() > 0].sort_values()

Series([], dtype: int64)

In [6]:
# 21 cand_ids appear twice in ccl dataframe
ccl.loc[
    ccl.duplicated(['cand_id', 'cmte_id', 'cmte_tp', 'cmte_dsgn'], keep = False), 
    'cand_id', 
].value_counts().value_counts()

2    17
Name: cand_id, dtype: int64

In [7]:
# display pairs of rows sharing a cand_id
dupes = list(ccl.loc[
    ccl.duplicated(['cand_id', 'cmte_id', 'cmte_tp', 'cmte_dsgn'], keep = False), 
    'cand_id', 
].value_counts()[
    ccl.loc[
    ccl.duplicated(['cand_id', 'cmte_id', 'cmte_tp', 'cmte_dsgn'], keep = False), 
    'cand_id', 
].value_counts() > 1
].index)

for dupe in dupes:
    print(
        dupe, 
        cand.loc[cand['cand_id'] == dupe, 'cand_name'].values[0], 
        cand.loc[cand['cand_id'] == dupe, 'cand_ici'].values[0],
        cand.loc[cand['cand_id'] == dupe, 'contest'].values[0],
        cand.loc[cand['cand_id'] == dupe, 'cand_pty_affiliation'].values[0],
    )
    print(ccl.loc[ccl['cand_id'] == dupe, [
        'cand_election_yr', 
        'cmte_id', 
        'cmte_tp', 
        'cmte_dsgn', 
        'linkage_id',
    ]].sort_values(['cand_election_yr']))
    print('\n\n')

H0IL05351 BURNS, BRIAN C IL_05 Democrat
     cand_election_yr    cmte_id cmte_tp cmte_dsgn  linkage_id
571              2019  C00715185       H         P         NaN
572              2020  C00715185       H         P         NaN



H8IL06139 CASTEN, SEAN I IL_06 Democrat
      cand_election_yr    cmte_id cmte_tp cmte_dsgn  linkage_id
3153              2019  C00648493       H         P         NaN
3152              2020  C00648493       H         P         NaN



H0TX22161 MOORE, NYANZA DAVIS O TX_22 Democrat
      cand_election_yr    cmte_id cmte_tp cmte_dsgn  linkage_id
1536              2019  C00694935       H         P         NaN
1537              2020  C00694935       H         P         NaN



H0NC03172 MURPHY, GREGORY I NC_03 Republican
     cand_election_yr    cmte_id cmte_tp cmte_dsgn  linkage_id
919              2019  C00697649       H         P         NaN
918              2020  C00697649       H         P         NaN



H0MN05186 KELLEY, CHRIS MR. C MN_05 Third party
     c

In [8]:
# how many times each cmte_id appears; 
# 1,702 cmte_ids appear once
# 29 cmte_ids appear twice
# one cmte_id appears 15 times
ccl['cmte_id'].value_counts().value_counts().sort_index()

1     1681
2       25
4        1
5        1
6        1
15       1
Name: cmte_id, dtype: int64

In [9]:
# quick timewarp from the next notebook to get the committee names for the next loop
cm = pd.read_csv(f'data/cm{year[2:]}.zip', sep = '|', error_bad_lines = False, header = None)
# add column headers from separate file
headers = pd.read_csv('data/cm_header_file.csv')
cm.columns = [x.lower() for x in headers.columns]
cm_dict = dict(zip(cm['cmte_id'], cm['cmte_nm']))
del cm

In [10]:
# display candidates pertaining to each cmte_id (name shown) appearing more than twice
multi_cand = list(ccl['cmte_id'].value_counts()[ccl['cmte_id'].value_counts() > 1].index)

for comm in multi_cand:
    cands = list(ccl.loc[ccl['cmte_id'] == comm, 'cand_id'].values)
    lil_cand = cand[cand['cand_id'].apply(lambda x: x in cands)]
    pty = list(set(lil_cand['cand_pty_affiliation']))
    lil_cand = lil_cand[[
        'cand_name', 
        'contest', 
        'cand_ici',
        
    ]].sort_values(['contest'], ascending = [True]).reset_index(drop = True)
    if len(lil_cand) > 2:
        print(cm_dict[comm], '(' + str(len(lil_cand)) + ', ' + str(pty) + ')')
        print(lil_cand, '\n\n')

2019 SENATORS CLASSIC COMMITTEE (15, ['Republican'])
                    cand_name    contest cand_ici
0               SULLIVAN, DAN  AK_senate        I
1             MCSALLY, MARTHA  AZ_senate        I
2               GARDNER, CORY  CO_senate        I
3              RISCH, JAMES E  ID_senate        I
4            MCCONNELL, MITCH  KY_senate        I
5         CASSIDY, WILLIAM M.  LA_senate        I
6           HYDE-SMITH, CINDY  MS_senate        O
7              DAINES, STEVEN  MT_senate        I
8        TILLIS, THOM R. SEN.  NC_senate        I
9           SASSE, BENJAMIN E  NE_senate        I
10      INHOFE, JAMES M. SEN.  OK_senate        I
11         GRAHAM, LINDSEY O.  SC_senate        I
12               ROUNDS, MIKE  SD_senate        I
13           CORNYN, JOHN SEN  TX_senate        I
14  CAPITO, SHELLEY MOORE MS.  WV_senate        I 


FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST AKA FARM TRUST (6, ['Republican'])
                  cand_name contest cand_ici
0  CRAWFORD, ERIC

### Make readable

In [11]:
# committee type
cmtetp_dict = {
    'H' : 'cmte_House',
    'S' : 'cmte_Senate',
    'N' : 'cmte_PACnonqual',
    'Q' : 'cmte_PACqual',
    'Y' : 'cmte_Partyqual',
    'O' : 'cmte_SuperPAC',
    'U' : 'cmte_Singlecand',
    'P' : 'cmte_President', 
    'I' : 'cmte_IndepExpenditor', 
    'D' : 'cmte_DelegateCommittee',
    'X' : 'cmte_Partynonqual', 
    'V' : 'cmte_PACnoncontribnonqual', 
}

ccl['cmte_tp'] = [cmtetp_dict[x] for x in ccl['cmte_tp']]
ccl['cmte_tp'].value_counts()

cmte_House         1444
cmte_Senate         194
cmte_PACnonqual      92
cmte_President       17
cmte_PACqual          6
cmte_Partyqual        4
cmte_Singlecand       2
cmte_SuperPAC         2
Name: cmte_tp, dtype: int64

In [12]:
# committee designation
cmtedsgn_dict = {
    'A' : 'Authorized by a candidate',
    'B' : 'Lobbyist/Registrant PAC',
    'D' : 'Leadership PAC',
    'J' : 'Joint fundraiser',
    'P' : 'Principal campaign committee',
    'U' : 'Unauthorized',
    np.nan : np.nan, 
    
}

ccl['cmte_dsgn'] = [cmtedsgn_dict[x] for x in ccl['cmte_dsgn']]
ccl['cmte_dsgn'].value_counts()

Principal campaign committee    1579
Joint fundraiser                 114
Authorized by a candidate         46
Unauthorized                      14
Leadership PAC                     7
Lobbyist/Registrant PAC            1
Name: cmte_dsgn, dtype: int64

In [13]:
# add linkage count column to cand dataframe
links_df = pd.DataFrame(ccl['cand_id'].value_counts())
links_df.reset_index(drop = False, inplace = True)
links_df.columns = ['cand_id', 'cm_links']
links_dict = dict(zip(links_df['cand_id'], links_df['cm_links']))
cand['cm_links'] = [links_dict[cand.loc[i, 'cand_id']] if \
                    (cand.loc[i, 'cand_id'] in list(links_dict.keys())) else \
                    0 for i in list(cand.index)]

cand['cm_links'].value_counts().sort_index()

0       6
1    1396
2     129
3      16
4       7
5       5
6       1
Name: cm_links, dtype: int64

In [14]:
cand[cand['cm_links'] == 0]

Unnamed: 0,cand_id,cand_name,cand_ici,cand_pty_affiliation,ttl_receipts,trans_from_auth,ttl_disb,trans_to_auth,coh_bop,coh_cop,...,other_pol_cmte_contrib,pol_pty_contrib,cvg_end_dt,indiv_refunds,cmte_refunds,position,contest,type,state,cm_links
45,H2MA04073,"KENNEDY, JOSEPH P III",I,Democrat,3969003.17,243428.31,2597763.94,0.0,4173181.1,5544420.33,...,303998.38,0.0,2019-12-01,44148.0,500.0,H,MA_00,Group 04,MA,0
671,S0NC00293,"SMITH, SANDY",C,Republican,265001.71,0.0,198531.1,0.0,0.0,66470.61,...,0.0,0.0,2019-09-01,104.0,0.0,S,NC_senate,Group 07,NC,0
963,S6IL00185,"MARSHALL, ROBERT",C,Republican,65530.0,0.0,27980.0,0.0,314.0,35550.0,...,0.0,0.0,2019-12-01,0.0,0.0,S,IL_senate,Group 05,IL,0
1302,H6OR04203,"PERKINS, JO RAE",C,Republican,10675.6,0.0,9873.57,0.0,0.0,826.26,...,0.0,0.0,2019-12-01,0.0,0.0,H,OR_04,Group 05,OR,0
1450,S0CA00447,"SAGOO, GURPINDER SINGH",O,Republican,5000.0,0.0,0.0,0.0,30000000.0,60005000.0,...,0.0,0.0,2019-12-01,0.0,0.0,S,CA_senate,Group 11,CA,0
1454,H0IL14155,"MALOUF, DANIEL",C,Republican,4929.0,0.0,2729.97,0.0,0.0,2199.03,...,0.0,0.0,2019-12-01,0.0,0.0,H,IL_14,Group 01,IL,0


In [15]:
cand[cand['cm_links'] == 6]

Unnamed: 0,cand_id,cand_name,cand_ici,cand_pty_affiliation,ttl_receipts,trans_from_auth,ttl_disb,trans_to_auth,coh_bop,coh_cop,...,other_pol_cmte_contrib,pol_pty_contrib,cvg_end_dt,indiv_refunds,cmte_refunds,position,contest,type,state,cm_links
36,S4IA00129,"ERNST, JONI K",I,Republican,5390976.66,1356857.92,1897645.46,5600.0,1363149.69,4856480.89,...,1256251.03,44600.0,2019-12-01,28153.0,2500.0,S,IA_senate,Group 02,IA,6


In [16]:
ccl.to_csv('data/02/cand_comm_linkages_02a.csv', index = False)

In [17]:
cand.to_csv('data/02/cand_clean_02a.csv', index = False)