# FEC Campaign Finance Analysis

## Candidate-Committee Linkages

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
year = '2018'

In [3]:
# read in candidate-committee linkage data
ccl = pd.read_csv(
    f'https://www.fec.gov/files/bulk-downloads/{year}/ccl{year[2:]}.zip',
    sep = '|', 
    error_bad_lines = False, 
    header = None, 
)

# read in column headers
headers = pd.read_csv(
    'https://www.fec.gov/files/bulk-downloads/data_dictionaries/ccl_header_file.csv', 
)

ccl.columns = [x.lower() for x in headers.columns]
print('original length:', len(ccl))

cand = pd.read_csv('data/01a_cand_clean.csv')
cands = list(set(cand['cand_id']))

ccl = ccl[ccl['cand_id'].apply(lambda x: x in cands)]
print(f'{year} cand_id:', len(ccl))

ccl = ccl[ccl['fec_election_yr'] == int(year)]
print('fec election year:', len(ccl))

ccl.drop(['fec_election_yr', 'linkage_id'], axis = 1, inplace = True)
cand_dict = dict(zip(cand['cand_id'], cand['cand_name']))
ccl['cand_name'] = [cand_dict[x] for x in ccl['cand_id']]

ccl.head()

original length: 7203
2018 cand_id: 1358
fec election year: 1358


Unnamed: 0,cand_id,cand_election_yr,cmte_id,cmte_tp,cmte_dsgn,cand_name
0,H0AL02087,2018,C00493783,H,J,"ROBY, MARTHA"
1,H0AL02087,2018,C00462143,H,P,"ROBY, MARTHA"
4,H0AL05163,2018,C00464149,H,P,"BROOKS, MO"
7,H0AL07086,2018,C00458976,H,P,"SEWELL, TERRI A."
9,H0AR01083,2018,C00462374,H,P,"CRAWFORD, ERIC ALAN RICK"


In [4]:
# confirm all entries in cand_id column are candidate IDs
ccl['cand_id'].apply(lambda x: x[0]).value_counts()

H    1017
S     341
Name: cand_id, dtype: int64

In [5]:
# check for nulls
ccl.isnull().sum()[ccl.isnull().sum() > 0].sort_values()

Series([], dtype: int64)

In [6]:
# 21 cand_ids appear twice in ccl dataframe
ccl.loc[
    ccl.duplicated(['cand_id', 'cmte_id', 'cmte_tp', 'cmte_dsgn'], keep = False), 
    'cand_id', 
].value_counts().value_counts()

2    9
6    1
4    1
Name: cand_id, dtype: int64

In [7]:
# display cand_ids shared at least three times across ccl rows
dupes = list(ccl.loc[
    ccl.duplicated(['cand_id', 'cmte_id', 'cmte_tp', 'cmte_dsgn'], keep = False), 
    'cand_id', 
].value_counts()[
    ccl.loc[
    ccl.duplicated(['cand_id', 'cmte_id', 'cmte_tp', 'cmte_dsgn'], keep = False), 
    'cand_id', 
].value_counts() > 2
].index)

for dupe in dupes:
    print(
        dupe, 
        cand.loc[cand['cand_id'] == dupe, 'cand_name'].values[0], 
        cand.loc[cand['cand_id'] == dupe, 'cand_ici'].values[0],
        cand.loc[cand['cand_id'] == dupe, 'contest'].values[0],
        cand.loc[cand['cand_id'] == dupe, 'cand_pty_affiliation'].values[0],
    )
    print(ccl.loc[ccl['cand_id'] == dupe, [
        'cand_election_yr', 
        'cmte_id', 
        'cmte_tp', 
        'cmte_dsgn', 
        'linkage_id',
    ]].sort_values(['cand_election_yr']))
    print('\n\n')

H8UT03238 CURTIS, JOHN I UT_03 Republican
      cand_election_yr    cmte_id cmte_tp cmte_dsgn  linkage_id
4693              2017  C00647180       O         U         NaN
4694              2017  C00647339       H         P         NaN
4696              2017  C00654459       N         J         NaN
4691              2018  C00647339       H         P         NaN
4692              2018  C00647180       O         U         NaN
4695              2018  C00654459       N         J         NaN



H8GA06286 HANDEL, KAREN CHRISTINE I GA_06 Republican
      cand_election_yr    cmte_id cmte_tp cmte_dsgn  linkage_id
3144              2017  C00634949       H         A         NaN
3145              2017  C00633362       H         P         NaN
3146              2018  C00634949       H         A         NaN
3147              2018  C00633362       H         P         NaN





In [8]:
# how many times each cmte_id appears; 
# <# times> <count>
ccl['cmte_id'].value_counts().value_counts().sort_index()

1     1065
2       61
3       12
4       12
5        1
6        1
8        3
11       1
12       1
13       1
16       1
Name: cmte_id, dtype: int64

In [9]:
# quick timewarp from the next notebook to get the committee names for the next loop
cm = pd.read_csv(
    f'https://www.fec.gov/files/bulk-downloads/{year}/cm{year[2:]}.zip', 
    sep = '|', 
    error_bad_lines = False, 
    header = None, 
)
# add column headers from separate file
headers = pd.read_csv(
    'https://www.fec.gov/files/bulk-downloads/data_dictionaries/cm_header_file.csv', 
)
cm.columns = [x.lower() for x in headers.columns]
cm_dict = dict(zip(cm['cmte_id'], cm['cmte_nm']))
del cm

In [10]:
# display candidates pertaining to each cmte_id (name shown) appearing more than twice
multi_cand = list(ccl['cmte_id'].value_counts()[ccl['cmte_id'].value_counts() > 1].index)

for comm in multi_cand:
    cands = list(ccl.loc[ccl['cmte_id'] == comm, 'cand_id'].values)
    lil_cand = cand[cand['cand_id'].apply(lambda x: x in cands)]
    pty = list(set(lil_cand['cand_pty_affiliation']))
    lil_cand = lil_cand[[
        'cand_name', 
        'contest', 
        'cand_ici',
        
    ]].sort_values(['contest'], ascending = [True]).reset_index(drop = True)
    if len(lil_cand) > 2:
        print(cm_dict[comm], '(' + str(len(lil_cand)) + ', ' + str(pty) + ')')
        print(lil_cand, '\n\n')

BLUE SENATE 2018 (16, ['Third party', 'Democrat'])
                        cand_name    contest cand_ici
0                 SINEMA, KYRSTEN  AZ_senate        O
1        MURPHY, CHRISTOPHER S MR  CT_senate        I
2                    NELSON, BILL  FL_senate        I
3              DONNELLY, JOSEPH S  IN_senate        I
4          KING, ANGUS STANLEY JR  ME_senate        I
5                STABENOW, DEBBIE  MI_senate        I
6                MENENDEZ, ROBERT  NJ_senate        I
7         HEINRICH, MARTIN TREVOR  NM_senate        I
8                    ROSEN, JACKY  NV_senate        C
9   GILLIBRAND, KIRSTEN ELIZABETH  NY_senate        I
10                 BROWN, SHERROD  OH_senate        I
11             CASEY, ROBERT P JR  PA_senate        I
12         KAINE, TIMOTHY MICHAEL  VA_senate        I
13                CANTWELL, MARIA  WA_senate        I
14                 BALDWIN, TAMMY  WI_senate        I
15               MANCHIN III, JOE  WV_senate        I 


WOMEN ON THE ROAD 2018 (13, 

### Make readable

In [11]:
# committee type
cmtetp_dict = {
    'H' : 'cmte_House',
    'S' : 'cmte_Senate',
    'N' : 'cmte_PACnonqual',
    'Q' : 'cmte_PACqual',
    'Y' : 'cmte_Partyqual',
    'O' : 'cmte_SuperPAC',
    'U' : 'cmte_Singlecand',
    'P' : 'cmte_President', 
    'I' : 'cmte_IndepExpenditor', 
    'D' : 'cmte_DelegateCommittee',
    'X' : 'cmte_Partynonqual', 
    'V' : 'cmte_PACnoncontribnonqual', 
}

ccl['cmte_tp'] = [cmtetp_dict[x] for x in ccl['cmte_tp']]
ccl['cmte_tp'].value_counts()

cmte_House         926
cmte_Senate        261
cmte_PACnonqual    155
cmte_PACqual         6
cmte_Partyqual       5
cmte_SuperPAC        4
cmte_Singlecand      1
Name: cmte_tp, dtype: int64

In [12]:
# committee designation
cmtedsgn_dict = {
    'A' : 'Authorized by a candidate',
    'B' : 'Lobbyist/Registrant PAC',
    'D' : 'Leadership PAC',
    'J' : 'Joint fundraiser',
    'P' : 'Principal campaign committee',
    'U' : 'Unauthorized',
    np.nan : np.nan, 
    
}

ccl['cmte_dsgn'] = [cmtedsgn_dict[x] for x in ccl['cmte_dsgn']]
ccl['cmte_dsgn'].value_counts()

Principal campaign committee    952
Joint fundraiser                371
Authorized by a candidate        23
Unauthorized                      9
Leadership PAC                    3
Name: cmte_dsgn, dtype: int64

In [13]:
# add linkage count column to cand dataframe
links_df = pd.DataFrame(ccl['cand_id'].value_counts())
links_df.reset_index(drop = False, inplace = True)
links_df.columns = ['cand_id', 'cm_links']
links_dict = dict(zip(links_df['cand_id'], links_df['cm_links']))
cand['cm_links'] = [links_dict[cand.loc[i, 'cand_id']] if \
                    (cand.loc[i, 'cand_id'] in list(links_dict.keys())) else \
                    0 for i in list(cand.index)]

cand['cm_links'].value_counts().sort_index()

1     765
2     102
3      33
4       8
5       3
6       4
7       8
8       3
9       1
11      4
12      4
18      1
20      1
Name: cm_links, dtype: int64

In [14]:
cand[cand['cm_links'] == 0]

Unnamed: 0,cand_id,cand_name,cand_ici,cand_pty_affiliation,ttl_receipts,trans_from_auth,ttl_disb,trans_to_auth,coh_bop,coh_cop,...,other_pol_cmte_contrib,pol_pty_contrib,cvg_end_dt,indiv_refunds,cmte_refunds,position,contest,type,state,cm_links


In [15]:
cand[cand['cm_links'] == cand['cm_links'].max()]

Unnamed: 0,cand_id,cand_name,cand_ici,cand_pty_affiliation,ttl_receipts,trans_from_auth,ttl_disb,trans_to_auth,coh_bop,coh_cop,...,other_pol_cmte_contrib,pol_pty_contrib,cvg_end_dt,indiv_refunds,cmte_refunds,position,contest,type,state,cm_links
7,S2WI00219,"BALDWIN, TAMMY",I,Democrat,27758852.65,1490494.35,28706579.25,129500.0,1089919.0,142193.2,...,1881644.77,100.0,2018-12-01,343163.38,35150.0,S,WI_senate,Group 02,WI,20


In [16]:
ccl.to_csv('data/02a_cand_comm_linkages.csv', index = False)

In [17]:
cand.to_csv('data/02a_cand_clean.csv', index = False)