## FEC 2018 Campaign Finance: 

### Committee Master

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from copy import deepcopy
from datetime import datetime as dt

%matplotlib inline

In [3]:
# read in & clean
# https://www.fec.gov/files/bulk-downloads/2018/cm18.zip
cm = pd.read_csv('data/cm18.zip', sep = '|', error_bad_lines = False, header = None)
print('original length:', len(cm))

# add column headers from separate file
headers = pd.read_csv('data/cm_header_file.csv')

cm.columns = [x.lower() for x in headers.columns]

cm = cm[pd.notnull(cm['cmte_nm'])]

cm.head(2)

original length: 19018


Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id
0,C00000018,IRONWORKERS LOCAL UNION NO. 25 POLITICAL EDUCA...,STEVEN N GULICK,43335 W 10 MILE,P O BOX 965,NOVI,MI,48050,U,Q,,T,,IRON WORKERS; INT'L ASS'N OF BRIDGE...,H8TX22313
1,C00000059,HALLMARK CARDS PAC,SARAH MOE,2501 MCGEE,MD #500,KANSAS CITY,MO,64108,U,Q,UNK,M,C,,


In [4]:
cm.isnull().sum()[cm.isnull().sum() > 0].sort_values()

cmte_tp                     3
cmte_dsgn                   5
cmte_st                    12
cmte_city                  14
cmte_st1                   22
cmte_zip                   25
tres_nm                  1510
cmte_pty_affiliation    11637
connected_org_nm        12026
cand_id                 12324
cmte_st2                15294
org_tp                  15414
dtype: int64

In [5]:
cm[cm['cmte_tp'].isnull()]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id
11363,C00631226,DREW TURIANO FOR US CONGRESS,ANDREW TURIANO,PO BOX 1331,,EAST HELENA,MT,59635.0,,,REP,A,,,H4MT00084
11995,C00637835,CARLA SPALDING FOR U.S. CONGRESS,,,,,,,A,,,T,,,
12415,C00646943,THE ALT-MIDDLE PAC,,301 KING ST #1009,,SAN FRANCISCO,CA,94158.0,,,,T,,,


In [16]:
finds = [
    'TURIANO',
    'CARLA SPALDING',
    'ALT-MIDDLE',
]
for i in list(cm.index):
    name = cm.loc[i, ['cmte_nm', 'cmte_tp']]
    for which in finds:
        if which in cm.loc[i, 'cmte_nm']:
            print(cm.loc[i, 'cmte_nm'])
            print(str(cm.loc[i, 'cmte_tp']))
            print('\n')

CARLA SPALDING FOR CONGRESS
H


DREW TURIANO FOR US CONGRESS
nan


CARLA SPALDING FOR U.S. CONGRESS
nan


THE ALT-MIDDLE PAC
nan


THE ALT-MIDDLE
N




In [None]:
cm['tres_nm'].value_counts()

In [None]:
# https://www.youtube.com/watch?v=c_6lIGI2gTQ

cm.loc[cm['tres_nm'] == 'KILGORE, PAUL', 'cmte_nm'].value_counts()

In [None]:
cm.loc[cm['tres_nm'] == 'MARSTON, CHRIS', 'cmte_nm'].value_counts()

In [None]:
cm.loc[cm['tres_nm'] == 'LISKER, LISA', 'cmte_nm'].value_counts()

In [None]:
cmtetp_dict = {
    'C' : 'cmte_CommunicationCost',
    'D' : 'cmte_DelegateCommittee',
    'E' : 'cmte_ElectioneeringComms',
    'H' : 'cmte_House',
    'I' : 'cmte_IndependentExpenditor',
    'S' : 'cmte_Senate',
    'N' : 'cmte_PACnonqual',
    'Q' : 'cmte_PACqual',
    'Y' : 'cmte_Partyqual',
    'O' : 'cmte_SuperPAC',
    'P' : 'cmte_Presidential',
    'U' : 'cmte_Singlecand',
    'V' : 'cmte_PACnoncontrib_nonqual',
    'W' : 'cmte_PACnoncontrib_qual',
    'X' : 'cmte_Partynonqual',
    np.nan : np.nan,
}

cm['cmte_tp'] = [cmtetp_dict[x] for x in cm['cmte_tp']]
cm['cmte_tp'].value_counts()

In [None]:
cmtedsgn_dict = {
    'A' : 'Authorized by a candidate',
    'B' : 'Lobbyist/Registrant PAC',
    'D' : 'Leadership PAC',
    'J' : 'Joint fundraiser',
    'P' : 'Principal campaign committee',
    'U' : 'Unauthorized',
    np.nan : np.nan,
}

cm['cmte_dsgn'] = [cmtedsgn_dict[x] for x in cm['cmte_dsgn']]
cm['cmte_dsgn'].value_counts()

In [None]:
cm['cmte_pty_affiliation'].value_counts()

In [None]:
cm['cmte_filing_freq'].value_counts()

In [None]:
orgtp_dict = {
    'C' : 'org_Corporation',
    'H' : 'org_HnotonFECsite',
    'I' : 'org_InotonFECsite',
    'L' : 'org_Labor organization',
    'M' : 'org_Membership organization',
    'T' : 'org_Trade association',
    'V' : 'org_Cooperative',
    'W' : 'org_Corporation without capital stock',
    np.nan : np.nan,
}

cm['org_tp'] = [orgtp_dict[x] for x in cm['org_tp']]
cm['org_tp'].value_counts()

In [None]:
cm['connected_org_nm'].value_counts()

In [None]:
cm['cand_id'].value_counts()

In [None]:
cm.to_csv('data/committees_2018.csv', index = False)