## FEC Campaign Finance: 

### Individual contributions

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime as dt

In [2]:
year = '2020'
mindate = dt.strptime(f'0101{str(int(year) - 2)}', '%m%d%Y')
maxdate = dt.strptime(f'1231{int(year) - 1}', '%m%d%Y')

In [3]:
ccl = pd.read_csv('data/02/cand_comm_linkages_02a.csv')
ccl.head()

Unnamed: 0,cand_id,cand_election_yr,cmte_id,cmte_tp,cmte_dsgn,cand_name
0,H0AL01055,2020,C00697789,cmte_House,Principal campaign committee,"CARL, JERRY LEE, JR"
1,H0AL01063,2020,C00701557,cmte_House,Principal campaign committee,"LAMBERT, DOUGLAS WESTLEY III"
2,H0AL01071,2020,C00701409,cmte_House,Principal campaign committee,"PRINGLE, CHRISTOPHER PAUL"
3,H0AL01089,2020,C00703066,cmte_House,Principal campaign committee,"HIGHTOWER, BILL"
4,H0AL01097,2020,C00708867,cmte_House,Principal campaign committee,"AVERHART, JAMES"


In [4]:
cm = pd.read_csv('data/03/committees_03a.csv')
cm_dict = dict(zip(cm['cmte_id'], cm['cmte_nm']))
del cm

In [5]:
# add column headers from separate file
headers = pd.read_csv('data/indiv_header_file.csv')

# read in & clean
# https://www.fec.gov/files/bulk-downloads/2020/indiv20.zip
indiv = pd.read_csv(
    f'data/indiv{year[2:]}/itcont.txt', 
    sep = '|', 
    error_bad_lines = False, 
    header = None, 

)
indiv.columns = [x.lower() for x in headers.columns]
print('original length:', len(indiv))

# diff # unique cmte_ids in indiv vs # unique cmte_ids in ccl
print('number of cmte_ids in indiv but not in ccl:', 
      len(set(indiv['cmte_id']) - set(ccl['cmte_id'].values)))

# remove transaction_amt == 0
indiv = indiv[indiv['transaction_amt'] != 0]
print('remove transaction_amt == 0:', len(indiv))

# add cmte_nm column
indiv['cmte_nm'] = [cm_dict[x] for x in indiv['cmte_id']]

# make report type readable
rpttp = pd.read_csv('data/04/pas2_rpttp.csv')
rpttp = rpttp[['Report type code', 'Report type', 'explanation']]
rpttp_dict = dict(zip(rpttp['Report type code'], rpttp['Report type']))
indiv['rpt_tp'] = [rpttp_dict[x] for x in indiv['rpt_tp']]
del rpttp_dict

indiv.head()

b'Skipping line 8160152: expected 21 fields, saw 26\n'


original length: 8723558
number of cmte_ids in indiv but not in ccl: 4757
remove transaction_amt == 0: 8669589


Unnamed: 0,cmte_id,amndt_ind,rpt_tp,transaction_pgi,image_num,transaction_tp,entity_tp,name,city,state,...,occupation,transaction_dt,transaction_amt,other_id,tran_id,file_num,memo_cd,memo_text,sub_id,cmte_nm
0,C00432906,T,Termination,P2018,201901219143901218,22Y,IND,"LEMUNYON, GLENN",WASHINGTON,DC,...,,1182019.0,-500,,SB20A.55755,1305860,,,4021320191639407453,TOM ROONEY FOR CONGRESS
1,C00432906,T,Termination,P2018,201901219143901218,22Y,IND,"LEMUNYON, GLENN",WASHINGTON,DC,...,,1182019.0,500,,SB20A.55756,1305860,,,4021320191639407455,TOM ROONEY FOR CONGRESS
2,C00638478,T,Termination,P2018,201901289144040160,15C,CAN,"JANOWICZ, PHILIP",BUENA PARK,CA,...,CANDIDATE,1282019.0,33000,H8CA39133,3703296,1307800,,CONVERTING PRIMARY LOAN TO CONTRIBUTION,4021220191639267649,FRIENDS OF PHIL JANOWICZ
3,C00640870,T,Termination,P2018,201901259144002482,15C,CAN,"RICHARDSON, CHARDO",LONGWOOD,FL,...,INFORMATION REQUESTED,1242019.0,76,H8FL07054,VTQYWHKD8W6,1307204,,CONTRIBUTION FOR DEBT RETIREMENT,4021320191639532337,CHARDO RICHARDSON FOR CONGRESS
4,C00638478,T,Termination,P2018,201901289144040158,15,IND,"STEVENS, RICHARD",DIAMOND BAR,CA,...,NOT EMPLOYED,1252019.0,-50,,3703278,1307800,,CHECK LOST,4021220191639267645,FRIENDS OF PHIL JANOWICZ


In [6]:
# which cmte_nms have null dates
print('transaction_amt total for null date:', 
      indiv.loc[indiv['transaction_dt'].isnull(), 'transaction_amt'].sum())
indiv.loc[indiv['transaction_dt'].isnull(), 'cmte_nm'].value_counts()[:10]

transaction_amt total for null date: 1694


ALABAMA NURSING HOME ASSOCIATION FEDERAL POLITICAL ACTION COMMITTEE (ANHA PAC)    4
INVENERGY LLC PAC                                                                 3
Name: cmte_nm, dtype: int64

In [7]:
# remove null date rows
indiv = indiv[pd.notnull(indiv['transaction_dt'])]
print('remove missing dates:', len(indiv))

remove missing dates: 8669582


In [8]:
# convert to datetime
indiv['transaction_dt'] = [dt.strptime(str(int(x)), '%m%d%Y') for \
                           x in indiv['transaction_dt']]

# slice on dates
indiv = indiv[(indiv['transaction_dt'] >= mindate) & (indiv['transaction_dt'] <= maxdate)]
print('slice on dates:', len(indiv))

indiv.sort_values(['transaction_dt'], inplace = True)

slice on dates: 8668654


In [9]:
indiv['rpt_tp'].value_counts()

Mid-year             1874014
October quarterly    1476564
July quarterly        759730
December monthly      590082
November monthly      581828
Year end              581503
October monthly       485677
September monthly     433921
August monthly        387522
July monthly          339240
June monthly          295269
April quarterly       278758
May monthly           224235
April monthly         175642
March monthly         104258
February monthly       56803
Post-special            9657
Pre-special             9288
Termination             2913
Pre-primary              857
Pre-Runoff               824
Pre-general               69
Name: rpt_tp, dtype: int64

In [14]:
indiv.loc[indiv['rpt_tp'] == 'Termination', 'cmte_nm'].value_counts()[:30]

AETNA INC. POLITICAL ACTION COMMITTEE                          354
JOE SESTAK FOR PRESIDENT                                       252
GARLAND TUCKER FOR SENATE                                      245
RICHARD BEW FOR CONGRESS                                       191
FOX CORPORATION POLITICAL ACTION COMMITTEE  II (FOX PAC II)     84
BAUGH FOR CONGRESS                                              66
IDAHO VICTORY FUND PAC                                          60
ABBIE HODGSON FOR CONGRESS                                      56
GEP ADMINISTRATIVE SERVICES, INC. EMPLOYEE PAC                  50
BRUNO A BARREIRO CAMPAIGN                                       47
BARRASSO CASSIDY VICTORY FUND                                   44
BLUE GREEN VICTORY FUND                                         43
TIM CANOVA FOR CONGRESS                                         43
BYRNE FOR CONGRESS                                              42
HATCH ELECTION COMMITTEE INC                                  

In [16]:
# which names have 'X' memo_cd; these rows don't count against FEC limits
print('transaction_amt total for \'X\' memo_cd:', 
      indiv.loc[indiv['memo_cd'] == 'X', 'transaction_amt'].sum())
indiv.loc[indiv['memo_cd'] == 'X', 'cmte_nm'].value_counts()[:20]

transaction_amt total for 'X' memo_cd: 18307747


DONALD J. TRUMP FOR PRESIDENT, INC.                                      4811
PETE FOR AMERICA, INC.                                                   3137
EMILY'S LIST                                                             2371
AMERICAN ASSOCIATION FOR JUSTICE POLITICAL ACTION COMMITTEE (AAJ PAC)    1249
TEAM GRAHAM, INC.                                                         858
DCCC                                                                      764
TEXANS FOR SENATOR JOHN CORNYN INC.                                       688
CLUB FOR GROWTH PAC                                                       661
CORY 2020                                                                 589
BILL CASSIDY FOR US SENATE                                                558
PERDUE FOR SENATE                                                         531
MCSALLY FOR SENATE INC                                                    494
JOHN JAMES FOR SENATE, INC.                                     

In [17]:
indiv['amndt_ind'].value_counts()

N    6435470
A    2230516
T       2668
Name: amndt_ind, dtype: int64

In [18]:
indiv['entity_tp'].value_counts()

IND    8656705
CAN       5279
ORG       4793
PAC       1331
CCM        264
COM        221
PTY         12
Name: entity_tp, dtype: int64

In [None]:
indiv.to_csv(f'data/05/indiv_contrib_{year}.csv', index = False)