## FEC Campaign Finance

### Contributions from committees to candidates & independent expenditures

resolve color/ls dupe issue

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime as dt
from matplotlib import font_manager as fm, rcParams
import matplotlib.dates as mdates
from matplotlib.lines import Line2D
from matplotlib.offsetbox import (
    AnnotationBbox,
    OffsetImage, 
)

%matplotlib inline

In [2]:
year = '2018'
maxdate = dt.strptime(f'{int(year) - 0}-11-10', '%Y-%m-%d')
mindate = dt.strptime(f'{int(year) - 1}-01-01', '%Y-%m-%d')

In [3]:
winners = pd.read_csv('data/winners_2018.csv')
winners.head()

Unnamed: 0,contest,cand_id
0,AK_00,H6AK00045
1,AL_01,H4AL01123
2,AL_02,H0AL02087
3,AL_03,H2AL03032
4,AL_04,H6AL04098


In [4]:
cand = pd.read_csv('data/02a_cand_clean.csv')
cand_dict = dict(zip(cand['cand_id'], cand['cand_name']))
cand.head(2)

Unnamed: 0,cand_id,cand_name,cand_ici,cand_pty_affiliation,ttl_receipts,trans_from_auth,ttl_disb,trans_to_auth,coh_bop,coh_cop,...,other_pol_cmte_contrib,pol_pty_contrib,cvg_end_dt,indiv_refunds,cmte_refunds,position,contest,type,state,cm_links
0,S8FL00273,"SCOTT, RICK GOV",C,Republican,85231716.47,1036189.15,83771112.48,0.0,0.0,1460603.73,...,981797.67,5050.0,2018-12-01,185475.35,10000.0,S,FL_senate,Group 10,FL,3
1,S8TX00285,"O'ROURKE, ROBERT (BETO)",C,Democrat,80344836.34,53666.26,80458720.11,0.0,398699.0,284815.77,...,83346.3,500.0,2018-12-01,1264011.92,5117.0,S,TX_senate,Group 04,TX,2


In [5]:
cm = pd.read_csv('data/03a_committees.csv')
cm_dict = dict(zip(cm['cmte_id'], cm['cmte_nm']))
cm.head(2)

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
0,C00000018,IRONWORKERS LOCAL UNION NO. 25 POLITICAL EDUCA...,STEVEN GULICK,43335 W 10 MILE,P O BOX 965,NOVI,MI,48050.0,Unauthorized,cmte_PACqual,UNK,T,,IRON WORKERS; INT'L ASS'N OF BRIDGE...,H8TX22313,STEVEN N GULICK,43335 W 10 MILE,"43335 W 10 MILE, NOVI, MI 48050"
1,C00000059,HALLMARK CARDS PAC,SARAH MOE,2501 MCGEE,MD #500,KANSAS CITY,MO,64108.0,Unauthorized,cmte_PACqual,UNK,M,org_Corporation,,,SARAH MOE,2501 MCGEE,"2501 MCGEE, KANSAS CITY, MO 64108"


In [6]:
tres_dict = dict(zip(cm['cmte_id'], cm['tres_nm']))

In [7]:
# add column headers from separate file
headers = pd.read_csv(
    'https://www.fec.gov/files/bulk-downloads/data_dictionaries/pas2_header_file.csv', 
)

# read in & clean
pas = pd.read_csv(
    f'https://www.fec.gov/files/bulk-downloads/{year}/pas2{year[2:]}.zip', 
    sep = '|', 
    error_bad_lines = False, 
    header = None, 
)
pas.columns = [x.lower() for x in headers.columns]
print('original length:', len(pas))

# transaction_amt != 0
pas = pas[pas['transaction_amt'] != 0]
print('remove observations with transaction_amt == 0:', len(pas))

# keep only lines pertaining to candidates currently in cand dataframe
cands = list(set(cand['cand_id'].values))
mask = [True if pas.loc[i, 'cand_id'] in cands else \
        False for i in list(pas.index)]
pas = pas[mask]
print('keep lines for relevant candidates:', len(pas))

# # 18-digit image number normat (June 29, 2015)
# # YYYYMMDDSSPPPPPPPP
# # YYYY - scanning year
# # MM - scanning month
# # DD - scanning day
# # SS - source (02 - Senate, 03 - FEC Paper, 90-99 - FEC Electronic)
# # PPPPPPPP - page (reset to zero every year on January 1)
# pas['image_num'] = [str(x) for x in pas['image_num']]
# # pas['scan_dt'] = [dt.strptime(x[:8], '%Y%m%d') for x in pas['image_num']]
# source_dict = {
#     '02' : 'source_Senate',
#     '03' : 'source_FECpaper',
# }

# # make amendment indicator readable
# amndtind_dict = {
#     'N' : 'new filing',
#     'A' : 'amendment filing',
#     'T' : 'termination filing',
# }
# pas['amndt_ind'] = [amndtind_dict[x] for x in pas['amndt_ind']]

# make report type readable
rpttp = pd.read_csv('data/pas2_rpttp.csv')
rpttp = rpttp[['Report type code', 'Report type', 'explanation']]
rpttp_dict = dict(zip(rpttp['Report type code'], rpttp['Report type']))
pas['rpt_tp'] = [rpttp_dict[x] for x in pas['rpt_tp']]
del rpttp_dict
    
# make transaction type readable
transtp = pd.read_csv('data/pas2_transactiontp.csv')
transtp = transtp[['Transaction type', 'Transaction type description']]
transtp_dict = dict(zip(transtp['Transaction type'], transtp['Transaction type description']))
pas['transaction_tp'] = [transtp_dict[x] for x in pas['transaction_tp']]
del transtp_dict

# make entity type readable
entitytp_dict = {
    'CAN' : 'enttp_Candidate',
    'CCM' : 'enttp_Candidate Committee',
    'COM' : 'enttp_Committee',
    'IND' : 'enttp_Individual (a person)',
    'ORG' : 'enttp_Organization (not a committee and not a person)',
    'PAC' : 'enttp_Political Action Committee',
    'PTY' : 'enttp_Party Organization',
    np.nan : np.nan,
}

pas['entity_tp'] = [entitytp_dict[x] for x in pas['entity_tp']]
del entitytp_dict

pas.drop([
    'memo_text', # too many nulls, memos vague
    'occupation',
    'sub_id', # bring this back if it's a key somewhere else
    'transaction_pgi', # total mess
], axis = 1, inplace = True)

# remove where tran_id null; all are 4-digit negative $
drops = [
    'tran_id',
    'transaction_dt',
    'name', 
]
for which in drops:
    pas = pas[pd.notnull(pas[which])]
print('remove a few nulls:', len(pas))

# make all names uppercase, normalize for deduping
pas['name'] = [' '.join(x\
                        .replace(',', ' ')\
                        .replace('.', ' ')\
                        .replace(' LLC', '')\
                        .split()) for x in pas['name']]
pas['name'] = [re.sub(' INC$', '', x) for x in pas['name']]
    
# convert transaction_dt
pas['transaction_dt'] = ['0' + str(int(x)) if len(str(int(x))) == 7 else str(int(x)) for \
                         x in pas['transaction_dt']]
pas['transaction_dt'] = [dt.strptime(x, '%m%d%Y') for x in pas['transaction_dt']]
pas = pas[(pas['transaction_dt'] <= maxdate) & (pas['transaction_dt'] >= mindate)]
print('slice on date window:', len(pas))

# drop null columns
pas.dropna(axis = 1, how = 'all', inplace = True)

pas['other_name'] = [cm_dict[x] if x in list(cm_dict.keys()) else \
                     np.nan for x in pas['other_id']]

pas['cmte_name'] = [cm_dict[x] if x in list(cm_dict.keys()) else \
                     np.nan for x in pas['cmte_id']]

pas['cand_name'] = [cand_dict[x] if x in list(cand_dict.keys()) else \
                   np.nan for x in pas['cand_id']]

# prep for deduping
replace_dict = {
    'POLITICAL ACTION COMMITTEE' : 'PAC',
    ' & ' : ' AND ',
    'CMTE' : 'COMMITTEE',
    ' + ' : ' AND ',
    'CORPORATION' : 'CORP',
    
}
for key in list(replace_dict.keys()):
    pas['name'] = [x.replace(key, replace_dict[key]) for x in pas['name']]
    pas['cmte_name'] = [x.replace(key, replace_dict[key]) if (key in str(x)) else x for x in pas['cmte_name']] 

# reorder columns
putfront = [
    'cmte_id',
    'cmte_name',
    'entity_tp',
    'name',
    'other_id',
    'other_name',
    'transaction_amt',
    'cand_id',
    'cand_name',
    'transaction_dt', 
    'tran_id',

]
notfront = [x for x in pas.columns if x not in putfront]
pas = pd.concat([pas[putfront], pas[notfront]], axis = 1)

pas.head()

original length: 349594
remove observations with transaction_amt == 0: 347637
keep lines for relevant candidates: 295358
remove a few nulls: 292295
slice on date window: 288584


Unnamed: 0,cmte_id,cmte_name,entity_tp,name,other_id,other_name,transaction_amt,cand_id,cand_name,transaction_dt,...,amndt_ind,rpt_tp,image_num,transaction_tp,city,state,zip_code,employer,file_num,memo_cd
0,C00390633,SCHOOLSFIRST FEDERAL CREDIT UNION EMPLOYEES FE...,enttp_Candidate Committee,PETE AGUILAR FOR CONGRESS,C00510461,PETE AGUILAR FOR CONGRESS,2500,H2CA31125,"AGUILAR, PETE",2017-12-18,...,N,Year end,201801249090614097,Contribution made to nonaffiliated committee,SAN BERNARDINO,CA,92423,,1199694,
2,C00392464,UNIVERSAL MUSIC GROUP PAC,enttp_Candidate Committee,RATCLIFFE FOR CONGRESS,C00554113,RATCLIFFE FOR CONGRESS,500,H4TX04153,"RATCLIFFE, JOHN L",2017-12-12,...,N,Year end,201801249090614162,Contribution made to nonaffiliated committee,ROCKWALL,TX,75032,,1199696,
3,C00392464,UNIVERSAL MUSIC GROUP PAC,enttp_Candidate Committee,TED DEUTCH FOR CONGRESS COMMITTEE,C00469163,TED DEUTCH FOR CONGRESS COMMITTEE,2000,H0FL19080,"DEUTCH, THEODORE ELIOT",2017-12-13,...,N,Year end,201801249090614162,Contribution made to nonaffiliated committee,WASHINGTON,DC,20036,,1199696,
5,C00368993,WINNING STRATEGIES WASHINGTON PAC,enttp_Candidate Committee,DONALD M PAYNE JR FOR CONGRESS,C00519355,DONALD M PAYNE JR FOR CONGRESS,1300,H2NJ10154,"PAYNE, DONALD M., JR.",2017-12-12,...,N,Year end,201801249090614336,Contribution made to nonaffiliated committee,NEWARK,NJ,7114,,1199704,
7,C00368993,WINNING STRATEGIES WASHINGTON PAC,enttp_Candidate Committee,FRIENDS OF DON BEYER,C00555888,FRIENDS OF DON BEYER,500,H4VA08224,"BEYER, DONALD STERNOFF HONORABLE JR.",2017-10-31,...,N,Year end,201801249090614337,Contribution made to nonaffiliated committee,ALEXANDRIA,VA,22314,,1199704,


In [8]:
pas['cmte_tres'] = [tres_dict[x] if x in list(tres_dict.keys()) else x for x in pas['cmte_id']]
pas['cmte_tres'].value_counts()

(no treasurer listed)    11551
HARRY PASCAL              3218
PAUL KILGORE              3215
EDWARD KURTZ              2450
PATRICIA GILBERT          2170
                         ...  
C00649962                    1
JOEL DYAR                    1
NATHANIEL IRVIN              1
MOLLY BISHOP                 1
ZACHARY QUINN                1
Name: cmte_tres, Length: 3852, dtype: int64

In [34]:
treses = list(pas['cmte_tres'].value_counts().index)

tres_list = []
for tres in treses:
    lil_pas = pas[pas['cmte_tres'] == tres]
    total = lil_pas['transaction_amt'].sum()
    tres_list.append((tres, total))
    
tres_df = pd.DataFrame(tres_list)
tres_df.columns = ['tres', 'total']
tres_df.sort_values(['total'], ascending = False, inplace = True)
tres_df = tres_df[tres_df['total'] > 5.0E6]
tres_df.reset_index(drop = True, inplace = True)

Unnamed: 0,tres,total
0,CALEB CROSBY,212373269
1,KEITH DAVIS,111017251
2,REBECCA LAMBE,107501507
3,(no treasurer listed),96614358
4,DANIEL SENA,79245252
5,ALIXANDRIA LAPP,59380075
6,ALLISON WRIGHT,39781289
7,HOWARD WOLFSON,38146487
8,JULIE DOZIER,32068879
9,GREG SPEED,27098413


In [37]:
treses = list(tres_df['tres'].values)

for tres in treses:
    print(tres)
    lil_pas = pas[pas['cmte_tres'] == tres]
    comms = list(lil_pas['cmte_name'].value_counts().index)
    lil_list = []
    for comm in comms:
        lil_comm = lil_pas[lil_pas['cmte_name'] == comm]
        total = lil_comm['transaction_amt'].sum()
        lil_list.append((comm, total))
    lil_df = pd.DataFrame(lil_list)
    lil_df.columns = ['cmte', 'total']
    lil_df.sort_values(['total'], ascending = False, inplace = True)
    lil_df.reset_index(drop = True, inplace = True)
    lil_df['total'] = ['${:,}'.format(x) for x in lil_df['total']]
    print(lil_df)
    print('\n')

CALEB CROSBY
                               cmte         total
0     CONGRESSIONAL LEADERSHIP FUND  $124,708,177
1            SENATE LEADERSHIP FUND   $85,791,877
2            CLEARPATH ACTION, INC.    $1,856,215
3              FRIENDS OF NEAL DUNN       $11,000
4              FREE MARKETS PAC INC        $5,500
5  ADVANCED MICRO DEVICES, INC. PAC          $500


KEITH DAVIS
                                                cmte        total
0                                               NRCC  $68,563,300
1                                               NRSC  $42,049,856
2                            TENN PAC INC (TENN PAC)     $180,000
3  TRUST PAC TEAM REPUBLICANS FOR UTILIZING SENSI...     $134,000
4                           WICKER VICTORY COMMITTEE      $61,095
5        COUNTRY FIRST PAC, INC. (COUNTRY FIRST PAC)      $15,000
6                                        BULLDOG PAC       $7,000
7                                 ACCOUNTABILITY PAC       $7,000


REBECCA LAMBE
  cmte       

In [32]:
pas.loc[pas['cmte_tres'] == '(no treasurer listed)', 'cmte_name'].value_counts()[:20]

WORKING AMERICA                            2085
FEMINIST MAJORITY                          1264
INDIVISIBLE PROJECT INC.                    918
UNITE HERE ARIZONA                          900
HOTEL WORKERS FOR STRONGER COMMUNITIES      538
                                           ... 
SEIU OHIO STATE JOINT COUNCIL PCE             1
ORGANIZERS IN THE LAND OF ENCHANTMENT         1
OPEN PROGRESS                                 1
ATKINSON, JOHN AND BONNIE                     1
AMERICAN BRIDGE 21ST CENTURY FOUNDATION       1
Name: cmte_name, Length: 145, dtype: int64

In [None]:
pas['transaction_dt'].sort_values()[:10]

In [None]:
pas.isnull().sum()[pas.isnull().sum() > 0].sort_values()

In [None]:
pas['transaction_tp'].value_counts()

In [None]:
pas[pas['transaction_tp'] == 'Election recount disbursement']

In [None]:
pas = pas[pas['transaction_tp'] != 'Election recount disbursement']

In [None]:
pas[pas['transaction_tp'] == 'Communication cost against candidate (only for Form 7 filer)']

In [None]:
pas = pas[pas['transaction_tp'] != 'Communication cost against candidate (only for Form 7 filer)']

In [None]:
# other_id is a cand_id when other_name is null
no_other_name = list(pas.loc[pas['other_name'].isnull(), 'other_id'].value_counts().index)
list(set([x[0] for x in no_other_name]))

In [None]:
# transaction_tps when other_name is null-- do not directly involve a candidate
pas.loc[pas['other_name'].isnull(), 'transaction_tp'].value_counts()

In [None]:
# other_id is a cmte_id when other_name is present
other_name = list(pas.loc[pd.notnull(pas['other_name']), 'other_id'].value_counts().index)
list(set([x[0] for x in other_name]))

In [None]:
# transaction_tps when other_name is present
pas.loc[pd.notnull(pas['other_name']), 'transaction_tp'].value_counts()

In [None]:
plt.figure(figsize = (18, 3))
plt.scatter(
    pas['transaction_dt'],
    pas['transaction_amt'],
    s = 5,
)

plt.show();

In [None]:
data = []
labels = list(set(pas.loc[pd.notnull(pas['cand_id']), 'transaction_tp']))
for which in labels:
    lil_pas = pas[pd.notnull(pas['cand_id']) & (pas['transaction_tp'] == which)]
    data.append(lil_pas['transaction_amt']/1.0E6)
    print(which)
    print('count:', len(lil_pas))
    print('unique:', len(lil_pas['name'].value_counts()))
    print('median:', lil_pas['transaction_amt'].median())
    print('mean:', lil_pas['transaction_amt'].mean())
    print('std dev:', lil_pas['transaction_amt'].std())
    print(lil_pas['name'].value_counts()[:10])
    print('\n')

plt.figure(figsize = (20, 2))

plt.boxplot(data, vert = False)
plt.yticks(range(1, len(labels) + 1), labels, fontsize = 12)
plt.xlabel('$MM USD')

plt.show();

### Split dataframe, dedupe 'name' column

In [None]:
def findall(name):
    '''find pattern in treasurer names columns'''
    all_name = list(set(pas['name']))
    if type(name) == str:
        these = [x for x in all_name if name.lower() in str(x).lower()]
    else:  
        these = all_name
        for which in name:
            these = [x for x in these if which in str(x).lower()]
    return sorted(these)

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[:50]

In [None]:
findall('courage')

In [None]:
findall(['face', 'book'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'FACEBOOK' in x), 'name'] = 'FACEBOOK'

In [None]:
findall(['water', 'front'])

In [None]:
findall('bully')

In [None]:
findall(['house', 'freedom'])

In [None]:
findall('twilio')

In [None]:
findall(['club', 'growth'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'CLUB FOR GROWTH' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'CLUB FOR GROWTH' in x), 'name'] = 'CLUB FOR GROWTH'

In [None]:
findall(['metro', 'strat'])

In [None]:
findall(['right', 'country'])

In [None]:
findall('nebo')

In [None]:
findall(['senate', 'conserv'])

In [None]:
findall('indivisible')

In [None]:
findall(['pro', 'list'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PROLIST' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PROLIST' in x), 'name'] = 'PROLIST'

In [None]:
findall('hustle')

In [None]:
findall('skd')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SKD' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SKD' in x), 'name'] = 'SKDKNICKERBOCKER'

In [None]:
findall('sierra club')

In [None]:
findall('pivot')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PIVOT' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PIVOT' in x), 'name'] = 'THE PIVOT GROUP'

In [None]:
findall('alliance')

In [None]:
findall('pdq')

In [None]:
findall(['maj', 'strat'])

In [None]:
pas.loc[pas['name'] == 'MAJORITIES STRATEGIES', 'name'] = 'MAJORITY STRATEGIES'

In [None]:
findall(['next', 'gen'])

In [None]:
findall('deliver')

In [None]:
findall('every')

In [None]:
findall('mother')

In [None]:
findall('spaff')

In [None]:
findall(['forth', 'right'])

In [None]:
findall('lincoln')

In [None]:
findall('humane')

In [None]:
findall('360')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'I360' in x), 'name'] = 'I360'

In [None]:
findall('fp1')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'FP1' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'FP1' in x), 'name'] = 'FP1 STRATEGIES'

In [None]:
findall('solidarity')

In [None]:
findall('groundbase')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SOLIDARITY COMPANY' in x), 'name'] = 'GROUNDBASE/SOLIDARITY COMPANY'
pas.loc[pas['name'].apply(lambda x: 'GROUNDBASE' in x), 'name'] = 'GROUNDBASE/SOLIDARITY COMPANY'

In [None]:
findall(['blue', 'print'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('BLUE' in x) & ('PRINT INTER' in x)), 'name'].value_counts()

In [None]:
pas.loc[pas['name'] == 'BLUE PRINT INTERACTIVE', 'name'] = 'BLUEPRINT INTERACTIVE'

In [None]:
findall(['demo', 'live'])

In [None]:
findall(['conn', 'strat'])

In [None]:
findall(['creative', 'dir'])

In [None]:
findall(['del', 'ray'])

In [None]:
findall(['texas', 'org'])

In [None]:
findall(['pay', 'chex'])

In [None]:
findall('imge')

In [None]:
findall('unite here')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'UNITE HERE' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'UNITE HERE' in x), 'name'] = 'UNITE HERE TIP STATE AND LOCAL FUND'

In [None]:
findall('amplify')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'AMPLIFY' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'AMPLIFY' in x), 'name'] = 'AMPLIFY MEDIA'

In [None]:
findall('consolidate')

In [None]:
pas.loc[pas['name'] == 'CONSOLIDATED MAILING SERVICE', 'name'] = 'CONSOLIDATED MAILING SERVICES'

In [None]:
findall('for our')

In [None]:
findall(['master', 'print'])

In [None]:
findall('arena')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'ARENA' in x), 'name'].value_counts()

In [None]:
findall('main street')

In [None]:
findall('mission')

In [None]:
findall('lukens')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'LUKENS' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'LUKENS' in x), 'name'] = 'THE LUKENS COMPANY'

In [None]:
findall('sage media')

In [None]:
findall(['league', 'cons'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'LEAGUE OF CONSERVATION VOTERS' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'LEAGUE OF CONSERVATION VOTERS' in x), 'name'] = \
'LEAGUE OF CONSERVATION VOTERS'

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[:50]

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[50:100]

In [None]:
findall('allegiance')

In [None]:
findall('resonance')

In [None]:
findall(['strategy group'])

In [None]:
pas.loc[pas['name'] == 'MOSIAC MEDIA STRATEGY GROUP', 'name'] = 'MOSAIC MEDIA STRATEGY GROUP'

In [None]:
pas.loc[pas['name'] == 'STRATEGY GROUP FOR MEDIA', 'name'] = 'THE STRATEGY GROUP FOR MEDIA'
pas.loc[pas['name'].apply(lambda x: 'THE STRATEGY GROUP' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'THE STRATEGY GROUP' in x), 'name'] = 'THE STRATEGY GROUP'

In [None]:
findall(['spoken', 'hub'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SPOKEN HUB' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SPOKEN HUB' in x), 'name'] = 'THE SPOKEN HUB'

In [None]:
findall(['stone', 'ridge'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'STONERIDGE' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'STONERIDGE' in x), 'name'] = 'THE STONERIDGE GROUP'

In [None]:
findall('priorities')

In [None]:
findall('dssi')

In [None]:
findall(['conserv', 'connect'])

In [None]:
findall(['rev', 'com'])

In [None]:
pas.loc[pas['name'] == 'REV COM', 'name'] = 'REV.COM'

In [None]:
findall(['pin', 'point'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PINPOINT WEB' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PINPOINT WEB' in x), 'name'] = 'PINPOINT WEBSOLUTIONS'

In [None]:
findall(['rev', 'messag'])

In [None]:
findall('google')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'GOOGLE' in x), 'name'] = 'GOOGLE'

In [None]:
findall('interactive')

In [None]:
findall('postal')

In [None]:
findall('usps')

In [None]:
pas.loc[pas['name'].apply(lambda x: ('POSTAL' in x) | ('USPS' in x)), 'name'] = 'UNITED STATES POSTAL SERVICE'

In [None]:
findall('western')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BEST WESTERN' in x), 'name'] = 'BEST WESTERN'

In [None]:
findall('target')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'MICRO TARGETING' in x), 'name'] = 'ADVANCED MICRO TARGETING'
pas.loc[pas['name'].apply(lambda x: 'TARGETED VICOTRY' in x), 'name'] = 'TARGETED VICTORY'
pas.loc[pas['name'].apply(lambda x: 'TARGETSMART' in x), 'name'] = 'TARGETSMART'
pas.loc[
    (pas['name'] == 'TARGET') | \
    (pas['name'] == 'TARGET COM') | \
    (pas['name'] == 'TARGET STORES'), 'name'] = 'TARGET CORP'

In [None]:
findall('ppws')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PPWS' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PPWS' in x), 'name'] = 'PPWS'

In [None]:
findall('ralston')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'RALSTON' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'RALSTON' in x), 'name'] = 'RALSTON LAPP MEDIA'

In [None]:
findall(['columbia', 'road'])

In [None]:
findall(['campaign', 'funding'])

In [None]:
findall(['federal', 'capit'])

In [None]:
findall('fccc')

In [None]:
findall('bend')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BRABENDER' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BRABENDER' in x), 'name'] = 'BRABENDER COX'

In [None]:
findall('paramount')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PARAMOUNT COMM' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PARAMOUNT COMM' in x), 'name'] = 'PARAMOUNT COMMUNICATIONS'

In [None]:
findall('paramount')

In [None]:
findall('direct support')

In [None]:
findall(['great', 'america'])

In [None]:
findall(['political', 'data'])

In [None]:
findall('dmm')

In [None]:
findall('digital media')

In [None]:
findall('forum')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'FORUM' in x), 'name'] = 'FORUM COMMUNICATIONS COMPANY'

In [None]:
findall('seiu')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SEIU' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SEIU' in x), 'name'] = 'SEIU (umbrella)'

In [None]:
findall('enterprise')

In [None]:
pas.loc[pas['name'].apply(lambda x: ('ENTERPRISE' in x) & ('RENT' in x)), 'name'] = 'ENTERPRISE'
pas.loc[pas['name'] == 'ENTERPRISE', 'name'] = 'ENTERPRISE CAR RENTAL'

In [None]:
findall(['tele', 'town'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('TELE' in x) & ('TOWN' in x)), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: ('TELE' in x) & ('TOWN' in x)), 'name'] = 'TELE-TOWN HALL'

In [None]:
findall('for life')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'WEST VIRGINIANS FOR LIFE' in x), 'name'] = 'WEST VIRGINIANS FOR LIFE'

In [None]:
findall('mcafee')

In [None]:
findall('headway')

In [None]:
findall(['mw', 'pol'])

In [None]:
findall(['human', 'right'])

In [None]:
findall(['american', 'prosper'])

In [None]:
findall('legacy')

In [None]:
findall('mentzer')

In [None]:
findall(['sales', 'media'])

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[100:150]

In [None]:
findall('fls')

In [None]:
findall('amazon')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'AMAZON' in x), 'name'] = 'AMAZON'

In [None]:
findall(['wild', 'fire'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'WILDFIRE' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'WILDFIRE' in x), 'name'] = 'WILDFIRE CONTACT'

In [None]:
findall('moxie')

In [None]:
findall('converg')

In [None]:
findall('hennings')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'HENNINGS' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'HENNINGS' in x), 'name'] = 'MCCARTHY HENNINGS WHALEN'

In [None]:
findall('singularis')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SINGULARIS' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SINGULARIS' in x), 'name'] = 'SINGULARIS GROUP'

In [None]:
findall('envision')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'ENVISION' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'ENVISION' in x), 'name'] = 'ENVISION MARKETING'

In [None]:
findall('rifle')

In [None]:
findall('nra')

In [None]:
findall('warfield')

In [None]:
findall(['berlin', 'rosen'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BERLIN ROSEN' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BERLIN ROSEN' in x), 'name'] = 'BERLIN ROSEN LTD'

In [None]:
findall(['np', 'consult'])

In [None]:
findall('mvar')

In [None]:
findall('homewood')

In [None]:
findall('tide')

In [None]:
findall('three')

In [None]:
findall('ngp')

In [None]:
findall(['voter', 'action'])

In [None]:
findall('valtim')

In [None]:
findall('killer')

In [None]:
findall(['work', 'prog'])

In [None]:
findall(['camp', 'sol'])

In [None]:
findall('speedway')

In [None]:
findall('eagle')

In [None]:
findall(['camp', 'group'])

In [None]:
findall('budget')

In [None]:
pas.loc[pas['name'].apply(lambda x: ('BUDGET' in x) & ('AVIS' not in x)), 'name'] = 'BUDGET (umbrella)'

In [None]:
findall('donor')

In [None]:
findall(['blue', 'light'])

In [None]:
findall('eberle')

In [None]:
findall(['vision', 'strat'])

In [None]:
findall(['bank', 'card'])

In [None]:
findall('bank of america')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BANK OF AMERICA' in x), 'name'] = 'BANK OF AMERICA'

In [None]:
findall('herald')

In [None]:
findall('vell')

In [None]:
findall('public interest')

In [None]:
findall(['american', 'express'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'AMERICAN EXPRESS ' in x), 'name'] = 'AMERICAN EXPRESS'

In [None]:
findall('amex')

In [None]:
findall('savanna')

In [None]:
findall(['print', 'partner'])

In [None]:
findall(['sb', 'strat'])

In [None]:
findall('planned parent')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PLANNED PARENT' in x), 'name'] = 'PLANNED PARENTHOOD (umbrella)'

In [None]:
findall(['zip', 'mail'])

In [None]:
findall('twitter')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'TWITTER' in x), 'name'] = 'TWITTER'

In [None]:
findall('trilogy')

In [None]:
findall(['strategic', 'media'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'STRATEGIC MEDIA' in x), 'name'].value_counts()

In [None]:
findall('chong')

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[100:150]

In [None]:
findall(['flex', 'point'])

In [None]:
pas.loc[pas['name'] == 'FLEX POINT MEDIA', 'name'] = 'FLEXPOINT MEDIA'

In [None]:
findall('pursuit')

In [None]:
findall('airline')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'AMERICAN AIRLINE' in x), 'name'] = 'AMERICAN AIRLINES'
pas.loc[pas['name'].apply(lambda x: 'SOUTHWEST AIRLINE' in x), 'name'] = 'SOUTHWEST AIRLINES'

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[150:200]

In [None]:
findall('talbot')

In [None]:
findall('service employees international')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SERVICE EMPLOYEES INTERNATIONAL' in x), 'name'] = 'SEIU (umbrella)'

In [None]:
findall('wright')

In [None]:
findall('trippi')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'TRIPPI' in x), 'name'] = 'JOE TRIPPI AND ASSOCIATES'

In [None]:
findall(['america', 'market'])

In [None]:
findall('j-ad')

In [None]:
findall(['water', 'mark'])

In [None]:
findall('putnam')

In [None]:
findall('donatelli')

In [None]:
findall('rumble')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'RUMBLE' in x), 'name'] = 'RUMBLEUP'

In [None]:
findall('prime')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'PRIME' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'] == 'PRIME MEDIA', 'name'] = 'PRIME MEDIA PARTNERS'

In [None]:
findall('magic')

In [None]:
findall('national media')

In [None]:
pas.loc[pas['name'] == 'NATIONAL MEDIA RESEARCH PLANNING &PLACEMENT', 'name'] = \
'NATIONAL MEDIA RESEARCH PLANNING & PLACEMENT'

In [None]:
findall('solidarity')

In [None]:
findall('korean')

In [None]:
findall('akpd')

In [None]:
pas.loc[pas['name'] == 'AKPD MEDIA', 'name'] = 'AKPD MESSAGE AND MEDIA'

In [None]:
findall(['pol', 'ink'])

In [None]:
findall('think')

In [None]:
findall(['camp', 'hq']) ## relationship?

In [None]:
findall('hulu')

In [None]:
findall(['env', 'america'])

In [None]:
findall('gumbinner')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'GUMBINNER' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'GUMBINNER' in x), 'name'] = 'GUMBINNER AND DAVIES COMMUNICATIONS'

In [None]:
findall('76')

In [None]:
pas.loc[pas['name'].apply(lambda x: '76 WORDS' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'] == '76 WORDS', 'name'] = '76 WORDS CORP'

In [None]:
findall('toskr')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'TOSKR' in x), 'name'] = 'TOSKR INC'

In [None]:
findall('opn')

In [None]:
findall('rst market')

In [None]:
pas.loc[pas['name'] == 'RST MARKETING ASSOC', 'name'] = 'RST MARKETING'

In [None]:
findall('apex')

In [None]:
findall('burrell')

In [None]:
findall('srcp')

In [None]:
findall(['west', 'affair'])

In [None]:
findall('simko')

In [None]:
findall(['big', 'eye'])

In [None]:
findall('lps')

In [None]:
findall(['modern', 'litho'])

In [None]:
findall(['dixon', 'davis'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('DAVIS' in x) & ('DIXON' in x)), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: ('DAVIS' in x) & ('DIXON' in x)), 'name'] = 'DIXON DAVIS MEDIA GROUP'

In [None]:
findall('nasica')

In [None]:
findall('lockwood')

In [None]:
findall(['harris', 'media'])

In [None]:
findall('omega')

In [None]:
findall('cavalry')

In [None]:
findall('american way')

In [None]:
findall('yongho')

In [None]:
findall(['conserv', 'ohio'])

In [None]:
findall('mdi')

In [None]:
findall('anne lewis')

In [None]:
findall('extended')

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[150:200]

In [None]:
findall(['wilson', 'grand'])

In [None]:
findall(['professional', 'profit'])

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[200:250]

In [None]:
findall(['in', 'field'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'FAIRFIELD INN' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'FAIRFIELD INN' in x), 'name'] = 'FAIRFIELD INN'

In [None]:
findall('revenue')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'INTERNAL REVENUE' in x), 'name'] = 'INTERNAL REVENUE SERVICE'

In [None]:
findall('canal')

In [None]:
findall('ibew')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'IBEW' in x), 'name'] = 'IBEW (umbrella)'

In [None]:
findall(['on', 'message'])

In [None]:
pas.loc[pas['name'] == 'ON MESSAGE', 'name'] = 'ONMESSAGE'

In [None]:
findall('jamestown')

In [None]:
findall('outreach')

In [None]:
findall('lyft')

In [None]:
findall('precision')

In [None]:
findall(['bull', 'eye'])

In [None]:
findall('confluent')

In [None]:
findall(['adela', 'roche'])

In [None]:
findall(['color', 'change'])

In [None]:
pas.loc[pas['name'] == 'COLOROFCHANGE ORG', 'name'] = 'COLOROFCHANGE.ORG'

In [None]:
findall(['active', 'engag'])

In [None]:
findall(['comm', 'worker'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'COMMUNICATIONS WORKERS' in x), 'name'] = 'COMMUNICATIONS WORKERS OF AMERICA'

In [None]:
findall(['stone', 'phone'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('STONE' in x) & ('PHONE' in x)), 'name'] = 'STONES PHONES'

In [None]:
findall('strategic service')

In [None]:
pas.loc[pas['name'] == 'M&R STRATEGIC SERVICES', 'name'] = 'M+R STRATEGIC SERVICES'

In [None]:
findall('steel')

In [None]:
findall('pacific')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'OF THE PACIFIC S' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'] == 'PPAF OF THE PACIFIC SOUTHWEST', 'name'] = 'PLANNED PARENTHOOD (umbrella)'
pas.loc[pas['name'] == 'PP ACTION FUND OF THE PACIFIC SW', 'name'] = 'PLANNED PARENTHOOD (umbrella)'

In [None]:
findall('pp ')

In [None]:
findall('sss')

In [None]:
pas['name'] = [x.replace('CONGRESSS', 'CONGRESS') for x in pas['name']]

In [None]:
findall('integram')

In [None]:
findall('mosaic')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'MOSAIC' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'] == 'MOSAIC', 'name'] = 'MOSAIC MEDIA STRATEGY GROUP'

In [None]:
findall(['yes', 'print'])

In [None]:
findall(['tru', 'corp'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'TRUCORPS' in x), 'name'] = 'TRUCORPS'

In [None]:
findall(['left', 'hook'])

In [None]:
findall('politxt')

In [None]:
findall('staples')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'STAPLES' in x), 'name'] = 'STAPLES'

In [None]:
findall(['something', 'else'])

In [None]:
findall('l2')

In [None]:
findall('meridian')

In [None]:
findall('mocap')

In [None]:
findall(['white', 'board'])

In [None]:
findall('jva')

In [None]:
pas.loc[pas['name'] == 'JVA CAMPAGINS', 'name'] = 'JVA CAMPAIGNS'

In [None]:
findall('erdman')

In [None]:
findall('robo')

In [None]:
findall('honold')

In [None]:
findall(['red', 'print'])

In [None]:
pas.loc[pas['name'] == 'RED PRINT STRATEGY', 'name'] = 'REDPRINT STRATEGY'

In [None]:
findall('adp')

In [None]:
findall('store')

In [None]:
pas.loc[pas['name'] == 'U P S STORE', 'name'] = 'UPS STORE'
pas.loc[pas['name'].apply(lambda x: 'UPS STORE' in x), 'name'] = 'UNITED PARCEL SERVICE (UPS)'

In [None]:
findall('united parcel')

In [None]:
pas.loc[pas['name'] == 'UNITED PARCEL SERVICES', 'name'] = 'UNITED PARCEL SERVICE (UPS)'

In [None]:
findall('million')

In [None]:
pas.loc[pas['name'] == 'MILLION MORE VOTERS SPONSORED BY THE', 'name'] = \
'MILLION MORE VOTERS SPONSORED BY THE CA LABOR FEDERATION AFL-CIO'

In [None]:
findall('zeus')

In [None]:
findall('maverick')

In [None]:
findall('fairmont')

In [None]:
findall(['cross', 'screen'])

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[200:250]

In [None]:
findall(['cross', 'screen'])

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[250:300]

In [None]:
findall('ianza')

In [None]:
findall('savvy')

In [None]:
findall(['siegel', 'strat'])

In [None]:
findall(['star', 'board'])

In [None]:
pas.loc[pas['name'] == 'STARBOARD STRAGEGIC', 'name'] = 'STARBOARD STRATEGIC'

In [None]:
findall('fairmont')

In [None]:
findall('tampa')

In [None]:
findall(['mail', 'haus'])

In [None]:
findall(['story', 'teller'])

In [None]:
pas.loc[pas['name'] == 'STORYTELLERS', 'name'] = 'STORYTELLERS GROUP'

In [None]:
findall('victory media')

In [None]:
pas.loc[pas['name'] == 'VICTORY MEDIA GROUP LTD', 'name'] = 'VICTORY MEDIA GROUP'

In [None]:
findall('matson')

In [None]:
findall(['creat', 'design'])

In [None]:
findall('milli')

In [None]:
findall(['hard', 'knock'])

In [None]:
pas.loc[pas['name'] == 'HARD KNOCKS', 'name'] = 'HARD KNOCKS FIELD'

In [None]:
findall('cpm')

In [None]:
findall(['micro', 'soft'])

In [None]:
findall('fuse')

In [None]:
findall(['north', 'shore'])

In [None]:
findall('wolverine')

In [None]:
findall('kapol')

In [None]:
findall(['keri', 'max'])

In [None]:
findall('amagi')

In [None]:
findall('youtube')

In [None]:
pas.loc[pas['name'] == 'YOUTUBE', 'name'] = 'GOOGLE'

In [None]:
findall('eichen')

In [None]:
findall(['river', 'city'])

In [None]:
findall('ufcw')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'UFCW' in x), 'name'] = 'UFCW (umbrella)'

In [None]:
findall('4c')

In [None]:
findall(['pier', 'media'])

In [None]:
findall('otg')

In [None]:
findall('clean water')

In [None]:
findall(['brown', 'print']) # same?

In [None]:
findall('catalist')

In [None]:
findall(['ground', 'swell'])

In [None]:
findall('gps')

In [None]:
pas.loc[pas['name'] == 'GROUNDSWELL PUBLIC STRATEGIES INC GPS IMPACT', 'name'] = 'GPS IMPACT'

In [None]:
findall(['loud', 'print'])

In [None]:
findall('ipm')

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[250:300]

In [None]:
findall('lcx')

In [None]:
pas.loc[pas['name'] == 'LCX COM', 'name'] = 'LCX.COM'

In [None]:
findall('beacon')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BEACON HILL' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BEACON HILL' in x), 'name'] = 'BEACON HILL STAFFING GROUP'

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[300:350]

In [None]:
findall(['ams', 'comm'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'AMS COMMUNICATIONS' in x), 'name'] = 'AMS COMMUNICATIONS'

In [None]:
findall('burdick')

In [None]:
findall('new media')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'NEW MEDIA FIRM' in x), 'name'] = 'THE NEW MEDIA FIRM'

In [None]:
findall('broker')

In [None]:
findall('bouchard')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'BOUCHARD GOLD' in x), 'name'] = 'BOUCHARD GOLD COMMUNICATIONS'

In [None]:
findall(['center', 'popular'])

In [None]:
findall('excelsior')

In [None]:
findall(['chambers', 'strat'])

In [None]:
findall(['open', 'hand'])

In [None]:
findall(['penn', 'united'])

In [None]:
findall('buying')

In [None]:
findall(['ax', 'media'])

In [None]:
findall('equalization')

In [None]:
findall(['info', 'cision'])

In [None]:
findall('apple')

In [None]:
findall(['assoc', 'press'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'ASSOCIATED PRESS' in x), 'name'] = 'ASSOCIATED PRESS'

In [None]:
findall(['color', 'tree'])

In [None]:
findall(['red', 'horse'])

In [None]:
pas.loc[pas['name'] == 'REDHORSE STRATEGIES', 'name'] = 'RED HORSE STRATEGIES'

In [None]:
findall(['shorr', 'holding'])

In [None]:
findall('liftable')

In [None]:
findall('penneco')

In [None]:
findall(['allegheny', 'print'])

In [None]:
pas.loc[pas['name'] == 'COMMERCIAL PRINTING ALLEGHENY', 'name'] = 'ALLEGHENY COMMERCIAL PRINTING'

In [None]:
findall('cwa')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'CWA' in x), 'name'] = 'CWA (umbrella)'

In [None]:
findall(['basis', 'd'])

In [None]:
findall('baughman')

In [None]:
findall(['voter', 'circle'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('VOTER' in x) & ('CIRCLE' in x)), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: ('VOTER' in x) & ('CIRCLE' in x)), 'name'] = 'VOTERCIRCLE'

In [None]:
findall(['dover', 'strat'])

In [None]:
findall('voter info')

In [None]:
findall(['united', 'print'])

In [None]:
findall(['afl', 'cio'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('AFL' in x) & ('CIO' in x)), 'name'] = 'AFL-CIO (umbrella)'

In [None]:
findall(['talent', 'pay'])

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[300:350]

In [None]:
findall(['pool', 'house'])

In [None]:
findall(['media', 'bridge'])

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[350:400]

In [None]:
findall(['prog', 'contact'])

In [None]:
findall('circle')

In [None]:
findall('axiom')

In [None]:
findall('c3') # same?

In [None]:
findall('pex')

In [None]:
findall('mooney')

In [None]:
findall('fulk')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'FULKERSON' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'FULKERSON' in x), 'name'] = 'FULKERSON KENNEDY & COMPANY'

In [None]:
findall('civis')

In [None]:
findall('getty')

In [None]:
findall(['at&t'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'AT&T' in x), 'name'] = 'AT&T'

In [None]:
findall('fiverr')

In [None]:
findall('cp direct')

In [None]:
findall('lamar')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'LAMAR ' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'LAMAR ' in x), 'name'] = 'LAMAR'

In [None]:
findall(['action', 'mail'])

In [None]:
findall(['moore', 'camp'])

In [None]:
findall('shell')

In [None]:
pas.loc[pas['name'].apply(lambda x: ('SHELL ' in x) | ('SHELL-' in x)), 'name'] = 'SHELL CORP'

In [None]:
findall('opro')

In [None]:
findall(['mobile', 'log'])

In [None]:
findall('landscape')

In [None]:
findall('winning')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'WINNING' in x), 'name'] = 'WINNING CONNECTIONS'

In [None]:
findall(['black', 'rock'])

In [None]:
findall(['hopkins', 'sachs'])

In [None]:
findall('reston')

In [None]:
findall('v12')

In [None]:
findall(['ad', 'victory'])

In [None]:
findall(['betty', 'smith'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('BETTY' in x) & ('SMITH' in x)), 'name'] = 'BETTY AND SMITH LLC'

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[350:400]

In [None]:
findall('cfg')

In [None]:
findall('jm2')

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[400:450]

In [None]:
findall('elevate')

In [None]:
findall('mottola')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'MOTTOLA' in x), 'name'] = 'MOTTOLA CONSULTING'

In [None]:
findall('mcnally')

In [None]:
findall('costco')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'COSTCO' in x), 'name'] = 'COSTCO (umbrella)'

In [None]:
findall(['nat', 'cable'])

In [None]:
findall(['vera', 'pax'])

In [None]:
findall('hello')

In [None]:
findall('map')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'MAP' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'] == 'MAP POLITICAL COMMUNICATION', 'name'] = 'MAP POLITICAL COMMUNICATIONS'

In [None]:
findall('engineer')

In [None]:
findall(['image', 'plus'])

In [None]:
findall('vedda')

In [None]:
findall('godfrey')

In [None]:
findall('wawa')

In [None]:
findall('grunwald')

In [None]:
findall('fuse')

In [None]:
findall('api ')

In [None]:
findall(['holiday', 'inn'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'HOLIDAY INN' in x), 'name'] = 'HOLIDAY INN (umbrella)'

In [None]:
findall(['hopkin', 'sac'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('HOPKIN' in x) & ('SACHS' in x)), 'name'] = 'HOPKINS+SACHS'

In [None]:
findall('marchant')

In [None]:
findall('ddc')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'DDC' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: 'DDC' in x), 'name'] = 'DDC'

In [None]:
findall('gowan')

In [None]:
findall(['wish', 'list'])

In [None]:
findall('a&b')

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[400:450]

In [None]:
findall('instagram')

In [None]:
pas.loc[pas['name'] == 'INSTAGRAM', 'name'] = 'FACEBOOK'

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[450:500]

In [None]:
findall(['chi', 'dona'])

In [None]:
findall('strata')

In [None]:
findall(['right', 'way'])

In [None]:
findall(['camp', 'work'])

In [None]:
findall(['mega', 't', 'v'])

In [None]:
findall('letter')

In [None]:
findall('cielo')

In [None]:
findall(['black', 'fork'])

In [None]:
findall('sway')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SWAYABLE' in x), 'name'] = 'WORLDVIEW INC DBA SWAYABLE'

In [None]:
pas.loc[pas['name'] == 'SWAY', 'name'] = 'SWAY MEDIA'

In [None]:
findall('dunk')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'DUNK' in x), 'name'] = 'DUNKIN DONUTS'

In [None]:
findall('futura')

In [None]:
findall('ethic')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'ETHICA' in x), 'name'] = 'ETHICA MEDIA'

In [None]:
findall(['sun', 'pass'])

In [None]:
findall(['fed', 'ex'])

In [None]:
pas.loc[pas['name'].apply(lambda x: ('FEDEX' in x) | ('FED EX' in x)), 'name'] = 'FEDERAL EXPRESS'

In [None]:
findall(['lg', 'camp'])

In [None]:
findall('zwerd')

In [None]:
findall(['thom', 'comm'])

In [None]:
findall(['switch', 'board'])

In [None]:
pas.loc[pas['name'].apply(lambda x: 'SWITCHBOARD' in x), 'name'] = 'SWITCHBOARD COMMUNICATIONS'

In [None]:
findall(['lux', 'media'])

In [None]:
findall('advantage')

In [None]:
pas.loc[pas['name'].apply(lambda x: 'ADVANTAGE' in x), 'name'].value_counts()

In [None]:
pas.loc[pas['name'].apply(lambda x: ('ADVANTAGE DIRECT' in x) | (x == 'ADVANTAGE')), 'name'] = \
'ADVANTAGE DIRECT COMMUNICATIONS'

In [None]:
findall(['rev', 'field'])

In [None]:
findall(['ring', 'lim'])

In [None]:
findall(['voter', 'guide'])

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[450:500]

In [None]:
lil_pas = pas[pas['transaction_tp'].apply(lambda x: x in [
    'Independent expenditure opposing election of candidate', 
    'Independent expenditure advocating election of candidate', 
    'Coordinated party expenditure',
])]
lil_pas['name'].value_counts()[500:550]

In [None]:
data = []
labels = list(set(pas.loc[pd.notnull(pas['cand_id']), 'transaction_tp']))
for which in labels:
    lil_pas = pas[pd.notnull(pas['cand_id']) & (pas['transaction_tp'] == which)]
    data.append(lil_pas['transaction_amt']/1.0E6)
    print(which)
    print('count:', len(lil_pas))
    print('unique:', len(lil_pas['name'].value_counts()))
    print('median:', lil_pas['transaction_amt'].median())
    print('mean:', lil_pas['transaction_amt'].mean())
    print('std dev:', lil_pas['transaction_amt'].std())
    print(lil_pas['name'].value_counts()[:10])
    print('\n')

plt.figure(figsize = (20, 2))

plt.boxplot(data, vert = False)
plt.yticks(range(1, len(labels) + 1), labels, fontsize = 12)
plt.xlabel('$MM USD')

plt.show();

In [None]:
pas['transaction_tp'].value_counts()

In [None]:
tp_dict = dict(zip(list(pas['transaction_tp'].value_counts().index), [
    {
        'ls' : '-',
        'lw' : 2.0,
    },
    {
        'ls' : '--',
        'lw' : 2.0,
    },
    {
        'ls' : '-.',
        'lw' : 2.0,
    },
    {
        'ls' : ':',
        'lw' : 2.0,
    },
    {
        'ls' : ':',
        'lw' : 1.0,
    },
    
]))

# states/territories
# {<abbreviation> : <full name>}
abbr_cand = pd.read_csv('../../assets/abbreviations.csv')
abbr_dict = dict(zip(abbr_cand['abbreviation'], abbr_cand['name']))

In [None]:
contests = sorted(list(set(cand['contest'])))
# contests = [x for x in contests if x != '00_00']

# plot settings per party
party_dict = {
    'Republican' : {
        'hex' : '#FF6661', 
        'mult' : 0.3, 
        'party' : 'Republican',
    },
    'Democrat' : {
        'hex' : '#5494F7', 
        'mult' : -0.3, 
        'party' : 'Democrat',
    },
    'Third party' : {
        'hex' : '#15DCDC',
        'mult' : 0,
        'party' : 'Third party',
    }
}

vol_light = '#f8fbfc'
vol_dark = '#263C4D'
    
for contest in contests:
    lil_cand = cand[cand['contest'] == contest]
    candidates = list(lil_cand['cand_id'])
    candidates = [x for x in candidates if x in list(set(pas['cand_id']))]
    if len(candidates) > 1:
        print('\n\n---------------------------------------------------\n')
        print(contest)
        fig = plt.figure(figsize = (14, 8))
        fig.patch.set_facecolor(vol_light)
        ax = fig.add_subplot(111)
        ax.patch.set_facecolor(vol_light)

        vol_dark_rcparams = [
            'patch.edgecolor',
            'grid.color',
            'text.color',
            'axes.labelcolor',
            'xtick.color',
            'ytick.color',
            'axes.edgecolor'
        ]

        for which in vol_dark_rcparams:
            plt.rcParams[which] = vol_dark

        font_dirs = ['../../css/fonts/for_matplotlib/', ]
        font_files = fm.findSystemFonts(fontpaths=font_dirs)
        font_list = fm.createFontList(font_files)
        fm.fontManager.ttflist.extend(font_list)

        plt.rcParams['font.family'] = 'DM Sans Medium'
    
        for candidate in candidates:
            if candidate in list(winners['cand_id'].values):
                mult = 2
                win = ' (winner)'
            else:
                mult = 1
                win = ''
            lil_pas = pas[pas['cand_id'] == candidate].sort_values(['transaction_dt'])
            name = list(lil_cand.loc[lil_cand['cand_id'] == candidate, 
                                     'cand_name'].values)[0]
            party = list(lil_cand.loc[lil_cand['cand_id'] == candidate, 
                                      'cand_pty_affiliation'].values)[0]
            incumbent = (list(lil_cand.loc[lil_cand['cand_id'] == candidate, 
                                      'cand_ici'].values)[0] == 'I')
            
            opposers = list(set(lil_pas.loc[lil_pas['transaction_tp'] == \
                        'Independent expenditure opposing election of candidate', 
                        'name']))
            if len(opposers) > 0:
                opp_list = []
                for opposer in opposers:
                    opp_pas = lil_pas.loc[lil_pas['name'] == opposer, 'transaction_amt'].sum()
                    opp_list.append((opposer, opp_pas))

                opp_df = pd.DataFrame(
                    opp_list, 
                )
                opp_df.columns = ['name', 'total']
                opp_df.sort_values(['total'], ascending = False, inplace = True)
                opp_df.reset_index(drop = True, inplace = True)
                if len(opp_df) > 5:
                    opp_df = opp_df.loc[:5, :]
                print('OPPOSING:', name)
                print(opp_df)
                print('\n')
                
            supporters = list(set(lil_pas.loc[lil_pas['transaction_tp'] == \
                        'Independent expenditure advocating election of candidate', 
                        'name']))
            if len(supporters) > 0:
                supp_list = []
                for supporter in supporters:
                    supp_pas = lil_pas.loc[
                        lil_pas['name'] == supporter, 
                        'transaction_amt'].sum()
                    supp_list.append((supporter, supp_pas))

                supp_df = pd.DataFrame(
                    supp_list, 
                )
                supp_df.columns = ['name', 'total']
                supp_df.sort_values(['total'], ascending = False, inplace = True)
                supp_df.reset_index(drop = True, inplace = True)
                if len(supp_df) > 5:
                    supp_df = supp_df.loc[:5, :]
                print('SUPPORTING:', name)
                print(supp_df)
                print('\n')
                
            if incumbent:
                inc = ' (incumbent)'
            else:
                inc = ''
            tps = list(set(lil_pas['transaction_tp']))
            for tp in tps:
                liller_pas = lil_pas[lil_pas['transaction_tp'] == tp]
                liller_pas['cumsum'] = liller_pas['transaction_amt'].cumsum()
                if len(liller_pas) == 1:
                    plt.scatter(
                        liller_pas['transaction_dt'],
                        liller_pas['cumsum']/1.0E6,
                        color = party_dict[party]['hex'],
                        label = name + inc + win + ' ' + tp,
                        alpha = 1.0,
                    )
                else:
                    plt.plot(
                        liller_pas['transaction_dt'],
                        liller_pas['cumsum']/1.0E6,
                        color = party_dict[party]['hex'],
                        ls = tp_dict[tp]['ls'],
                        label = name + inc + win + ' ' + tp,
                        alpha = 1.0,
                        lw = mult*tp_dict[tp]['lw'],
                    )
                
        if contest == '00_00':
            title = f'{year} Presidential Contest:\nCommittee Contributions'
        else:
            state = abbr_dict[contest.split('_')[0]]
            district = contest.split('_')[1]
            if district == 'senate':
                title = f'{year} U.S. Senate Elections:\nCommittee Contributions for {state} Contest'
            elif district != '00':
                district = district.lstrip('0')
                title = f'{year} U.S. House of Representatives Elections:\nCommittee Contributions for {state} District {district}'
            else:
                title = f'{year} U.S. House of Representatives Elections:\nCommittee Contributions in {state} At-Large District'
        plt.title(
            title, 
            fontsize = 18, 
        )
        plt.xlabel('Date', fontsize = 18)
#         plt.xlim([dt.strptime('2018-12-20', '%Y-%m-%d'), 
#                   dt.strptime('2020-01-10', '%Y-%m-%d')])
        plt.xlim([pas['transaction_dt'].min(), pas['transaction_dt'].max()])
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %y'))
        plt.xticks(fontsize = 14)
        plt.ylabel('Cumulative contributions\n($MM USD)', fontsize = 14)
        plt.yticks(fontsize = 14)
        handles, labels = ax.get_legend_handles_labels()
        # sort both labels and handles by labels
        labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
        ax.legend(
            handles, 
            labels, 
            loc = 'upper left', 
            fontsize = 12, 
            facecolor = vol_light,
            
        )
        
        plt.axvline(
            dt.strptime('2018-11-06', '%Y-%m-%d'), 
            alpha = 0.3,
            label = 'Election Day',
            color = vol_dark,
            ls = '--',
            
        )
        fec_deadlines = [
            '2017-03-31',
            '2017-06-30',
            '2017-09-30',
            '2017-12-31',
            '2018-03-31',
            '2018-06-30',
            '2018-09-30',
            '2018-10-19', # pre-general
            '2018-11-28', # post-general
            '2018-12-31', # year-end
            
        ]
        for x in fec_deadlines:
            plt.axvline(
                dt.strptime(x, '%Y-%m-%d'), 
                color = vol_dark,
                alpha = 0.1,
                lw = 1.0,
                ls = '--',
                
            )

        x = ax.annotate(
            'Source: FEC.gov', 
            xy=(0.0, 0.0), 
            xytext=(38.0, -28.0), 
            ha='center', 
            va='bottom', 
            textcoords='axes pixels', 
            xycoords='axes pixels', 
        )
        # logo
        arr_img = plt.imread('../../assets/VOL_Logo_Color_Light_Green.png')
        imagebox = OffsetImage(arr_img, zoom=0.07)
        imagebox.image.axes = ax
        xy = (0.0, -45.0)
        ab = AnnotationBbox(imagebox, 
                            xy,
                            xybox=(xy[0], xy[1]),
                            xycoords='axes pixels',
                            boxcoords=('axes pixels'),
                            box_alignment=(0., 0.),
                            frameon=False,
                           )
        ax.add_artist(ab)
        plt.tight_layout()

        plt.show();

In [None]:
data = []
labels = list(set(pas.loc[pd.notnull(pas['cand_id']), 'transaction_tp']))
for which in labels:
    lil_pas = pas[pd.notnull(pas['cand_id']) & (pas['transaction_tp'] == which)]
    data.append(lil_pas['transaction_amt']/1.0E6)
    print(which)
    print('count:', len(lil_pas))
    print('unique:', len(lil_pas['name'].value_counts()))
    print('median:', lil_pas['transaction_amt'].median())
    print('mean:', lil_pas['transaction_amt'].mean())
    print('std dev:', lil_pas['transaction_amt'].std())
    print(lil_pas['name'].value_counts()[:10])
    print('\n')

plt.figure(figsize = (20, 2))

plt.boxplot(data, vert = False)
plt.yticks(range(1, len(labels) + 1), labels, fontsize = 12)
plt.xlabel('$MM USD')

plt.show();

In [None]:
# label contributions *advocating for* candidate
pas.loc[
    pas['transaction_tp'] == 'Independent expenditure advocating election of candidate', 
    'name'] = \
pas.loc[
    pas['transaction_tp'] == 'Independent expenditure advocating election of candidate', 
    'name'].apply(lambda x: 'comm1__' + x + ' (for)')

In [None]:
# label contributions *opposing* candidate
pas.loc[
    pas['transaction_tp'] == 'Independent expenditure opposing election of candidate', 
    'name'] = \
pas.loc[
    pas['transaction_tp'] == 'Independent expenditure opposing election of candidate', 
    'name'].apply(lambda x: 'comm1__' + x + ' (against)')

In [None]:
# label coord party expenditures
pas.loc[
    pas['transaction_tp'] == 'Coordinated party expenditure', 
    'name'] = \
pas.loc[
    pas['transaction_tp'] == 'Coordinated party expenditure', 
    'name'].apply(lambda x: 'comm1__' + x + ' (coord pty exp)')

In [None]:
# label coord party expenditures
pas.loc[
    pas['transaction_tp'] == 'Contribution made to nonaffiliated committee', 
    'name'] = \
pas.loc[
    pas['transaction_tp'] == 'Contribution made to nonaffiliated committee', 
    'name'].apply(lambda x: 'comm2__' + x + ' (nonaffiliated)')

In [None]:
# label coord party expenditures
pas.loc[
    pas['transaction_tp'] == 'In-kind contribution made to registered filer', 
    'name'] = \
pas.loc[
    pas['transaction_tp'] == 'In-kind contribution made to registered filer', 
    'name'].apply(lambda x: 'comm2__' + x + ' (in-kind)')

In [None]:
# create dataframe to show summary stats per committee
gave = pd.DataFrame()

gave['name'] = sorted(list(pas['name'].value_counts()[pas['name'].value_counts() > 5].index))

# one column for each candidate appearing
each = sorted(list(set(pas['cand_id'])))
for which in each:
    gave[which] = [0]*len(gave)

total = []
count = []
distinct = []
ents = []
for name in gave['name']:
    lil_pas = pas[pas['name'] == name]
    total.append(lil_pas['transaction_amt'].sum())
    count.append(len(lil_pas))
    distinct.append(len(lil_pas.drop_duplicates(['cand_id'])))
    every_each = list(lil_pas['cand_id'].values)
    for which in every_each:
        comm_cand = lil_pas.loc[lil_pas['cand_id'] == which, 'transaction_amt'].sum()
        if comm_cand > 0:
            gave.loc[gave['name'] == name, which] = comm_cand
    ents.append(list(set(lil_pas['entity_tp'])))
            
gave['total'] = total
gave['count'] = count
gave['distinct'] = distinct
gave['entity_tps'] = ents
del total, count, distinct, ents

gave.sort_values(['total'], ascending = False, inplace = True)
gave.reset_index(drop = True, inplace = True)
print(len(gave))
gave.head()

In [None]:
# merge gave.T & cand
from_gave = pd.DataFrame()

from_gave = gave.drop([
    'name', 
    'total', 
    'count', 
    'distinct',  
    'entity_tps',
    
], axis = 1).T

from_gave.columns = list(gave['name'])

# (for)
fors_cols = [x for x in from_gave.columns if ('(for)' in x)]
fors = from_gave[fors_cols]
from_gave['(for) count'] = [len([x for x in fors.loc[y, :].values if (x > 0)]) for y in list(from_gave.index)]
from_gave['(for) sum'] = [fors.loc[y, :].sum() for y in list(from_gave.index)]

# (against)
against_cols = [x for x in from_gave.columns if ('(against)' in x)]
against = from_gave[against_cols]
from_gave['(against) count'] = [len([x for x in against.loc[y, :].values if (x > 0)]) for y in list(from_gave.index)]
from_gave['(against) sum'] = [against.loc[y, :].sum() for y in list(from_gave.index)]

# (coord pty exp)
coord_cols = [x for x in from_gave.columns if ('(coord pty exp)' in x)]
coord = from_gave[coord_cols]
from_gave['(coord pty exp) count'] = [len([x for x in coord.loc[y, :].values if (x > 0)]) for y in list(from_gave.index)]
from_gave['(coord pty exp) sum'] = [coord.loc[y, :].sum() for y in list(from_gave.index)]

# (in-kind)
inkind_cols = [x for x in from_gave.columns if ('(in-kind)' in x)]
inkind = from_gave[inkind_cols]
from_gave['(in-kind) count'] = [len([x for x in inkind.loc[y, :].values if (x > 0)]) for y in list(from_gave.index)]
from_gave['(in-kind) sum'] = [inkind.loc[y, :].sum() for y in list(from_gave.index)]

# (nonaffiliated)
nonaffil_cols = [x for x in from_gave.columns if ('(nonaffiliated)' in x)]
nonaffil = from_gave[nonaffil_cols]
from_gave['(nonaffiliated) count'] = [len([x for x in nonaffil.loc[y, :].values if (x > 0)]) for y in list(from_gave.index)]
from_gave['(nonaffiliated) sum'] = [nonaffil.loc[y, :].sum() for y in list(from_gave.index)]

from_gave.head(2)

In [None]:
cand.index = cand['cand_id']
cand_new = pd.concat([cand, from_gave], axis = 1)
cand_new.reset_index(drop = True, inplace = True)
cand_new.rename(columns = {'index' : 'cand_id'}, inplace = True)
cand_new.fillna(0, inplace = True)
print(len(cand_new))
cand_new.head(2)

In [None]:
pas.to_csv('data/04a_pas_cleaned.csv', index = False)

In [None]:
cand_new.to_csv('data/04a_cand_cleaned.csv', index = False)

In [None]:
gave.to_csv('data/04a_committee_stats.csv', index = False)