## FEC Campaign Finance

### Committee Master

In [1]:
import pandas as pd
import numpy as np

from datetime import datetime as dt

In [2]:
year = '2018'

In [3]:
# read in & clean
cm = pd.read_csv(
    f'https://www.fec.gov/files/bulk-downloads/{year}/cm{year[2:]}.zip', 
    sep = '|', 
    error_bad_lines = False, 
    header = None, 
)
print('original length:', len(cm))

# add column headers from separate file
headers = pd.read_csv(
    'https://www.fec.gov/files/bulk-downloads/data_dictionaries/cm_header_file.csv', 
)
cm.columns = [x.lower() for x in headers.columns]

# fillna
cm['cmte_pty_affiliation'].fillna('UNK', inplace = True)

# preserve uncleaned tres_nm & cmte_st1
cm['orig_tres_nm'] = list(cm['tres_nm'].values)
cm['orig_cmte_st1'] = list(cm['cmte_st1'].values)

# prep strings for deduping
change_dict = {
    ' STREET' : ' ST',
    'AVENUE' : 'AVE',
    ' DRIVE' : ' DR',
    ' ROAD' : ' RD',
    ' SUITE' : ' STE',
    '.' : '',
    ',' : '',
    'BOULEVARD' : 'BLVD',
    ' PLACE' : ' PL',
    
}
keys = list(set(change_dict.keys()))
for key in keys:
    cm['cmte_st1'] = [str(x).replace(key, change_dict[key]) for x in cm['cmte_st1']]

keep_0 = [
    ' STE ',
    '#',
    ' ST ',
    ' AVE ',
    ' RD ',
    ' PL ', 
    ' BLVD ', 
    ' DR ',
]

cm['cmte_st1'] = [x.split(' STE ')[0].split('#')[0].split(' NUM ')[0] for \
                  x in cm['cmte_st1']]
cm['cmte_st1'] = [' '.join(str(x).split()) for x in cm['cmte_st1']]

cm['cmte_zip'] = [str(x).strip() for x in cm['cmte_zip']]
cm.loc[cm['cmte_zip'].apply(lambda x: len(str(x))) == 9, 'cmte_zip'] = \
cm.loc[cm['cmte_zip'].apply(lambda x: len(str(x))) == 9, 'cmte_zip'].apply(lambda x: x[:5])

cm['cmte_zip'].apply(lambda x: len(x)).value_counts().sort_index()

original length: 19027


3       33
4        8
5    18982
6        1
8        3
Name: cmte_zip, dtype: int64

In [4]:
cands = pd.read_csv('data/02a_cand_clean.csv')

cands = list(cands['cand_id'])

mask = []
for x in cm['cand_id']:
    if type(x) == float:
        mask.append(True)
    else:
        if x in cands:
            mask.append(True)
        else:
            mask.append(False)
        
cm = cm[mask]

print(len(cm))

13490


In [5]:
cm.loc[cm['cmte_zip'].apply(lambda x: len(str(x))) != 5, 
       ['cmte_st1', 'cmte_city', 'cmte_st', 'cmte_zip']].sort_values([
    'cmte_zip', 
    'cmte_st1', 
    'cmte_city',
    'cmte_st',
])

Unnamed: 0,cmte_st1,cmte_city,cmte_st,cmte_zip
18190,510A SHERMAN ST,CANTON,MA,2021.0
18199,580 BRIDGE ST,DEDHAM,MA,2026.0
18200,580 BRIDGE ST,DEDHAM,MA,2026.0
18194,17 PLEASANT HILL AVE,BOSTON,MA,21232813.0
18198,33 THORONDIKE ST,BROOKLINE,MA,2446.0
18206,393 DORCHESTER RD,LYME,NH,3768.0
2060,1210 CORBIN ST,ELIZABETH,NJ,7201.0
10840,847 KENSINGTON CV,SPRINGDALE,AR,7276.0
18209,111 VICTORIA DR,EATONTOWN,NJ,7724.0
18116,,.,VA,


In [6]:
fix_zip = {
    '291501' : '29150',
    '89053145' : '89015',
    '04342443' : '70433',
    '21232813' : '02126',
    '7276' : '72762',
    '2021' : '02021',
    '2026' : '02026',
    '2446' : '02446',
    '3768' : '03768',
    '7201' : '07201',
    '7724' : '07724',
    
}
for key in list(fix_zip.keys()):
    cm.loc[cm['cmte_zip'] == key, 'cmte_zip'] = fix_zip[key]

cm.loc[cm['cmte_st1'] == '1055 CATALPA RD', 'cmte_zip'] = '91007'
cm.loc[cm['cmte_st1'] == '2008 OLDTOWN VALLEY RD SE', 'cmte_zip'] = '44663'
cm.loc[
    (cm['cmte_st1'] == '247 THIRD ST') & \
    (cm['cmte_city'] == 'ASHLAND'), 
    'cmte_zip'] = '97520'
cm.loc[cm['cmte_st1'] == '420 KEWANNA DR', 'cmte_zip'] = '47130'
cm.loc[cm['cmte_st1'] == '868 CHURCH ST -', 'cmte_zip'] = '11716'
cm.loc[
    (cm['cmte_st1'] == 'PO BOX 334') & \
    (cm['cmte_city'] == 'DAVIDSONVILLE'), 
    'cmte_zip'] = '21035'
cm.loc[
    (cm['cmte_st1'] == 'PO BOX 356') & \
    (cm['cmte_city'] == 'LIVINGSTON'), 
    'cmte_zip'] = '07039'

In [7]:
cm.loc[cm['cmte_zip'].apply(lambda x: len(x) == 3), 'cmte_zip'] = np.nan
# create full address column from cleaned parts
cm['address'] = \
cm['cmte_st1'].apply(lambda x: str(x).strip()) + ', ' + \
cm['cmte_city'].apply(lambda x: str(x).strip()) + ', ' + \
cm['cmte_st'].apply(lambda x: str(x).strip()) + ' ' + \
cm['cmte_zip'].apply(lambda x: str(x).strip())

cm.head(2)

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
0,C00000018,IRONWORKERS LOCAL UNION NO. 25 POLITICAL EDUCA...,STEVEN N GULICK,43335 W 10 MILE,P O BOX 965,NOVI,MI,48050,U,Q,UNK,T,,IRON WORKERS; INT'L ASS'N OF BRIDGE...,H8TX22313,STEVEN N GULICK,43335 W 10 MILE,"43335 W 10 MILE, NOVI, MI 48050"
1,C00000059,HALLMARK CARDS PAC,SARAH MOE,2501 MCGEE,MD #500,KANSAS CITY,MO,64108,U,Q,UNK,M,C,,,SARAH MOE,2501 MCGEE,"2501 MCGEE, KANSAS CITY, MO 64108"


In [8]:
# check for duplicate rows
cm[cm.duplicated(keep = False)]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
16591,C00690123,INDEPENDENT VOICES OF CHANGE,"PHELAN, JOHN",2 MEGHANS WAY,,LYNN,MA,1904,U,O,UNK,Q,,NONE,H8NJ07256,"PHELAN, JOHN",2 MEGHANS WAY,"2 MEGHANS WAY, LYNN, MA 01904"
16592,C00690123,INDEPENDENT VOICES OF CHANGE,"PHELAN, JOHN",2 MEGHANS WAY,,LYNN,MA,1904,U,O,UNK,Q,,NONE,H8NJ07256,"PHELAN, JOHN",2 MEGHANS WAY,"2 MEGHANS WAY, LYNN, MA 01904"


In [9]:
# drop duplicates
cm.drop_duplicates(keep = 'first', inplace = True)

In [10]:
# see which cmte_ids appear more than once
cm['cmte_id'].value_counts()[cm['cmte_id'].value_counts() > 1]

Series([], Name: cmte_id, dtype: int64)

In [11]:
# check for nulls
cm.isnull().sum()[cm.isnull().sum() > 0].sort_values()

cmte_nm                 9
cmte_tp                10
cmte_dsgn              11
cmte_st                19
cmte_city              20
cmte_zip               25
orig_cmte_st1          27
tres_nm              1513
orig_tres_nm         1513
connected_org_nm     7967
org_tp               9898
cmte_st2            10383
cand_id             12344
dtype: int64

In [12]:
# see which have missing cmte_nm
cm[cm['cmte_nm'].isnull()]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
8006,C00575308,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
10164,C00616649,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
10419,C00619916,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
10519,C00621201,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
12322,C00641233,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
13088,C00653873,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
13089,C00653881,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
13396,C00657031,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
17010,C30001184,,,PO BOX 33524,,WASHINGTON,DC,20033.0,U,E,UNK,A,,,,,PO BOX 33524,"PO BOX 33524, WASHINGTON, DC 20033"


In [13]:
# fill in placeholder cmte_nm
cm.loc[cm['cmte_nm'].isnull(), 'cmte_nm'] = '(cmte_nm missing)'

In [14]:
# see which have missing address
# then see if any other rows have matching cmte_nm
cm[cm['address'].isnull()]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [15]:
# see which street addresses appear most frequently
cm['address'].value_counts()[:20]

228 S WASHINGTON ST, ALEXANDRIA, VA 22314        156
918 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003    112
824 S MILLEDGE AVE, ATHENS, GA 30605             101
PO BOX 26141, ALEXANDRIA, VA 22313                95
700 13TH ST NW, WASHINGTON, DC 20005              61
PO BOX 9891, ARLINGTON, VA 22219                  56
611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003     48
PO BOX 30844, BETHESDA, MD 20824                  41
610 S BLVD, TAMPA, FL 33606                       37
777 S FIGUEROA ST, LOS ANGELES, CA 90017          37
PO BOX 15320, WASHINGTON, DC 20003                35
910 17TH ST NW, WASHINGTON, DC 20006              35
5429 MADISON AVE, SACRAMENTO, CA 95841            32
PO BOX 75357, WASHINGTON, DC 20013                26
555 CAPITOL MALL, SACRAMENTO, CA 95814            26
120 MARYLAND AVE NE, WASHINGTON, DC 20002         24
901 N WASHINGTON ST, ALEXANDRIA, VA 22314         24
499 SOUTH CAPITOL ST SW, WASHINGTON, DC 20003     24
249 E OCEAN BLVD, LONG BEACH, CA 90802        

In [16]:
# fill in placeholder for missing treasurer name 
print(len(cm[cm['tres_nm'].isnull()]))
cm.loc[cm['tres_nm'].isnull(), 'tres_nm'] = '(no treasurer listed)'
cm.loc[cm['tres_nm'].apply(lambda x: x in ['.', 'NONE', '']), 'tres_nm'] = '(no treasurer listed)'

# convert to <first name> <last name> format from <last name>, <first name>
cm['tres_nm'] = [' '.join([' '.join(x.split(', ')[1:]), x.split(', ')[0]]) if ', ' in \
                 x else x for x in cm['tres_nm']]
# clean up spacing
# looks like this could just be ' '.join(x.strip()) 
# but i feel like i did it this way for a reason... leaving it
cm['tres_nm'] = [' '.join([x for x in list(x.split(' ')) if len(x) > 1]).strip() for \
                 x in cm['tres_nm']]

# prep for deduping

# substitutions
subs = {
    'XAVUER' : 'XAVIER',
    'JSOEPH' : 'JOSEPH',
    'WILLAIM' : 'WILLIAM',
    'V?RONIQUE' : 'VERONIQUE',
    '.' : '',
    ',' : '',
    
}
for key in list(subs.keys()):
    cm['tres_nm'] = [x.replace(key, subs[key]) for x in cm['tres_nm']]

# drops
drops = [
    'MRS',
    'MS',
    'MR',
    'MISS',
    'JR',
    'SR',
    'UNDEFINED',
    'JD',
    'CPA',
    'HONORABLE',
    'FED',
    'II', 
    'III',
    'IV',
    'ESQ',
    'DR',
    'MD',
    'DO',
    'CFO',
    'CEO',
    'CTO',
    'TREAS',
    'US',
    '1970',
    '--SELECT',
    '1973',
    '1979',
    
]
for droplet in drops:
    cm['tres_nm'] = [' '.join([x for x in x.split() if \
                               ((x not in drops) & (len(x) > 1))]) for x in cm['tres_nm']]

# a bunch of entries have last name appearing twice; 
# check for this and keep only first instance
cm['tres_nm'] = [x.split() for x in cm['tres_nm']]
def f7(seq):
    '''https://stackoverflow.com/questions/480214/\
    how-do-you-remove-duplicates-from-a-list-whilst-preserving-order'''
    seen = set()
    seen_add = seen.add
    return [x for x in seq if not (x in seen or seen_add(x))]            
cm['tres_nm'] = [' '.join(f7(x)) for x in cm['tres_nm']]

# see tres_nms appearing most frequently
cm['tres_nm'].value_counts()[:20]

1513


(no treasurer listed)    1518
PAUL KILGORE              144
JUDITH ZAMORE             108
CHRIS MARSTON             106
LISA LISKER                88
JENNIFER MAY               62
JAY PETTERSON              56
BENJAMIN OTTENHOFF         55
DAVID SATTERFIELD          49
CABELL HOBBS               47
KEITH DAVIS                44
BRADLEY CRATE              43
NANCY WATKINS              42
JANICA KYRIACOPOULOS       41
THOMAS DATWYLER            36
STEVEN MARTIN              25
ROBERT CARLIN              24
TIMOTHY KOCH               23
LORA HAGGARD               22
STEVEN MELE                22
Name: tres_nm, dtype: int64

In [17]:
# what is street address of committees not listing treasurer
cm.loc[cm['tres_nm'] == '(no treasurer listed)', 'address'].value_counts()[:20]

nan, nan, nan nan                                 16
1201 CONNECTICUT AVE NW, WASHINGTON, DC 20036     10
700 13TH ST NW, WASHINGTON, DC 20005               6
555 CAPITOL MALL, SACRAMENTO, CA 95814             5
300 M ST SE, WASHINGTON, DC 20003                  5
815 16TH ST NW, WASHINGTON, DC 20006               5
PO BOX 2259, WILMINGTON, NC 28402                  4
1401 NEW YORK AVE NW, WASHINGTON, DC 20005         4
5429 MADISON AVE, SACRAMENTO, CA 95841             4
1120 CONNECTICUT AVE NW, WASHINGTON, DC 20036      4
1747 PENNSYLVANIA AVE NW, WASHINGTON, DC 20006     4
515 KING ST, ALEXANDRIA, VA 22314                  3
518 GARDEN ST, SANTA BARBARA, CA 93101             3
625 BROAD ST, NEWARK, NJ 07102                     3
414 N ORLEANS PLAZA, CHICAGO, IL 60654             3
705-2 E BIDWELL ST, FOLSOM, CA 95630               3
905 16TH ST NW, WASHINGTON, DC 20006               3
2525 W ALAMEDA AVE, DENVER, CO 80219               3
50 F ST NW, WASHINGTON, DC 20001              

In [18]:
list(cm.loc[cm['address'] == '1201 CONNECTICUT AVE NW, WASHINGTON, DC 20036', 'cmte_nm'])

['CONSTITUTIONAL RESPONSIBILITY PROJECT',
 'SOCAL HEALTH CARE COALITION A PROJECT OF SIXTEEN THIRTY FUND',
 'FLORIDIANS FOR A FAIR SHAKE A PROJECT OF SIXTEEN THIRTY FUND',
 'DEMAND JUSTICE A PROJECT OF SIXTEEN THIRTY FUND',
 'SIXTEEN THIRTY FUND / MAKE IT WORK AMERICA1',
 'SIXTEEN THIRTY FUND/MAKE IT WORK ACTION',
 'SIXTEEN THIRTY FUND/NOT ONE PENNY',
 'FLORIDIANS FOR A FAIR SHAKE A PROJECT OF SIXTEEN THIRTY FUND',
 'SOCAL HEALTH CARE COALITION A PROJECT OF SIXTEEN THIRTY FUND',
 'OHIOANS FOR ECONOMIC OPPORTUNITY A PROJECT OF SIXTEEN THIRTY FUND']

In [19]:
list(cm.loc[
    (cm['address'] == '700 13TH ST NW, WASHINGTON, DC 20005') & \
    (cm['tres_nm'] == '(no treasurer listed)'), 'cmte_nm'])

['MAJORITY FORWARD',
 'MAJORITY FORWARD',
 'AMERICA WORKING TOGETHER',
 'DUTY AND HONOR',
 'BLACK PROGRESSIVE ACTION COALITION',
 'BLACK ECONOMIC ALLIANCE FUND']

In [20]:
list(cm.loc[
    (cm['address'] == '555 CAPITOL MALL, SACRAMENTO, CA 95814') & \
    (cm['tres_nm'] == '(no treasurer listed)'), 'cmte_nm'])

['PLANNED PARENTHOOD ADVOCACY PROJECT LOS ANGELES COUNTY',
 'PLANNED PARENTHOOD AFFILIATES OF CALIF',
 'ALAMEDANS UNITED SUPPORTING VELLA AND ASHCRAFT FOR CITY COUNCIL, ET AL',
 'OPPORTUNITY PAC - A COALITION OF TEACHERS HEALTH CARE GIVERS FACULTY MEMBERS SCHOOL EMPLOYEES AND PUBLIC AND PR',
 'GOLDEN CALIFORNIA COMMITTEE SPONSORED BY THE SEIU CALIFORNIA STATE COUNCIL']

In [21]:
# mark principal campaign committees

pccs = pd.read_csv('data/02a_principalcampaigncommittees.csv')

pccs.head()

Unnamed: 0,cand_id,cmte_id
0,H0AL02087,C00462143
1,H0AL05163,C00464149
2,H0AL07086,C00458976
3,H0AR01083,C00462374
4,H0AR03055,C00477745


In [22]:
cm['pcc'] = [True if (x in list(pccs['cmte_id'].values)) else False for x in cm['cmte_id']]
cm['pcc'].value_counts()

False    12548
True       941
Name: pcc, dtype: int64

### Dedupe treasurer names

This is a journey.

In [23]:
def findall(name):
    '''find pattern in treasurer names columns'''
    all_tres = list(set(cm['tres_nm']))
    if type(name) == str:
        these = [x for x in all_tres if name.lower() in str(x).lower()]
    else:  
        these = all_tres
        for which in name:
            these = [x for x in these if which in str(x).lower()]
    return sorted(these)

In [24]:
def getall(these, new):
    '''assign deduped treasurer name'''
    print('orig len:', len(cm[cm['tres_nm'] == new]))
    for which in these:
        cm.loc[cm['tres_nm'] == which, 'tres_nm'] = new
    print('new len:', len(cm[cm['tres_nm'] == new]))
    return 

In [25]:
# this list keeps track of the ones we've already addressed
dones = ['(no treasurer listed)']

In [26]:
findall(' kil')

['EDWIN JAY KILPATRICK',
 'GERALD KILPATRICK',
 'JEFFREY TUTTLE KILLEEN',
 'KEVIN KILEY',
 'LAURA KILMER',
 'LAWRENCE KILGORE',
 'MEAGHAN KILLION JOYCE',
 'MILES KILCOIN',
 'PAUL KILGORE',
 'RACHEL KILPATRICK',
 'TERRY KILROY',
 'WILLIAM KILLMER']

In [27]:
findall(' joyce')

['JONATHAN JOYCE', 'MEAGHAN KILLION JOYCE', 'STEPHEN JOYCE']

In [28]:
dones.extend(findall(' kil'))

In [29]:
findall(' lis')

['(no treasurer listed)',
 'EVAN ROGER LISTOPAD',
 'JOHN LISTAK',
 'LISA LISKER',
 'SUSIE LISA']

In [30]:
dones.extend(findall(' lis'))

In [31]:
findall('lisker')

['LISA LISKER']

In [32]:
new = 'LISA LISKER'
dones.append(new)

these = findall('lisker')

getall(these, new)

orig len: 88
new len: 88


In [33]:
findall(['c', 'marst'])

['CHRIS MARSTEN', 'CHRIS MARSTON', 'CHRISTOPHER MARSTON']

In [34]:
cm[cm['tres_nm'] == 'CHRIS MARSTEN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
11508,C00632760,WHITE COAT WASTE PAC,CHRIS MARSTEN,PO BOX 26141,,ALEXANDRIA,VA,22313,U,Q,UNK,Q,,,,CHRIS MARSTEN,PO BOX 26141,"PO BOX 26141, ALEXANDRIA, VA 22313",False
12366,C00646463,BUCKEYE BATTLE PAC,CHRIS MARSTEN,PO BOX 26141,,ALEXANDRIA,VA,22313,U,O,UNK,T,,,,CHRIS MARSTEN,PO BOX 26141,"PO BOX 26141, ALEXANDRIA, VA 22313",False
14309,C00666511,WE STAND FOR BETTER,CHRIS MARSTEN,PO BOX 26141,,ALEXANDRIA,VA,22313,U,O,UNK,Q,,,,CHRIS MARSTEN,PO BOX 26141,"PO BOX 26141, ALEXANDRIA, VA 22313",False


In [35]:
cm[cm['tres_nm'] == 'CHRIS MARSTON'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
3558,C00433524,DUNCAN D. HUNTER FOR CONGRESS,CHRIS MARSTON,PO BOX 1545,,EL CAJON,CA,92022,P,H,REP,Q,,HUNTER VICTORY FUND,H8CA52052,"MARSTON, CHRIS",PO BOX 1545,"PO BOX 1545, EL CAJON, CA 92022",True
3723,C00441014,ROB WITTMAN FOR CONGRESS,CHRIS MARSTON,PO BOX 3770,,OAKTON,VA,22124,P,H,REP,Q,,WITTMAN VICTORY COMMITTEE,H8VA01147,"MARSTON, CHRIS",PO BOX 3770,"PO BOX 3770, OAKTON, VA 22124",True
3919,C00449926,AMERICAN FUTURE FUND POLITICAL ACTION,CHRIS MARSTON,45 N HILL DR,STE 100,WARRENTON,VA,20186,U,W,UNK,M,,,,"MARSTON, CHRIS",45 N HILL DR,"45 N HILL DR, WARRENTON, VA 20186",False
4932,C00486738,MANY INDIVIDUAL CONSERVATIVES HELPING ELECT LE...,CHRIS MARSTON,PO BOX 26141,,ALEXANDRIA,VA,22313,D,Q,UNK,Q,,MICHELE BACHMANN,,"MARSTON, CHRIS",PO BOX 26141,"PO BOX 26141, ALEXANDRIA, VA 22313",False
5364,C00499020,FREEDOMWORKS FOR AMERICA,CHRIS MARSTON,111 K ST NE,STE 600,WASHINGTON,DC,20002,U,O,UNK,Q,,NONE,,"MARSTON, CHRIS",111 K ST NE,"111 K ST NE, WASHINGTON, DC 20002",False


In [36]:
new = 'CHRISTOPHER MARSTON'
dones.append(new)

these = findall(['c', 'marst'])

getall(these, new)

orig len: 13
new len: 122


In [37]:
findall(['st', ' martin'])

['ERNESTO MARTINEZ', 'STEVEN MARTIN']

In [38]:
cm[cm['tres_nm'] == 'STEVE MARTIN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [39]:
cm[cm['tres_nm'] == 'STEVEN MARTIN'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
1361,C00233353,AD ALLIANCE,STEVEN MARTIN,PO BOX 30844,,BETHESDA,MD,20824,B,Q,UNK,M,M,NONE,,"MARTIN, STEVEN JR.",PO BOX 30844,"PO BOX 30844, BETHESDA, MD 20824",False
2507,C00372532,MICHAEL BURGESS FOR CONGRESS,STEVEN MARTIN,PO BOX 2334,,DENTON,TX,76202,P,H,REP,Q,,CARE AMERICA,H2TX26093,"MARTIN, STEVEN G. JR.",PO BOX 2334,"PO BOX 2334, DENTON, TX 76202",True
3207,C00415208,LONE STAR LEADERSHIP PAC,STEVEN MARTIN,PO BOX 30844,,BETHESDA,MD,20824,D,Q,UNK,M,,MICHAEL BURGESS,,"MARTIN, STEVEN G. JR.",PO BOX 30844,"PO BOX 30844, BETHESDA, MD 20824",False
5181,C00493783,FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...,STEVEN MARTIN,PO BOX 30844,,BETHESDA,MD,20824,J,H,UNK,Q,,NONE,H0AL02087,"MARTIN, STEVEN G. JR.",PO BOX 30844,"PO BOX 30844, BETHESDA, MD 20824",False
6843,C00551366,RIGHTNOW WOMEN PAC,STEVEN MARTIN,PO BOX 30844,,BETHESDA,MD,20824,U,Q,UNK,Q,,NONE,,"MARTIN, STEVEN G. JR.",PO BOX 30844,"PO BOX 30844, BETHESDA, MD 20824",False


In [40]:
dones.extend(findall(['st', ' martin']))

In [41]:
findall(' crat')

['BRADLEY CRATE', 'VIVIAN CRATER']

In [42]:
dones.extend(findall(' crat'))

In [43]:
findall('datwyler')

['THOMAS CHARLES DATWYLER', 'THOMAS DATWYLER']

In [44]:
new = 'THOMAS DATWYLER'
dones.append(new)

these = findall('datwyler')

getall(these, new)

orig len: 36
new len: 37


In [45]:
findall('zamore')

['JUDITH ZAMORE', 'JUDY ZAMORE']

In [46]:
new = 'JUDITH ZAMORE'
dones.append(new)

these = findall('zamore')

getall(these, new)

orig len: 108
new len: 109


In [47]:
findall(' pett')

['BRENDAN PETTYJOHN', 'JAY PETTERSON']

In [48]:
dones.extend(findall(' pett'))

In [49]:
findall(['jen', 'may'])

['JENNIFER MAY']

In [50]:
dones.append('JENNIFER MAY')

In [51]:
# first jennifer for driskell
cm[cm['tres_nm'] == 'JENNIFER FAIRFIELD']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
7845,C00572859,GRETCHEN DRISKELL FOR CONGRESS,JENNIFER FAIRFIELD,PO BOX 464,,SALINE,MI,48176,P,H,DEM,Q,,NONE,H6MI07223,"FAIRFIELD, JENNIFER",PO BOX 464,"PO BOX 464, SALINE, MI 48176",True


In [52]:
# second jennifer for driskell.  same?
cm[(cm['tres_nm'] == 'JENNIFER MAY') & (cm['cmte_st1'] == 'PO BOX 464')]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [53]:
findall(' williamson')

['CRAIG WILLIAMSON', 'LES WILLIAMSON', 'SEAN WILLIAMSON']

In [54]:
dones.extend(findall(' williamson'))

In [55]:
findall(' ky')

['CHUCK KYRISH', 'JANICA KYRIACOPOULOS', 'SAMMY KYE']

In [56]:
dones.extend(findall(' ky'))

In [57]:
findall(' watkins')

['CATHLEEN WATKINS',
 'CHARLES WATKINS',
 'MICHAEL WATKINS',
 'NANCY WATKINS',
 'WILLIAM WATKINS']

In [58]:
cm[cm['tres_nm'] == 'BILL WATKINS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [59]:
cm[cm['tres_nm'] == 'WILLIAM WATKINS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
2551,C00376244,KUSTOFF FOR CONGRESS 2002,WILLIAM WATKINS,1661 AARON BRENNER DR,SUITE 300,MEMPHIS,TN,38120,A,H,REP,Q,,NONE,H2TN07103,"WATKINS, WILLIAM H JR",1661 AARON BRENNER DRIVE,"1661 AARON BRENNER DR, MEMPHIS, TN 38120",False
10036,C00614826,KUSTOFF FOR CONGRESS,WILLIAM WATKINS,1661 AARON BRENNER DR,STE 300,MEMPHIS,TN,38120,P,H,REP,Q,,,H2TN07103,"WATKINS, WILLIAM H JR",1661 AARON BRENNER DR,"1661 AARON BRENNER DR, MEMPHIS, TN 38120",True


In [60]:
new = 'WILLIAM WATKINS'
dones.append(new)

these = ['BILL WATKINS', 'WILLIAM WATKINS']

getall(these, new)

orig len: 2
new len: 2


In [61]:
dones.extend(findall(' watkins'))

In [62]:
findall('hobbs')

['CABELL HOBBS', 'JENNIFER HOBBS', 'NICOLE HOBBS', 'SCOTT HOBBS']

In [63]:
findall('giles')

['JASON GILES', 'RODNEY GILES']

In [64]:
dones.extend(findall('giles'))

In [65]:
dones.extend(findall('hobbs'))

In [66]:
findall(' satt')

['ADAM SATTERFIELD', 'DAVID SATTERFIELD', 'STACEY SATTERLEE']

In [67]:
dones.extend(findall(' satt'))

In [68]:
findall(' curtis')

['BOBBY CURTIS BRACKETT',
 'DAVID MARLOW CURTIS',
 'ELIZABETH CURTIS',
 'JAMES CURTIS',
 'KEVIN CURTIS',
 'LIZ CURTIS',
 'MICHAEL CURTIS DEAN',
 'RON CURTIS',
 'STEVEN DALE CURTIS']

In [69]:
findall('brackett')

['BOBBY CURTIS BRACKETT']

In [70]:
findall(['mi', 'dean'])

['MICHAEL CURTIS DEAN']

In [71]:
new = 'ELIZABETH CURTIS'
dones.append(new)

these = findall(['liz', 'curtis'])

getall(these, new)

orig len: 15
new len: 17


In [72]:
dones.extend(findall(' curtis'))

In [73]:
findall(' davis')

['ADAM DAVIS',
 'BRADLEY DAVIS',
 'BRIAN DAVIS',
 'BROOKE DAVIS',
 'CASEY DAVISON',
 'CLAY PARKER DAVIS',
 'CRYSTAL DAVIS-TAYLOR',
 'CYNTHIA DAVIS',
 'DEBORAH DAVIS',
 'DONALD DAVIS',
 'DYLAN DAVIS',
 'GARY DAVIS',
 'HAROLD DAVIS',
 'JEFF DAVIS',
 'JEREMY DAVIS',
 'JOE DAVIS',
 'JONATHAN DAVIS',
 'KAYOLKA DAVIS',
 'KEITH DAVIS',
 'KELLY DAVIS',
 'KETH DAVIS',
 'LANCE DAVIS',
 'LESLIE WALTER DAVIS',
 'MARIAN DAVIS',
 'MARIE ELIZABETH DAVIS',
 'MARIENELLA DAVIS',
 'MARK DAVIS',
 'MICHAEL DAVIS',
 'MICHELE DAVISON',
 'MILTON DAVIS',
 'PAUL DAVIS',
 'REGINALD DAVIS',
 'RICHARD BERNARD DAVIS',
 'RICK DAVISON',
 'ROBERT DAVIS',
 'SANDRA DAVIS',
 'SUSAN DAVIS',
 'VANDY DAVIS',
 'VERA DAVIS',
 'VIRGINIA DAVIS',
 'WAYNE DAVIS',
 'WILLIAM DAVIS']

In [74]:
cm[cm['tres_nm'].apply(lambda x: x in ['BILLY DAVIS', 'WILLIAM DAVIS'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
12062,C00638536,FRIENDS OF DANNER KLINE,WILLIAM DAVIS,PO BOX 430191,,VESTAVIA HILLS,AL,35243,P,H,DEM,Q,,NONE,H8AL06099,"DAVIS, WILLIAM",PO BOX 430191,"PO BOX 430191, VESTAVIA HILLS, AL 35243",True


In [75]:
cm[cm['tres_nm'] == 'KETH DAVIS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
14187,C00665232,WIN IN 2018,KETH DAVIS,228 S WASHINGTON ST,SUITE 115,ALEXANDRIA,VA,22314,J,N,UNK,Q,,NONE,,"DAVIS, KETH A.",228 S. WASHINGTON STREET,"228 S WASHINGTON ST, ALEXANDRIA, VA 22314",False


In [76]:
cm[cm['tres_nm'] == 'KEITH DAVIS'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
212,C00027466,NRSC,KEITH DAVIS,425 2ND ST NE,,WASHINGTON,DC,20002,U,Y,REP,M,,2018 TILLIS SENATE CANDIDATE FUND,,"DAVIS, KEITH",425 2ND STREET NE,"425 2ND ST NE, WASHINGTON, DC 20002",False
395,C00075820,NRCC,KEITH DAVIS,320 FIRST ST SE,.,WASHINGTON,DC,20003,U,Y,REP,M,,COLE COMBINED COMMITTEE,,"DAVIS, KEITH A.",320 FIRST STREET SE,"320 FIRST ST SE, WASHINGTON, DC 20003",False
2013,C00330720,TRUST PAC TEAM REPUBLICANS FOR UTILIZING SENSI...,KEITH DAVIS,228 S WASHINGTON ST,SUITE 115,ALEXANDRIA,VA,22314,D,Q,UNK,Q,,NONE,,"DAVIS, KEITH A.",228 S. WASHINGTON STREET,"228 S WASHINGTON ST, ALEXANDRIA, VA 22314",False
2738,C00388421,TENN POLITICAL ACTION COMMITTEE INC (TENN PAC),KEITH DAVIS,228 S WASHINGTON ST,,ALEXANDRIA,VA,22314,D,Q,UNK,Q,,NONE,,"DAVIS, KEITH A.",228 S WASHINGTON STREET SUITE 115,"228 S WASHINGTON ST, ALEXANDRIA, VA 22314",False
3819,C00445387,IMS HEALTH PAC,KEITH DAVIS,228 S WASHINGTON ST,,ALEXANDRIA,VA,22314,U,Q,UNK,T,C,,,KEITH A DAVIS,228 S WASHINGTON STREET SUITE 115,"228 S WASHINGTON ST, ALEXANDRIA, VA 22314",False


In [77]:
new = 'KEITH DAVIS' 
dones.append(new)

these = ['KEITH DAVIS', 'KETH DAVIS']

getall(these, new)

orig len: 44
new len: 45


In [78]:
cm[cm['tres_nm'].apply(lambda x: x in ['MIKE DAVIS', 'MICHAEL DAVIS'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
15960,C00683573,STRENGTH AND PROGRESS INC,MICHAEL DAVIS,30 N GOULD ST,STE 7981,SHERIDAN,WY,82801,U,N,UNK,Q,,,,"DAVIS, MICHAEL",30 N GOULD ST,"30 N GOULD ST, SHERIDAN, WY 82801",False


In [79]:
dones.extend(findall(' davis'))

In [80]:
findall(['phillips', 'ob'])

['ROB PHILLIPS', 'ROBERT PHILLIPS']

In [81]:
new = 'ROBERT PHILLIPS' 
dones.append(new)

these = findall(['phillips', 'ob'])

getall(these, new)

orig len: 20
new len: 24


In [82]:
findall(' mele')

['STEVE MELE', 'STEVEN MELE']

In [83]:
cm[cm['tres_nm'] == 'STEVE MELE']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
5594,C00507574,MOTOR CITY PAC,STEVE MELE,611 PENNSYLVANIA AVE SE,STE 143,WASHINGTON,DC,20003,D,Q,UNK,M,,,,"MELE, STEVE",611 PENNSYLVANIA AVENUE SE,"611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003",False
12747,C00650366,LESLIE COCKBURN FOR CONGRESS,STEVE MELE,PO BOX 186,,SPERRYVILLE,VA,22740,P,H,DEM,Q,,NONE,H8VA05155,"MELE, STEVE",PO BOX 186,"PO BOX 186, SPERRYVILLE, VA 22740",True
16182,C00685842,THE ARENA CANDIDATE PAC HOUSE VICTORY FUND,STEVE MELE,611 PENNSYLVANIA AVE SE,NUM 143,WASHINGTON,DC,20003,J,H,UNK,T,,,,"MELE, STEVE",611 PENNSYLVANIA AVE SE,"611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003",False
16851,C00692988,ALL FOR OUR COUNTRY VICTORY FUND,STEVE MELE,611 PENNSYLVANIA AVE SE,STE. 143,WASHINGTON,DC,20003,J,N,UNK,Q,,NONE,,"MELE, STEVE",611 PENNSYLVANIA AVENUE SE,"611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003",False


In [84]:
cm[cm['tres_nm'] == 'STEVEN MELE'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
6138,C00526657,TOWARD TOMORROW PAC,STEVEN MELE,PO BOX 544,,SAN ANTONIO,TX,78292,D,Q,UNK,M,,,,"MELE, STEVEN",PO BOX 544,"PO BOX 544, SAN ANTONIO, TX 78292",False
8535,C00583104,CORTEZ MASTO VICTORY FUND,STEVEN MELE,611 PENNSYLVANIA AVE SE,,WASHINGTON,DC,20003,J,N,UNK,T,,,,STEVEN MELE,611 PENNSYLVANIA AVE SE SUITE 143,"611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003",False
9460,C00606939,ROSEN FOR NEVADA,STEVEN MELE,PO BOX 27195,,LAS VEGAS,NV,89126,P,S,DEM,Q,,ROSEN VICTORY FUND,S8NV00156,"MELE, STEVEN",PO BOX 27195,"PO BOX 27195, LAS VEGAS, NV 89126",True
10991,C00627232,IVOTE ACTION FUND,STEVEN MELE,722 12TH ST NW,3RD FLOOR,WASHINGTON,DC,20005,U,O,UNK,Q,,NONE,,"MELE, STEVEN",722 12TH ST NW,"722 12TH ST NW, WASHINGTON, DC 20005",False
11170,C00629212,ALL FOR OUR COUNTRY LEADERSHIP PAC,STEVEN MELE,611 PENNSYLVANIA AVE SE,,WASHINGTON,DC,20003,D,Q,UNK,M,,,,STEVEN MELE,"611 PENNSYLVANIA AVE SE, #143","611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003",False


In [85]:
new = 'STEVEN MELE'
dones.append(new)

these = findall(['steve', ' mele'])

getall(these, new)

orig len: 22
new len: 26


In [86]:
findall(['su', 'jackson'])

['SUE JACKSON', 'SUSAN JACKSON']

In [87]:
new = 'SUSAN JACKSON'
dones.append(new)

these = findall(['su', 'jackson'])

getall(these, new)

orig len: 2
new len: 21


In [88]:
findall(' otte')

['BENJAMIN OTTENHOFF', 'TIMOTHY OTTEN']

In [89]:
new = 'BENJAMIN OTTENHOFF'
dones.append(new)

these = findall(' otte')

getall(these, new)

orig len: 55
new len: 56


In [90]:
findall(' mcmi')

['ANN MCMILLAN',
 'COLLIN MCMICHAEL',
 'GEETA MCMILLAN',
 'KURT MCMILLAN',
 'MARY MCMILLAN']

In [91]:
dones.extend(findall(' mcmi'))

In [92]:
findall(' lew')

['ANDY LEWIS JAMES',
 'CHERYL LEWIS',
 'CORDELIA LEWIS BURKS',
 'DEBORAH LEWIS',
 'DENISE LEWIS',
 'EMANUEL LEWIS',
 'JACK LEWIS CHARBONNEAU',
 'JEFF LEWIS',
 'JENNIFER LEWIS',
 'KEET LEWIS',
 'LARRY LEWIS',
 'LEROY LEWIS',
 'LINDSAY LEWIS',
 'MARC LEWKOWITZ',
 'MARK LEWIS',
 'MARY PARKER LEWIS',
 'MICHAEL LEWIS',
 'NANCY LEWIS',
 'NATHAN LEWIS WURTZEL',
 'PAUL LEWIS',
 'ROSS BAZELON LEWIN',
 "SABRINA Y'VES LEWIS-JONES",
 'SANDRA LEWIS',
 'SCOTT LEWIS',
 'WENDY LEWIS',
 'WILLIAM LEWIS FANATIA']

In [93]:
findall(['and', ' james'])

['ALEX-ST JAMES ANDREW RAILEY-CISCO',
 'ANDREW JAMES MCDOWELL',
 'ANDY LEWIS JAMES',
 'BRANDON MICHAEL JAMES',
 'CHANDLER JAMES HOUGHTLEN']

In [94]:
findall(['cord', 'burk'])

['CORDELIA LEWIS BURKS']

In [95]:
findall('charbon')

['JACK LEWIS CHARBONNEAU']

In [96]:
findall(['nat', 'wurtz'])

['NATHAN LEWIS WURTZEL']

In [97]:
findall(' alston')

['LINDA ALSTON']

In [98]:
cm[cm['tres_nm'] == 'WENDY LEWIS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
3934,C00450411,MOMENTUM PAC,WENDY LEWIS,1200 SMITH,SUITE 1600,HOUSTON,TX,77002,U,N,UNK,A,,,,WENDY LEWIS,1200 SMITH,"1200 SMITH, HOUSTON, TX 77002",False


In [99]:
cm[cm['tres_nm'] == 'WENDY LEWIS ARMSTRONG']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [100]:
new = 'WENDY LEWIS'
dones.append(new)

these = findall('wendy lewis')

getall(these, new)

orig len: 1
new len: 1


In [101]:
dones.extend(findall(' lew'))

In [102]:
findall(' lawl')

['CHRISTOPHER LAWLOR', 'KELLY LAWLER', 'SEWARD LAWLOR']

In [103]:
dones.extend(findall(' lawl'))

In [104]:
findall(' crum')

['GARY CRUMMITT', 'JANE CRUMPLER', 'JOHN CRUMP']

In [105]:
dones.extend(findall(' crum'))

In [106]:
findall(' slater')

['JEN SLATER', 'TODD SLATER']

In [107]:
dones.extend(findall(' slater'))

In [108]:
findall(' mccaul')

['ALICIA MCCAULEY', 'MICHAEL MCCAULEY', 'MIKE MCCAULEY']

In [109]:
dones.extend(findall(' mccaul'))

In [110]:
findall(' montg')

['CHRISTOPHER MONTGOMERY WOODFIN',
 'DAVA MONTGOMERY',
 'DIANE MONTGOMERY',
 'JAY MONTGOMERY',
 'LUKE MONTGOMERY',
 'MEGAN MONTGOMERY',
 'RUSSELL MONTGOMERY',
 'THOMAS MONTGOMERY']

In [111]:
dones.extend(findall(' montg'))

In [112]:
findall(' nis')

['MELISSA NISSEN']

In [113]:
dones.extend(findall(' nis'))

In [114]:
findall(' matthews')

['LAURA MATTHEWS', 'PAUL MATTHEWS']

In [115]:
dones.extend(findall(' matthews'))

In [116]:
findall(' goul')

['ANN MARIE GOULD', 'DAVID GOULD', 'VAN CHARLES GOULD']

In [117]:
cm[cm['tres_nm'].apply(lambda x: ('ADAM' in x) & ('GOULD' in x))]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [118]:
new = 'ADAM KOHL GOULD'
dones.append(new)

these = findall(['adam', 'gould'])

getall(these, new)

orig len: 0
new len: 0


In [119]:
dones.extend(findall(' goul'))

In [120]:
findall(' gan')

['CHARLES GANTT',
 'HARVEY GANTT',
 'MARIA VICTORIA GANACIAS BORJA',
 'MARY RANDOLPH GANNON',
 'PETER GANGI',
 'RON GANT']

In [121]:
findall('borja')

['MARIA VICTORIA GANACIAS BORJA']

In [122]:
findall(['mary', 'randolph'])

['MARY RANDOLPH GANNON']

In [123]:
dones.extend(findall(' gan'))

In [124]:
findall(' carli')

['HELEN CARLIN', 'JAMES CARLISLE', 'MICHAEL CARLIN', 'ROBERT CARLIN']

In [125]:
dones.extend(findall(' carli'))

In [126]:
findall(' kra')

['ALLEN KRAMER',
 'BETH KRATOCHVIL',
 'BLAKE ALAN KRAPF',
 'CATHERINE KRANTZ',
 'DAVID KRALLE',
 'DAVID KRAMER',
 'DON KRAUS',
 'JOHN KRALL',
 'PATRICK KRASON',
 'SHAWN KRAUSE',
 'STEVEN KRAVITZ',
 'TOM KRAUSE']

In [127]:
cm[cm['tres_nm'] == 'THOMAS KRAUS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [128]:
cm[cm['tres_nm'] == 'TOM KRAUSE']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
15667,C00680504,BROADCOM INC. POLITICAL ACTION COMMITTEE (BROA...,TOM KRAUSE,1320 RIDDER PARK DR,,SAN JOSE,CA,95131,U,Q,UNK,M,C,,,"KRAUSE, TOM",1320 RIDDER PARK DR.,"1320 RIDDER PARK DR, SAN JOSE, CA 95131",False


In [129]:
dones.extend(findall(' kra'))

In [130]:
findall(' koch')

['KEVIN KOCH',
 'LINDA KOCH',
 'NICHOLAS KOCH',
 'THEODORE KOCH',
 'TIMOTHY KOCH',
 'TOM KOCHAN']

In [131]:
dones.extend(findall(' koch'))

In [132]:
findall(' win')

['ALAN WINDGASSEN',
 'ALISON WINGATE',
 'ANDREW WINTERING',
 'ARTHUR WINSTEAD',
 'BRIGEN WINTERS',
 'DEAN WINE',
 'ELIZABETH WINN BJORK',
 'HICKS WINTERS',
 'JOHN WINKLER',
 'KAREN WINGARD',
 'MICHAEL WINTERHALTER',
 'PATRICIA WINTER',
 'ROBERT LEROY WINTER',
 'ROBERT VAN WINTON',
 'STEVE WIND',
 'THOMAS WINTER',
 'VICKIE WINPISINGER']

In [133]:
dones.extend(findall(' win'))

In [134]:
findall(' rit')

['ANDREW RITTENBERG',
 'ERIN RITTER',
 'JOEL RITER',
 'MAX WILSON RITTER',
 'TOM RITTER']

In [135]:
dones.extend(findall(' rit'))

In [136]:
findall(' tat')

['ADAM TATUN',
 'ALIX TATE',
 'BRIAN TATUM',
 'DARRYL TATTRIE',
 'ELIZABETH TATE',
 'JOHN TATE',
 'MARTHA SHAW TATE',
 'SAMBA TATA',
 'SRINIVASA TATINENI',
 'STANLEY TATE']

In [137]:
cm[cm['tres_nm'].apply(lambda x: x in ['BRIAN TATUM', 'BRYAN TATUM'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
3203,C00415026,AMERICAN FUELS AND PETROCHEMICAL MANUFACTURERS...,BRIAN TATUM,1800 M ST NW,SUITE 900 NORTH,WASHINGTON,DC,20036,B,Q,UNK,M,T,AMERICAN FUELS AND PETROCHEMICAL MANUFACTURERS...,,"TATUM, BRIAN",1800 M STREET NW,"1800 M ST NW, WASHINGTON, DC 20036",False


In [138]:
dones.extend(findall(' tat'))

In [139]:
findall('ralls')

['STEVE RALLS', 'STEVEN RALLS']

In [140]:
cm[cm['tres_nm'] == 'STEVEN RALLS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
2845,C00395467,JEFF FORTENBERRY FOR UNITED STATES CONGRESS,STEVEN RALLS,PO BOX 30265,,LINCOLN,NE,68503,P,H,REP,Q,,NONE,H4NE01064,"RALLS, STEVEN",PO BOX 30265,"PO BOX 30265, LINCOLN, NE 68503",True


In [141]:
cm[cm['tres_nm'] == 'STEVE RALLS'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
3389,C00426494,FOR OUR REPUBLIC'S TRADITIONS FUND AKA FORT FUND,STEVE RALLS,PO BOX 30883,,LINCOLN,NE,68503,D,N,UNK,M,,JEFF FORTENBERRY,,"RALLS, STEVE MR.",PO BOX 30883,"PO BOX 30883, LINCOLN, NE 68503",False
3416,C00427781,ONLINE LENDERS ALLIANCE POLITICAL ACTION COMMI...,STEVE RALLS,PO BOX 15480,SE STATION,WASHINGTON,DC,20003,U,Q,UNK,M,M,ONLINE LENDERS ALLIANCE,,"RALLS, STEVE",PO BOX 15480,"PO BOX 15480, WASHINGTON, DC 20003",False
5012,C00489336,WILD AND WONDERFUL PAC,STEVE RALLS,332 W LEE HWY,# 303,WARRENTON,VA,20186,D,Q,UNK,M,,,,"RALLS, STEVE",332 W LEE HWY,"332 W LEE HWY, WARRENTON, VA 20186",False
5345,C00498345,MULLIN FOR CONGRESS,STEVE RALLS,PO BOX 3681,,MUSKOGEE,OK,74402,P,H,REP,Q,,MULLIN VICTORY FUND,H2OK02083,"RALLS, STEVE",PO BOX 3681,"PO BOX 3681, MUSKOGEE, OK 74402",True
5508,C00504365,FUNDING REPUBLICANS SUPPORTING OPPORTUNITY AND...,STEVE RALLS,332 W LEE HWY,# 303,WARRENTON,VA,20186,D,N,UNK,T,,,,STEVE RALLS,332 W LEE HWY,"332 W LEE HWY, WARRENTON, VA 20186",False


In [142]:
new = 'STEVE RALLS'
dones.append(new)

these = findall('ralls')

getall(these, new)

orig len: 22
new len: 23


In [143]:
findall(' has')

['BECKY HASSLEN',
 'CHRISSIE HASTIE',
 'GARY HASTY',
 'JOSEPH HASTO',
 'KHAWAR HASSAN',
 'NEIL HASSETT']

In [144]:
dones.extend(findall(' has'))

In [145]:
findall(' buch')

['EMILY BUCHANAN',
 'KATHERINE BUCHANAN',
 'KIMBERLY BUCHAN',
 'MARY ANNE BUCHANAN',
 'SHAWN BUCHTEL',
 'TIM BUCHE']

In [146]:
dones.extend(findall(' buch'))

In [147]:
findall(' bac')

['ANDREW BACON',
 'BENJAMIN BACKER',
 'CATHARYNE BACH',
 'DAN BACHUS',
 'DAN BACKER',
 'DANIEL BACINE',
 'JA BACHMAN',
 'JOEL BACON',
 'WILLIAM BACHMAN']

In [148]:
dones.extend(findall(' bac'))

In [149]:
findall(' brog')

['KEVIN BROGHAMER', 'LEE ANNE BROGOWSKI', 'MICHAEL BROGAN', 'RICHARD BROGAN']

In [150]:
dones.extend(findall(' brog'))

In [151]:
findall(' gia')

['ANDREW GIANNONE',
 'FRANCIS GIARDIELLO',
 'HOLLY GIARRAPUTO',
 'JOEL GIANNELLI',
 'SAVERIO GIAMBALVO',
 'SUSAN GIANNETTI LONGACRE']

In [152]:
findall('longacre')

['SUSAN GIANNETTI LONGACRE']

In [153]:
dones.extend(findall(' gia'))

In [154]:
findall(' deane')

['SHAWNDA DEANE']

In [155]:
dones.extend(findall(' deane'))

In [156]:
findall(' hag')

['DEBORAH RUTH HAGAR',
 'HILLARY HAGERTY',
 'JANICE GWYN HAGERMAN',
 'JOHN HAGY',
 'LORA HAGGARD',
 'PAUL HAGY',
 'RICHARD HAGEN',
 'SARAH HAGER',
 'TIMOTHY HAGAN']

In [157]:
dones.extend(findall(' hag'))

In [158]:
findall(' anger')

['LINDSAY ANGERHOLZER']

In [159]:
dones.extend(findall(' anger'))

In [160]:
findall(' lowe')

['AARON LOWE',
 'GRETCHEN LOWE',
 'JENNIFER LOWE',
 'KEITH LOWEY',
 'LUCAS LOWELL EASLEY',
 'ROSS LOWE',
 'TRAVIS LOWE']

In [161]:
dones.extend(findall(' lowe'))

In [162]:
findall(' bau')

['BRIAN BAUER',
 'CARL BAUMAN',
 'DAVID BAUER',
 'DAWN BAUMAN',
 'GARY BAUER',
 'LINDA BAUER DARR',
 'NATALIE BAUR',
 'ORPHEAO BAUM',
 'PAULA BAUER',
 'TED BAUER']

In [163]:
findall(['ju', 'mart'])

[]

In [164]:
dones.extend(findall(' bau'))

In [165]:
findall(' evans')

['BRENT EVANS',
 'BRIAN EVANS',
 'DEBI EVANS',
 'DIANE EVANS',
 'DOUG EVANS',
 'GEORGE EVANS',
 'HEATHER EVANS',
 'JAMES EVANS',
 'JON EVANS',
 'LYLE EVANS',
 'MELISSA ANNE EVANS',
 'NICHOLAS EVANS',
 'WALTER EVANS']

In [166]:
findall(' doyle')

['JOHN DOYLE', 'MATTHEW DOYLE', 'PETER DOYLE', 'TIM DOYLE']

In [167]:
dones.extend(findall(' doyle'))

In [168]:
dones.extend(findall(' evans'))

In [169]:
findall(' mason')

['DAVE MASON',
 'DAVID MASON',
 'DIRK MASON CANTRELL',
 'DYANA MASON',
 'JACQUELINE MASON',
 'MARCUS MASON',
 'MICHAEL MASON',
 'STACY MASON']

In [170]:
cm[cm['tres_nm'] == 'DAVE MASON'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
3021,C00406850,BLUE CROSS BLUE SHIELD OF SOUTH CAROLINA FEDER...,DAVE MASON,INTERSTATE 20 AT ALPINE RD,,COLUMBIA,SC,29214,B,Q,UNK,Q,C,,,"MASON, DAVE MR.",INTERSTATE 20 AT ALPINE ROAD,"INTERSTATE 20 AT ALPINE RD, COLUMBIA, SC 29214",False


In [171]:
cm[(cm['tres_nm'] == 'DAVID MASON') & (cm['cmte_st'] == 'SC')].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [172]:
findall('cantrell')

['DIRK MASON CANTRELL']

In [173]:
cm[cm['tres_nm'] == 'MICHAEL THOMAS MASON'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [174]:
cm[(cm['tres_nm'] == 'MICHAEL MASON') & (cm['cmte_st'] == 'TX')].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
1574,C00268904,TRINITY INDUSTRIES EMPLOYEE POLITICAL ACTION C...,MICHAEL MASON,2525 N STEMMONS FREEWAY,,DALLAS,TX,75207,B,Q,UNK,M,C,TRINITY INDUSTRIES INC.,,"MASON, MICHAEL J.",2525 N. STEMMONS FREEWAY,"2525 N STEMMONS FREEWAY, DALLAS, TX 75207",False


In [175]:
new = 'MICHAEL MASON'
dones.append(new)

these = findall(['michael', 'mason'])

getall(these, new)

orig len: 1
new len: 1


In [176]:
findall(' hale')

['CHRISTINA HALEY',
 'DAN HALEY',
 'DONALD BRETT HALE',
 'JAMES HALE',
 'NANCY HALEY',
 'RANDY HALE',
 'TONY HALE',
 'YVONNE HALEJKO']

In [177]:
dones.extend(findall(' mason'))

In [178]:
findall(' lloyd')

['JOHN LLOYD', 'JUSTIN LLOYD MCNEEL', 'PHILIP LLOYD', 'WILLIS LLOYD']

In [179]:
findall(' mcneel')

['JUSTIN LLOYD MCNEEL']

In [180]:
dones.extend(findall(' lloyd'))

In [181]:
findall(' johnson')

['ANDREW JOHNSON',
 'AQUASIA JOHNSON',
 'ARLEY JOHNSON',
 'ARNOLD JOHNSON',
 'BETTY JOHNSON',
 'BRAD JOHNSON',
 'BRIAN JOHNSON',
 'BURT JOHNSON',
 'CATHERINE JOHNSON',
 'CHARLES JOHNSON',
 'CHELSEA JOHNSON',
 'CHRIS JOHNSON',
 'CLAY JOHNSON',
 'CLIFFORD JOHNSON',
 'CONSTANCE JOHNSON',
 'CORY JOHNSON',
 'DAVID JOHNSON',
 'DUANE JOSEPH JOHNSON',
 'EDDIE JOHNSON',
 'EMMA JOHNSON',
 'ERIC JOHNSON',
 'ERIK ARLEN JOHNSON',
 'GEORGE JOHNSON',
 'GREGORY JOHNSON',
 'JAMES JOHNSON',
 'JUDITH JOHNSON',
 'JULIE JOHNSON',
 'KENNETH JOHNSON',
 'LINELL JOHNSON',
 'LORRETTA JOHNSON',
 'LOUIS JOHNSON',
 'MARK JOHNSON',
 'MATTHEW JOHNSON',
 'MELODIE JOHNSON',
 'MICHAEL JOHNSON',
 'OTIS LEE JOHNSON',
 'PRINCESS JOHNSON',
 'ROBERT JOHNSON',
 'SARAH JOHNSON',
 'STANLEY JOHNSON',
 'STEPHEN JOHNSON',
 'SUSAN JOHNSON',
 'THOMAS JOHNSON',
 'THOMAS WAYNE JOHNSON',
 'VERDELL JOHNSON',
 'WILLIAM JOHNSON']

In [182]:
cm[cm['tres_nm'] == 'ARLEN JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [183]:
cm[cm['tres_nm'] == 'ERIK ARLEN JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
16221,C00686238,RESISTERY PAC,ERIK ARLEN JOHNSON,275 LAKE DR,,KENSINGTON,CA,94708,U,O,UNK,Q,,,,ERIK ARLEN JOHNSON,275 LAKE DRIVE,"275 LAKE DR, KENSINGTON, CA 94708",False


In [184]:
cm[cm['tres_nm'] == 'CHRIS JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
6169,C00528166,NATIONAL ASSOCIATION FOR FIXED ANNUITIES POLIT...,CHRIS JOHNSON,1155 F ST NW,SUITE 1050,WASHINGTON,DC,20004,U,Q,UNK,Q,T,,,"JOHNSON, CHRIS",1155 F ST NW,"1155 F ST NW, WASHINGTON, DC 20004",False


In [185]:
cm[cm['tres_nm'] == 'CHRISTOPHER LEE JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [186]:
cm[cm['tres_nm'] == 'JIM JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [187]:
cm[cm['tres_nm'] == 'JAMES JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
1378,C00236034,WERNER ENTERPRISES INC POLITICAL ACTION COMMITTEE,JAMES JOHNSON,14507 FRONTIER RD,,OMAHA,NE,68138,U,Q,UNK,Q,C,WERNER ENTERPRISES INC,,"JOHNSON, JAMES L.",14507 FRONTIER ROAD,"14507 FRONTIER RD, OMAHA, NE 68138",False
3162,C00412569,DENTON COUNTY REPUBLICAN VICTORY FUND,JAMES JOHNSON,2921 COUNTRY CLUB RD,,DENTON,TX,76202,U,X,REP,M,M,REPUBLICAN PARTY OF TEXAS,,"JOHNSON, JAMES S.",2921 COUNTRY CLUB RD #102,"2921 COUNTRY CLUB RD, DENTON, TX 76202",False


In [188]:
cm[cm['tres_nm'] == 'LORETTA JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [189]:
cm[cm['tres_nm'] == 'LORRETTA JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
216,C00028860,"AMERICAN FEDERATION OF TEACHERS, AFL-CIO COMMI...",LORRETTA JOHNSON,555 NEW JERSEY AVE NW,,WASHINGTON,DC,20001,B,Q,UNK,M,L,"AMERICAN FEDERATION OF TEACHERS, AFL-CIO",,"JOHNSON, LORRETTA","555 NEW JERSEY AVENUE, NW","555 NEW JERSEY AVE NW, WASHINGTON, DC 20001",False
17352,C70002472,"AMERICAN FEDERATION OF TEACHERS, AFL-CIO",LORRETTA JOHNSON,555 NEW JERSEY AVE NW,,WASHINGTON,DC,20001,U,C,UNK,Q,L,,,"JOHNSON, LORRETTA","555 NEW JERSEY AVENUE, N.W.","555 NEW JERSEY AVE NW, WASHINGTON, DC 20001",False


In [190]:
new = 'LORRETTA JOHNSON'
dones.append(new)

these = findall(['lor', 'etta', 'johnson'])

getall(these, new)

orig len: 2
new len: 2


In [191]:
findall(['mi', 'gibson'])

['MICHAEL GIBSON']

In [192]:
dones.extend(findall(' johnson'))

In [193]:
findall(' goldstein')

[]

In [194]:
dones.extend(findall(' goldstein'))

In [195]:
findall(' marks')

['BRUCE MARKS',
 'ELENA MARKS',
 'JEFFREY MARKS',
 'MICHELLE MARKS-OSBORNE',
 'NANCY MARKS',
 'STEPHEN MARKS']

In [196]:
dones.extend(findall(' marks'))

In [197]:
findall(' grav')

['RONALD GRAVINO', 'SCOTT GRAVES', 'SHARRON GRAVES']

In [198]:
dones.extend(findall(' grav'))

In [199]:
findall(' eich')

['ALYSSA EICHMAN', 'FRED EICHEL', 'RICHARD EICHMAN']

In [200]:
dones.extend(findall(' eich'))

In [201]:
findall(' fou')

['BRIAN FOUCART', 'KEITH FOURNIER', 'ROGER FOUNTAIN']

In [202]:
dones.extend(findall(' fou'))

In [203]:
findall(' martin')

['ANDY MARTIN',
 'ANTONIO MARTINEZ',
 'CARL MARTIN NELSON',
 'CASSANDRA MARTINEZ',
 'CHARLES MARTIN',
 'CHRIS MARTIN',
 'CLIFFORD MARTIN',
 'DAVID MARTIN',
 'ED MARTIN',
 'EDWARD MARTIN',
 'ERNESTO MARTINEZ',
 'FRANCES MARTIN',
 'GAYLE MARTIN',
 'GEMMA MARTIN',
 'GREGORY MARTIN WADE',
 'JANE MARTIN',
 'JANICE MARTIN',
 'JEAN MARTINEZ',
 'JENA TONICE MARTIN',
 'JENNIFER MARTIN',
 'JOHN MARTIN',
 'JONATHAN MARTIN',
 'JOSE DANIEL MARTINES',
 'JOSEPH MARTIN',
 'KEVIN MARTIN',
 'LARISSA MARTINEZ',
 'LILIANA MARTINEZ',
 'LISA MARTINEZ',
 'LOUIE CRUZ MARTINEZ',
 'MARIA MARTINEZ',
 'MARIO MARTINEZ',
 'NOVEL MARTIN',
 'PAUL MARTINO',
 'PAULETTE MARIE MARTIN',
 'ROBERT MARTINEZ',
 'ROQUE MARTINEZ',
 'SANDRA MARTINEZ',
 'SARAH MARTIN',
 'STEVEN MARTIN',
 'SUSAN MARTIN',
 'TERESA MARTINEZ',
 'XAVIER MARTINEZ']

In [204]:
findall(' salazar')

[]

In [205]:
findall('salzburg')

['MICHAEL JOSEPH SALZBURG-FELTS']

In [206]:
cm[cm['tres_nm'] == 'STEVE MARTIN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [207]:
cm[(cm['tres_nm'] == 'STEVEN MARTIN') & (cm['cmte_st'] == 'NC')]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [208]:
dones.extend(findall(' martin'))

In [209]:
findall(' davidson')

['CARY DAVIDSON', 'DWIGHT DAVIDSON', 'REBECCA DAVIDSON', 'ROBERT DAVIDSON']

In [210]:
findall(['paul', 'reynolds'])

['PAUL REYNOLDS']

In [211]:
cm[cm['tres_nm'] == 'PAUL REYNOLDS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
12660,C00649459,DIANE FOR COLORADO CD3,PAUL REYNOLDS,PO BOX 771606,,STEAMBOAT SPRINGS,CO,80477,P,H,DEM,Q,,DIANE MITSCH BUSH VICTORY FUND,H8CO03192,"REYNOLDS, PAUL D. MR.",PO BOX 771606,"PO BOX 771606, STEAMBOAT SPRINGS, CO 80477",True
16364,C00687715,DIANE MITSCH BUSH VICTORY FUND,PAUL REYNOLDS,PO BOX 771606,,STEAMBOAT SPRINGS,DC,80477,J,N,UNK,Q,,NONE,,"REYNOLDS, PAUL D. MR.",PO BOX 771606,"PO BOX 771606, STEAMBOAT SPRINGS, DC 80477",False


In [212]:
cm[cm['tres_nm'] == 'PAUL DAVIDSON REYNOLDS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [213]:
new = 'PAUL DAVIDSON REYNOLDS'
dones.append(new)

these = findall(['paul', 'reynolds'])

getall(these, new)

orig len: 0
new len: 2


In [214]:
dones.extend(findall(' davidson'))

In [215]:
findall(' bren')

['ANNE BRENSLEY',
 'CHRISTOPHER BRENNAN',
 'GREGORY BRENNICK',
 'LAUREL BRENNAN',
 'MEGAN BRENGARTH',
 'OZY ROSE BRENNAN',
 'TOM BRENEMAN',
 'TUCKER BRENNAN']

In [216]:
findall(' mantan')

[]

In [217]:
dones.extend(findall(' bren'))

In [218]:
findall(' mont')

['CHRISTOPHER MONTGOMERY WOODFIN',
 'DACEY MONTOYA',
 'DARRIN MONTEIRO',
 'DAVA MONTGOMERY',
 'DIANE MONTGOMERY',
 'JAY MONTGOMERY',
 'LUKE MONTGOMERY',
 'MEGAN MONTGOMERY',
 'MICHAEL MONT',
 'PATRICIA MONTAGUE',
 'RUSSELL MONTGOMERY',
 'THOMAS MONTGOMERY']

In [219]:
dones.extend(findall(' mont'))

In [220]:
findall(' hank')

['BRENDA HANKINS']

In [221]:
dones.extend(findall(' hank'))

In [222]:
findall(' lesh')

['GREGORY LESHOCK', 'MEREDITH LESHER', 'WARREN LESHNER']

In [223]:
dones.extend(findall(' lesh'))

In [224]:
findall(' bre')

['ANNE BRENSLEY',
 'BAILEY BRESSLER',
 'BEN BREWSTER',
 'BENJAMIN BREUER',
 'CATHERINE BRESLER',
 'CHARLES BREIT',
 'CHRISTOPHER BRENNAN',
 'CHRISTOPHER SCOTT BREWER',
 'DONALD BRETT HALE',
 'GREGORY BRENNICK',
 'HANNELORE BREITMEYER-JONES',
 'JAMES TROY BREWER',
 'JONATHAN BRETT RINGHAM',
 'LAUREL BRENNAN',
 'MARK BREBBERMAN',
 'MEGAN BRENGARTH',
 'MICHAEL BREATHES',
 'NICOLE BREWIN',
 'OZY ROSE BRENNAN',
 'PATRICIA BREED',
 'PAUL BREAZEALE',
 'TOM BRENEMAN',
 'TROY BREWER',
 'TUCKER BRENNAN',
 'VICTOR BREED',
 'WADE BREWER']

In [225]:
cm[cm['tres_nm'] == 'JAMES BREWER']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [226]:
cm[cm['tres_nm'] == 'JAMES TROY BREWER']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
11743,C00635219,DAVID PAC DELIVERING AMERICAN VALUES IN DC,JAMES TROY BREWER,95 WHITE BRIDGE RD,,NASHVILLE,TN,37205,D,N,UNK,Q,,DAVID KUSTOFF,,JAMES TROY BREWER,95 WHITE BRIDGE RD SUITE 207,"95 WHITE BRIDGE RD, NASHVILLE, TN 37205",False
15870,C00682633,TENNESSEANS FOR A STRONG AMERICA PAC,JAMES TROY BREWER,95 WHITE BRIDGE RD,SUITE 207,NASHVILLE,TN,37205,U,O,UNK,Q,,NONE,,"BREWER, JAMES TROY",95 WHITE BRIDGE RD,"95 WHITE BRIDGE RD, NASHVILLE, TN 37205",False


In [227]:
new = 'JAMES BREWER'
dones.append(new)

these = findall(['james', 'brewer'])

getall(these, new)

orig len: 0
new len: 2


In [228]:
findall(' ringham')

['JONATHAN BRETT RINGHAM']

In [229]:
dones.extend(findall(' bre'))

In [230]:
findall(' sel')

['CRYSTAL SELLERS',
 'GREGORY SELTZER',
 'JANET SELWAY',
 'JOHN SELPH',
 'JUDITH SELZER',
 'KATHY SELVAGGIO',
 'LAURA SELKEN',
 'LAURIE SELF',
 'LINDA SELL',
 'MONIKA SELMONT',
 'PAUL SELTMAN',
 'STEVE SELTZER']

In [231]:
findall(['ja', 'peters'])

[]

In [232]:
findall(['sar', 'scot'])

['SARAH HARPER SCOTT']

In [233]:
dones.extend(findall(' sel'))

In [234]:
findall(' owen')

['DAVID OWEN',
 'ETHAN OWENS',
 'GAIL DENISE OWENS',
 'JOHN OWENS',
 'LINDA OWENS',
 'OSCAR OWENS',
 'ROBERT OWENS',
 'RODNEY OWEN MOSIER',
 'SELENA OWENS',
 'STACY OWENS',
 'THADDEUS OWENS',
 'WENDI OWEN',
 'WENDY ANNOR OWENS']

In [235]:
findall(' mosier')

['HELEN MOSIER', 'JONATHAN MOSIER', 'RODNEY OWEN MOSIER']

In [236]:
dones.extend(findall(' owen'))

In [237]:
findall(' vo')

['CHRISTOPHER VOROS',
 'DATHAN VOELTER',
 'FRANCIS VOIGT',
 'HARRISON WAGNER VON DWINGELO',
 'JEN GILBERT VOSS',
 'MARK VOGEL',
 'MARTIN VOGT',
 'PATRICK VOSS',
 'PAUL VOGEL',
 'RICHARD VOLIVA',
 'ROBERT VOLTMANN',
 'ROGER VON TING',
 'TARYN VOGEL']

In [238]:
findall(' luza')

[]

In [239]:
dones.extend(findall(' vo'))

In [240]:
findall(' bol')

['APRIL BOLING',
 'CHRISTINE BOLDT',
 'DANIEL BOLLNER',
 'DIANE BOLAK',
 'DONALD PATRICK BOLENA',
 'ELDAR BOLSAKOV',
 'JEFFERY BOLTON',
 'JERRY BOLES',
 'JESSE BOLIN',
 'JOHN BOLING',
 'KIMBERLY BOLIN',
 'MICHAEL BOLAND',
 'MICHAEL BOLLENTIN',
 'MICHAEL BOLTON',
 'MIKE BOLAND',
 'NATHAN BOLT',
 'PRENTISS BOLIN',
 'REGINALD BOLDING',
 'TOM BOLOVINOS',
 'VINCENT BOLLON']

In [241]:
dones.extend(findall(' bol'))

In [242]:
findall(' hub')

['AARON HUBBARD',
 'BEN HUBBY',
 'CHARLES HUBERT HAUSER',
 'EDWARD HUBBARD',
 'FREDDIE HUBBARD',
 'HARRY HUBBARD',
 'JACK HUBBARD',
 'JIM HUBBARD',
 'SCOTT HUBAY',
 'TSHOMBE HUBBARD']

In [243]:
findall(' hauser')

['CHARLES HUBERT HAUSER']

In [244]:
dones.extend(findall(' hub'))

In [245]:
findall(' oz')

['NINA OZLU TUNCELI', 'WILLIAM OZANUS']

In [246]:
findall(' tunc')

['NINA OZLU TUNCELI']

In [247]:
dones.extend(findall(' oz'))

In [248]:
findall(' groe')

['ERIC GROEN', 'REBECCA GROEN']

In [249]:
dones.extend(findall(' groe'))

In [250]:
findall(' sola')

['DEREK SOLAR', 'KRISTIN SOLANDER']

In [251]:
findall(['ed', 'patterson'])

[]

In [252]:
dones.extend(findall(' sola'))

In [253]:
findall(' rupp')

['BEN RUPP', 'DAVID RUPPENICKER', 'JEFFREY RUPPERT', 'LORI RUPPEN']

In [254]:
dones.extend(findall(' rupp'))

In [255]:
findall(' schi')

['ADAM VICTOR SCHILLER',
 'AMY SCHILLING',
 'ANTHONY SCHIWEIER',
 'ERIC SCHIPPERS',
 'JAMES SCHISSER',
 'JOHN SCHILLING',
 'JOSEPH SCHINTZ',
 'KEVIN SCHIEFFER',
 'LOUIS SCHIAZZA',
 'MARY SCHILLING',
 'SARAH SCHIMDT']

In [256]:
dones.extend(findall(' schi'))

In [257]:
findall(' guin')

[]

In [258]:
dones.extend(findall(' guin'))

In [259]:
findall(' stone')

['ANN STONE',
 'ERIC STONEHAM',
 'LISA STONE',
 "O'LENE STONE",
 'ROBIN STONE',
 'RUTH ROCHELLE STONER',
 'SAMUEL STONE',
 'SHARON STONES',
 'SHERRI STONE']

In [260]:
dones.extend(findall(' stone'))

In [261]:
findall(' cros')

['CALEB CROSBY',
 'COLLEEN CROSSEY',
 'GRADY CROSBY',
 'KAREN CROSS',
 'MARK CROSS']

In [262]:
dones.extend(findall(' cros'))

In [263]:
findall(' gla')

['ALAN GLAZIER',
 'EUSTACE GLASGOW',
 'EZRA GLASER',
 'JOHN GLANCEY',
 'MITCH GLAZIER',
 'WILLIAM GLASS']

In [264]:
findall('hemmig')

[]

In [265]:
dones.extend(findall(' gla'))

In [266]:
findall(' ragan')

['ASHLEY RAGAN', 'JACKI RAGAN', 'JENNIFER RAGAN', 'VIRGINIA RAGAN']

In [267]:
dones.extend(findall(' ragan'))

In [268]:
findall(' mau')

['DAVID MAURO',
 'GEORGE MAURER',
 'STACEY MAUD',
 'TERRY MAUPIN',
 'WENDY MAUSOLF']

In [269]:
findall('littlejohn')

[]

In [270]:
findall('buchser')

[]

In [271]:
dones.extend(findall(' mau'))

In [272]:
findall(' snyd')

['CRAIG SNYDER', 'FRANK SNYDER', 'JASON SNYDER', 'RICHARD SNYDER']

In [273]:
dones.extend(findall(' snyd'))

In [274]:
findall(' pur')

['BYRON PURCELL',
 'CRAIG PURSER',
 'KENNETH PURNELL YANCY',
 'MICHAEL PURZYCKI',
 'PAULA PURDY',
 'SALVATORE PURPURA',
 'SANJAY PURI',
 'STARR PURDUE']

In [275]:
findall(' balc')

[]

In [276]:
dones.extend(findall(' pur'))

In [277]:
findall(' lei')

['BRYAN LEIB',
 'CODY PAUL LEISTIKOW',
 'CYNTHIA LEIGH APPLEBAUM',
 'ERIC LEIGH KELLER',
 'GARY LEIGH',
 'GLEN LEIBOWITZ',
 'JANE LEIDERMAN',
 'JUSTIN LEIGH FARBER',
 'MIGUEL LEIJA',
 'SUSAN LEIVAS-STURNER']

In [278]:
findall('bronson')

[]

In [279]:
findall('kasz')

[]

In [280]:
findall(['cynth', 'apple'])

['CYNTHIA LEIGH APPLEBAUM']

In [281]:
findall('fowler')

['PAUL FOWLER', 'RICHARD ANTHONY FOWLER', 'SCOTT FOWLER', 'SEAN FOWLER']

In [282]:
findall('keller')

['AARON KELLER',
 'CRAIG KELLER',
 'ERIC KELLER',
 'ERIC LEIGH KELLER',
 'JAMES KELLER',
 'LYNNE KELLER']

In [283]:
cm[cm['tres_nm'] == 'ERIC KELLER']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
16029,C00684282,EDEMOCRATS PAC LLC,ERIC KELLER,70 LINDEN AVE,P O BOX 2134,HADDONFIELD,NJ,8033,U,N,UNK,Q,,NONE,,"KELLER, ERIC MR",70 LINDEN AVENUE,"70 LINDEN AVE, HADDONFIELD, NJ 08033",False


In [284]:
cm[cm['tres_nm'] == 'ERIC LEIGH KELLER']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
14311,C00666537,PRESS FREEDOM POLITICAL ACTION COMMITTEE,ERIC LEIGH KELLER,902 TURKEY RUN RD,,MCLEAN,VA,22101,U,N,UNK,Q,,NONE,,"KELLER, ERIC LEIGH MR.",902 TURKEY RUN ROAD,"902 TURKEY RUN RD, MCLEAN, VA 22101",False


In [285]:
findall(' hubbard')

['AARON HUBBARD',
 'EDWARD HUBBARD',
 'FREDDIE HUBBARD',
 'HARRY HUBBARD',
 'JACK HUBBARD',
 'JIM HUBBARD',
 'TSHOMBE HUBBARD']

In [286]:
dones.extend(findall(' lei'))

In [287]:
findall(' tea')

['DONALD TEAL', 'GARY TEAL', 'MATT TEAGARDEN', 'RICHARD TEAMAN']

In [288]:
dones.extend(findall(' tea'))

In [289]:
findall(' rut')

['DEBORAH RUTH HAGAR',
 'JANNA RUTLAND',
 'JEFFREY RUTAN',
 'JOHN RUTKAUSKAS',
 'LUCY RUTISHAUSER',
 'REID RUTHERFORD',
 'SUSAN RUTT']

In [290]:
findall('bellar')

[]

In [291]:
findall(' mayo')

['HEATH MAYO']

In [292]:
dones.extend(findall(' mayo'))

In [293]:
dones.extend(findall(' rut'))

In [294]:
findall(' elli')

['ALLEN ELLISON',
 'AMY ELLIS',
 'GREG ELLIOTT',
 'HEIDI ELLIS ROBEY',
 'JEROME ELLIOTT TRES TRUSTEE FIELDS',
 'JOE ELLISON',
 'JOHN CLELLAND ELLIS',
 'LISA ELLIS',
 'MICHAEL ELLIOTT',
 'SCOTT ELLINGTON',
 'THERESA ELLINGTON',
 'TWANA ELLIOTT']

In [295]:
findall(' robey')

['HEIDI ELLIS ROBEY']

In [296]:
findall(' dahl')

['LISA DAHLING', 'THOMAS DAHLEN']

In [297]:
dones.extend(findall(' elli'))

In [298]:
findall(' kee')

['BRONWYN KEENAN',
 'DARRYL KEENAN SEGARS',
 'DAVID KEEN',
 'HELENE KEELEY',
 'JOHN KEELING',
 'LYNETTE KEETON',
 'MICHAEL KEELING',
 'THORNTON KEEL']

In [299]:
findall(' segars')

['DARRYL KEENAN SEGARS']

In [300]:
dones.extend(findall(' kee'))

In [301]:
findall(' jan')

['AUSTIN BENNET TR JANSSEN',
 'BEVE JANE THAKHAMHOR',
 'DEBORAH JANSEN',
 'DNP APRN JANICE JONES',
 'ELAINE JANUS',
 'GORAN JANKOVIC',
 'JOHN JANKOWSKI',
 'LATREASHA JANET GIVENS',
 'MICHELLE JANEEN WHITE',
 'TIMOTHY JANISSE']

In [302]:
findall(' janssen')

['AUSTIN BENNET TR JANSSEN']

In [303]:
findall(' thakh')

['BEVE JANE THAKHAMHOR']

In [304]:
findall('osgerby')

[]

In [305]:
findall('wharton')

['CYNTHIA WHARTON', 'KOTA WHARTON']

In [306]:
findall(' tanis')

[]

In [307]:
dones.extend(findall(' jan'))

In [308]:
findall(' dupr')

['ABBY DUPREE']

In [309]:
dones.extend(findall(' dupr'))

In [310]:
findall(' schwartz')

['ANDREW SCHWARTZ',
 'ARTHUR SCHWARTZ',
 'JAMES SCHWARTZ',
 'JENNIFER SCHWARTZ',
 'KARL SCHWARTZ',
 'LAURA SCHWARTZ',
 'MELODIE SCHWARTZ',
 'STEVEN SCHWARTZ']

In [311]:
cm[cm['tres_nm'] == 'LAURA SCHWARTZ']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
8348,C00580415,FASO FOR CONGRESS,LAURA SCHWARTZ,PO BOX 448,,KINDERHOOK,NY,12106,P,H,REP,Q,,FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...,H6NY19169,"SCHWARTZ, LAURA A. MS.",PO BOX 448,"PO BOX 448, KINDERHOOK, NY 12106",True
9348,C00602755,LANGE FOR CONGRESS,LAURA SCHWARTZ,PO BOX 481,,SOUTH SALEM,NY,10590,P,H,REP,Q,,NONE,,"SCHWARTZ, LAURA A.",PO BOX 481,"PO BOX 481, SOUTH SALEM, NY 10590",False
10580,C00622100,FASO VICTORY COMMITTEE,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,J,N,UNK,T,,,,"SCHWARTZ, LAURA",PO BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590",False
11309,C00630681,EMPIRE STATE PAC,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,D,N,UNK,Q,,,,"SCHWARTZ, LAURA A.",PO BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590",False
11556,C00633263,JOHN FASO VICTORY COMMITTEE,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,J,N,UNK,Q,,NONE,,"SCHWARTZ, LAURA",PO BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590",False
14735,C00670885,NEW YORK MAJORITY VICTORY,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,J,H,UNK,T,,,,"SCHWARTZ, LAURA",PO BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590",False
15542,C00679225,KEEP THE HOUSE,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,J,N,UNK,T,,,,LAURA SCHWARTZ,P.O. BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590",False


In [312]:
cm[cm['tres_nm'] == 'LAURA ANN SCHWARTZ']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [313]:
dones.extend(findall(' schwartz'))

In [314]:
findall(' kau')

['AMIT KAUL',
 'EMILY KAUFMAN',
 'JASON KAUNE',
 'KARL KAUFMANN',
 'PHIL KAUFMAN',
 'ROBERT KAUFMANN',
 'SCOTT KAUFMAN',
 'STEPHEN KAUFMAN']

In [315]:
cm[cm['tres_nm'] == 'RON KAUFMAN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [316]:
cm[cm['tres_nm'] == 'RONALD KAUFMAN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [317]:
new = 'RONALD KAUFMAN'
dones.append(new)

these = findall(['ron', ' kaufman'])

getall(these, new)

orig len: 0
new len: 0


In [318]:
dones.extend(findall(' kau'))

In [319]:
findall(' riesc')

['JOSE RIESCO']

In [320]:
dones.extend(findall(' riesc'))

In [321]:
findall(' cop')

['ARTHUR COPLESTON',
 'BILLY COPELAND',
 'ELIZABETH COPPINGER',
 'MICHAEL COPELEY',
 'RITA COPELAND',
 'VONA COPP']

In [322]:
dones.extend(findall(' cop'))

In [323]:
findall(' bal')

['AJ BALUKOFF',
 'BRIANA BALESKIE',
 'JAMES BALL',
 'JOHN BALLARD',
 'JONATHAN BALDWIN',
 'JONATHAN BALL',
 'JOSHUA BALLARD',
 'KAREN BALLARD',
 'KEITH BALKCOM',
 'KRYSTAL BALL',
 'LAURA BALDI',
 'MATANGI BALA',
 'MATTHEW BALAZIK',
 'MICHAEL BALL',
 'PAUL BALASSA',
 'ROBERT BALGENORTH',
 'SUSAN BALKENBUSH',
 'TRACEY BALL']

In [324]:
dones.extend(findall(' bal'))

In [325]:
findall(' tall')

['VINCENT TALLMAN']

In [326]:
dones.extend(findall(' tall'))

In [327]:
findall(' juk')

['JOEL JUKUS']

In [328]:
dones.extend(findall(' juk'))

In [329]:
findall(' milln')

[]

In [330]:
dones.extend(findall(' milln'))

In [331]:
findall(' goode')

['KIMBERLY GOODEN', 'MICHAEL GOODE', 'WARREN GOODE']

In [332]:
dones.extend(findall(' goode'))

In [333]:
findall(' kun')

['ADEN KUN',
 'CRAIG KUNKLE',
 'DAVE KUNES',
 'HOORIA KUNDI',
 'JOHN KUNITZ',
 'JOHN WILLIAM KUNITZ',
 'LORNA KUNEY',
 'TEKIN KUNT']

In [334]:
cm[cm['tres_nm'].apply(lambda x: ('KUNITZ' in x) & ('JOHN' in x))]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
10896,C00626093,THIRD CONGRESSIONAL DISTRICT REPUBLICAN PARTY ...,JOHN KUNITZ,PO 390722,,EDINA,MN,55439,U,X,REP,Q,,NONE,,"KUNITZ, JOHN W MR.",PO 390722,"PO 390722, EDINA, MN 55439",False
15499,C00678789,CONSERVATIVES FOR AMERICA,JOHN WILLIAM KUNITZ,6441 BRETTON WAY,,CHANHASSEN,MN,55317,U,X,REP,Q,,NONE,,"KUNITZ, JOHN WILLIAM MR",6441 BRETTON WAY,"6441 BRETTON WAY, CHANHASSEN, MN 55317",False


In [335]:
new = 'JOHN WILLIAM KUNITZ'
dones.append(new)

these = findall(['kunitz', 'john'])

getall(these, new)

orig len: 1
new len: 2


In [336]:
dones.extend(findall(' kun'))

In [337]:
findall('lefko')

[]

In [338]:
cm[cm['tres_nm'].apply(lambda x: 'LEFKOWITZ' in x)]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [339]:
new = 'JON LEFKOWITZ'
dones.append(new)

these = findall('lefkowitz')

getall(these, new)

orig len: 0
new len: 0


In [340]:
findall(' sif')

['TIM SIFERT', 'TIMOTHY SIFERT']

In [341]:
dones.extend(findall(' sif'))

In [342]:
findall(' vand')

['CRAIG VANDERVEER',
 'DAWNE VANDIVER',
 'DICK VANDER WOUDE',
 'DIRK VANDONGEN',
 'ELIZABETH ANN VANDERWAY',
 'MIKE VANDERWEY',
 'NIGEL VANDERFORD',
 'RAY VANDRIESSCHE',
 'SHAWN VANDIVER',
 'WILLIAM VANDERBROOK']

In [343]:
dones.extend(findall(' vand'))

In [344]:
findall(' changkyu')

[]

In [345]:
new = 'JASON CHANGKYU KIM'
dones.append(new)

these = findall(' changkyu')

getall(these, new)

orig len: 0
new len: 0


In [346]:
findall(['ash', 'newman'])

['ASHLEY NEWMAN']

In [347]:
dones.extend(findall(['ash', 'newman']))

In [348]:
findall(' rogers')

['BRETT ROGERS',
 'CHERI ROGERS',
 'CINNAMON ROGERS',
 'DENNIS ROGERS',
 'EMORY ROGERS',
 'GEORGE ROGERS',
 'GLEN ROGERS',
 'GRACE ROGERS',
 'KEVIN ROGERS',
 'MARCIA MATHISON ROGERS',
 'MICHAEL ROGERS',
 'STEVEN ROGERS']

In [349]:
dones.extend(findall(' rogers'))

In [350]:
findall(' kennedy')

['CASTLEN KENNEDY',
 'GORDON KENNEDY',
 'JEANNE KENNEDY',
 'JOHN FITZGERALD KENNEDY LANKSTER',
 'KATHERINE KENNEDY',
 'KATIE KENNEDY',
 'KEVIN KENNEDY',
 'MAURA KENNEDY',
 'MICHAEL KENNEDY',
 'RICHARD KENNEDY',
 'ROBERT KENNEDY',
 'SEAN KENNEDY',
 'WARD KENNEDY']

In [351]:
cm[cm['tres_nm'].apply(lambda x: x in ['CHRIS KENNEDY', 'CHRISTOPHER KENNEDY'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc


In [352]:
cm[cm['tres_nm'].apply(lambda x: x in ['KATIE KENNEDY', 'KATHERINE KENNEDY'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
6627,C00546325,TAKING AN INDEPENDENT PERSPECTIVE TOGETHER FOR...,KATHERINE KENNEDY,2318 CURTIS ST,,DENVER,CO,80205,D,N,UNK,Q,,,,KATHERINE KENNEDY,2318 CURTIS STREET,"2318 CURTIS ST, DENVER, CO 80205",False
7169,C00558759,PDC ENERGY INC PAC,KATHERINE KENNEDY,1775 SHERMAN ST,,DENVER,CO,80203,U,Q,UNK,Q,C,,,"KENNEDY, KATHERINE",1775 SHERMAN STREET SUITE 3000,"1775 SHERMAN ST, DENVER, CO 80203",False
7399,C00564435,PRINCIPLED CONSERVATIVES OF COLORADO,KATIE KENNEDY,600 17TH ST,,DENVER,CO,80202,U,O,UNK,Q,,,,KATIE KENNEDY,600 17TH STREET SUITE 2800 SOUTH,"600 17TH ST, DENVER, CO 80202",False
8382,C00580894,VAIL RESORTS EMPLOYEE POLITICAL ACTION COMMITTEE,KATIE KENNEDY,390 INTERLOCKEN CRESCENT,,BROOMFIELD,CO,80021,U,Q,UNK,Q,C,,,"KENNEDY, KATIE",390 INTERLOCKEN CRESCENT,"390 INTERLOCKEN CRESCENT, BROOMFIELD, CO 80021",False
14698,C00670505,COMMITTEE FOR BARRINGTON,KATIE KENNEDY,745 S MILLER ST,,LAKEWOOD,CO,80226,P,H,REP,Q,,NONE,H8CO07094,"KENNEDY, KATIE",745 S. MILLER STREET,"745 S MILLER ST, LAKEWOOD, CO 80226",True
15648,C00680306,BARRIS FOR CONGRESS,KATIE KENNEDY,24853 CHRIS DR,,EVERGREEN,CO,80439,P,H,LIB,Q,,NONE,H8CO02269,"KENNEDY, KATIE",24853 CHRIS DRIVE,"24853 CHRIS DR, EVERGREEN, CO 80439",True
15864,C00682559,PROTECTING OUR CONSTITUTION,KATIE KENNEDY,5910 S UNIVERSITY BLVD C18 NO 254,,GREENWOOD VILLAGE,CO,80121,U,O,UNK,Q,,NONE,,"KENNEDY, KATIE",5910 S. UNIVERSITY BLVD C18 NO 254,"5910 S UNIVERSITY BLVD C18 NO 254, GREENWOOD V...",False


In [353]:
new = 'KATHERINE KENNEDY'
dones.append(new)

these = findall(['kat', 'kennedy'])

getall(these, new)

orig len: 2
new len: 7


In [354]:
cm[cm['tres_nm'].apply(lambda x: x in [
    'MICHAEL KENNEDY', 
    'MICHAEL WARREN KENNEDY', 
])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address,pcc
4648,C00477299,"VMWARE, INC. POLITICAL ACTION COMMITTEE (VMWAR...",MICHAEL KENNEDY,3401 HILLVIEW AVE,,PALO ALTO,CA,94304,U,Q,UNK,M,C,,,"KENNEDY, MICHAEL",3401 HILLVIEW AVE,"3401 HILLVIEW AVE, PALO ALTO, CA 94304",False


In [355]:
dones.extend(findall(' kennedy'))

In [356]:
findall(['mel', 'allen'])

['MELISSA ALLEN']

In [357]:
findall(' himmel')

['MARC HIMMELSTEIN']

In [358]:
dones.extend(findall(' himmel'))
dones.extend(findall(['mel', 'allen']))

In [359]:
findall(' reis')

['ALAN REISCHE', 'ALEXANDER REISH', 'DANIEL REISTETER', 'MICHELE REISNER']

In [360]:
dones.extend(findall(' reis'))

In [361]:
findall(' doz')

['JAMES DOZIER', 'JOSEPH DOZIER', 'JULIE DOZIER']

In [362]:
dones.extend(findall(' doz'))

In [363]:
findall(' roz')

['ADAM ROZANSKY', 'COSTANTINO ROZZO', 'YURY ROZEL']

In [364]:
dones.extend(findall(' roz'))

In [365]:
findall(' patt')

['ALYX PATTISON',
 'ANDREW PATTERSON',
 'CHRIS PATTON',
 'JAMES PATTON',
 'MELVIN PATTERSON',
 'ROBERT PATTISON',
 'STEPHEN PATTERSON',
 'THERESA PATTARA',
 'TOM PATTERSON']

In [366]:
dones.extend(findall(' patt'))

The work is never done...

In [367]:
# checks which have already been deduped and does not display those
mask = [False if cm.loc[i, 'tres_nm'] in dones else True for i in list(cm.index)]
cm.loc[mask, 'tres_nm'].value_counts()[cm.loc[mask, 'tres_nm'].value_counts() > 1][:20]

SCOTT MACKENZIE        13
DIANA SAQUELLA         12
BAILEY MORGAN          12
SLOANE SKINNER         10
KATE LIND              10
ALEXANDER HORNADAY     10
EZEKIEL FREDERICK       9
CHRISTINA SIROIS        9
RUSSELL MILLER          8
TRAVIS KABRICK          8
THOMAS HILTACHK         8
THOMAS MAXWELL          8
CARY PETERSON           7
GIANNI DONATES          7
DANIEL SENA             7
PAUL TARNAWSKI          7
MEGAN MIELNIK           6
EUGENE SLOAN CRAIG      6
MARIA WOJCIECHOWSKI     6
RYAN PHILLIPS           6
Name: tres_nm, dtype: int64

In [368]:
dupename = list(cm['cmte_nm'].value_counts()[cm['cmte_nm'].value_counts() > 2].index)

for dupe in dupename:
    lil_cm = cm.loc[cm['cmte_nm'] == dupe, [
#         'cmte_id',
        'tres_nm',
        'address',
    ]].sort_values(['address'])
    print(dupe)
    print(lil_cm)
    print('\n')

(cmte_nm missing)
                     tres_nm                             address
17010  (no treasurer listed)  PO BOX 33524, WASHINGTON, DC 20033
8006   (no treasurer listed)                   nan, nan, nan nan
10164  (no treasurer listed)                   nan, nan, nan nan
10419  (no treasurer listed)                   nan, nan, nan nan
10519  (no treasurer listed)                   nan, nan, nan nan
12322  (no treasurer listed)                   nan, nan, nan nan
13088  (no treasurer listed)                   nan, nan, nan nan
13089  (no treasurer listed)                   nan, nan, nan nan
13396  (no treasurer listed)                   nan, nan, nan nan


CLEAN WATER ACTION PROJECT
                tres_nm                                address
17297    SOPHIE AN AOKI       11 S 12TH ST, RICHMOND, VA 23219
17337  SANDRA LEDBETTER  1320 18TH ST NW, WASHINGTON, DC 20003
17296  SANDRA LEDBETTER        6 SLOANE ST, S ORANGE, NJ 07079
17294   SOPHIE ANN AOKI      P O BOX 3361, IOWA CIT

In [369]:
# show all cmte_nm & tres_nm for committees sharing a street address
multiadd = list(cm['address'].value_counts()[cm['address'].value_counts() > 10].index)

for add in multiadd:
    lil_cm = cm.loc[cm['address'] == add, ['tres_nm', 'cmte_nm']]\
    .sort_values(['tres_nm', 'cmte_nm'], ascending = [True, True]).reset_index(drop = True)
    print('-----------------------------------------------------------')
    print(add)
    print('\n')
    treses = list(lil_cm['tres_nm'].value_counts().index)
    for tres in treses:
        lil_tres = lil_cm[lil_cm['tres_nm'] == tres]
        if len(lil_tres) > 1:
            committees = sorted(list(set(lil_tres['cmte_nm'])))
            print(tres)
            print('\n')
            for committee in committees:
                print('    ' + committee)
            print('\n')
    print('\n')
    
# "look here"

-----------------------------------------------------------
228 S WASHINGTON ST, ALEXANDRIA, VA 22314


LISA LISKER


    AMERICANS UNITED FOR FREEDOM
    ARIZONA GRASSROOTS ACTION PAC
    BACON VICTORY FUND
    BOLD ACTIVE CONSERVATIVES OF NEBRASKA PAC-BACON PAC
    BRIDGING THE GAP
    BUCKEYE JOINT FUNDRAISING COMMITTEE
    BUILDING OUR BASE - BOB PAC
    BUILDING RENEWAL IN AMERICA NOW PAC
    CITIZENS FOR PROSPERITY IN AMERICA TODAY PAC
    CONCERNED AMERICANS FOR FREEDOM & OPPORTUNITY PAC (CAFO PAC)
    CRAPO VICTORY COMMITTEE
    CREATING OPPORTUNITIES, MARKETS AND ENTHUSIASM IN RURAL KENTUCKY PAC
    FREEDOM AND SECURITY PAC
    FREEDOM PROJECT; THE
    FRIENDS OF WINNING WOMEN 2016
    FRIENDS OF WINNING WOMEN 2018
    GARDNER VICTORY COMMITTEE
    GOP MAJORITY VICTORY FUND
    GOP WINNING WOMEN
    GRAHAM MAJORITY FUND
    GROWING REPUBLICAN ACHIEVEMENTS AND PROMOTING EXCELLENCE-GRAPE JFC
    HONOR AND PRINCIPLES PAC
    HOUSE CONSERVATIVES FUND
    IOWA CONGRESSIONAL MAJORIT


-----------------------------------------------------------
PO BOX 9891, ARLINGTON, VA 22219


BENJAMIN OTTENHOFF


    ALABAMA 2017 SENATE VICTORY COMMITTEE, A JOINT FUNDRAISING COMMITTEE COMPRISED OF JUDGE ROY MOORE FOR U.S. SENATE AND THE ALABAMA REPUBLICAN PARTY
    BERGMAN VICTORY COMMITTEE
    BLUE COLLAR VICTORY FUND
    BOST VICTORY FUND
    BRAUN VICTORY COMMITTEE
    COMMON SENSE COMMON SOLUTIONS POLITICAL ACTION COMMITTEE
    CONSERVATIVE ACTION PAC
    FLORIDA WORKS
    FREEDOM AND FAIRNESS VIRGINIA
    HANDEL VICTORY FUND
    HAWLEY WIN FUND
    HELPING AMERICA'S NEXT DEDICATED ELECTED LEADERS PAC
    HK VICTORY 2018
    ILLINOIS MAJORITY FUND 2016
    INDIANA SENATE NOMINEE FUND
    INDIANA/MISSOURI VICTORY COMMITTEE
    KATKO VICTORY FUND
    LET AMERICA WORK
    MACARTHUR VICTORY
    MAJORITY IN ACTION
    MAJORITY VICTORY PROGRAM
    MARK WALKER VICTORY COMMITTEE
    MOUNTAIN FAMILIES PAC
    NRSC TARGETED STATE VICTORY FUND
    PATRIOT DAY I 2017
    PATRIOT DAY II 2

(no treasurer listed)


    ALAMEDANS UNITED SUPPORTING VELLA AND ASHCRAFT FOR CITY COUNCIL, ET AL
    GOLDEN CALIFORNIA COMMITTEE SPONSORED BY THE SEIU CALIFORNIA STATE COUNCIL
    OPPORTUNITY PAC - A COALITION OF TEACHERS HEALTH CARE GIVERS FACULTY MEMBERS SCHOOL EMPLOYEES AND PUBLIC AND PR
    PLANNED PARENTHOOD ADVOCACY PROJECT LOS ANGELES COUNTY
    PLANNED PARENTHOOD AFFILIATES OF CALIF


ART PULASKI


    COMMITTEE FOR WORKING FAMILIES, SPONSORED BY THE CALIFORNIA LABOR FEDERATION, AFL-CIO
    MILLION MORE VOTERS, SPONSORED BY THE CALIFORNIA LABOR FEDERATION, AFL-CIO




-----------------------------------------------------------
120 MARYLAND AVE NE, WASHINGTON, DC 20002


LAURA MATTHEWS


    CALIFORNIA SENATE VICTORY 2018
    FLORIDA SENATE 2016
    FLORIDA SENATE RECOUNT 2018
    HOUSE SENATE VICTORY FUND
    MASSACHUSETTS SENATE VICTORY 2018
    MCCASKILL 2018 VICTORY
    MCCASKILL SENATE VICTORY 2018
    MICHIGAN SENATE VICTORY 2018
    MINNESOTA SENATE VICTORY 2018
    NEV

CABELL HOBBS


    DALLAS ENTREPRENEURS - SESSIONS 2018 FUND
    DEFENDING AMERICAN VALUES EVERYWHERE PAC (DAVE PAC)
    GIANFORTE-ROSENDALE FUND
    NEW OC FUTURE PAC
    NOEM-RHODEN VICTORY COMMITTEE
    RED SENTINEL
    RED SENTINEL PAC
    TRANSPARENCY IN GOVERNMENT PAC


MELODIE JOHNSON


    AMERICAN UPRISING 2020
    EXCELSIORNY PAC
    FREEDOMWORKS VICTORY COMMITTEE
    IRON DOME ALLIANCE


WILLIAM OZANUS


    C3 PAC
    CIVIC
    CONSERVATIVE VOICES PAC




-----------------------------------------------------------
PO BOX 751271, LAS VEGAS, NV 89136


CHRISSIE HASTIE


    FULL HOUSE PAC
    HARDY NEVADA VICTORY FUND
    HARDY STEWART VICTORY COMMITTEE
    HARDY TARKANIAN VICTORY COMMITTEE
    HECK YEAH!
    HELLER SENATE VICTORY COMMITTEE
    MORNING IN AMERICA PAC
    TEAM HARDY NEVADA TOUGH VICTORY FUND
    THE PORTER GROUP PAC
    WIN NEVADA


RYAN PHILLIPS


    2016 SENATE MAJORITY COMMITTEE
    HECK HARDY NEVADA VICTORY COMMITTEE
    HELLER FLAKE VICTORY COMMITTEE
   

### Other columns

In [370]:
cmtetp_dict = {
    'C' : 'cmte_CommunicationCost',
    'D' : 'cmte_DelegateCommittee',
    'E' : 'cmte_ElectioneeringComms',
    'H' : 'cmte_House',
    'I' : 'cmte_IndependentExpenditor',
    'S' : 'cmte_Senate',
    'N' : 'cmte_PACnonqual',
    'Q' : 'cmte_PACqual',
    'Y' : 'cmte_Partyqual',
    'O' : 'cmte_SuperPAC',
    'P' : 'cmte_Presidential',
    'U' : 'cmte_Singlecand',
    'V' : 'cmte_PACnoncontrib_nonqual',
    'W' : 'cmte_PACnoncontrib_qual',
    'X' : 'cmte_Partynonqual',
    np.nan : np.nan,
}

cm['cmte_tp'] = [cmtetp_dict[x] for x in cm['cmte_tp']]
cm['cmte_tp'].value_counts()

cmte_PACnonqual               3268
cmte_PACqual                  3220
cmte_SuperPAC                 2210
cmte_IndependentExpenditor    1323
cmte_House                    1274
cmte_CommunicationCost         526
cmte_Partynonqual              299
cmte_PACnoncontrib_nonqual     286
cmte_ElectioneeringComms       285
cmte_Partyqual                 267
cmte_Senate                    197
cmte_Singlecand                136
cmte_Presidential              129
cmte_PACnoncontrib_qual         55
cmte_DelegateCommittee           4
Name: cmte_tp, dtype: int64

In [371]:
cmtedsgn_dict = {
    'A' : 'Authorized by a candidate',
    'B' : 'Lobbyist/Registrant PAC',
    'D' : 'Leadership PAC',
    'J' : 'Joint fundraiser',
    'P' : 'Principal campaign committee',
    'U' : 'Unauthorized',
    np.nan : np.nan,
}

cm['cmte_dsgn'] = [cmtedsgn_dict[x] for x in cm['cmte_dsgn']]
cm['cmte_dsgn'].value_counts()

Unauthorized                    9193
Lobbyist/Registrant PAC         1378
Principal campaign committee    1338
Joint fundraiser                 873
Leadership PAC                   658
Authorized by a candidate         38
Name: cmte_dsgn, dtype: int64

In [372]:
pty_affil = [
    np.nan,
    '.',
    'UKN',
    '  0',
    'UN',
]

cm['cmte_pty_affiliation'] = ['UNK' if (x in pty_affil) else x for x in cm['cmte_pty_affiliation']]
cm['cmte_pty_affiliation'].value_counts()

UNK    11461
DEM      914
REP      722
LIB       76
IND       69
NNE       50
GRE       34
OTH       27
NAT       27
W         14
NPA       12
DFL       10
CON        7
AIP        5
NON        5
REF        5
IDP        5
SEP        3
SWP        3
CRV        2
CST        2
COM        2
GRN        2
PFP        2
PRO        2
NAP        2
FED        2
AMP        2
REC        1
NLP        1
WAD        1
PAC        1
CIT        1
WF         1
WOR        1
PPO        1
FUX        1
ICD        1
ACE        1
IAP        1
TUP        1
WHG        1
TX         1
TN         1
UUP        1
WRI        1
GWP        1
TEX        1
WFP        1
ALP        1
Name: cmte_pty_affiliation, dtype: int64

In [373]:
cm['cmte_filing_freq'].value_counts()

Q    6737
A    3245
M    1932
T    1575
Name: cmte_filing_freq, dtype: int64

In [374]:
orgtp_dict = {
    'C' : 'org_Corporation',
    'H' : 'org_HnotonFECsite',
    'I' : 'org_InotonFECsite',
    'L' : 'org_Labor organization',
    'M' : 'org_Membership organization',
    'T' : 'org_Trade association',
    'V' : 'org_Cooperative',
    'W' : 'org_Corporation without capital stock',
    np.nan : np.nan,
}

cm['org_tp'] = [orgtp_dict[x] for x in cm['org_tp']]
cm['org_tp'].value_counts()

org_Corporation                          1744
org_Trade association                     757
org_Membership organization               529
org_Labor organization                    401
org_Corporation without capital stock     104
org_Cooperative                            45
org_HnotonFECsite                           7
org_InotonFECsite                           4
Name: org_tp, dtype: int64

In [375]:
cm['connected_org_nm'].value_counts()[:10]

NONE                                                                  2596
DOLLARS FOR DEMOCRATS                                                   12
SERVE AMERICA VICTORY FUND                                              10
DIGIDEMS COMMITTEE                                                       8
REPUBLICANS INSPIRING SUCCESS & EMPOWERMENT PROJECT (RISE PROJECT)       8
REPUBLICAN PARTY OF TEXAS                                                7
DEMOCRATIC PARTY OF VIRGINIA                                             6
REPUBLICAN PARTY OF VIRGINIA INC                                         6
SMITH                                                                    5
REPUBLICAN NATIONAL COMMITTEE                                            5
Name: connected_org_nm, dtype: int64

In [376]:
cm.loc[cm['connected_org_nm'] == 'NONE', 'connected_org_nm'] = np.nan

In [377]:
cm['connected_org_nm'].value_counts()[:10]

DOLLARS FOR DEMOCRATS                                                 12
SERVE AMERICA VICTORY FUND                                            10
DIGIDEMS COMMITTEE                                                     8
REPUBLICANS INSPIRING SUCCESS & EMPOWERMENT PROJECT (RISE PROJECT)     8
REPUBLICAN PARTY OF TEXAS                                              7
DEMOCRATIC PARTY OF VIRGINIA                                           6
REPUBLICAN PARTY OF VIRGINIA INC                                       6
SMITH                                                                  5
CALIFORNIA REPUBLICAN PARTY                                            5
REPUBLICAN NATIONAL COMMITTEE                                          5
Name: connected_org_nm, dtype: int64

In [378]:
# sorry! these print ugly but it's to display them unabbreviated
treses = list(cm['tres_nm'].value_counts()[cm['tres_nm'].value_counts() > 10].index)

for tres in treses:
    if tres != '(no treasurer listed)':
        lil_cm = cm[cm['tres_nm'] == tres]
        lil_cm = lil_cm[[
            'cmte_id',
            'cand_id',
            'cmte_nm',

        ]].sort_values(['cand_id', 'cmte_nm']).reset_index(drop = True)
        pty = list(set(cm.loc[list(lil_cm.index), 'cmte_pty_affiliation']))
        ptys = sorted([x for x in pty if (type(x) != float)])
        print(''.join([tres, ', treasurer']))
        print(len(lil_cm), 'committees')
        print('parties:', str(ptys))
        print(lil_cm.to_csv(None))
        print('\n\n')

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return getattr(section, self.name)[new_key]


PAUL KILGORE, treasurer
144 committees
parties: ['CRV', 'DEM', 'NAT', 'NNE', 'REP', 'UNK']
,cmte_id,cand_id,cmte_nm
0,C00569079,H0GA07125,HICE FREEDOM FUND
1,C00544445,H0GA07125,JODY HICE FOR CONGRESS
2,C00544510,H0GA08099,AUSTIN SCOTT VICTORY FUND
3,C00462556,H0GA09030,GRAVES FOR CONGRESS
4,C00501866,H0GA09030,TEAM GRAVES
5,C00458877,H0IL11052,KINZINGER FOR CONGRESS
6,C00467522,H0IL14080,RANDY HULTGREN FOR CONGRESS
7,C00476820,H0OH06189,JOHNSON FOR CONGRESS
8,C00497115,H0PA04220,ROTHFUS FOR CONGRESS
9,C00504522,H2NC08185,HUDSON FOR CONGRESS
10,C00548818,H2NC08185,HUDSON FREEDOM FUND
11,C00551374,H4AL06098,GARY PALMER FOR CONGRESS
12,C00573378,H4CO04090,BUCK FOR COLORADO
13,C00546846,H4FL26038,CARLOS CURBELO CONGRESS
14,C00565374,H4FL26038,CURBELO VICTORY COMMITTEE
15,C00543967,H4GA01039,BUDDY CARTER FOR CONGRESS
16,C00545749,H4MN06087,EMMER FOR CONGRESS
17,C00573444,H4MN06087,EMMER VICTORY COMMITTEE
18,C00573436,H4MO08162,SMITH VICTORY
19,C00559393,H4WA04104,DAN NEWHOUSE FOR CONGRESS


JEN SLATER, treasurer
20 committees
parties: ['DEM', 'NNE', 'REP', 'UNK']
,cmte_id,cand_id,cmte_nm
0,C00564674,H4CA45097,MIMI WALTERS VICTORY FUND
1,C00546853,H4CA45097,WALTERS FOR CONGRESS
2,C00658021,H8CA08120,TIM DONNELLY FOR CONGRESS
3,C00639690,H8CA26114,ANTONIO SABATO JR FOR CONGRESS
4,C00224691,H8CA42061,ROHRABACHER FOR CONGRESS
5,C00665513,H8CA49082,DIANE HARKEY FOR CONGRESS
6,C00632786,,AMERICANS FOR SENSIBLE REFORM
7,C00318766,,CALIFORNIA INDEPENDENT PETROLEUM ASSOCIATION FEDERAL PAC
8,C00613364,,GIVE ME LIBERTY PAC
9,C00684704,,HELPING ALL REPUBLICANS THIS KEY ELECTION YEAR - HARKEY PAC
10,C00450320,,INVEST IN A STRONG AND SECURE AMERICA
11,C00327007,,ISSA FOR US SENATE
12,C00680702,,KEEPING INTEGRITY MEANINGFUL IN THE 36TH (KIM36 PAC)
13,C00454819,,MAF FREEDOM PAC - MOVE AMERICA FORWARD FREEDOM PAC - MAF PAC
14,C00564658,,MAKING INVESTMENTS MAJORITY INSURED PAC
15,C00685461,,ROHRABACHER VICTORY FUND
16,C00609396,,STRONG AMERICA PAC
17,C00490136,,THE LINCOLN CLUB OF ORANGE C

THOMAS MONTGOMERY, treasurer
13 committees
parties: ['DEM', 'NNE', 'UNK']
,cmte_id,cand_id,cmte_nm
0,C00672980,H8CA12270,REMMER FOR CONGRESS
1,C00409490,,CALAVERAS COUNTY REPUBLICAN CENTRAL COMMITTEE (FEDERAL)
2,C00629147,,CALIFORNIA FREEDOM AND PROSPERITY PAC
3,C00688812,,CITIZENS FOR A RED WAVE PAC
4,C00680546,,COMMITTEE FOR LIFE LIBERTY & PROPERTY
5,C00609420,,DANNY TURNER FOR CONGRESS
6,C00603639,,LAKE COUNTY REPUBLICAN PARTY
7,C00666263,,MOBIUS POLITICAL ACTION COMMITTEE
8,C00389981,,MONTEREY COUNTY REPUBLICAN CENTRAL COMMITTEE (FED.)
9,C00659912,,OSMENA FOR CONGRESS
10,C00546127,,REPUBLICAN PARTY OF LA COUNTY - 66TH AD (FEDERAL)
11,C00325464,,TUOLUMNE COUNTY REPUBLICAN PARTY (FED)
12,C00688820,,US CITIZENS PAC




VICKIE WINPISINGER, treasurer
13 committees
parties: ['DEM', 'NNE', 'UNK']
,cmte_id,cand_id,cmte_nm
0,C00545582,H4FL20023,DWS VICTORY FUND
1,C00202416,H6GA05217,JOHN LEWIS FOR CONGRESS
2,C00687467,,BRENDAN KELLY VICTORY FUND
3,C00681510,,COMMITTEE FOR MIDWEST PRIORITIES

In [379]:
cm.to_csv('data/03a_committees.csv', index = False)