## FEC Campaign Finance

### Committee Master

In [1]:
import pandas as pd
import numpy as np

from datetime import datetime as dt

In [2]:
year = '2018'

In [3]:
# read in & clean
cm = pd.read_csv(
    f'https://www.fec.gov/files/bulk-downloads/{year}/cm{year[2:]}.zip', 
    sep = '|', 
    error_bad_lines = False, 
    header = None, 
)
print('original length:', len(cm))

# add column headers from separate file
headers = pd.read_csv(
    'https://www.fec.gov/files/bulk-downloads/data_dictionaries/cm_header_file.csv', 
)
cm.columns = [x.lower() for x in headers.columns]

# fillna
cm['cmte_pty_affiliation'].fillna('UNK', inplace = True)

# preserve uncleaned tres_nm & cmte_st1
cm['orig_tres_nm'] = list(cm['tres_nm'].values)
cm['orig_cmte_st1'] = list(cm['cmte_st1'].values)

# prep strings for deduping
change_dict = {
    ' STREET' : ' ST',
    'AVENUE' : 'AVE',
    ' DRIVE' : ' DR',
    ' ROAD' : ' RD',
    ' SUITE' : ' STE',
    '.' : '',
    ',' : '',
    'BOULEVARD' : 'BLVD',
    ' PLACE' : ' PL',
    
}
keys = list(set(change_dict.keys()))
for key in keys:
    cm['cmte_st1'] = [str(x).replace(key, change_dict[key]) for x in cm['cmte_st1']]

keep_0 = [
    ' STE ',
    '#',
    ' ST ',
    ' AVE ',
    ' RD ',
    ' PL ', 
    ' BLVD ', 
    ' DR ',
]

cm['cmte_st1'] = [x.split(' STE ')[0].split('#')[0].split(' NUM ')[0] for \
                  x in cm['cmte_st1']]
cm['cmte_st1'] = [' '.join(str(x).split()) for x in cm['cmte_st1']]

cm['cmte_zip'] = [str(x).strip() for x in cm['cmte_zip']]
cm.loc[cm['cmte_zip'].apply(lambda x: len(str(x))) == 9, 'cmte_zip'] = \
cm.loc[cm['cmte_zip'].apply(lambda x: len(str(x))) == 9, 'cmte_zip'].apply(lambda x: x[:5])

cm['cmte_zip'].apply(lambda x: len(x)).value_counts().sort_index()

original length: 19027


3       33
4        8
5    18982
6        1
8        3
Name: cmte_zip, dtype: int64

In [4]:
cands = pd.read_csv('data/02a_cand_clean.csv')

cands = list(cands['cand_id'])

mask = []
for x in cm['cand_id']:
    if type(x) == float:
        mask.append(True)
    else:
        if x in cands:
            mask.append(True)
        else:
            mask.append(False)
        
cm = cm[mask]

print(len(cm))

13490


In [5]:
cm.loc[cm['cmte_zip'].apply(lambda x: len(str(x))) != 5, 
       ['cmte_st1', 'cmte_city', 'cmte_st', 'cmte_zip']].sort_values([
    'cmte_zip', 
    'cmte_st1', 
    'cmte_city',
    'cmte_st',
])

Unnamed: 0,cmte_st1,cmte_city,cmte_st,cmte_zip
18190,510A SHERMAN ST,CANTON,MA,2021.0
18199,580 BRIDGE ST,DEDHAM,MA,2026.0
18200,580 BRIDGE ST,DEDHAM,MA,2026.0
18194,17 PLEASANT HILL AVE,BOSTON,MA,21232813.0
18198,33 THORONDIKE ST,BROOKLINE,MA,2446.0
18206,393 DORCHESTER RD,LYME,NH,3768.0
2060,1210 CORBIN ST,ELIZABETH,NJ,7201.0
10840,847 KENSINGTON CV,SPRINGDALE,AR,7276.0
18209,111 VICTORIA DR,EATONTOWN,NJ,7724.0
18116,,.,VA,


In [6]:
fix_zip = {
    '291501' : '29150',
    '89053145' : '89015',
    '04342443' : '70433',
    '21232813' : '02126',
    '7276' : '72762',
    '2021' : '02021',
    '2026' : '02026',
    '2446' : '02446',
    '3768' : '03768',
    '7201' : '07201',
    '7724' : '07724',
    
}
for key in list(fix_zip.keys()):
    cm.loc[cm['cmte_zip'] == key, 'cmte_zip'] = fix_zip[key]

cm.loc[cm['cmte_st1'] == '1055 CATALPA RD', 'cmte_zip'] = '91007'
cm.loc[cm['cmte_st1'] == '2008 OLDTOWN VALLEY RD SE', 'cmte_zip'] = '44663'
cm.loc[
    (cm['cmte_st1'] == '247 THIRD ST') & \
    (cm['cmte_city'] == 'ASHLAND'), 
    'cmte_zip'] = '97520'
cm.loc[cm['cmte_st1'] == '420 KEWANNA DR', 'cmte_zip'] = '47130'
cm.loc[cm['cmte_st1'] == '868 CHURCH ST -', 'cmte_zip'] = '11716'
cm.loc[
    (cm['cmte_st1'] == 'PO BOX 334') & \
    (cm['cmte_city'] == 'DAVIDSONVILLE'), 
    'cmte_zip'] = '21035'
cm.loc[
    (cm['cmte_st1'] == 'PO BOX 356') & \
    (cm['cmte_city'] == 'LIVINGSTON'), 
    'cmte_zip'] = '07039'

In [7]:
cm.loc[cm['cmte_zip'].apply(lambda x: len(x) == 3), 'cmte_zip'] = np.nan
# create full address column from cleaned parts
cm['address'] = \
cm['cmte_st1'].apply(lambda x: str(x).strip()) + ', ' + \
cm['cmte_city'].apply(lambda x: str(x).strip()) + ', ' + \
cm['cmte_st'].apply(lambda x: str(x).strip()) + ' ' + \
cm['cmte_zip'].apply(lambda x: str(x).strip())

cm.head(2)

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
0,C00000018,IRONWORKERS LOCAL UNION NO. 25 POLITICAL EDUCA...,STEVEN N GULICK,43335 W 10 MILE,P O BOX 965,NOVI,MI,48050,U,Q,UNK,T,,IRON WORKERS; INT'L ASS'N OF BRIDGE...,H8TX22313,STEVEN N GULICK,43335 W 10 MILE,"43335 W 10 MILE, NOVI, MI 48050"
1,C00000059,HALLMARK CARDS PAC,SARAH MOE,2501 MCGEE,MD #500,KANSAS CITY,MO,64108,U,Q,UNK,M,C,,,SARAH MOE,2501 MCGEE,"2501 MCGEE, KANSAS CITY, MO 64108"


In [8]:
# check for duplicate rows
cm[cm.duplicated(keep = False)]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
16591,C00690123,INDEPENDENT VOICES OF CHANGE,"PHELAN, JOHN",2 MEGHANS WAY,,LYNN,MA,1904,U,O,UNK,Q,,NONE,H8NJ07256,"PHELAN, JOHN",2 MEGHANS WAY,"2 MEGHANS WAY, LYNN, MA 01904"
16592,C00690123,INDEPENDENT VOICES OF CHANGE,"PHELAN, JOHN",2 MEGHANS WAY,,LYNN,MA,1904,U,O,UNK,Q,,NONE,H8NJ07256,"PHELAN, JOHN",2 MEGHANS WAY,"2 MEGHANS WAY, LYNN, MA 01904"


In [9]:
# drop duplicates
cm.drop_duplicates(keep = 'first', inplace = True)

In [10]:
# see which cmte_ids appear more than once
cm['cmte_id'].value_counts()[cm['cmte_id'].value_counts() > 1]

Series([], Name: cmte_id, dtype: int64)

In [11]:
# check for nulls
cm.isnull().sum()[cm.isnull().sum() > 0].sort_values()

cmte_nm                 9
cmte_tp                10
cmte_dsgn              11
cmte_st                19
cmte_city              20
cmte_zip               25
orig_cmte_st1          27
tres_nm              1513
orig_tres_nm         1513
connected_org_nm     7967
org_tp               9898
cmte_st2            10383
cand_id             12344
dtype: int64

In [12]:
# see which have missing cmte_nm
cm[cm['cmte_nm'].isnull()]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
8006,C00575308,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
10164,C00616649,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
10419,C00619916,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
10519,C00621201,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
12322,C00641233,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
13088,C00653873,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
13089,C00653881,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
13396,C00657031,,,,,,,,,,UNK,A,,,,,,"nan, nan, nan nan"
17010,C30001184,,,PO BOX 33524,,WASHINGTON,DC,20033.0,U,E,UNK,A,,,,,PO BOX 33524,"PO BOX 33524, WASHINGTON, DC 20033"


In [13]:
# fill in placeholder cmte_nm
cm.loc[cm['cmte_nm'].isnull(), 'cmte_nm'] = '(cmte_nm missing)'

In [14]:
# see which have missing address
# then see if any other rows have matching cmte_nm
cm[cm['address'].isnull()]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [15]:
# see which street addresses appear most frequently
cm['address'].value_counts()[:20]

228 S WASHINGTON ST, ALEXANDRIA, VA 22314        156
918 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003    112
824 S MILLEDGE AVE, ATHENS, GA 30605             101
PO BOX 26141, ALEXANDRIA, VA 22313                95
700 13TH ST NW, WASHINGTON, DC 20005              61
PO BOX 9891, ARLINGTON, VA 22219                  56
611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003     48
PO BOX 30844, BETHESDA, MD 20824                  41
610 S BLVD, TAMPA, FL 33606                       37
777 S FIGUEROA ST, LOS ANGELES, CA 90017          37
910 17TH ST NW, WASHINGTON, DC 20006              35
PO BOX 15320, WASHINGTON, DC 20003                35
5429 MADISON AVE, SACRAMENTO, CA 95841            32
PO BOX 75357, WASHINGTON, DC 20013                26
555 CAPITOL MALL, SACRAMENTO, CA 95814            26
499 SOUTH CAPITOL ST SW, WASHINGTON, DC 20003     24
901 N WASHINGTON ST, ALEXANDRIA, VA 22314         24
120 MARYLAND AVE NE, WASHINGTON, DC 20002         24
249 E OCEAN BLVD, LONG BEACH, CA 90802        

In [16]:
# fill in placeholder for missing treasurer name 
print(len(cm[cm['tres_nm'].isnull()]))
cm.loc[cm['tres_nm'].isnull(), 'tres_nm'] = '(no treasurer listed)'
cm.loc[cm['tres_nm'].apply(lambda x: x in ['.', 'NONE', '']), 'tres_nm'] = '(no treasurer listed)'

# convert to <first name> <last name> format from <last name>, <first name>
cm['tres_nm'] = [' '.join([' '.join(x.split(', ')[1:]), x.split(', ')[0]]) if ', ' in \
                 x else x for x in cm['tres_nm']]
# clean up spacing
# looks like this could just be ' '.join(x.strip()) 
# but i feel like i did it this way for a reason... leaving it
cm['tres_nm'] = [' '.join([x for x in list(x.split(' ')) if len(x) > 1]).strip() for \
                 x in cm['tres_nm']]

# prep for deduping

# substitutions
subs = {
    'XAVUER' : 'XAVIER',
    'JSOEPH' : 'JOSEPH',
    'WILLAIM' : 'WILLIAM',
    'V?RONIQUE' : 'VERONIQUE',
    '.' : '',
    ',' : '',
    
}
for key in list(subs.keys()):
    cm['tres_nm'] = [x.replace(key, subs[key]) for x in cm['tres_nm']]

# drops
drops = [
    'MRS',
    'MS',
    'MR',
    'MISS',
    'JR',
    'SR',
    'UNDEFINED',
    'JD',
    'CPA',
    'HONORABLE',
    'FED',
    'II', 
    'III',
    'IV',
    'ESQ',
    'DR',
    'MD',
    'DO',
    'CFO',
    'CEO',
    'CTO',
    'TREAS',
    'US',
    '1970',
    '--SELECT',
    '1973',
    '1979',
    
]
for droplet in drops:
    cm['tres_nm'] = [' '.join([x for x in x.split() if \
                               ((x not in drops) & (len(x) > 1))]) for x in cm['tres_nm']]

# a bunch of entries have last name appearing twice; 
# check for this and keep only first instance
cm['tres_nm'] = [x.split() for x in cm['tres_nm']]
def f7(seq):
    '''https://stackoverflow.com/questions/480214/\
    how-do-you-remove-duplicates-from-a-list-whilst-preserving-order'''
    seen = set()
    seen_add = seen.add
    return [x for x in seq if not (x in seen or seen_add(x))]            
cm['tres_nm'] = [' '.join(f7(x)) for x in cm['tres_nm']]

# see tres_nms appearing most frequently
cm['tres_nm'].value_counts()[:20]

1513


(no treasurer listed)    1518
PAUL KILGORE              144
JUDITH ZAMORE             108
CHRIS MARSTON             106
LISA LISKER                88
JENNIFER MAY               62
JAY PETTERSON              56
BENJAMIN OTTENHOFF         55
DAVID SATTERFIELD          49
CABELL HOBBS               47
KEITH DAVIS                44
BRADLEY CRATE              43
NANCY WATKINS              42
JANICA KYRIACOPOULOS       41
THOMAS DATWYLER            36
STEVEN MARTIN              25
ROBERT CARLIN              24
TIMOTHY KOCH               23
STEVE RALLS                22
LORA HAGGARD               22
Name: tres_nm, dtype: int64

In [17]:
# what is street address of committees not listing treasurer
cm.loc[cm['tres_nm'] == '(no treasurer listed)', 'address'].value_counts()[:20]

nan, nan, nan nan                                 16
1201 CONNECTICUT AVE NW, WASHINGTON, DC 20036     10
700 13TH ST NW, WASHINGTON, DC 20005               6
815 16TH ST NW, WASHINGTON, DC 20006               5
300 M ST SE, WASHINGTON, DC 20003                  5
555 CAPITOL MALL, SACRAMENTO, CA 95814             5
1120 CONNECTICUT AVE NW, WASHINGTON, DC 20036      4
5429 MADISON AVE, SACRAMENTO, CA 95841             4
1401 NEW YORK AVE NW, WASHINGTON, DC 20005         4
1747 PENNSYLVANIA AVE NW, WASHINGTON, DC 20006     4
PO BOX 2259, WILMINGTON, NC 28402                  4
1875 CONNECTICUT AVE NW, WASHINGTON, DC 20009      3
414 N ORLEANS PLAZA, CHICAGO, IL 60654             3
50 F ST NW, WASHINGTON, DC 20001                   3
8655 EXPLORER DR, COLORADO SPRINGS, CO 80920       3
2525 W ALAMEDA AVE, DENVER, CO 80219               3
1201 15TH ST NW, WASHINGTON, DC 20005              3
1615 H ST NW, WASHINGTON, DC 20062                 3
1150 GRAND BLVD, KANSAS CITY, MO 64106        

In [18]:
list(cm.loc[cm['address'] == '1201 CONNECTICUT AVE NW, WASHINGTON, DC 20036', 'cmte_nm'])

['CONSTITUTIONAL RESPONSIBILITY PROJECT',
 'SOCAL HEALTH CARE COALITION A PROJECT OF SIXTEEN THIRTY FUND',
 'FLORIDIANS FOR A FAIR SHAKE A PROJECT OF SIXTEEN THIRTY FUND',
 'DEMAND JUSTICE A PROJECT OF SIXTEEN THIRTY FUND',
 'SIXTEEN THIRTY FUND / MAKE IT WORK AMERICA1',
 'SIXTEEN THIRTY FUND/MAKE IT WORK ACTION',
 'SIXTEEN THIRTY FUND/NOT ONE PENNY',
 'FLORIDIANS FOR A FAIR SHAKE A PROJECT OF SIXTEEN THIRTY FUND',
 'SOCAL HEALTH CARE COALITION A PROJECT OF SIXTEEN THIRTY FUND',
 'OHIOANS FOR ECONOMIC OPPORTUNITY A PROJECT OF SIXTEEN THIRTY FUND']

In [19]:
list(cm.loc[
    (cm['address'] == '700 13TH ST NW, WASHINGTON, DC 20005') & \
    (cm['tres_nm'] == '(no treasurer listed)'), 'cmte_nm'])

['MAJORITY FORWARD',
 'MAJORITY FORWARD',
 'AMERICA WORKING TOGETHER',
 'DUTY AND HONOR',
 'BLACK PROGRESSIVE ACTION COALITION',
 'BLACK ECONOMIC ALLIANCE FUND']

In [20]:
list(cm.loc[
    (cm['address'] == '555 CAPITOL MALL, SACRAMENTO, CA 95814') & \
    (cm['tres_nm'] == '(no treasurer listed)'), 'cmte_nm'])

['PLANNED PARENTHOOD ADVOCACY PROJECT LOS ANGELES COUNTY',
 'PLANNED PARENTHOOD AFFILIATES OF CALIF',
 'ALAMEDANS UNITED SUPPORTING VELLA AND ASHCRAFT FOR CITY COUNCIL, ET AL',
 'OPPORTUNITY PAC - A COALITION OF TEACHERS HEALTH CARE GIVERS FACULTY MEMBERS SCHOOL EMPLOYEES AND PUBLIC AND PR',
 'GOLDEN CALIFORNIA COMMITTEE SPONSORED BY THE SEIU CALIFORNIA STATE COUNCIL']

### Dedupe treasurer names

This is a journey.

In [21]:
def findall(name):
    '''find pattern in treasurer names columns'''
    all_tres = list(set(cm['tres_nm']))
    if type(name) == str:
        these = [x for x in all_tres if name.lower() in str(x).lower()]
    else:  
        these = all_tres
        for which in name:
            these = [x for x in these if which in str(x).lower()]
    return sorted(these)

In [22]:
def getall(these, new):
    '''assign deduped treasurer name'''
    print('orig len:', len(cm[cm['tres_nm'] == new]))
    for which in these:
        cm.loc[cm['tres_nm'] == which, 'tres_nm'] = new
    print('new len:', len(cm[cm['tres_nm'] == new]))
    return 

In [23]:
# this list keeps track of the ones we've already addressed
dones = ['(no treasurer listed)']

In [24]:
findall(' kil')

['EDWIN JAY KILPATRICK',
 'GERALD KILPATRICK',
 'JEFFREY TUTTLE KILLEEN',
 'KEVIN KILEY',
 'LAURA KILMER',
 'LAWRENCE KILGORE',
 'MEAGHAN KILLION JOYCE',
 'MILES KILCOIN',
 'PAUL KILGORE',
 'RACHEL KILPATRICK',
 'TERRY KILROY',
 'WILLIAM KILLMER']

In [25]:
findall(' joyce')

['JONATHAN JOYCE', 'MEAGHAN KILLION JOYCE', 'STEPHEN JOYCE']

In [26]:
dones.extend(findall(' kil'))

In [27]:
findall(' lis')

['(no treasurer listed)',
 'EVAN ROGER LISTOPAD',
 'JOHN LISTAK',
 'LISA LISKER',
 'SUSIE LISA']

In [28]:
dones.extend(findall(' lis'))

In [29]:
findall('lisker')

['LISA LISKER']

In [30]:
new = 'LISA LISKER'
dones.append(new)

these = findall('lisker')

getall(these, new)

orig len: 88
new len: 88


In [31]:
findall(['c', 'marst'])

['CHRIS MARSTEN', 'CHRIS MARSTON', 'CHRISTOPHER MARSTON']

In [32]:
cm[cm['tres_nm'] == 'CHRIS MARSTEN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
11508,C00632760,WHITE COAT WASTE PAC,CHRIS MARSTEN,PO BOX 26141,,ALEXANDRIA,VA,22313,U,Q,UNK,Q,,,,CHRIS MARSTEN,PO BOX 26141,"PO BOX 26141, ALEXANDRIA, VA 22313"
12366,C00646463,BUCKEYE BATTLE PAC,CHRIS MARSTEN,PO BOX 26141,,ALEXANDRIA,VA,22313,U,O,UNK,T,,,,CHRIS MARSTEN,PO BOX 26141,"PO BOX 26141, ALEXANDRIA, VA 22313"
14309,C00666511,WE STAND FOR BETTER,CHRIS MARSTEN,PO BOX 26141,,ALEXANDRIA,VA,22313,U,O,UNK,Q,,,,CHRIS MARSTEN,PO BOX 26141,"PO BOX 26141, ALEXANDRIA, VA 22313"


In [33]:
cm[cm['tres_nm'] == 'CHRIS MARSTON'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
3558,C00433524,DUNCAN D. HUNTER FOR CONGRESS,CHRIS MARSTON,PO BOX 1545,,EL CAJON,CA,92022,P,H,REP,Q,,HUNTER VICTORY FUND,H8CA52052,"MARSTON, CHRIS",PO BOX 1545,"PO BOX 1545, EL CAJON, CA 92022"
3723,C00441014,ROB WITTMAN FOR CONGRESS,CHRIS MARSTON,PO BOX 3770,,OAKTON,VA,22124,P,H,REP,Q,,WITTMAN VICTORY COMMITTEE,H8VA01147,"MARSTON, CHRIS",PO BOX 3770,"PO BOX 3770, OAKTON, VA 22124"
3919,C00449926,AMERICAN FUTURE FUND POLITICAL ACTION,CHRIS MARSTON,45 N HILL DR,STE 100,WARRENTON,VA,20186,U,W,UNK,M,,,,"MARSTON, CHRIS",45 N HILL DR,"45 N HILL DR, WARRENTON, VA 20186"
4932,C00486738,MANY INDIVIDUAL CONSERVATIVES HELPING ELECT LE...,CHRIS MARSTON,PO BOX 26141,,ALEXANDRIA,VA,22313,D,Q,UNK,Q,,MICHELE BACHMANN,,"MARSTON, CHRIS",PO BOX 26141,"PO BOX 26141, ALEXANDRIA, VA 22313"
5364,C00499020,FREEDOMWORKS FOR AMERICA,CHRIS MARSTON,111 K ST NE,STE 600,WASHINGTON,DC,20002,U,O,UNK,Q,,NONE,,"MARSTON, CHRIS",111 K ST NE,"111 K ST NE, WASHINGTON, DC 20002"


In [34]:
new = 'CHRISTOPHER MARSTON'
dones.append(new)

these = findall(['c', 'marst'])

getall(these, new)

orig len: 13
new len: 122


In [35]:
findall(['st', ' martin'])

['ERNESTO MARTINEZ', 'STEVEN MARTIN']

In [36]:
cm[cm['tres_nm'] == 'STEVE MARTIN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [37]:
cm[cm['tres_nm'] == 'STEVEN MARTIN'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
1361,C00233353,AD ALLIANCE,STEVEN MARTIN,PO BOX 30844,,BETHESDA,MD,20824,B,Q,UNK,M,M,NONE,,"MARTIN, STEVEN JR.",PO BOX 30844,"PO BOX 30844, BETHESDA, MD 20824"
2507,C00372532,MICHAEL BURGESS FOR CONGRESS,STEVEN MARTIN,PO BOX 2334,,DENTON,TX,76202,P,H,REP,Q,,CARE AMERICA,H2TX26093,"MARTIN, STEVEN G. JR.",PO BOX 2334,"PO BOX 2334, DENTON, TX 76202"
3207,C00415208,LONE STAR LEADERSHIP PAC,STEVEN MARTIN,PO BOX 30844,,BETHESDA,MD,20824,D,Q,UNK,M,,MICHAEL BURGESS,,"MARTIN, STEVEN G. JR.",PO BOX 30844,"PO BOX 30844, BETHESDA, MD 20824"
5181,C00493783,FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...,STEVEN MARTIN,PO BOX 30844,,BETHESDA,MD,20824,J,H,UNK,Q,,NONE,H0AL02087,"MARTIN, STEVEN G. JR.",PO BOX 30844,"PO BOX 30844, BETHESDA, MD 20824"
6843,C00551366,RIGHTNOW WOMEN PAC,STEVEN MARTIN,PO BOX 30844,,BETHESDA,MD,20824,U,Q,UNK,Q,,NONE,,"MARTIN, STEVEN G. JR.",PO BOX 30844,"PO BOX 30844, BETHESDA, MD 20824"


In [38]:
dones.extend(findall(['st', ' martin']))

In [39]:
findall(' crat')

['BRADLEY CRATE', 'VIVIAN CRATER']

In [40]:
dones.extend(findall(' crat'))

In [41]:
findall('datwyler')

['THOMAS CHARLES DATWYLER', 'THOMAS DATWYLER']

In [42]:
new = 'THOMAS DATWYLER'
dones.append(new)

these = findall('datwyler')

getall(these, new)

orig len: 36
new len: 37


In [43]:
findall('zamore')

['JUDITH ZAMORE', 'JUDY ZAMORE']

In [44]:
new = 'JUDITH ZAMORE'
dones.append(new)

these = findall('zamore')

getall(these, new)

orig len: 108
new len: 109


In [45]:
findall(' pett')

['BRENDAN PETTYJOHN', 'JAY PETTERSON']

In [46]:
dones.extend(findall(' pett'))

In [47]:
findall(['jen', 'may'])

['JENNIFER MAY']

In [48]:
dones.append('JENNIFER MAY')

In [49]:
# first jennifer for driskell
cm[cm['tres_nm'] == 'JENNIFER FAIRFIELD']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
7845,C00572859,GRETCHEN DRISKELL FOR CONGRESS,JENNIFER FAIRFIELD,PO BOX 464,,SALINE,MI,48176,P,H,DEM,Q,,NONE,H6MI07223,"FAIRFIELD, JENNIFER",PO BOX 464,"PO BOX 464, SALINE, MI 48176"


In [50]:
# second jennifer for driskell.  same?
cm[(cm['tres_nm'] == 'JENNIFER MAY') & (cm['cmte_st1'] == 'PO BOX 464')]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [51]:
findall(' williamson')

['CRAIG WILLIAMSON', 'LES WILLIAMSON', 'SEAN WILLIAMSON']

In [52]:
dones.extend(findall(' williamson'))

In [53]:
findall(' ky')

['CHUCK KYRISH', 'JANICA KYRIACOPOULOS', 'SAMMY KYE']

In [54]:
dones.extend(findall(' ky'))

In [55]:
findall(' watkins')

['CATHLEEN WATKINS',
 'CHARLES WATKINS',
 'MICHAEL WATKINS',
 'NANCY WATKINS',
 'WILLIAM WATKINS']

In [56]:
cm[cm['tres_nm'] == 'BILL WATKINS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [57]:
cm[cm['tres_nm'] == 'WILLIAM WATKINS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
2551,C00376244,KUSTOFF FOR CONGRESS 2002,WILLIAM WATKINS,1661 AARON BRENNER DR,SUITE 300,MEMPHIS,TN,38120,A,H,REP,Q,,NONE,H2TN07103,"WATKINS, WILLIAM H JR",1661 AARON BRENNER DRIVE,"1661 AARON BRENNER DR, MEMPHIS, TN 38120"
10036,C00614826,KUSTOFF FOR CONGRESS,WILLIAM WATKINS,1661 AARON BRENNER DR,STE 300,MEMPHIS,TN,38120,P,H,REP,Q,,,H2TN07103,"WATKINS, WILLIAM H JR",1661 AARON BRENNER DR,"1661 AARON BRENNER DR, MEMPHIS, TN 38120"


In [58]:
new = 'WILLIAM WATKINS'
dones.append(new)

these = ['BILL WATKINS', 'WILLIAM WATKINS']

getall(these, new)

orig len: 2
new len: 2


In [59]:
dones.extend(findall(' watkins'))

In [60]:
findall('hobbs')

['CABELL HOBBS', 'JENNIFER HOBBS', 'NICOLE HOBBS', 'SCOTT HOBBS']

In [61]:
findall('giles')

['JASON GILES', 'RODNEY GILES']

In [62]:
dones.extend(findall('giles'))

In [63]:
dones.extend(findall('hobbs'))

In [64]:
findall(' satt')

['ADAM SATTERFIELD', 'DAVID SATTERFIELD', 'STACEY SATTERLEE']

In [65]:
dones.extend(findall(' satt'))

In [66]:
findall(' curtis')

['BOBBY CURTIS BRACKETT',
 'DAVID MARLOW CURTIS',
 'ELIZABETH CURTIS',
 'JAMES CURTIS',
 'KEVIN CURTIS',
 'LIZ CURTIS',
 'MICHAEL CURTIS DEAN',
 'RON CURTIS',
 'STEVEN DALE CURTIS']

In [67]:
findall('brackett')

['BOBBY CURTIS BRACKETT']

In [68]:
findall(['mi', 'dean'])

['MICHAEL CURTIS DEAN']

In [69]:
new = 'ELIZABETH CURTIS'
dones.append(new)

these = findall(['liz', 'curtis'])

getall(these, new)

orig len: 15
new len: 17


In [70]:
dones.extend(findall(' curtis'))

In [71]:
findall(' davis')

['ADAM DAVIS',
 'BRADLEY DAVIS',
 'BRIAN DAVIS',
 'BROOKE DAVIS',
 'CASEY DAVISON',
 'CLAY PARKER DAVIS',
 'CRYSTAL DAVIS-TAYLOR',
 'CYNTHIA DAVIS',
 'DEBORAH DAVIS',
 'DONALD DAVIS',
 'DYLAN DAVIS',
 'GARY DAVIS',
 'HAROLD DAVIS',
 'JEFF DAVIS',
 'JEREMY DAVIS',
 'JOE DAVIS',
 'JONATHAN DAVIS',
 'KAYOLKA DAVIS',
 'KEITH DAVIS',
 'KELLY DAVIS',
 'KETH DAVIS',
 'LANCE DAVIS',
 'LESLIE WALTER DAVIS',
 'MARIAN DAVIS',
 'MARIE ELIZABETH DAVIS',
 'MARIENELLA DAVIS',
 'MARK DAVIS',
 'MICHAEL DAVIS',
 'MICHELE DAVISON',
 'MILTON DAVIS',
 'PAUL DAVIS',
 'REGINALD DAVIS',
 'RICHARD BERNARD DAVIS',
 'RICK DAVISON',
 'ROBERT DAVIS',
 'SANDRA DAVIS',
 'SUSAN DAVIS',
 'VANDY DAVIS',
 'VERA DAVIS',
 'VIRGINIA DAVIS',
 'WAYNE DAVIS',
 'WILLIAM DAVIS']

In [72]:
cm[cm['tres_nm'].apply(lambda x: x in ['BILLY DAVIS', 'WILLIAM DAVIS'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
12062,C00638536,FRIENDS OF DANNER KLINE,WILLIAM DAVIS,PO BOX 430191,,VESTAVIA HILLS,AL,35243,P,H,DEM,Q,,NONE,H8AL06099,"DAVIS, WILLIAM",PO BOX 430191,"PO BOX 430191, VESTAVIA HILLS, AL 35243"


In [73]:
cm[cm['tres_nm'] == 'KETH DAVIS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
14187,C00665232,WIN IN 2018,KETH DAVIS,228 S WASHINGTON ST,SUITE 115,ALEXANDRIA,VA,22314,J,N,UNK,Q,,NONE,,"DAVIS, KETH A.",228 S. WASHINGTON STREET,"228 S WASHINGTON ST, ALEXANDRIA, VA 22314"


In [74]:
cm[cm['tres_nm'] == 'KEITH DAVIS'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
212,C00027466,NRSC,KEITH DAVIS,425 2ND ST NE,,WASHINGTON,DC,20002,U,Y,REP,M,,2018 TILLIS SENATE CANDIDATE FUND,,"DAVIS, KEITH",425 2ND STREET NE,"425 2ND ST NE, WASHINGTON, DC 20002"
395,C00075820,NRCC,KEITH DAVIS,320 FIRST ST SE,.,WASHINGTON,DC,20003,U,Y,REP,M,,COLE COMBINED COMMITTEE,,"DAVIS, KEITH A.",320 FIRST STREET SE,"320 FIRST ST SE, WASHINGTON, DC 20003"
2013,C00330720,TRUST PAC TEAM REPUBLICANS FOR UTILIZING SENSI...,KEITH DAVIS,228 S WASHINGTON ST,SUITE 115,ALEXANDRIA,VA,22314,D,Q,UNK,Q,,NONE,,"DAVIS, KEITH A.",228 S. WASHINGTON STREET,"228 S WASHINGTON ST, ALEXANDRIA, VA 22314"
2738,C00388421,TENN POLITICAL ACTION COMMITTEE INC (TENN PAC),KEITH DAVIS,228 S WASHINGTON ST,,ALEXANDRIA,VA,22314,D,Q,UNK,Q,,NONE,,"DAVIS, KEITH A.",228 S WASHINGTON STREET SUITE 115,"228 S WASHINGTON ST, ALEXANDRIA, VA 22314"
3819,C00445387,IMS HEALTH PAC,KEITH DAVIS,228 S WASHINGTON ST,,ALEXANDRIA,VA,22314,U,Q,UNK,T,C,,,KEITH A DAVIS,228 S WASHINGTON STREET SUITE 115,"228 S WASHINGTON ST, ALEXANDRIA, VA 22314"


In [75]:
new = 'KEITH DAVIS' 
dones.append(new)

these = ['KEITH DAVIS', 'KETH DAVIS']

getall(these, new)

orig len: 44
new len: 45


In [76]:
cm[cm['tres_nm'].apply(lambda x: x in ['MIKE DAVIS', 'MICHAEL DAVIS'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
15960,C00683573,STRENGTH AND PROGRESS INC,MICHAEL DAVIS,30 N GOULD ST,STE 7981,SHERIDAN,WY,82801,U,N,UNK,Q,,,,"DAVIS, MICHAEL",30 N GOULD ST,"30 N GOULD ST, SHERIDAN, WY 82801"


In [77]:
dones.extend(findall(' davis'))

In [78]:
findall(['phillips', 'ob'])

['ROB PHILLIPS', 'ROBERT PHILLIPS']

In [79]:
new = 'ROBERT PHILLIPS' 
dones.append(new)

these = findall(['phillips', 'ob'])

getall(these, new)

orig len: 20
new len: 24


In [80]:
findall(' mele')

['STEVE MELE', 'STEVEN MELE']

In [81]:
cm[cm['tres_nm'] == 'STEVE MELE']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
5594,C00507574,MOTOR CITY PAC,STEVE MELE,611 PENNSYLVANIA AVE SE,STE 143,WASHINGTON,DC,20003,D,Q,UNK,M,,,,"MELE, STEVE",611 PENNSYLVANIA AVENUE SE,"611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003"
12747,C00650366,LESLIE COCKBURN FOR CONGRESS,STEVE MELE,PO BOX 186,,SPERRYVILLE,VA,22740,P,H,DEM,Q,,NONE,H8VA05155,"MELE, STEVE",PO BOX 186,"PO BOX 186, SPERRYVILLE, VA 22740"
16182,C00685842,THE ARENA CANDIDATE PAC HOUSE VICTORY FUND,STEVE MELE,611 PENNSYLVANIA AVE SE,NUM 143,WASHINGTON,DC,20003,J,H,UNK,T,,,,"MELE, STEVE",611 PENNSYLVANIA AVE SE,"611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003"
16851,C00692988,ALL FOR OUR COUNTRY VICTORY FUND,STEVE MELE,611 PENNSYLVANIA AVE SE,STE. 143,WASHINGTON,DC,20003,J,N,UNK,Q,,NONE,,"MELE, STEVE",611 PENNSYLVANIA AVENUE SE,"611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003"


In [82]:
cm[cm['tres_nm'] == 'STEVEN MELE'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
6138,C00526657,TOWARD TOMORROW PAC,STEVEN MELE,PO BOX 544,,SAN ANTONIO,TX,78292,D,Q,UNK,M,,,,"MELE, STEVEN",PO BOX 544,"PO BOX 544, SAN ANTONIO, TX 78292"
8535,C00583104,CORTEZ MASTO VICTORY FUND,STEVEN MELE,611 PENNSYLVANIA AVE SE,,WASHINGTON,DC,20003,J,N,UNK,T,,,,STEVEN MELE,611 PENNSYLVANIA AVE SE SUITE 143,"611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003"
9460,C00606939,ROSEN FOR NEVADA,STEVEN MELE,PO BOX 27195,,LAS VEGAS,NV,89126,P,S,DEM,Q,,ROSEN VICTORY FUND,S8NV00156,"MELE, STEVEN",PO BOX 27195,"PO BOX 27195, LAS VEGAS, NV 89126"
10991,C00627232,IVOTE ACTION FUND,STEVEN MELE,722 12TH ST NW,3RD FLOOR,WASHINGTON,DC,20005,U,O,UNK,Q,,NONE,,"MELE, STEVEN",722 12TH ST NW,"722 12TH ST NW, WASHINGTON, DC 20005"
11170,C00629212,ALL FOR OUR COUNTRY LEADERSHIP PAC,STEVEN MELE,611 PENNSYLVANIA AVE SE,,WASHINGTON,DC,20003,D,Q,UNK,M,,,,STEVEN MELE,"611 PENNSYLVANIA AVE SE, #143","611 PENNSYLVANIA AVE SE, WASHINGTON, DC 20003"


In [83]:
new = 'STEVEN MELE'
dones.append(new)

these = findall(['steve', ' mele'])

getall(these, new)

orig len: 22
new len: 26


In [84]:
findall(['su', 'jackson'])

['SUE JACKSON', 'SUSAN JACKSON']

In [85]:
new = 'SUSAN JACKSON'
dones.append(new)

these = findall(['su', 'jackson'])

getall(these, new)

orig len: 2
new len: 21


In [86]:
findall(' otte')

['BENJAMIN OTTENHOFF', 'TIMOTHY OTTEN']

In [87]:
new = 'BENJAMIN OTTENHOFF'
dones.append(new)

these = findall(' otte')

getall(these, new)

orig len: 55
new len: 56


In [88]:
findall(' mcmi')

['ANN MCMILLAN',
 'COLLIN MCMICHAEL',
 'GEETA MCMILLAN',
 'KURT MCMILLAN',
 'MARY MCMILLAN']

In [89]:
dones.extend(findall(' mcmi'))

In [90]:
findall(' lew')

['ANDY LEWIS JAMES',
 'CHERYL LEWIS',
 'CORDELIA LEWIS BURKS',
 'DEBORAH LEWIS',
 'DENISE LEWIS',
 'EMANUEL LEWIS',
 'JACK LEWIS CHARBONNEAU',
 'JEFF LEWIS',
 'JENNIFER LEWIS',
 'KEET LEWIS',
 'LARRY LEWIS',
 'LEROY LEWIS',
 'LINDSAY LEWIS',
 'MARC LEWKOWITZ',
 'MARK LEWIS',
 'MARY PARKER LEWIS',
 'MICHAEL LEWIS',
 'NANCY LEWIS',
 'NATHAN LEWIS WURTZEL',
 'PAUL LEWIS',
 'ROSS BAZELON LEWIN',
 "SABRINA Y'VES LEWIS-JONES",
 'SANDRA LEWIS',
 'SCOTT LEWIS',
 'WENDY LEWIS',
 'WILLIAM LEWIS FANATIA']

In [91]:
findall(['and', ' james'])

['ALEX-ST JAMES ANDREW RAILEY-CISCO',
 'ANDREW JAMES MCDOWELL',
 'ANDY LEWIS JAMES',
 'BRANDON MICHAEL JAMES',
 'CHANDLER JAMES HOUGHTLEN']

In [92]:
findall(['cord', 'burk'])

['CORDELIA LEWIS BURKS']

In [93]:
findall('charbon')

['JACK LEWIS CHARBONNEAU']

In [94]:
findall(['nat', 'wurtz'])

['NATHAN LEWIS WURTZEL']

In [95]:
findall(' alston')

['LINDA ALSTON']

In [96]:
cm[cm['tres_nm'] == 'WENDY LEWIS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
3934,C00450411,MOMENTUM PAC,WENDY LEWIS,1200 SMITH,SUITE 1600,HOUSTON,TX,77002,U,N,UNK,A,,,,WENDY LEWIS,1200 SMITH,"1200 SMITH, HOUSTON, TX 77002"


In [97]:
cm[cm['tres_nm'] == 'WENDY LEWIS ARMSTRONG']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [98]:
new = 'WENDY LEWIS'
dones.append(new)

these = findall('wendy lewis')

getall(these, new)

orig len: 1
new len: 1


In [99]:
dones.extend(findall(' lew'))

In [100]:
findall(' lawl')

['CHRISTOPHER LAWLOR', 'KELLY LAWLER', 'SEWARD LAWLOR']

In [101]:
dones.extend(findall(' lawl'))

In [102]:
findall(' crum')

['GARY CRUMMITT', 'JANE CRUMPLER', 'JOHN CRUMP']

In [103]:
dones.extend(findall(' crum'))

In [104]:
findall(' slater')

['JEN SLATER', 'TODD SLATER']

In [105]:
dones.extend(findall(' slater'))

In [106]:
findall(' mccaul')

['ALICIA MCCAULEY', 'MICHAEL MCCAULEY', 'MIKE MCCAULEY']

In [107]:
dones.extend(findall(' mccaul'))

In [108]:
findall(' montg')

['CHRISTOPHER MONTGOMERY WOODFIN',
 'DAVA MONTGOMERY',
 'DIANE MONTGOMERY',
 'JAY MONTGOMERY',
 'LUKE MONTGOMERY',
 'MEGAN MONTGOMERY',
 'RUSSELL MONTGOMERY',
 'THOMAS MONTGOMERY']

In [109]:
dones.extend(findall(' montg'))

In [110]:
findall(' nis')

['MELISSA NISSEN']

In [111]:
dones.extend(findall(' nis'))

In [112]:
findall(' matthews')

['LAURA MATTHEWS', 'PAUL MATTHEWS']

In [113]:
dones.extend(findall(' matthews'))

In [114]:
findall(' goul')

['ANN MARIE GOULD', 'DAVID GOULD', 'VAN CHARLES GOULD']

In [115]:
cm[cm['tres_nm'].apply(lambda x: ('ADAM' in x) & ('GOULD' in x))]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [116]:
new = 'ADAM KOHL GOULD'
dones.append(new)

these = findall(['adam', 'gould'])

getall(these, new)

orig len: 0
new len: 0


In [117]:
dones.extend(findall(' goul'))

In [118]:
findall(' gan')

['CHARLES GANTT',
 'HARVEY GANTT',
 'MARIA VICTORIA GANACIAS BORJA',
 'MARY RANDOLPH GANNON',
 'PETER GANGI',
 'RON GANT']

In [119]:
findall('borja')

['MARIA VICTORIA GANACIAS BORJA']

In [120]:
findall(['mary', 'randolph'])

['MARY RANDOLPH GANNON']

In [121]:
dones.extend(findall(' gan'))

In [122]:
findall(' carli')

['HELEN CARLIN', 'JAMES CARLISLE', 'MICHAEL CARLIN', 'ROBERT CARLIN']

In [123]:
dones.extend(findall(' carli'))

In [124]:
findall(' kra')

['ALLEN KRAMER',
 'BETH KRATOCHVIL',
 'BLAKE ALAN KRAPF',
 'CATHERINE KRANTZ',
 'DAVID KRALLE',
 'DAVID KRAMER',
 'DON KRAUS',
 'JOHN KRALL',
 'PATRICK KRASON',
 'SHAWN KRAUSE',
 'STEVEN KRAVITZ',
 'TOM KRAUSE']

In [125]:
cm[cm['tres_nm'] == 'THOMAS KRAUS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [126]:
cm[cm['tres_nm'] == 'TOM KRAUSE']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
15667,C00680504,BROADCOM INC. POLITICAL ACTION COMMITTEE (BROA...,TOM KRAUSE,1320 RIDDER PARK DR,,SAN JOSE,CA,95131,U,Q,UNK,M,C,,,"KRAUSE, TOM",1320 RIDDER PARK DR.,"1320 RIDDER PARK DR, SAN JOSE, CA 95131"


In [127]:
dones.extend(findall(' kra'))

In [128]:
findall(' koch')

['KEVIN KOCH',
 'LINDA KOCH',
 'NICHOLAS KOCH',
 'THEODORE KOCH',
 'TIMOTHY KOCH',
 'TOM KOCHAN']

In [129]:
dones.extend(findall(' koch'))

In [130]:
findall(' win')

['ALAN WINDGASSEN',
 'ALISON WINGATE',
 'ANDREW WINTERING',
 'ARTHUR WINSTEAD',
 'BRIGEN WINTERS',
 'DEAN WINE',
 'ELIZABETH WINN BJORK',
 'HICKS WINTERS',
 'JOHN WINKLER',
 'KAREN WINGARD',
 'MICHAEL WINTERHALTER',
 'PATRICIA WINTER',
 'ROBERT LEROY WINTER',
 'ROBERT VAN WINTON',
 'STEVE WIND',
 'THOMAS WINTER',
 'VICKIE WINPISINGER']

In [131]:
dones.extend(findall(' win'))

In [132]:
findall(' rit')

['ANDREW RITTENBERG',
 'ERIN RITTER',
 'JOEL RITER',
 'MAX WILSON RITTER',
 'TOM RITTER']

In [133]:
dones.extend(findall(' rit'))

In [134]:
findall(' tat')

['ADAM TATUN',
 'ALIX TATE',
 'BRIAN TATUM',
 'DARRYL TATTRIE',
 'ELIZABETH TATE',
 'JOHN TATE',
 'MARTHA SHAW TATE',
 'SAMBA TATA',
 'SRINIVASA TATINENI',
 'STANLEY TATE']

In [135]:
cm[cm['tres_nm'].apply(lambda x: x in ['BRIAN TATUM', 'BRYAN TATUM'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
3203,C00415026,AMERICAN FUELS AND PETROCHEMICAL MANUFACTURERS...,BRIAN TATUM,1800 M ST NW,SUITE 900 NORTH,WASHINGTON,DC,20036,B,Q,UNK,M,T,AMERICAN FUELS AND PETROCHEMICAL MANUFACTURERS...,,"TATUM, BRIAN",1800 M STREET NW,"1800 M ST NW, WASHINGTON, DC 20036"


In [136]:
dones.extend(findall(' tat'))

In [137]:
findall('ralls')

['STEVE RALLS', 'STEVEN RALLS']

In [138]:
cm[cm['tres_nm'] == 'STEVEN RALLS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
2845,C00395467,JEFF FORTENBERRY FOR UNITED STATES CONGRESS,STEVEN RALLS,PO BOX 30265,,LINCOLN,NE,68503,P,H,REP,Q,,NONE,H4NE01064,"RALLS, STEVEN",PO BOX 30265,"PO BOX 30265, LINCOLN, NE 68503"


In [139]:
cm[cm['tres_nm'] == 'STEVE RALLS'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
3389,C00426494,FOR OUR REPUBLIC'S TRADITIONS FUND AKA FORT FUND,STEVE RALLS,PO BOX 30883,,LINCOLN,NE,68503,D,N,UNK,M,,JEFF FORTENBERRY,,"RALLS, STEVE MR.",PO BOX 30883,"PO BOX 30883, LINCOLN, NE 68503"
3416,C00427781,ONLINE LENDERS ALLIANCE POLITICAL ACTION COMMI...,STEVE RALLS,PO BOX 15480,SE STATION,WASHINGTON,DC,20003,U,Q,UNK,M,M,ONLINE LENDERS ALLIANCE,,"RALLS, STEVE",PO BOX 15480,"PO BOX 15480, WASHINGTON, DC 20003"
5012,C00489336,WILD AND WONDERFUL PAC,STEVE RALLS,332 W LEE HWY,# 303,WARRENTON,VA,20186,D,Q,UNK,M,,,,"RALLS, STEVE",332 W LEE HWY,"332 W LEE HWY, WARRENTON, VA 20186"
5345,C00498345,MULLIN FOR CONGRESS,STEVE RALLS,PO BOX 3681,,MUSKOGEE,OK,74402,P,H,REP,Q,,MULLIN VICTORY FUND,H2OK02083,"RALLS, STEVE",PO BOX 3681,"PO BOX 3681, MUSKOGEE, OK 74402"
5508,C00504365,FUNDING REPUBLICANS SUPPORTING OPPORTUNITY AND...,STEVE RALLS,332 W LEE HWY,# 303,WARRENTON,VA,20186,D,N,UNK,T,,,,STEVE RALLS,332 W LEE HWY,"332 W LEE HWY, WARRENTON, VA 20186"


In [140]:
new = 'STEVE RALLS'
dones.append(new)

these = findall('ralls')

getall(these, new)

orig len: 22
new len: 23


In [141]:
findall(' has')

['BECKY HASSLEN',
 'CHRISSIE HASTIE',
 'GARY HASTY',
 'JOSEPH HASTO',
 'KHAWAR HASSAN',
 'NEIL HASSETT']

In [142]:
dones.extend(findall(' has'))

In [143]:
findall(' buch')

['EMILY BUCHANAN',
 'KATHERINE BUCHANAN',
 'KIMBERLY BUCHAN',
 'MARY ANNE BUCHANAN',
 'SHAWN BUCHTEL',
 'TIM BUCHE']

In [144]:
dones.extend(findall(' buch'))

In [145]:
findall(' bac')

['ANDREW BACON',
 'BENJAMIN BACKER',
 'CATHARYNE BACH',
 'DAN BACHUS',
 'DAN BACKER',
 'DANIEL BACINE',
 'JA BACHMAN',
 'JOEL BACON',
 'WILLIAM BACHMAN']

In [146]:
dones.extend(findall(' bac'))

In [147]:
findall(' brog')

['KEVIN BROGHAMER', 'LEE ANNE BROGOWSKI', 'MICHAEL BROGAN', 'RICHARD BROGAN']

In [148]:
dones.extend(findall(' brog'))

In [149]:
findall(' gia')

['ANDREW GIANNONE',
 'FRANCIS GIARDIELLO',
 'HOLLY GIARRAPUTO',
 'JOEL GIANNELLI',
 'SAVERIO GIAMBALVO',
 'SUSAN GIANNETTI LONGACRE']

In [150]:
findall('longacre')

['SUSAN GIANNETTI LONGACRE']

In [151]:
dones.extend(findall(' gia'))

In [152]:
findall(' deane')

['SHAWNDA DEANE']

In [153]:
dones.extend(findall(' deane'))

In [154]:
findall(' hag')

['DEBORAH RUTH HAGAR',
 'HILLARY HAGERTY',
 'JANICE GWYN HAGERMAN',
 'JOHN HAGY',
 'LORA HAGGARD',
 'PAUL HAGY',
 'RICHARD HAGEN',
 'SARAH HAGER',
 'TIMOTHY HAGAN']

In [155]:
dones.extend(findall(' hag'))

In [156]:
findall(' anger')

['LINDSAY ANGERHOLZER']

In [157]:
dones.extend(findall(' anger'))

In [158]:
findall(' lowe')

['AARON LOWE',
 'GRETCHEN LOWE',
 'JENNIFER LOWE',
 'KEITH LOWEY',
 'LUCAS LOWELL EASLEY',
 'ROSS LOWE',
 'TRAVIS LOWE']

In [159]:
dones.extend(findall(' lowe'))

In [160]:
findall(' bau')

['BRIAN BAUER',
 'CARL BAUMAN',
 'DAVID BAUER',
 'DAWN BAUMAN',
 'GARY BAUER',
 'LINDA BAUER DARR',
 'NATALIE BAUR',
 'ORPHEAO BAUM',
 'PAULA BAUER',
 'TED BAUER']

In [161]:
findall(['ju', 'mart'])

[]

In [162]:
dones.extend(findall(' bau'))

In [163]:
findall(' evans')

['BRENT EVANS',
 'BRIAN EVANS',
 'DEBI EVANS',
 'DIANE EVANS',
 'DOUG EVANS',
 'GEORGE EVANS',
 'HEATHER EVANS',
 'JAMES EVANS',
 'JON EVANS',
 'LYLE EVANS',
 'MELISSA ANNE EVANS',
 'NICHOLAS EVANS',
 'WALTER EVANS']

In [164]:
findall(' doyle')

['JOHN DOYLE', 'MATTHEW DOYLE', 'PETER DOYLE', 'TIM DOYLE']

In [165]:
dones.extend(findall(' doyle'))

In [166]:
dones.extend(findall(' evans'))

In [167]:
findall(' mason')

['DAVE MASON',
 'DAVID MASON',
 'DIRK MASON CANTRELL',
 'DYANA MASON',
 'JACQUELINE MASON',
 'MARCUS MASON',
 'MICHAEL MASON',
 'STACY MASON']

In [168]:
cm[cm['tres_nm'] == 'DAVE MASON'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
3021,C00406850,BLUE CROSS BLUE SHIELD OF SOUTH CAROLINA FEDER...,DAVE MASON,INTERSTATE 20 AT ALPINE RD,,COLUMBIA,SC,29214,B,Q,UNK,Q,C,,,"MASON, DAVE MR.",INTERSTATE 20 AT ALPINE ROAD,"INTERSTATE 20 AT ALPINE RD, COLUMBIA, SC 29214"


In [169]:
cm[(cm['tres_nm'] == 'DAVID MASON') & (cm['cmte_st'] == 'SC')].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [170]:
findall('cantrell')

['DIRK MASON CANTRELL']

In [171]:
cm[cm['tres_nm'] == 'MICHAEL THOMAS MASON'].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [172]:
cm[(cm['tres_nm'] == 'MICHAEL MASON') & (cm['cmte_st'] == 'TX')].head()

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
1574,C00268904,TRINITY INDUSTRIES EMPLOYEE POLITICAL ACTION C...,MICHAEL MASON,2525 N STEMMONS FREEWAY,,DALLAS,TX,75207,B,Q,UNK,M,C,TRINITY INDUSTRIES INC.,,"MASON, MICHAEL J.",2525 N. STEMMONS FREEWAY,"2525 N STEMMONS FREEWAY, DALLAS, TX 75207"


In [173]:
new = 'MICHAEL MASON'
dones.append(new)

these = findall(['michael', 'mason'])

getall(these, new)

orig len: 1
new len: 1


In [174]:
findall(' hale')

['CHRISTINA HALEY',
 'DAN HALEY',
 'DONALD BRETT HALE',
 'JAMES HALE',
 'NANCY HALEY',
 'RANDY HALE',
 'TONY HALE',
 'YVONNE HALEJKO']

In [175]:
dones.extend(findall(' mason'))

In [176]:
findall(' lloyd')

['JOHN LLOYD', 'JUSTIN LLOYD MCNEEL', 'PHILIP LLOYD', 'WILLIS LLOYD']

In [177]:
findall(' mcneel')

['JUSTIN LLOYD MCNEEL']

In [178]:
dones.extend(findall(' lloyd'))

In [179]:
findall(' johnson')

['ANDREW JOHNSON',
 'AQUASIA JOHNSON',
 'ARLEY JOHNSON',
 'ARNOLD JOHNSON',
 'BETTY JOHNSON',
 'BRAD JOHNSON',
 'BRIAN JOHNSON',
 'BURT JOHNSON',
 'CATHERINE JOHNSON',
 'CHARLES JOHNSON',
 'CHELSEA JOHNSON',
 'CHRIS JOHNSON',
 'CLAY JOHNSON',
 'CLIFFORD JOHNSON',
 'CONSTANCE JOHNSON',
 'CORY JOHNSON',
 'DAVID JOHNSON',
 'DUANE JOSEPH JOHNSON',
 'EDDIE JOHNSON',
 'EMMA JOHNSON',
 'ERIC JOHNSON',
 'ERIK ARLEN JOHNSON',
 'GEORGE JOHNSON',
 'GREGORY JOHNSON',
 'JAMES JOHNSON',
 'JUDITH JOHNSON',
 'JULIE JOHNSON',
 'KENNETH JOHNSON',
 'LINELL JOHNSON',
 'LORRETTA JOHNSON',
 'LOUIS JOHNSON',
 'MARK JOHNSON',
 'MATTHEW JOHNSON',
 'MELODIE JOHNSON',
 'MICHAEL JOHNSON',
 'OTIS LEE JOHNSON',
 'PRINCESS JOHNSON',
 'ROBERT JOHNSON',
 'SARAH JOHNSON',
 'STANLEY JOHNSON',
 'STEPHEN JOHNSON',
 'SUSAN JOHNSON',
 'THOMAS JOHNSON',
 'THOMAS WAYNE JOHNSON',
 'VERDELL JOHNSON',
 'WILLIAM JOHNSON']

In [180]:
cm[cm['tres_nm'] == 'ARLEN JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [181]:
cm[cm['tres_nm'] == 'ERIK ARLEN JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
16221,C00686238,RESISTERY PAC,ERIK ARLEN JOHNSON,275 LAKE DR,,KENSINGTON,CA,94708,U,O,UNK,Q,,,,ERIK ARLEN JOHNSON,275 LAKE DRIVE,"275 LAKE DR, KENSINGTON, CA 94708"


In [182]:
cm[cm['tres_nm'] == 'CHRIS JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
6169,C00528166,NATIONAL ASSOCIATION FOR FIXED ANNUITIES POLIT...,CHRIS JOHNSON,1155 F ST NW,SUITE 1050,WASHINGTON,DC,20004,U,Q,UNK,Q,T,,,"JOHNSON, CHRIS",1155 F ST NW,"1155 F ST NW, WASHINGTON, DC 20004"


In [183]:
cm[cm['tres_nm'] == 'CHRISTOPHER LEE JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [184]:
cm[cm['tres_nm'] == 'JIM JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [185]:
cm[cm['tres_nm'] == 'JAMES JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
1378,C00236034,WERNER ENTERPRISES INC POLITICAL ACTION COMMITTEE,JAMES JOHNSON,14507 FRONTIER RD,,OMAHA,NE,68138,U,Q,UNK,Q,C,WERNER ENTERPRISES INC,,"JOHNSON, JAMES L.",14507 FRONTIER ROAD,"14507 FRONTIER RD, OMAHA, NE 68138"
3162,C00412569,DENTON COUNTY REPUBLICAN VICTORY FUND,JAMES JOHNSON,2921 COUNTRY CLUB RD,,DENTON,TX,76202,U,X,REP,M,M,REPUBLICAN PARTY OF TEXAS,,"JOHNSON, JAMES S.",2921 COUNTRY CLUB RD #102,"2921 COUNTRY CLUB RD, DENTON, TX 76202"


In [186]:
cm[cm['tres_nm'] == 'LORETTA JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [187]:
cm[cm['tres_nm'] == 'LORRETTA JOHNSON']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
216,C00028860,"AMERICAN FEDERATION OF TEACHERS, AFL-CIO COMMI...",LORRETTA JOHNSON,555 NEW JERSEY AVE NW,,WASHINGTON,DC,20001,B,Q,UNK,M,L,"AMERICAN FEDERATION OF TEACHERS, AFL-CIO",,"JOHNSON, LORRETTA","555 NEW JERSEY AVENUE, NW","555 NEW JERSEY AVE NW, WASHINGTON, DC 20001"
17352,C70002472,"AMERICAN FEDERATION OF TEACHERS, AFL-CIO",LORRETTA JOHNSON,555 NEW JERSEY AVE NW,,WASHINGTON,DC,20001,U,C,UNK,Q,L,,,"JOHNSON, LORRETTA","555 NEW JERSEY AVENUE, N.W.","555 NEW JERSEY AVE NW, WASHINGTON, DC 20001"


In [188]:
new = 'LORRETTA JOHNSON'
dones.append(new)

these = findall(['lor', 'etta', 'johnson'])

getall(these, new)

orig len: 2
new len: 2


In [189]:
findall(['mi', 'gibson'])

['MICHAEL GIBSON']

In [190]:
dones.extend(findall(' johnson'))

In [191]:
findall(' goldstein')

[]

In [192]:
dones.extend(findall(' goldstein'))

In [193]:
findall(' marks')

['BRUCE MARKS',
 'ELENA MARKS',
 'JEFFREY MARKS',
 'MICHELLE MARKS-OSBORNE',
 'NANCY MARKS',
 'STEPHEN MARKS']

In [194]:
dones.extend(findall(' marks'))

In [195]:
findall(' grav')

['RONALD GRAVINO', 'SCOTT GRAVES', 'SHARRON GRAVES']

In [196]:
dones.extend(findall(' grav'))

In [197]:
findall(' eich')

['ALYSSA EICHMAN', 'FRED EICHEL', 'RICHARD EICHMAN']

In [198]:
dones.extend(findall(' eich'))

In [199]:
findall(' fou')

['BRIAN FOUCART', 'KEITH FOURNIER', 'ROGER FOUNTAIN']

In [200]:
dones.extend(findall(' fou'))

In [201]:
findall(' martin')

['ANDY MARTIN',
 'ANTONIO MARTINEZ',
 'CARL MARTIN NELSON',
 'CASSANDRA MARTINEZ',
 'CHARLES MARTIN',
 'CHRIS MARTIN',
 'CLIFFORD MARTIN',
 'DAVID MARTIN',
 'ED MARTIN',
 'EDWARD MARTIN',
 'ERNESTO MARTINEZ',
 'FRANCES MARTIN',
 'GAYLE MARTIN',
 'GEMMA MARTIN',
 'GREGORY MARTIN WADE',
 'JANE MARTIN',
 'JANICE MARTIN',
 'JEAN MARTINEZ',
 'JENA TONICE MARTIN',
 'JENNIFER MARTIN',
 'JOHN MARTIN',
 'JONATHAN MARTIN',
 'JOSE DANIEL MARTINES',
 'JOSEPH MARTIN',
 'KEVIN MARTIN',
 'LARISSA MARTINEZ',
 'LILIANA MARTINEZ',
 'LISA MARTINEZ',
 'LOUIE CRUZ MARTINEZ',
 'MARIA MARTINEZ',
 'MARIO MARTINEZ',
 'NOVEL MARTIN',
 'PAUL MARTINO',
 'PAULETTE MARIE MARTIN',
 'ROBERT MARTINEZ',
 'ROQUE MARTINEZ',
 'SANDRA MARTINEZ',
 'SARAH MARTIN',
 'STEVEN MARTIN',
 'SUSAN MARTIN',
 'TERESA MARTINEZ',
 'XAVIER MARTINEZ']

In [202]:
findall(' salazar')

[]

In [203]:
findall('salzburg')

['MICHAEL JOSEPH SALZBURG-FELTS']

In [204]:
cm[cm['tres_nm'] == 'STEVE MARTIN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [205]:
cm[(cm['tres_nm'] == 'STEVEN MARTIN') & (cm['cmte_st'] == 'NC')]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [206]:
dones.extend(findall(' martin'))

In [207]:
findall(' davidson')

['CARY DAVIDSON', 'DWIGHT DAVIDSON', 'REBECCA DAVIDSON', 'ROBERT DAVIDSON']

In [208]:
findall(['paul', 'reynolds'])

['PAUL REYNOLDS']

In [209]:
cm[cm['tres_nm'] == 'PAUL REYNOLDS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
12660,C00649459,DIANE FOR COLORADO CD3,PAUL REYNOLDS,PO BOX 771606,,STEAMBOAT SPRINGS,CO,80477,P,H,DEM,Q,,DIANE MITSCH BUSH VICTORY FUND,H8CO03192,"REYNOLDS, PAUL D. MR.",PO BOX 771606,"PO BOX 771606, STEAMBOAT SPRINGS, CO 80477"
16364,C00687715,DIANE MITSCH BUSH VICTORY FUND,PAUL REYNOLDS,PO BOX 771606,,STEAMBOAT SPRINGS,DC,80477,J,N,UNK,Q,,NONE,,"REYNOLDS, PAUL D. MR.",PO BOX 771606,"PO BOX 771606, STEAMBOAT SPRINGS, DC 80477"


In [210]:
cm[cm['tres_nm'] == 'PAUL DAVIDSON REYNOLDS']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [211]:
new = 'PAUL DAVIDSON REYNOLDS'
dones.append(new)

these = findall(['paul', 'reynolds'])

getall(these, new)

orig len: 0
new len: 2


In [212]:
dones.extend(findall(' davidson'))

In [213]:
findall(' bren')

['ANNE BRENSLEY',
 'CHRISTOPHER BRENNAN',
 'GREGORY BRENNICK',
 'LAUREL BRENNAN',
 'MEGAN BRENGARTH',
 'OZY ROSE BRENNAN',
 'TOM BRENEMAN',
 'TUCKER BRENNAN']

In [214]:
findall(' mantan')

[]

In [215]:
dones.extend(findall(' bren'))

In [216]:
findall(' mont')

['CHRISTOPHER MONTGOMERY WOODFIN',
 'DACEY MONTOYA',
 'DARRIN MONTEIRO',
 'DAVA MONTGOMERY',
 'DIANE MONTGOMERY',
 'JAY MONTGOMERY',
 'LUKE MONTGOMERY',
 'MEGAN MONTGOMERY',
 'MICHAEL MONT',
 'PATRICIA MONTAGUE',
 'RUSSELL MONTGOMERY',
 'THOMAS MONTGOMERY']

In [217]:
dones.extend(findall(' mont'))

In [218]:
findall(' hank')

['BRENDA HANKINS']

In [219]:
dones.extend(findall(' hank'))

In [220]:
findall(' lesh')

['GREGORY LESHOCK', 'MEREDITH LESHER', 'WARREN LESHNER']

In [221]:
dones.extend(findall(' lesh'))

In [222]:
findall(' bre')

['ANNE BRENSLEY',
 'BAILEY BRESSLER',
 'BEN BREWSTER',
 'BENJAMIN BREUER',
 'CATHERINE BRESLER',
 'CHARLES BREIT',
 'CHRISTOPHER BRENNAN',
 'CHRISTOPHER SCOTT BREWER',
 'DONALD BRETT HALE',
 'GREGORY BRENNICK',
 'HANNELORE BREITMEYER-JONES',
 'JAMES TROY BREWER',
 'JONATHAN BRETT RINGHAM',
 'LAUREL BRENNAN',
 'MARK BREBBERMAN',
 'MEGAN BRENGARTH',
 'MICHAEL BREATHES',
 'NICOLE BREWIN',
 'OZY ROSE BRENNAN',
 'PATRICIA BREED',
 'PAUL BREAZEALE',
 'TOM BRENEMAN',
 'TROY BREWER',
 'TUCKER BRENNAN',
 'VICTOR BREED',
 'WADE BREWER']

In [223]:
cm[cm['tres_nm'] == 'JAMES BREWER']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [224]:
cm[cm['tres_nm'] == 'JAMES TROY BREWER']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
11743,C00635219,DAVID PAC DELIVERING AMERICAN VALUES IN DC,JAMES TROY BREWER,95 WHITE BRIDGE RD,,NASHVILLE,TN,37205,D,N,UNK,Q,,DAVID KUSTOFF,,JAMES TROY BREWER,95 WHITE BRIDGE RD SUITE 207,"95 WHITE BRIDGE RD, NASHVILLE, TN 37205"
15870,C00682633,TENNESSEANS FOR A STRONG AMERICA PAC,JAMES TROY BREWER,95 WHITE BRIDGE RD,SUITE 207,NASHVILLE,TN,37205,U,O,UNK,Q,,NONE,,"BREWER, JAMES TROY",95 WHITE BRIDGE RD,"95 WHITE BRIDGE RD, NASHVILLE, TN 37205"


In [225]:
new = 'JAMES BREWER'
dones.append(new)

these = findall(['james', 'brewer'])

getall(these, new)

orig len: 0
new len: 2


In [226]:
findall(' ringham')

['JONATHAN BRETT RINGHAM']

In [227]:
dones.extend(findall(' bre'))

In [228]:
findall(' sel')

['CRYSTAL SELLERS',
 'GREGORY SELTZER',
 'JANET SELWAY',
 'JOHN SELPH',
 'JUDITH SELZER',
 'KATHY SELVAGGIO',
 'LAURA SELKEN',
 'LAURIE SELF',
 'LINDA SELL',
 'MONIKA SELMONT',
 'PAUL SELTMAN',
 'STEVE SELTZER']

In [229]:
findall(['ja', 'peters'])

[]

In [230]:
findall(['sar', 'scot'])

['SARAH HARPER SCOTT']

In [231]:
dones.extend(findall(' sel'))

In [232]:
findall(' owen')

['DAVID OWEN',
 'ETHAN OWENS',
 'GAIL DENISE OWENS',
 'JOHN OWENS',
 'LINDA OWENS',
 'OSCAR OWENS',
 'ROBERT OWENS',
 'RODNEY OWEN MOSIER',
 'SELENA OWENS',
 'STACY OWENS',
 'THADDEUS OWENS',
 'WENDI OWEN',
 'WENDY ANNOR OWENS']

In [233]:
findall(' mosier')

['HELEN MOSIER', 'JONATHAN MOSIER', 'RODNEY OWEN MOSIER']

In [234]:
dones.extend(findall(' owen'))

In [235]:
findall(' vo')

['CHRISTOPHER VOROS',
 'DATHAN VOELTER',
 'FRANCIS VOIGT',
 'HARRISON WAGNER VON DWINGELO',
 'JEN GILBERT VOSS',
 'MARK VOGEL',
 'MARTIN VOGT',
 'PATRICK VOSS',
 'PAUL VOGEL',
 'RICHARD VOLIVA',
 'ROBERT VOLTMANN',
 'ROGER VON TING',
 'TARYN VOGEL']

In [236]:
findall(' luza')

[]

In [237]:
dones.extend(findall(' vo'))

In [238]:
findall(' bol')

['APRIL BOLING',
 'CHRISTINE BOLDT',
 'DANIEL BOLLNER',
 'DIANE BOLAK',
 'DONALD PATRICK BOLENA',
 'ELDAR BOLSAKOV',
 'JEFFERY BOLTON',
 'JERRY BOLES',
 'JESSE BOLIN',
 'JOHN BOLING',
 'KIMBERLY BOLIN',
 'MICHAEL BOLAND',
 'MICHAEL BOLLENTIN',
 'MICHAEL BOLTON',
 'MIKE BOLAND',
 'NATHAN BOLT',
 'PRENTISS BOLIN',
 'REGINALD BOLDING',
 'TOM BOLOVINOS',
 'VINCENT BOLLON']

In [239]:
dones.extend(findall(' bol'))

In [240]:
findall(' hub')

['AARON HUBBARD',
 'BEN HUBBY',
 'CHARLES HUBERT HAUSER',
 'EDWARD HUBBARD',
 'FREDDIE HUBBARD',
 'HARRY HUBBARD',
 'JACK HUBBARD',
 'JIM HUBBARD',
 'SCOTT HUBAY',
 'TSHOMBE HUBBARD']

In [241]:
findall(' hauser')

['CHARLES HUBERT HAUSER']

In [242]:
dones.extend(findall(' hub'))

In [243]:
findall(' oz')

['NINA OZLU TUNCELI', 'WILLIAM OZANUS']

In [244]:
findall(' tunc')

['NINA OZLU TUNCELI']

In [245]:
dones.extend(findall(' oz'))

In [246]:
findall(' groe')

['ERIC GROEN', 'REBECCA GROEN']

In [247]:
dones.extend(findall(' groe'))

In [248]:
findall(' sola')

['DEREK SOLAR', 'KRISTIN SOLANDER']

In [249]:
findall(['ed', 'patterson'])

[]

In [250]:
dones.extend(findall(' sola'))

In [251]:
findall(' rupp')

['BEN RUPP', 'DAVID RUPPENICKER', 'JEFFREY RUPPERT', 'LORI RUPPEN']

In [252]:
dones.extend(findall(' rupp'))

In [253]:
findall(' schi')

['ADAM VICTOR SCHILLER',
 'AMY SCHILLING',
 'ANTHONY SCHIWEIER',
 'ERIC SCHIPPERS',
 'JAMES SCHISSER',
 'JOHN SCHILLING',
 'JOSEPH SCHINTZ',
 'KEVIN SCHIEFFER',
 'LOUIS SCHIAZZA',
 'MARY SCHILLING',
 'SARAH SCHIMDT']

In [254]:
dones.extend(findall(' schi'))

In [255]:
findall(' guin')

[]

In [256]:
dones.extend(findall(' guin'))

In [257]:
findall(' stone')

['ANN STONE',
 'ERIC STONEHAM',
 'LISA STONE',
 "O'LENE STONE",
 'ROBIN STONE',
 'RUTH ROCHELLE STONER',
 'SAMUEL STONE',
 'SHARON STONES',
 'SHERRI STONE']

In [258]:
dones.extend(findall(' stone'))

In [259]:
findall(' cros')

['CALEB CROSBY',
 'COLLEEN CROSSEY',
 'GRADY CROSBY',
 'KAREN CROSS',
 'MARK CROSS']

In [260]:
dones.extend(findall(' cros'))

In [261]:
findall(' gla')

['ALAN GLAZIER',
 'EUSTACE GLASGOW',
 'EZRA GLASER',
 'JOHN GLANCEY',
 'MITCH GLAZIER',
 'WILLIAM GLASS']

In [262]:
findall('hemmig')

[]

In [263]:
dones.extend(findall(' gla'))

In [264]:
findall(' ragan')

['ASHLEY RAGAN', 'JACKI RAGAN', 'JENNIFER RAGAN', 'VIRGINIA RAGAN']

In [265]:
dones.extend(findall(' ragan'))

In [266]:
findall(' mau')

['DAVID MAURO',
 'GEORGE MAURER',
 'STACEY MAUD',
 'TERRY MAUPIN',
 'WENDY MAUSOLF']

In [267]:
findall('littlejohn')

[]

In [268]:
findall('buchser')

[]

In [269]:
dones.extend(findall(' mau'))

In [270]:
findall(' snyd')

['CRAIG SNYDER', 'FRANK SNYDER', 'JASON SNYDER', 'RICHARD SNYDER']

In [271]:
dones.extend(findall(' snyd'))

In [272]:
findall(' pur')

['BYRON PURCELL',
 'CRAIG PURSER',
 'KENNETH PURNELL YANCY',
 'MICHAEL PURZYCKI',
 'PAULA PURDY',
 'SALVATORE PURPURA',
 'SANJAY PURI',
 'STARR PURDUE']

In [273]:
findall(' balc')

[]

In [274]:
dones.extend(findall(' pur'))

In [275]:
findall(' lei')

['BRYAN LEIB',
 'CODY PAUL LEISTIKOW',
 'CYNTHIA LEIGH APPLEBAUM',
 'ERIC LEIGH KELLER',
 'GARY LEIGH',
 'GLEN LEIBOWITZ',
 'JANE LEIDERMAN',
 'JUSTIN LEIGH FARBER',
 'MIGUEL LEIJA',
 'SUSAN LEIVAS-STURNER']

In [276]:
findall('bronson')

[]

In [277]:
findall('kasz')

[]

In [278]:
findall(['cynth', 'apple'])

['CYNTHIA LEIGH APPLEBAUM']

In [279]:
findall('fowler')

['PAUL FOWLER', 'RICHARD ANTHONY FOWLER', 'SCOTT FOWLER', 'SEAN FOWLER']

In [280]:
findall('keller')

['AARON KELLER',
 'CRAIG KELLER',
 'ERIC KELLER',
 'ERIC LEIGH KELLER',
 'JAMES KELLER',
 'LYNNE KELLER']

In [281]:
cm[cm['tres_nm'] == 'ERIC KELLER']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
16029,C00684282,EDEMOCRATS PAC LLC,ERIC KELLER,70 LINDEN AVE,P O BOX 2134,HADDONFIELD,NJ,8033,U,N,UNK,Q,,NONE,,"KELLER, ERIC MR",70 LINDEN AVENUE,"70 LINDEN AVE, HADDONFIELD, NJ 08033"


In [282]:
cm[cm['tres_nm'] == 'ERIC LEIGH KELLER']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
14311,C00666537,PRESS FREEDOM POLITICAL ACTION COMMITTEE,ERIC LEIGH KELLER,902 TURKEY RUN RD,,MCLEAN,VA,22101,U,N,UNK,Q,,NONE,,"KELLER, ERIC LEIGH MR.",902 TURKEY RUN ROAD,"902 TURKEY RUN RD, MCLEAN, VA 22101"


In [283]:
findall(' hubbard')

['AARON HUBBARD',
 'EDWARD HUBBARD',
 'FREDDIE HUBBARD',
 'HARRY HUBBARD',
 'JACK HUBBARD',
 'JIM HUBBARD',
 'TSHOMBE HUBBARD']

In [284]:
dones.extend(findall(' lei'))

In [285]:
findall(' tea')

['DONALD TEAL', 'GARY TEAL', 'MATT TEAGARDEN', 'RICHARD TEAMAN']

In [286]:
dones.extend(findall(' tea'))

In [287]:
findall(' rut')

['DEBORAH RUTH HAGAR',
 'JANNA RUTLAND',
 'JEFFREY RUTAN',
 'JOHN RUTKAUSKAS',
 'LUCY RUTISHAUSER',
 'REID RUTHERFORD',
 'SUSAN RUTT']

In [288]:
findall('bellar')

[]

In [289]:
findall(' mayo')

['HEATH MAYO']

In [290]:
dones.extend(findall(' mayo'))

In [291]:
dones.extend(findall(' rut'))

In [292]:
findall(' elli')

['ALLEN ELLISON',
 'AMY ELLIS',
 'GREG ELLIOTT',
 'HEIDI ELLIS ROBEY',
 'JEROME ELLIOTT TRES TRUSTEE FIELDS',
 'JOE ELLISON',
 'JOHN CLELLAND ELLIS',
 'LISA ELLIS',
 'MICHAEL ELLIOTT',
 'SCOTT ELLINGTON',
 'THERESA ELLINGTON',
 'TWANA ELLIOTT']

In [293]:
findall(' robey')

['HEIDI ELLIS ROBEY']

In [294]:
findall(' dahl')

['LISA DAHLING', 'THOMAS DAHLEN']

In [295]:
dones.extend(findall(' elli'))

In [296]:
findall(' kee')

['BRONWYN KEENAN',
 'DARRYL KEENAN SEGARS',
 'DAVID KEEN',
 'HELENE KEELEY',
 'JOHN KEELING',
 'LYNETTE KEETON',
 'MICHAEL KEELING',
 'THORNTON KEEL']

In [297]:
findall(' segars')

['DARRYL KEENAN SEGARS']

In [298]:
dones.extend(findall(' kee'))

In [299]:
findall(' jan')

['AUSTIN BENNET TR JANSSEN',
 'BEVE JANE THAKHAMHOR',
 'DEBORAH JANSEN',
 'DNP APRN JANICE JONES',
 'ELAINE JANUS',
 'GORAN JANKOVIC',
 'JOHN JANKOWSKI',
 'LATREASHA JANET GIVENS',
 'MICHELLE JANEEN WHITE',
 'TIMOTHY JANISSE']

In [300]:
findall(' janssen')

['AUSTIN BENNET TR JANSSEN']

In [301]:
findall(' thakh')

['BEVE JANE THAKHAMHOR']

In [302]:
findall('osgerby')

[]

In [303]:
findall('wharton')

['CYNTHIA WHARTON', 'KOTA WHARTON']

In [304]:
findall(' tanis')

[]

In [305]:
dones.extend(findall(' jan'))

In [306]:
findall(' dupr')

['ABBY DUPREE']

In [307]:
dones.extend(findall(' dupr'))

In [308]:
findall(' schwartz')

['ANDREW SCHWARTZ',
 'ARTHUR SCHWARTZ',
 'JAMES SCHWARTZ',
 'JENNIFER SCHWARTZ',
 'KARL SCHWARTZ',
 'LAURA SCHWARTZ',
 'MELODIE SCHWARTZ',
 'STEVEN SCHWARTZ']

In [309]:
cm[cm['tres_nm'] == 'LAURA SCHWARTZ']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
8348,C00580415,FASO FOR CONGRESS,LAURA SCHWARTZ,PO BOX 448,,KINDERHOOK,NY,12106,P,H,REP,Q,,FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST...,H6NY19169,"SCHWARTZ, LAURA A. MS.",PO BOX 448,"PO BOX 448, KINDERHOOK, NY 12106"
9348,C00602755,LANGE FOR CONGRESS,LAURA SCHWARTZ,PO BOX 481,,SOUTH SALEM,NY,10590,P,H,REP,Q,,NONE,,"SCHWARTZ, LAURA A.",PO BOX 481,"PO BOX 481, SOUTH SALEM, NY 10590"
10580,C00622100,FASO VICTORY COMMITTEE,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,J,N,UNK,T,,,,"SCHWARTZ, LAURA",PO BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590"
11309,C00630681,EMPIRE STATE PAC,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,D,N,UNK,Q,,,,"SCHWARTZ, LAURA A.",PO BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590"
11556,C00633263,JOHN FASO VICTORY COMMITTEE,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,J,N,UNK,Q,,NONE,,"SCHWARTZ, LAURA",PO BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590"
14735,C00670885,NEW YORK MAJORITY VICTORY,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,J,H,UNK,T,,,,"SCHWARTZ, LAURA",PO BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590"
15542,C00679225,KEEP THE HOUSE,LAURA SCHWARTZ,PO BOX 98,,SOUTH SALEM,NY,10590,J,N,UNK,T,,,,LAURA SCHWARTZ,P.O. BOX 98,"PO BOX 98, SOUTH SALEM, NY 10590"


In [310]:
cm[cm['tres_nm'] == 'LAURA ANN SCHWARTZ']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [311]:
dones.extend(findall(' schwartz'))

In [312]:
findall(' kau')

['AMIT KAUL',
 'EMILY KAUFMAN',
 'JASON KAUNE',
 'KARL KAUFMANN',
 'PHIL KAUFMAN',
 'ROBERT KAUFMANN',
 'SCOTT KAUFMAN',
 'STEPHEN KAUFMAN']

In [313]:
cm[cm['tres_nm'] == 'RON KAUFMAN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [314]:
cm[cm['tres_nm'] == 'RONALD KAUFMAN']

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [315]:
new = 'RONALD KAUFMAN'
dones.append(new)

these = findall(['ron', ' kaufman'])

getall(these, new)

orig len: 0
new len: 0


In [316]:
dones.extend(findall(' kau'))

In [317]:
findall(' riesc')

['JOSE RIESCO']

In [318]:
dones.extend(findall(' riesc'))

In [319]:
findall(' cop')

['ARTHUR COPLESTON',
 'BILLY COPELAND',
 'ELIZABETH COPPINGER',
 'MICHAEL COPELEY',
 'RITA COPELAND',
 'VONA COPP']

In [320]:
dones.extend(findall(' cop'))

In [321]:
findall(' bal')

['AJ BALUKOFF',
 'BRIANA BALESKIE',
 'JAMES BALL',
 'JOHN BALLARD',
 'JONATHAN BALDWIN',
 'JONATHAN BALL',
 'JOSHUA BALLARD',
 'KAREN BALLARD',
 'KEITH BALKCOM',
 'KRYSTAL BALL',
 'LAURA BALDI',
 'MATANGI BALA',
 'MATTHEW BALAZIK',
 'MICHAEL BALL',
 'PAUL BALASSA',
 'ROBERT BALGENORTH',
 'SUSAN BALKENBUSH',
 'TRACEY BALL']

In [322]:
dones.extend(findall(' bal'))

In [323]:
findall(' tall')

['VINCENT TALLMAN']

In [324]:
dones.extend(findall(' tall'))

In [325]:
findall(' juk')

['JOEL JUKUS']

In [326]:
dones.extend(findall(' juk'))

In [327]:
findall(' milln')

[]

In [328]:
dones.extend(findall(' milln'))

In [329]:
findall(' goode')

['KIMBERLY GOODEN', 'MICHAEL GOODE', 'WARREN GOODE']

In [330]:
dones.extend(findall(' goode'))

In [331]:
findall(' kun')

['ADEN KUN',
 'CRAIG KUNKLE',
 'DAVE KUNES',
 'HOORIA KUNDI',
 'JOHN KUNITZ',
 'JOHN WILLIAM KUNITZ',
 'LORNA KUNEY',
 'TEKIN KUNT']

In [332]:
cm[cm['tres_nm'].apply(lambda x: ('KUNITZ' in x) & ('JOHN' in x))]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
10896,C00626093,THIRD CONGRESSIONAL DISTRICT REPUBLICAN PARTY ...,JOHN KUNITZ,PO 390722,,EDINA,MN,55439,U,X,REP,Q,,NONE,,"KUNITZ, JOHN W MR.",PO 390722,"PO 390722, EDINA, MN 55439"
15499,C00678789,CONSERVATIVES FOR AMERICA,JOHN WILLIAM KUNITZ,6441 BRETTON WAY,,CHANHASSEN,MN,55317,U,X,REP,Q,,NONE,,"KUNITZ, JOHN WILLIAM MR",6441 BRETTON WAY,"6441 BRETTON WAY, CHANHASSEN, MN 55317"


In [333]:
new = 'JOHN WILLIAM KUNITZ'
dones.append(new)

these = findall(['kunitz', 'john'])

getall(these, new)

orig len: 1
new len: 2


In [334]:
dones.extend(findall(' kun'))

In [335]:
findall('lefko')

[]

In [336]:
cm[cm['tres_nm'].apply(lambda x: 'LEFKOWITZ' in x)]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [337]:
new = 'JON LEFKOWITZ'
dones.append(new)

these = findall('lefkowitz')

getall(these, new)

orig len: 0
new len: 0


In [338]:
findall(' sif')

['TIM SIFERT', 'TIMOTHY SIFERT']

In [339]:
dones.extend(findall(' sif'))

In [340]:
findall(' vand')

['CRAIG VANDERVEER',
 'DAWNE VANDIVER',
 'DICK VANDER WOUDE',
 'DIRK VANDONGEN',
 'ELIZABETH ANN VANDERWAY',
 'MIKE VANDERWEY',
 'NIGEL VANDERFORD',
 'RAY VANDRIESSCHE',
 'SHAWN VANDIVER',
 'WILLIAM VANDERBROOK']

In [341]:
dones.extend(findall(' vand'))

In [342]:
findall(' changkyu')

[]

In [343]:
new = 'JASON CHANGKYU KIM'
dones.append(new)

these = findall(' changkyu')

getall(these, new)

orig len: 0
new len: 0


In [344]:
findall(['ash', 'newman'])

['ASHLEY NEWMAN']

In [345]:
dones.extend(findall(['ash', 'newman']))

In [346]:
findall(' rogers')

['BRETT ROGERS',
 'CHERI ROGERS',
 'CINNAMON ROGERS',
 'DENNIS ROGERS',
 'EMORY ROGERS',
 'GEORGE ROGERS',
 'GLEN ROGERS',
 'GRACE ROGERS',
 'KEVIN ROGERS',
 'MARCIA MATHISON ROGERS',
 'MICHAEL ROGERS',
 'STEVEN ROGERS']

In [347]:
dones.extend(findall(' rogers'))

In [348]:
findall(' kennedy')

['CASTLEN KENNEDY',
 'GORDON KENNEDY',
 'JEANNE KENNEDY',
 'JOHN FITZGERALD KENNEDY LANKSTER',
 'KATHERINE KENNEDY',
 'KATIE KENNEDY',
 'KEVIN KENNEDY',
 'MAURA KENNEDY',
 'MICHAEL KENNEDY',
 'RICHARD KENNEDY',
 'ROBERT KENNEDY',
 'SEAN KENNEDY',
 'WARD KENNEDY']

In [349]:
cm[cm['tres_nm'].apply(lambda x: x in ['CHRIS KENNEDY', 'CHRISTOPHER KENNEDY'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address


In [350]:
cm[cm['tres_nm'].apply(lambda x: x in ['KATIE KENNEDY', 'KATHERINE KENNEDY'])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
6627,C00546325,TAKING AN INDEPENDENT PERSPECTIVE TOGETHER FOR...,KATHERINE KENNEDY,2318 CURTIS ST,,DENVER,CO,80205,D,N,UNK,Q,,,,KATHERINE KENNEDY,2318 CURTIS STREET,"2318 CURTIS ST, DENVER, CO 80205"
7169,C00558759,PDC ENERGY INC PAC,KATHERINE KENNEDY,1775 SHERMAN ST,,DENVER,CO,80203,U,Q,UNK,Q,C,,,"KENNEDY, KATHERINE",1775 SHERMAN STREET SUITE 3000,"1775 SHERMAN ST, DENVER, CO 80203"
7399,C00564435,PRINCIPLED CONSERVATIVES OF COLORADO,KATIE KENNEDY,600 17TH ST,,DENVER,CO,80202,U,O,UNK,Q,,,,KATIE KENNEDY,600 17TH STREET SUITE 2800 SOUTH,"600 17TH ST, DENVER, CO 80202"
8382,C00580894,VAIL RESORTS EMPLOYEE POLITICAL ACTION COMMITTEE,KATIE KENNEDY,390 INTERLOCKEN CRESCENT,,BROOMFIELD,CO,80021,U,Q,UNK,Q,C,,,"KENNEDY, KATIE",390 INTERLOCKEN CRESCENT,"390 INTERLOCKEN CRESCENT, BROOMFIELD, CO 80021"
14698,C00670505,COMMITTEE FOR BARRINGTON,KATIE KENNEDY,745 S MILLER ST,,LAKEWOOD,CO,80226,P,H,REP,Q,,NONE,H8CO07094,"KENNEDY, KATIE",745 S. MILLER STREET,"745 S MILLER ST, LAKEWOOD, CO 80226"
15648,C00680306,BARRIS FOR CONGRESS,KATIE KENNEDY,24853 CHRIS DR,,EVERGREEN,CO,80439,P,H,LIB,Q,,NONE,H8CO02269,"KENNEDY, KATIE",24853 CHRIS DRIVE,"24853 CHRIS DR, EVERGREEN, CO 80439"
15864,C00682559,PROTECTING OUR CONSTITUTION,KATIE KENNEDY,5910 S UNIVERSITY BLVD C18 NO 254,,GREENWOOD VILLAGE,CO,80121,U,O,UNK,Q,,NONE,,"KENNEDY, KATIE",5910 S. UNIVERSITY BLVD C18 NO 254,"5910 S UNIVERSITY BLVD C18 NO 254, GREENWOOD V..."


In [351]:
new = 'KATHERINE KENNEDY'
dones.append(new)

these = findall(['kat', 'kennedy'])

getall(these, new)

orig len: 2
new len: 7


In [352]:
cm[cm['tres_nm'].apply(lambda x: x in [
    'MICHAEL KENNEDY', 
    'MICHAEL WARREN KENNEDY', 
])]

Unnamed: 0,cmte_id,cmte_nm,tres_nm,cmte_st1,cmte_st2,cmte_city,cmte_st,cmte_zip,cmte_dsgn,cmte_tp,cmte_pty_affiliation,cmte_filing_freq,org_tp,connected_org_nm,cand_id,orig_tres_nm,orig_cmte_st1,address
4648,C00477299,"VMWARE, INC. POLITICAL ACTION COMMITTEE (VMWAR...",MICHAEL KENNEDY,3401 HILLVIEW AVE,,PALO ALTO,CA,94304,U,Q,UNK,M,C,,,"KENNEDY, MICHAEL",3401 HILLVIEW AVE,"3401 HILLVIEW AVE, PALO ALTO, CA 94304"


In [353]:
dones.extend(findall(' kennedy'))

In [354]:
findall(['mel', 'allen'])

['MELISSA ALLEN']

In [355]:
findall(' himmel')

['MARC HIMMELSTEIN']

In [356]:
dones.extend(findall(' himmel'))
dones.extend(findall(['mel', 'allen']))

In [357]:
findall(' reis')

['ALAN REISCHE', 'ALEXANDER REISH', 'DANIEL REISTETER', 'MICHELE REISNER']

In [358]:
dones.extend(findall(' reis'))

In [359]:
findall(' doz')

['JAMES DOZIER', 'JOSEPH DOZIER', 'JULIE DOZIER']

In [360]:
dones.extend(findall(' doz'))

In [361]:
findall(' roz')

['ADAM ROZANSKY', 'COSTANTINO ROZZO', 'YURY ROZEL']

In [362]:
dones.extend(findall(' roz'))

In [363]:
findall(' patt')

['ALYX PATTISON',
 'ANDREW PATTERSON',
 'CHRIS PATTON',
 'JAMES PATTON',
 'MELVIN PATTERSON',
 'ROBERT PATTISON',
 'STEPHEN PATTERSON',
 'THERESA PATTARA',
 'TOM PATTERSON']

In [364]:
dones.extend(findall(' patt'))

The work is never done...

In [365]:
# checks which have already been deduped and does not display those
mask = [False if cm.loc[i, 'tres_nm'] in dones else True for i in list(cm.index)]
cm.loc[mask, 'tres_nm'].value_counts()[cm.loc[mask, 'tres_nm'].value_counts() > 1][:20]

SCOTT MACKENZIE       13
BAILEY MORGAN         12
DIANA SAQUELLA        12
ALEXANDER HORNADAY    10
KATE LIND             10
SLOANE SKINNER        10
EZEKIEL FREDERICK      9
CHRISTINA SIROIS       9
RUSSELL MILLER         8
THOMAS MAXWELL         8
TRAVIS KABRICK         8
THOMAS HILTACHK        8
GIANNI DONATES         7
PAUL TARNAWSKI         7
DANIEL SENA            7
CARY PETERSON          7
EUGENE SLOAN CRAIG     6
RYAN PHILLIPS          6
MICHAEL ADAMS          6
MEGAN MIELNIK          6
Name: tres_nm, dtype: int64

In [366]:
dupename = list(cm['cmte_nm'].value_counts()[cm['cmte_nm'].value_counts() > 2].index)

for dupe in dupename:
    lil_cm = cm.loc[cm['cmte_nm'] == dupe, [
#         'cmte_id',
        'tres_nm',
        'address',
    ]].sort_values(['address'])
    print(dupe)
    print(lil_cm)
    print('\n')

(cmte_nm missing)
                     tres_nm                             address
17010  (no treasurer listed)  PO BOX 33524, WASHINGTON, DC 20033
8006   (no treasurer listed)                   nan, nan, nan nan
10164  (no treasurer listed)                   nan, nan, nan nan
10419  (no treasurer listed)                   nan, nan, nan nan
10519  (no treasurer listed)                   nan, nan, nan nan
12322  (no treasurer listed)                   nan, nan, nan nan
13088  (no treasurer listed)                   nan, nan, nan nan
13089  (no treasurer listed)                   nan, nan, nan nan
13396  (no treasurer listed)                   nan, nan, nan nan


CLEAN WATER ACTION PROJECT
                tres_nm                                address
17297    SOPHIE AN AOKI       11 S 12TH ST, RICHMOND, VA 23219
17337  SANDRA LEDBETTER  1320 18TH ST NW, WASHINGTON, DC 20003
17296  SANDRA LEDBETTER        6 SLOANE ST, S ORANGE, NJ 07079
17294   SOPHIE ANN AOKI      P O BOX 3361, IOWA CIT

In [373]:
# show all cmte_nm & tres_nm for committees sharing a street address
multiadd = list(cm['address'].value_counts()[cm['address'].value_counts() > 10].index)

for add in multiadd:
    lil_cm = cm.loc[cm['address'] == add, ['tres_nm', 'cmte_nm']]\
    .sort_values(['tres_nm', 'cmte_nm'], ascending = [True, True]).reset_index(drop = True)
    print(add)
    print('\n')
    treses = list(lil_cm['tres_nm'].value_counts().index)
    for tres in treses:
        lil_tres = lil_cm[lil_cm['tres_nm'] == tres]
        committees = sorted(list(set(lil_tres['cmte_nm'])))
        print('\t', tres)
        print('\n')
        for committee in committees:
            print('\t\t', committee)
        print('\n')
    print('\n')
        
#     print(lil_cm.to_csv(None))
#     print('\n')

228 S WASHINGTON ST, ALEXANDRIA, VA 22314


	 LISA LISKER


		 AMERICANS UNITED FOR FREEDOM
		 ARIZONA GRASSROOTS ACTION PAC
		 BACON VICTORY FUND
		 BOLD ACTIVE CONSERVATIVES OF NEBRASKA PAC-BACON PAC
		 BRIDGING THE GAP
		 BUCKEYE JOINT FUNDRAISING COMMITTEE
		 BUILDING OUR BASE - BOB PAC
		 BUILDING RENEWAL IN AMERICA NOW PAC
		 CITIZENS FOR PROSPERITY IN AMERICA TODAY PAC
		 CONCERNED AMERICANS FOR FREEDOM & OPPORTUNITY PAC (CAFO PAC)
		 CRAPO VICTORY COMMITTEE
		 CREATING OPPORTUNITIES, MARKETS AND ENTHUSIASM IN RURAL KENTUCKY PAC
		 FREEDOM AND SECURITY PAC
		 FREEDOM PROJECT; THE
		 FRIENDS OF WINNING WOMEN 2016
		 FRIENDS OF WINNING WOMEN 2018
		 GARDNER VICTORY COMMITTEE
		 GOP MAJORITY VICTORY FUND
		 GOP WINNING WOMEN
		 GRAHAM MAJORITY FUND
		 GROWING REPUBLICAN ACHIEVEMENTS AND PROMOTING EXCELLENCE-GRAPE JFC
		 HONOR AND PRINCIPLES PAC
		 HOUSE CONSERVATIVES FUND
		 IOWA CONGRESSIONAL MAJORITY FUND; THE
		 JAMES-RUBIO VICTORY COMMITTEE
		 JOBS, ENERGY AND OUR FOUNDING FATH

PO BOX 9891, ARLINGTON, VA 22219


	 BENJAMIN OTTENHOFF


		 ALABAMA 2017 SENATE VICTORY COMMITTEE, A JOINT FUNDRAISING COMMITTEE COMPRISED OF JUDGE ROY MOORE FOR U.S. SENATE AND THE ALABAMA REPUBLICAN PARTY
		 BERGMAN VICTORY COMMITTEE
		 BLUE COLLAR VICTORY FUND
		 BOST VICTORY FUND
		 BRAUN VICTORY COMMITTEE
		 COMMON SENSE COMMON SOLUTIONS POLITICAL ACTION COMMITTEE
		 CONSERVATIVE ACTION PAC
		 FLORIDA WORKS
		 FREEDOM AND FAIRNESS VIRGINIA
		 HANDEL VICTORY FUND
		 HAWLEY WIN FUND
		 HELPING AMERICA'S NEXT DEDICATED ELECTED LEADERS PAC
		 HK VICTORY 2018
		 ILLINOIS MAJORITY FUND 2016
		 INDIANA SENATE NOMINEE FUND
		 INDIANA/MISSOURI VICTORY COMMITTEE
		 KATKO VICTORY FUND
		 LET AMERICA WORK
		 MACARTHUR VICTORY
		 MAJORITY IN ACTION
		 MAJORITY VICTORY PROGRAM
		 MARK WALKER VICTORY COMMITTEE
		 MOUNTAIN FAMILIES PAC
		 NRSC TARGETED STATE VICTORY FUND
		 PATRIOT DAY I 2017
		 PATRIOT DAY II 2017
		 PATRIOT DAY III 2018
		 PATRIOT DAY IV 2018
		 PATRIOT ROUND IV 2018
		 POLIQU

	 DON ERNST


		 MISSION PAC


	 EARL JENKINS


		 CONGRESSIONAL BLACK CAUCUS PAC


	 ALAN REISCHE


		 A NEW DIRECTION PAC


	 PHIL FOGG


		 AMERICAN HEALTH CARE ASSOCIATION POLITICAL ACTION COMMITTEE


	 MAHMOUD BARAZI


		 FREE SYRIA PAC




555 CAPITOL MALL, SACRAMENTO, CA 95814


	 (no treasurer listed)


		 ALAMEDANS UNITED SUPPORTING VELLA AND ASHCRAFT FOR CITY COUNCIL, ET AL
		 GOLDEN CALIFORNIA COMMITTEE SPONSORED BY THE SEIU CALIFORNIA STATE COUNCIL
		 OPPORTUNITY PAC - A COALITION OF TEACHERS HEALTH CARE GIVERS FACULTY MEMBERS SCHOOL EMPLOYEES AND PUBLIC AND PR
		 PLANNED PARENTHOOD ADVOCACY PROJECT LOS ANGELES COUNTY
		 PLANNED PARENTHOOD AFFILIATES OF CALIF


	 ART PULASKI


		 COMMITTEE FOR WORKING FAMILIES, SPONSORED BY THE CALIFORNIA LABOR FEDERATION, AFL-CIO
		 MILLION MORE VOTERS, SPONSORED BY THE CALIFORNIA LABOR FEDERATION, AFL-CIO


	 DAVE LOW


		 PACE OF CALIFORNIA SCHOOL EMPLOYEES ASSOCIATION - LOCAL, STATE, FEDERAL CANDIDATES


	 MICHAEL REDARD


		 PLANNED PA

	 WILLIAM OZANUS


		 C3 PAC
		 CIVIC
		 CONSERVATIVE VOICES PAC


	 CHRISTIAN RICKERS


		 AMERICAN UPRISING PAC




C/O RED CURVE SOLUTIONS, BEVERLY, MA 01915


	 BRADLEY CRATE


		 BELIEVE IN AMERICA PAC
		 CONSERVATIVE FIGHTER FUND
		 GOLDEN STATE LEADERSHIP PAC
		 HURD VICTORY FUND
		 KENNEDY CLUB COMMITTEE
		 MAVERICK PAC USA
		 MOONEY VICTORY FUND
		 PETER TEDESCHI VICTORY COMMITTEE
		 RESCHENTHALER VICTORY FUND
		 RVFPAC
		 SCHNEIDER VICTORY
		 STRENGTHEN AMERICA COMMITTEE
		 TEAM MITT
		 TRUMP MAKE AMERICA GREAT AGAIN COMMITTEE
		 TRUMP VICTORY




601 PENNSYLVANIA AVE NW, WASHINGTON, DC 20004


	 CHARLES SPIES


		 CLARK HILL FEDERAL POLITICAL ACTION COMMITTEE, THE
		 FUND FOR LOUISIANA'S FUTURE; THE


	 EILEEN BRADEN


		 JPMORGAN CHASE & CO. FEDERAL POLITICAL ACTION COMMITTEE
		 JPMORGAN CHASE & CO. PAC


	 EARL JONES


		 HAIER US APPLIANCE SOLUTIONS, INC., DBA GE APPLIANCES POLITICAL ACTION COMMITTEE


	 MICHAEL KLOBUCHAR


		 MERCK & CO., INC., EMPLOYEES POLITICAL ACTION

	 GREGG SHEIOWITZ


		 ZURICH HOLDING COMPANY OF AMERICA, INC. COMMITTEE FOR GOOD GOVERNMENT (Z-PAC)


	 JEFF SMITH


		 NATIONAL FEDERATION OF INDEPENDENT BUSINESS FEDERAL POLITICAL ACTION COMMITTEE


	 MICHAEL MCGARRY


		 CRUISE LINES INTERNATIONAL ASSOCIATION PAC (CLIA PAC)




1787 TRIBUTE RD, SACRAMENTO, CA 95815


	 SHAWNDA DEANE


		 CA-BAM PAC
		 CALIFORNIA PROGRESS COALITION
		 CALIFORNIANS FOR OPPORTUNITY
		 EAST BAY YOUNG DEMOCRATS - FEDERAL
		 ELECTING WOMEN SILICON VALLEY FEDERAL PAC
		 FIGHT BACK CALIFORNIA
		 NEED TO IMPEACH
		 PROGRESSIVE WOMEN SILICON VALLEY FEDERAL PAC


	 (no treasurer listed)


		 SAFETY FOR ALL YES ON PROP. 63 NEWSOM BALLOT MEASURE COMMITTEE


	 SAL ROSSELLI


		 NATIONAL UNION OF HEALTHCARE WORKERS FEDERAL COMMITTEE ON POLITICAL EDUCATION


	 JACK PHILLIPS


		 VENTURA COUNTY DEMOCRATIC CENTRAL COMMITTEE






### Other columns

In [None]:
cmtetp_dict = {
    'C' : 'cmte_CommunicationCost',
    'D' : 'cmte_DelegateCommittee',
    'E' : 'cmte_ElectioneeringComms',
    'H' : 'cmte_House',
    'I' : 'cmte_IndependentExpenditor',
    'S' : 'cmte_Senate',
    'N' : 'cmte_PACnonqual',
    'Q' : 'cmte_PACqual',
    'Y' : 'cmte_Partyqual',
    'O' : 'cmte_SuperPAC',
    'P' : 'cmte_Presidential',
    'U' : 'cmte_Singlecand',
    'V' : 'cmte_PACnoncontrib_nonqual',
    'W' : 'cmte_PACnoncontrib_qual',
    'X' : 'cmte_Partynonqual',
    np.nan : np.nan,
}

cm['cmte_tp'] = [cmtetp_dict[x] for x in cm['cmte_tp']]
cm['cmte_tp'].value_counts()

In [None]:
cmtedsgn_dict = {
    'A' : 'Authorized by a candidate',
    'B' : 'Lobbyist/Registrant PAC',
    'D' : 'Leadership PAC',
    'J' : 'Joint fundraiser',
    'P' : 'Principal campaign committee',
    'U' : 'Unauthorized',
    np.nan : np.nan,
}

cm['cmte_dsgn'] = [cmtedsgn_dict[x] for x in cm['cmte_dsgn']]
cm['cmte_dsgn'].value_counts()

In [None]:
pty_affil = [
    np.nan,
    '.',
    'UKN',
    '  0',
    'UN',
]

cm['cmte_pty_affiliation'] = ['UNK' if (x in pty_affil) else x for x in cm['cmte_pty_affiliation']]
cm['cmte_pty_affiliation'].value_counts()

In [None]:
cm['cmte_filing_freq'].value_counts()

In [None]:
orgtp_dict = {
    'C' : 'org_Corporation',
    'H' : 'org_HnotonFECsite',
    'I' : 'org_InotonFECsite',
    'L' : 'org_Labor organization',
    'M' : 'org_Membership organization',
    'T' : 'org_Trade association',
    'V' : 'org_Cooperative',
    'W' : 'org_Corporation without capital stock',
    np.nan : np.nan,
}

cm['org_tp'] = [orgtp_dict[x] for x in cm['org_tp']]
cm['org_tp'].value_counts()

In [None]:
cm['connected_org_nm'].value_counts()[:10]

In [None]:
cm.loc[cm['connected_org_nm'] == 'NONE', 'connected_org_nm'] = np.nan

In [None]:
cm['connected_org_nm'].value_counts()[:10]

In [None]:
# sorry! these print ugly but it's to display them unabbreviated
treses = list(cm['tres_nm'].value_counts()[cm['tres_nm'].value_counts() > 10].index)

for tres in treses:
    if tres != '(no treasurer listed)':
        lil_cm = cm[cm['tres_nm'] == tres]
        lil_cm = lil_cm[[
            'cmte_id',
            'cand_id',
            'cmte_nm',

        ]].sort_values(['cand_id', 'cmte_nm']).reset_index(drop = True)
        pty = list(set(cm.loc[list(lil_cm.index), 'cmte_pty_affiliation']))
        ptys = sorted([x for x in pty if (type(x) != float)])
        print(''.join([tres, ', treasurer']))
        print(len(lil_cm), 'committees')
        print('parties:', str(ptys))
        print(lil_cm.to_csv(None))
        print('\n\n')

In [None]:
cm.to_csv('data/03a_committees.csv', index = False)