In [91]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import requests
import json

import time
import re
import copy

import os.path
from os import path

from crpapi import CRP
from congress import Congress

## Support Functions

In [13]:
def get_keys(file_path):
    """
    Pulls necessary api keys from designated file path
    """
    with open(file_path) as f:
        return json.load(f)

In [14]:
def party_code_to_letter(party_code):
    '''
    Convert a party code from Voteview into a letter for a party in recent history
    '''
    if party_code == 100:
        return 'D'
    if party_code == 200:
        return 'R'
    if party_code == 328:
        return 'I'
    else:
        return np.nan

In [15]:
def parse_senator_names(name, name_type):
    split_name = name.split(', ', 1)
    last_name = split_name[0].split(' ')[-1].capitalize()
    first_name = split_name[1].replace(',','').split(' ')[0].capitalize()
    
    indiv_nick_names = {'Durbin': 'Dick', 'Cochran': 'Thad'}
    
    if name_type == 'last':
        return last_name
    elif name_type == 'first':
        return first_name
    elif name_type == 'first_abbrev':
        if last_name in indiv_nick_names.keys():
            return indiv_nick_names[last_name][0]
        elif '(' in split_name[1]:
            return split_name[1][split_name[1].index('(')+1]
        else:
            return first_name[0]
    else:
        return np.nan

## ETL

In [26]:
current_congress = 116
start_congress = 113

### Voteview

In [27]:
def get_voteview_csv_content(content_type, current_congress_num):
    '''
    Retrieves congress data by content type (passed in) for the 104th Congress to the current Congress today.
    
    '''
    if content_type not in ['votes', 'rollcalls', 'members']:
        print('Not a valid content type')
        return
    
    for congress_num in range(104, current_congress_num+1):
        #download the vote each member made for each roll call
        url = 'https://voteview.com/static/data/out/{}/S{}_{}.csv'.format(content_type, congress_num, content_type)

        r = requests.get(url)
        content = r.content
        file = open('{}/S{}_{}.csv'.format(content_type, congress_num, content_type), 'wb')

        file.write(content)
        file.close()
        
        #put in a sleep timer to not overload the voteview servers
        time.sleep(1)
        
    return

In [28]:
# get_voteview_csv_content('votes', current_congress)

In [29]:
# get_voteview_csv_content('rollcalls', current_congress)

In [30]:
# get_voteview_csv_content('members', current_congress)

In [32]:
#combine rollcall info for each congress into one DataFrame
rollcall_df = pd.DataFrame()
for i in range(start_congress, current_congress+1):
    temp_df = pd.read_csv('rollcalls/S{}_rollcalls.csv'.format(i))
    rollcall_df = pd.concat([rollcall_df, temp_df], ignore_index=True)
    
rollcall_df.drop('dtl_desc', axis=1, inplace=True)
rollcall_df.dropna(axis=0, inplace=True)
    
display(rollcall_df.head())
display(rollcall_df.tail())
display(rollcall_df.info())

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question
0,113,Senate,1,2013-01-24,1,1,78,16,0.522,-0.852,0.214,-0.031,-20.316,SRES15,Resolution Agreed to,A resolution to improve procedures for the con...,On the Resolution
1,113,Senate,2,2013-01-24,1,2,86,9,0.62,-0.785,0.201,-0.111,-17.612,SRES16,Resolution Agreed to,A resolution amending the Standing Rules of th...,On the Resolution
2,113,Senate,3,2013-01-28,1,3,35,62,0.277,-0.961,-0.305,0.05,-18.941,HR152,Amendment Rejected,To offset the cost of the bill with rescission...,On the Amendment
3,113,Senate,4,2013-01-28,1,4,63,36,0.195,-0.812,0.46,-0.392,-15.561,HR152,Bill Passed,A bill making supplemental appropriations for ...,On Passage of the Bill
4,113,Senate,5,2013-01-29,1,5,95,3,0.998,-0.068,1.749,-0.128,-9.99,PN42,Nomination Confirmed,"John Forbes Kerry, of Massachusetts, to be Sec...",On the Nomination


Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question
2261,116,Senate,504,2020-03-18,2,76,90,8,0.544,-0.839,0.268,-0.311,-13.629,HR6201,Bill Passed,A bill making emergency supplemental appropria...,On Passage of the Bill
2262,116,Senate,505,2020-03-22,2,77,47,47,-0.009,-0.717,-0.352,0.347,-5.644,HR748,Cloture on the Motion to Proceed Rejected,A bill to amend the Internal Revenue Code of 1...,On Cloture on the Motion to Proceed
2263,116,Senate,506,2020-03-23,2,78,49,46,-0.037,-0.497,-0.72,0.022,-2.314,HR748,Cloture on the Motion to Proceed Rejected,A bill to amend the Internal Revenue Code of 1...,On Cloture on the Motion to Proceed
2264,116,Senate,507,2020-03-25,2,79,48,48,-0.243,0.819,-0.516,-1.138,-6.48,HR748,Amendment Rejected,To ensure that additional unemployment benefit...,On the Amendment
2265,116,Senate,508,2020-03-25,2,80,96,0,0.0,0.0,0.0,0.0,0.0,HR748,Bill Passed,A bill to amend the Internal Revenue Code of 1...,On Passage of the Bill


<class 'pandas.core.frame.DataFrame'>
Int64Index: 2263 entries, 0 to 2265
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   congress                 2263 non-null   int64  
 1   chamber                  2263 non-null   object 
 2   rollnumber               2263 non-null   int64  
 3   date                     2263 non-null   object 
 4   session                  2263 non-null   int64  
 5   clerk_rollnumber         2263 non-null   int64  
 6   yea_count                2263 non-null   int64  
 7   nay_count                2263 non-null   int64  
 8   nominate_mid_1           2263 non-null   float64
 9   nominate_mid_2           2263 non-null   float64
 10  nominate_spread_1        2263 non-null   float64
 11  nominate_spread_2        2263 non-null   float64
 12  nominate_log_likelihood  2263 non-null   float64
 13  bill_number              2263 non-null   object 
 14  vote_result             

None

In [33]:
#find all nomination votes and treaty votes to pull out from vote history
nomination_votes = rollcall_df[rollcall_df['bill_number'].str.contains('PN')].bill_number

treaty_votes = rollcall_df[rollcall_df['bill_number'].str.contains('TREATYDOC')].bill_number

votes_to_ignore = pd.concat([nomination_votes, treaty_votes], ignore_index=True).unique()

#find the bills to search that aren't nominations or treaty document votes
non_nomination_votes = rollcall_df[~rollcall_df['bill_number'].isin(votes_to_ignore)][['congress', 'bill_number']]

non_nomination_votes['bill_id'] = non_nomination_votes['bill_number'].apply(lambda x: x.lower()) + '-' + \
                                  non_nomination_votes['congress'].astype(str)

bills_to_search = non_nomination_votes['bill_id'].unique()
bills_to_search

array(['sres15-113', 'sres16-113', 'hr152-113', 'hr325-113', 's47-113',
       's16-113', 's388-113', 'sres64-113', 'hr933-113', 'sconres8-113',
       's649-113', 's743-113', 's601-113', 's954-113', 'sres65-113',
       's1003-113', 's953-113', 's744-113', 's1238-113', 's1243-113',
       'hr1911-113', 'hr527-113', 'hjres59-113', 's1569-113',
       'hr2775-113', 'sjres26-113', 's815-113', 'hr3204-113', 's1197-113',
       'sconres28-113', 'hr3304-113', 's1845-113', 'hjres106-113',
       'hr3547-113', 's1926-113', 'hr2642-113', 's1963-113', 's540-113',
       's25-113', 's1982-113', 's1752-113', 's1917-113', 's1086-113',
       'hr3370-113', 's2124-113', 'hr4152-113', 'hr3979-113',
       'hr4302-113', 's2199-113', 's2223-113', 's2262-113', 'hr3474-113',
       'hr3080-113', 's2432-113', 'hr3230-113', 'hr4660-113', 'hr803-113',
       's2363-113', 's2578-113', 's2244-113', 's2569-113', 'hr5021-113',
       's2648-113', 'sjres19-113', 'hjres124-113', 's2280-113',
       's2685-113', '

In [34]:
#make DataFrame of all senators from starting congress to current congress
member_df = pd.DataFrame()
for i in range(start_congress, current_congress+1):
    temp_df = pd.read_csv('members/S{}_members.csv'.format(i))
    member_df = pd.concat([member_df, temp_df], ignore_index=True)

#get rid of presidents from senate list
president_indexes = member_df[member_df['chamber'] == 'President'].index.values
member_df.drop(president_indexes, inplace=True)

#change the party code to a letter (Republican - R, Democrat - D, Independent - I)
member_df['party'] = member_df['party_code'].apply(lambda x: party_code_to_letter(x))

#separate out last name, first name, and middle names/suffixes
member_df['last_name'] = member_df['bioname'].apply(lambda x: parse_senator_names(x, 'last'))
member_df['first_name'] = member_df['bioname'].apply(lambda x: parse_senator_names(x, 'first'))
member_df['first_abbrev'] = member_df['bioname'].apply(lambda x: parse_senator_names(x, 'first_abbrev'))


display(member_df.head())
display(member_df.tail())
display(member_df.info())

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,...,nominate_geo_mean_probability,nominate_number_of_votes,nominate_number_of_errors,conditional,nokken_poole_dim1,nokken_poole_dim2,party,last_name,first_name,first_abbrev
1,113,Senate,49700,41,0,AL,200,0.0,1.0,"SESSIONS, Jefferson Beauregard III (Jeff)",...,0.82789,539.0,45.0,,0.619,0.082,R,Sessions,Jefferson,J
2,113,Senate,94659,41,0,AL,200,0.0,1.0,"SHELBY, Richard C.",...,0.81554,545.0,55.0,,0.515,0.602,R,Shelby,Richard,R
3,113,Senate,40300,81,0,AK,200,0.0,1.0,"MURKOWSKI, Lisa",...,0.73575,516.0,83.0,,0.124,-0.411,R,Murkowski,Lisa,L
4,113,Senate,40900,81,0,AK,100,0.0,1.0,"BEGICH, Mark",...,0.91,512.0,21.0,,-0.241,0.348,D,Begich,Mark,M
5,113,Senate,15039,61,0,AZ,200,0.0,1.0,"McCAIN, John Sidney, III",...,0.77659,525.0,67.0,,0.412,-0.579,R,Mccain,John,J


Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,...,nominate_geo_mean_probability,nominate_number_of_votes,nominate_number_of_errors,conditional,nokken_poole_dim1,nokken_poole_dim2,party,last_name,first_name,first_abbrev
408,116,Senate,40915,56,0,WV,100,,,"MANCHIN, Joe, III",...,0.90671,459.0,10.0,,-0.045,0.368,D,Manchin,Joe,J
409,116,Senate,29940,25,0,WI,100,,,"BALDWIN, Tammy",...,0.81402,462.0,47.0,,-0.396,-0.057,D,Baldwin,Tammy,T
410,116,Senate,41111,25,0,WI,200,,,"JOHNSON, Ron",...,0.92338,449.0,17.0,,0.599,-0.057,R,Johnson,Ron,R
411,116,Senate,40707,68,0,WY,200,,,"BARRASSO, John A.",...,0.95155,463.0,14.0,,0.601,0.231,R,Barrasso,John,J
412,116,Senate,49706,68,0,WY,200,,,"ENZI, Michael B.",...,0.92684,460.0,13.0,,0.58,0.336,R,Enzi,Michael,M


<class 'pandas.core.frame.DataFrame'>
Int64Index: 411 entries, 1 to 412
Data columns (total 26 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   congress                       411 non-null    int64  
 1   chamber                        411 non-null    object 
 2   icpsr                          411 non-null    int64  
 3   state_icpsr                    411 non-null    int64  
 4   district_code                  411 non-null    int64  
 5   state_abbrev                   411 non-null    object 
 6   party_code                     411 non-null    int64  
 7   occupancy                      205 non-null    float64
 8   last_means                     205 non-null    float64
 9   bioname                        411 non-null    object 
 10  bioguide_id                    411 non-null    object 
 11  born                           411 non-null    int64  
 12  died                           4 non-null      flo

None

#### nominate_dim1 is the dw nominate score

party code:  
- D - 100
- R - 200
- Independent - 328

### Open Secrets

In [35]:
#combine all Candidates from elections since 2012 into DataFrame
candidate_crp_ids = pd.DataFrame()
for year in range(2012, 2020, 2):
    temp_ids = pd.read_excel('CRP_IDs.xls', sheet_name='Candidate IDs - {}'.format(year), 
                             skiprows=13, usecols=range(1,6))
    temp_ids_senators = temp_ids[temp_ids['DistIDRunFor'].str.contains('[A-Z][A-Z]S\d', regex=True)]
    candidate_crp_ids = pd.concat([candidate_crp_ids, temp_ids_senators], ignore_index=True)

candidate_crp_ids.rename({'DistIDRunFor': 'Office'}, axis=1, inplace=True)
display(candidate_crp_ids.head())
display(candidate_crp_ids.info())

Unnamed: 0,CID,CRPName,Party,Office,FECCandID
0,N00035490,"Abeler, Jim",R,MNS1,S4MN00353
1,N00009888,"Alexander, Lamar",R,TNS2,S2TN00058
2,N00034703,"Armstrong, Brandon Christina",I,SCS2,S4SC00232
3,N00035475,"Baumgardner, Randy",R,COS2,S4CO00346
4,N00029901,"Begich, Mark",D,AKS1,S8AK00090


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1444 entries, 0 to 1443
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   CID        1444 non-null   object
 1   CRPName    1444 non-null   object
 2   Party      1444 non-null   object
 3   Office     1444 non-null   object
 4   FECCandID  1444 non-null   object
dtypes: object(5)
memory usage: 56.5+ KB


None

In [36]:
member_crp_ids = pd.DataFrame()
for cong in range(113,116):
    temp_ids = pd.read_excel('CRP_IDs.xls', sheet_name='Members {}th'.format(cong), 
                             skiprows=5, usecols=range(1,6))
    temp_ids_senators = temp_ids[temp_ids['Office'].str.contains('[A-Z][A-Z]S\d', regex=True)]
    member_crp_ids = pd.concat([member_crp_ids, temp_ids_senators], ignore_index=True)
    
display(member_crp_ids.head())
display(member_crp_ids.info())

Unnamed: 0,CID,CRPName,Party,Office,FECCandID
0,N00009888,"Alexander, Lamar",R,TNS2,S2TN00058
1,N00030980,"Ayotte, Kelly",R,NHS1,S0NH00235
2,N00004367,"Baldwin, Tammy",D,WIS1,S2WI00219
3,N00006236,"Barrasso, John A",R,WYS1,S6WY00068
4,N00004643,"Baucus, Max",D,MTS2,S8MT00010


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   CID        308 non-null    object
 1   CRPName    308 non-null    object
 2   Party      308 non-null    object
 3   Office     308 non-null    object
 4   FECCandID  308 non-null    object
dtypes: object(5)
memory usage: 12.2+ KB


None

In [37]:
crp_ids = pd.concat([candidate_crp_ids, member_crp_ids], ignore_index=True)

display(crp_ids.head())
display(crp_ids.info())

Unnamed: 0,CID,CRPName,Party,Office,FECCandID
0,N00035490,"Abeler, Jim",R,MNS1,S4MN00353
1,N00009888,"Alexander, Lamar",R,TNS2,S2TN00058
2,N00034703,"Armstrong, Brandon Christina",I,SCS2,S4SC00232
3,N00035475,"Baumgardner, Randy",R,COS2,S4CO00346
4,N00029901,"Begich, Mark",D,AKS1,S8AK00090


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1752 entries, 0 to 1751
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   CID        1752 non-null   object
 1   CRPName    1752 non-null   object
 2   Party      1752 non-null   object
 3   Office     1752 non-null   object
 4   FECCandID  1752 non-null   object
dtypes: object(5)
memory usage: 68.6+ KB


None

In [38]:
missing_senator_info = [{'cid': 'N99999896',
                         'party': 'D',
                         'feccandid': np.nan,
                         'last_name': 'Cowan',
                         'first_name': 'William',
                         'first_abbrev': 'M',
                         'state_abbrev': 'MA'
                        },
                        {'cid': np.nan,
                         'party': 'R',
                         'feccandid': np.nan,
                         'last_name': 'Chiesa',
                         'first_name': 'Jeffrey',
                         'first_abbrev': 'J',
                         'state_abbrev': 'NJ',
                        },
                        {'cid': 'N00046125',
                         'party': 'R',
                         'feccandid': 'S0GA00526',
                         'last_name': 'Loeffler',
                         'first_name': 'Kelly',
                         'first_abbrev': 'K',
                         'state_abbrev': 'GA'
                        }]

In [39]:
#drop any duplicate candidates
crp_ids.drop_duplicates(subset='CID', keep='last', inplace=True, ignore_index=True)

#create new columns for last name, first name, and any middle names for easy comparison to voteview data
crp_ids['last_name'] = crp_ids['CRPName'].apply(lambda x: parse_senator_names(x, 'last'))
crp_ids['first_name'] = crp_ids['CRPName'].apply(lambda x: parse_senator_names(x, 'first'))
crp_ids['first_abbrev'] = crp_ids['CRPName'].apply(lambda x: parse_senator_names(x, 'first_abbrev'))

#rename the column to match the members DataFrame
crp_ids['state_abbrev'] = crp_ids['Office'].apply(lambda x: x[:2])

#drop unnecessary columns
crp_ids.drop(['CRPName', 'Office'], axis=1, inplace=True)

#rename the columns to be all lower case (to match members_df)
crp_ids.columns = [x.lower() for x in crp_ids.columns]

#add missing senators to crp list
crp_ids = pd.concat([crp_ids, pd.DataFrame(missing_senator_info)], ignore_index=True)

display(crp_ids.head(10))
display(crp_ids.info())

Unnamed: 0,cid,party,feccandid,last_name,first_name,first_abbrev,state_abbrev
0,N00035496,R,S4NJ00235,Eck,Alieta,A,NJ
1,N00035511,R,S4NJ00219,Lonegan,Steve,S,NJ
2,N00035096,3,S4NJ00276,Olivera,Pablo,P,NJ
3,N00035280,R,S4MT00084,Stapleton,Corey,C,MT
4,N00035509,R,S4IA00103,Young,David,D,IA
5,N00035479,R,S4GA11269,Yu,Eugene,E,GA
6,N00035490,R,S4MN00353,Abeler,Jim,J,MN
7,N00036765,D,S4LA00156,Ables,Wayne,W,LA
8,N00027398,I,S4CO00296,Acosta,Raul,R,CO
9,N00035664,D,S4MT00100,Adams,Dirk,D,MT


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1272 entries, 0 to 1271
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   cid           1271 non-null   object
 1   party         1272 non-null   object
 2   feccandid     1270 non-null   object
 3   last_name     1272 non-null   object
 4   first_name    1272 non-null   object
 5   first_abbrev  1272 non-null   object
 6   state_abbrev  1272 non-null   object
dtypes: object(7)
memory usage: 69.7+ KB


None

In [40]:
crp_ids[crp_ids['last_name'] == 'Cochran']

Unnamed: 0,cid,party,feccandid,last_name,first_name,first_abbrev,state_abbrev
1180,N00003328,R,S8MS00055,Cochran,Thad,T,MS


In [72]:
crp_ids[crp_ids['last_name'] == 'Landrieu']

Unnamed: 0,cid,party,feccandid,last_name,first_name,first_abbrev,state_abbrev
492,N00035021,D,S6LA00391,Landrieu,Gary,G,LA
1150,N00005395,D,S6LA00227,Landrieu,Mary,M,LA


In [41]:
member_df_with_ids = member_df.merge(crp_ids, how='left', on=['last_name', 'state_abbrev', 'party', 'first_abbrev'])

member_df_with_ids.rename({'first_name_x': 'first_name', 'first_name_y': 'alt_first_name'}, axis=1, inplace=True)

display(member_df_with_ids.head())
display(member_df_with_ids.info())

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,...,conditional,nokken_poole_dim1,nokken_poole_dim2,party,last_name,first_name,first_abbrev,cid,feccandid,alt_first_name
0,113,Senate,49700,41,0,AL,200,0.0,1.0,"SESSIONS, Jefferson Beauregard III (Jeff)",...,,0.619,0.082,R,Sessions,Jefferson,J,N00003062,S6AL00195,Jeff
1,113,Senate,94659,41,0,AL,200,0.0,1.0,"SHELBY, Richard C.",...,,0.515,0.602,R,Shelby,Richard,R,N00009920,S6AL00013,Richard
2,113,Senate,40300,81,0,AK,200,0.0,1.0,"MURKOWSKI, Lisa",...,,0.124,-0.411,R,Murkowski,Lisa,L,N00026050,S4AK00099,Lisa
3,113,Senate,40900,81,0,AK,100,0.0,1.0,"BEGICH, Mark",...,,-0.241,0.348,D,Begich,Mark,M,N00029901,S8AK00090,Mark
4,113,Senate,15039,61,0,AZ,200,0.0,1.0,"McCAIN, John Sidney, III",...,,0.412,-0.579,R,Mccain,John,J,N00006424,S6AZ00019,John


<class 'pandas.core.frame.DataFrame'>
Int64Index: 411 entries, 0 to 410
Data columns (total 29 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   congress                       411 non-null    int64  
 1   chamber                        411 non-null    object 
 2   icpsr                          411 non-null    int64  
 3   state_icpsr                    411 non-null    int64  
 4   district_code                  411 non-null    int64  
 5   state_abbrev                   411 non-null    object 
 6   party_code                     411 non-null    int64  
 7   occupancy                      205 non-null    float64
 8   last_means                     205 non-null    float64
 9   bioname                        411 non-null    object 
 10  bioguide_id                    411 non-null    object 
 11  born                           411 non-null    int64  
 12  died                           4 non-null      flo

None

In [75]:
member_df_with_ids[member_df_with_ids['last_name'] == 'Harris'][['bioname', 'cid']]

Unnamed: 0,bioname,cid
216,"HARRIS, Kamala Devi",N00036915
318,"HARRIS, Kamala Devi",N00036915


## Making a CID, FecCandID, BioguideID, Name CSV

In [42]:
cid_crp_feccand_table = member_df_with_ids[['icpsr', 'state_abbrev', 'bioguide_id', 'party', 
                                           'last_name', 'first_name', 'cid', 'feccandid', 'alt_first_name']].copy()

cid_crp_feccand_table.drop_duplicates('cid', inplace=True)

display(cid_crp_feccand_table.head())
display(cid_crp_feccand_table.info())

Unnamed: 0,icpsr,state_abbrev,bioguide_id,party,last_name,first_name,cid,feccandid,alt_first_name
0,49700,AL,S001141,R,Sessions,Jefferson,N00003062,S6AL00195,Jeff
1,94659,AL,S000320,R,Shelby,Richard,N00009920,S6AL00013,Richard
2,40300,AK,M001153,R,Murkowski,Lisa,N00026050,S4AK00099,Lisa
3,40900,AK,B001265,D,Begich,Mark,N00029901,S8AK00090,Mark
4,15039,AZ,M000303,R,Mccain,John,N00006424,S6AZ00019,John


<class 'pandas.core.frame.DataFrame'>
Int64Index: 140 entries, 0 to 398
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   icpsr           140 non-null    int64 
 1   state_abbrev    140 non-null    object
 2   bioguide_id     140 non-null    object
 3   party           140 non-null    object
 4   last_name       140 non-null    object
 5   first_name      140 non-null    object
 6   cid             139 non-null    object
 7   feccandid       138 non-null    object
 8   alt_first_name  140 non-null    object
dtypes: int64(1), object(8)
memory usage: 10.9+ KB


None

In [76]:
cid_crp_feccand_table[cid_crp_feccand_table['last_name'] == 'Harris']

Unnamed: 0,icpsr,state_abbrev,bioguide_id,party,last_name,first_name,cid,feccandid,alt_first_name
216,41701,CA,H001075,D,Harris,Kamala,N00036915,S6CA00584,Kamala


In [120]:
# cid_crp_feccand_table.to_csv('senator_id_reference_table.csv', index=False)

## Resume CRP Search IDs

In [84]:
crp_search_ids = member_df_with_ids[member_df_with_ids['cid'].notna()][['congress', 'cid']].values

In [146]:
def get_crp_industry_info(crp_id_array, api_key):
    #instantiate Congress object with api key for Pro Publica API
    crp = CRP(api_key)
    
    #instantiate variables for reporting
    crp_industry_issues = []
    crp_industries = 0
    crp_completed = 0
    
    cong_reference = {113: '2012', 114: '2014', 115: '2016', 116: '2018'}
    
    #iterate through ids and congresses in list to find summary, subjects, and amendments (if any)
    for cong,crp_id in crp_id_array:
        cong = int(cong)

        #craft a basic json entry should the call return nothing
        no_response_dict = {crp_id : 'No response'}
        
        #get industry info
        try:
            industries = crp.candidates.industries(crp_id, cong_reference[cong])
            crp_industries +=1
        except:
            try:
                off_election = str(int(cong_reference[cong]) - 1)
                industries = crp.candidates.industries(crp_id, off_election)
                crp_industries +=1
            except:
                industries = copy.deepcopy(no_response_dict)
                crp_industry_issues.append([cong, crp_id])

        #write industries to json file   
        with open('crp_jsons/{}_{}_industries.json'.format(crp_id, cong_reference[cong]), 'w') as f:
            json.dump(industries, f)
            
        crp_completed += 1
        time.sleep(1)
    
    print('Congress/ID pairs parsed: ', crp_completed)
    print()
    print('Industries found from pairs: ', crp_industries)
    print()
    print('Pairs with issues: ', crp_industry_issues)    
    
    return

In [147]:
crp_search_ids

array([[113, 'N00003062'],
       [113, 'N00009920'],
       [113, 'N00026050'],
       [113, 'N00029901'],
       [113, 'N00006424'],
       [113, 'N00009573'],
       [113, 'N00013873'],
       [113, 'N00013823'],
       [113, 'N00006692'],
       [113, 'N00007364'],
       [113, 'N00008051'],
       [113, 'N00030608'],
       [113, 'N00027566'],
       [113, 'N00031685'],
       [113, 'N00012508'],
       [113, 'N00031820'],
       [113, 'N00009926'],
       [113, 'N00030612'],
       [113, 'N00002685'],
       [113, 'N00002593'],
       [113, 'N00028139'],
       [113, 'N00028138'],
       [113, 'N00006267'],
       [113, 'N00029441'],
       [113, 'N00004981'],
       [113, 'N00012539'],
       [113, 'N00003845'],
       [113, 'N00026586'],
       [113, 'N00001758'],
       [113, 'N00004207'],
       [113, 'N00005285'],
       [113, 'N00005282'],
       [113, 'N00003389'],
       [113, 'N00030836'],
       [113, 'N00009659'],
       [113, 'N00005395'],
       [113, 'N00034580'],
 

In [None]:
N00024817_2016_industries

In [148]:
def review_missing_jsons(search_ids):
    cong_reference = {113: '2012', 114: '2014', 115: '2016', 116: '2018'}
    missing_jsons = []
    for cong,cid in search_ids:
#         print(cong,cid)
        file_path = 'crp_jsons/{}_{}_industries.json'.format(cid, cong_reference[cong])
#         print(path.exists(file_path))
        if path.exists(file_path):
#             print('Into if')
            try:
#                 print('into if/try')
                with open(file_path) as f:
                    test_json = json.load(f)
                    test_json[0]['@attributes']
            except:
#                 print('if/except')
                missing_jsons.append([cong, cid])
        else:
#             print('else')
            off_year = str(int(cong_reference[cong]) - 1)
            file_path = 'crp_jsons/{}_{}_industries.json'.format(cid, off_year)
            if path.exists(file_path):
#                 print('else/if')
                try:
                    with open(file_path) as f:
                        test_json = json.load(f)
                        test_json[0]['@attributes']
                except:
#                     print('else/if/except')
                    missing_jsons.append([cong, cid])
            else:
#                 print('else/except')
                missing_jsons.append([cong, cid])

    return missing_jsons        

In [149]:
missing_jsons = review_missing_jsons(crp_search_ids)
missing_jsons

[[113, 'N99999896'],
 [113, 'N00035686'],
 [113, 'N00035267'],
 [115, 'N00040607'],
 [115, 'N00024817'],
 [115, 'N00006406'],
 [115, 'N00042353'],
 [115, 'N00043298'],
 [116, 'N00046125']]

In [150]:
i = 1
key_path = "/Users/flatironschool/.secret/open_secrets_api{}.json".format(i)
keys = get_keys(key_path)

api_key = keys['api_key']

get_crp_industry_info(missing_jsons, api_key)

Congress/ID pairs parsed:  9

Industries found from pairs:  5

Pairs with issues:  [[113, 'N99999896'], [113, 'N00035686'], [115, 'N00040607'], [115, 'N00006406']]


Remaining ids are for senators who were appointed due to an elected senator's death, or left shortly after winning due to w

In [None]:
# start_index = 0
# end_index = 150
# for i in range(1,4):
#     key_path = "/Users/flatironschool/.secret/open_secrets_api{}.json".format(i)
#     keys = get_keys(key_path)

#     api_key = keys['api_key']
    
#     subset_search = crp_search_ids[start_index:end_index]
    
#     print('Subset: ', start_index, ' - ', end_index-1)
    
#     get_crp_industry_info(subset_search, api_key)
    
#     print()
    
#     start_index += 150
#     end_index += 150

Subset:  0  -  149
Congress/ID pairs parsed:  150

Industries found from pairs:  142

Pairs with issues:  [[113, 'N00028138'], [113, 'N00035021'], [113, 'N99999896'], [113, 'N00035686'], [113, 'N00035267'], [113, 'N00043087'], [114, 'N00001955'], [114, 'N00004118']]

Subset:  0  -  149


### MIT Election Lab

Having trouble finding the right structure for this API call for a download.

In [21]:
#get key for dataverse api
key_path = "/Users/flatironschool/.secret/dataverse_api.json"
keys = get_keys(key_path)

api_key = keys['api_key']

In [31]:
server_url = 'https://dataverse.harvard.edu/dataset'
server_path = '/api/access/datafile/'
data_id = 'data'
# data_id = ':persistentId/?persistentId=doi:10.7910/DVN/PEJ5QU'

# '$SERVER_URL/api/search?q=$QUERY'

# 'GET http://$SERVER/api/access/datafile/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB'

url = server_url + server_path + data_id

r = requests.get(url)
print(r)
print(r.url)
# print(r.content)
print(r.text[:1000])

<Response [404]>
https://dataverse.harvard.edu/dataset/api/access/datafile/data
<?xml version='1.0' encoding='UTF-8' ?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head id="j_idt2"><!-- Global site tag (gtag.js) - Google Analytics -->
<script async="async" src="https://www.googletagmanager.com/gtag/js?id=UA-61753334-1"></script>
<script>
  //<![CDATA[
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date()); gtag('config', 'UA-61753334-1');

  window.addEventListener("load", enableAnalyticsEventCapture, false);

  function enableAnalyticsEventCapture() {
    // Download button
    $(document).on("click", ".btn-download", function() {
      var category = $(this).text();
      var label = getFileId($(this));
      gtag('event', 'Download',{'event_category' : category,
                                'event_label' : label});
    });

    // Request Access button
    $(document).on("click", 

### ProPublica

In [127]:
key_path = "/Users/flatironschool/.secret/pro_publica_api.json"
keys = get_keys(key_path)

api_key = keys['api_key']

In [128]:
def find_bill_info_from_list(bill_array, api_key):
    #instantiate Congress object with api key for Pro Publica API
    congress = Congress(api_key)
    
    #instantiate variables for reporting
    bill_sum_issues = []
    bill_sub_issues = []
    bill_amend_issues = []
    bills_completed = 0
    bill_summaries = 0
    bill_subjects = 0
    bill_amendments = 0
    
    #iterate through bills in list to find summary, subjects, and amendments (if any)
    for bill in bill_array:
        bill_num, bill_cong = bill.split('-')
        bill_cong = int(bill_cong)

        #craft a basic json entry should the call return nothing
        no_response_dict = {bill : 'No response'}

        #iterate through bill calls to get bill info
        
        #bill summaries
        try:
            bill_summary = congress.bills.get(bill_num, congress=bill_cong)
            bill_summaries += 1
        except:
            bill_summary = copy.deepcopy(no_response_dict)
            bill_sum_issues.append(bill)
        
        #bill subjects
        try:
            bill_subject = congress.bills.subjects(bill_num, congress=bill_cong)
            bill_subjects += 1
        except:
            bill_subject = copy.deepcopy(no_response_dict)
            bill_sub_issues.append(bill)

        #bill amendments
        try:
            bill_amend = congress.bills.amendments(bill_num, congress=bill_cong)
            bill_amendments += 1
        except:
            bill_amend = copy.deepcopy(no_response_dict)
            bill_amend_issues.append(bill)

        #write all bill parts to own json files    
        with open('pro_pub_jsons/{}_bill_sum.json'.format(bill), 'w') as f:
            json.dump(bill_summary, f)

        with open('pro_pub_jsons/{}_bill_sub.json'.format(bill), 'w') as f:
            json.dump(bill_subject, f)

        with open('pro_pub_jsons/{}_bill_amend.json'.format(bill), 'w') as f:
            json.dump(bill_amend, f)
            
        bills_completed += 1
        time.sleep(1)
    
    print('Bills parsed: ', bills_completed)
    print()
    print('Bill summaries completed: ', bill_summaries)
    print('Bill subjects completed: ', bill_subjects)
    print('Bill amendments completed: ', bill_amendments)
    print()
    print('Bill summaries with issues: ', bill_sum_issues)
    print('Bill subjects with issues: ', bill_sub_issues)
    print('Bill amendments with issues: ', bill_amend_issues)    
    
    return

In [61]:
# find_bill_info_from_list(bills_to_search, api_key)

Bills parsed:  262

Bill summaries completed:  241
Bill subjects completed:  245
Bill amendments completed:  244

Bill summaries with issues:  ['hr3547-113', 's1926-113', 'hr2642-113', 's1963-113', 's540-113', 's25-113', 's1982-113', 's1752-113', 's1917-113', 's1086-113', 'hr3370-113', 'hr1735-114', 's2943-114', 'treatydoc11412-115', 'hr2810-115', 'hr5515-115', 'treatydoc1134-116', 'treatydoc1121-116', 'treatydoc1141-116', 'treatydoc1118-116', 'treatydoc1161-116']
Bill subjects with issues:  ['hr3547-113', 's1926-113', 'hr2642-113', 's1963-113', 's540-113', 's25-113', 's1982-113', 's1752-113', 's1917-113', 's1086-113', 'hr3370-113', 'treatydoc11412-115', 'treatydoc1134-116', 'treatydoc1121-116', 'treatydoc1141-116', 'treatydoc1118-116', 'treatydoc1161-116']
Bill amendments with issues:  ['hjres106-113', 'hr3547-113', 's1926-113', 'hr2642-113', 's1963-113', 's540-113', 's25-113', 's1982-113', 's1752-113', 's1917-113', 's1086-113', 'hr3370-113', 'treatydoc11412-115', 'treatydoc1134-116',

In [129]:
bad_summaries = ['hr3547-113', 's1926-113', 'hr2642-113', 's1963-113', 's540-113', 's25-113', 's1982-113', 
                 's1752-113', 's1917-113', 's1086-113', 'hr3370-113', 'hr1735-114', 's2943-114', 
                 'treatydoc11412-115', 'hr2810-115', 'hr5515-115', 'treatydoc1134-116', 'treatydoc1121-116', 
                 'treatydoc1141-116', 'treatydoc1118-116', 'treatydoc1161-116']

In [130]:
bad_subjects = ['hr3547-113', 's1926-113', 'hr2642-113', 's1963-113', 's540-113', 's25-113', 's1982-113', 
                's1752-113', 's1917-113', 's1086-113', 'hr3370-113', 'treatydoc11412-115', 'treatydoc1134-116', 
                'treatydoc1121-116', 'treatydoc1141-116', 'treatydoc1118-116', 'treatydoc1161-116']

In [131]:
missed_bills = [x for x in (bad_summaries + bad_subjects) if 'treatydoc' not in x]
missed_bills = list(set(missed_bills))

In [132]:
find_bill_info_from_list(missed_bills, api_key)

Bills parsed:  15

Bill summaries completed:  11
Bill subjects completed:  15
Bill amendments completed:  15

Bill summaries with issues:  ['hr1735-114', 'hr2810-115', 's2943-114', 'hr5515-115']
Bill subjects with issues:  []
Bill amendments with issues:  []


Only missing bills are those that are the National Defense Authorization Act yearly fiscal bills

# Working Zone

In [34]:
member_test = congress.members.get('M000355')
member_test

{'id': 'M000355',
 'member_id': 'M000355',
 'first_name': 'Mitch',
 'middle_name': None,
 'last_name': 'McConnell',
 'suffix': None,
 'date_of_birth': '1942-02-20',
 'gender': 'M',
 'url': 'https://www.mcconnell.senate.gov',
 'times_topics_url': 'http://topics.nytimes.com/top/reference/timestopics/people/m/mitch_mcconnell/index.html',
 'times_tag': 'McConnell, Mitch (Per)',
 'govtrack_id': '300072',
 'cspan_id': '2351',
 'votesmart_id': '53298',
 'icpsr_id': '14921',
 'twitter_account': 'McConnellPress',
 'facebook_account': 'mitchmcconnell',
 'youtube_account': None,
 'crp_id': 'N00003389',
 'google_entity_id': '/m/01z6ls',
 'rss_url': 'https://www.mcconnell.senate.gov/public/?a=RSS.Feed',
 'in_office': True,
 'current_party': 'R',
 'most_recent_vote': '2020-03-26',
 'last_updated': '2020-04-22 09:45:55 -0400',
 'roles': [{'congress': '116',
   'chamber': 'Senate',
   'title': 'Senator, 2nd Class',
   'short_title': 'Sen.',
   'state': 'KY',
   'party': 'R',
   'leadership_role': 'Sen

In [121]:
bill_summary = congress.bills.get('sres15', congress=113)
bill_summary

{'bill_id': 'sres15-113',
 'bill_slug': 'sres15',
 'congress': '113',
 'bill': 'S.RES.15',
 'bill_type': 'sres',
 'number': 'S.RES.15',
 'bill_uri': 'https://api.propublica.org/congress/v1/113/bills/sres15.json',
 'title': 'A resolution to improve procedures for the consideration of legislation and nominations in the Senate.',
 'short_title': 'A resolution to improve procedures for the consideration of legislation and nominations in the Senate.',
 'sponsor_title': 'Sen.',
 'sponsor': 'Harry Reid',
 'sponsor_id': 'R000146',
 'sponsor_uri': 'https://api.propublica.org/congress/v1/members/R000146.json',
 'sponsor_party': 'D',
 'sponsor_state': 'NV',
 'gpo_pdf_uri': None,
 'congressdotgov_url': 'https://www.congress.gov/bill/113th-congress/senate-resolution/15',
 'govtrack_url': 'https://www.govtrack.us/congress/bills/113/sres15',
 'introduced_date': '2013-01-24',
 'active': True,
 'last_vote': '2013-01-24',
 'house_passage': None,
 'senate_passage': None,
 'enacted': None,
 'vetoed': None

In [122]:
bill_subject = congress.bills.subjects('sres15', congress=113)
bill_subject

{'congress': '113',
 'bill_id': 'sres15-113',
 'bill_slug': 'sres15',
 'bill_type': 'sres',
 'number': 'S.RES.15',
 'bill_uri': 'https://api.propublica.org/congress/v1/113/bills/sres15.json',
 'url_number': 'sres15',
 'title': 'A resolution to improve procedures for the consideration of legislation and nominations in the Senate.',
 'sponsor_title': 'Sen.',
 'sponsor_id': 'R000146',
 'sponsor_name': 'Harry Reid',
 'sponsor_state': 'NV',
 'sponsor_party': 'D',
 'sponsor_uri': 'https://api.propublica.org/congress/v1/members/R000146.json',
 'introduced_date': '2013-01-24',
 'number_of_cosponsors': 2,
 'committees': '',
 'latest_major_action_date': '2013-01-24',
 'latest_major_action': 'Resolution agreed to in Senate, under the order of 1/24/2012, having achieved 60 votes in the affirmative, without amendment by Yea-Nay Vote. 78 - 16. Record Vote Number: 1. (text: CR S272)',
 'house_passage_vote': None,
 'senate_passage_vote': None,
 'subjects': [{'name': 'Congress', 'url_name': 'congress'}

In [123]:
bill_amend = congress.bills.amendments('sres15', congress=113)
bill_amend

{'congress': '113',
 'bill_id': 'sres15-113',
 'num_results': 1,
 'offset': 0,
 'amendments': [{'amendment_number': 'S.AMDT.3',
   'slug': 'samdt3',
   'sponsor_title': 'Sen.',
   'sponsor': 'Mike Lee',
   'sponsor_id': 'L000577',
   'sponsor_uri': 'https://api.propublica.org/congress/v1/members/L000577.json',
   'sponsor_party': 'R',
   'sponsor_state': 'UT',
   'introduced_date': '2013-01-24',
   'title': 'To amend the Standing Rules of the Senate to reform the filibuster rules to improve the daily process of the Senate.',
   'congressdotgov_url': 'https://www.congress.gov/amendment/113th-congress/senate-amendment/3/text',
   'latest_major_action_date': '2013-01-24',
   'latest_major_action': 'Amendment SA 3 not agreed to in Senate by Voice Vote.'}]}

In [77]:
#to parse middle names
crp_ids['middle_name'] = crp_ids['CRPName'].apply(lambda x: ' '.join(x.split(', ')[1].split(' ')[1:]))
crp_ids['middle_name'] = crp_ids['middle_name'].apply(lambda x: x.replace("'", '').replace('"', '').replace('.', ''))

In [46]:
bad_crp_ids_1 = [[113, 'N00028138'], [113, 'N00035021'], [113, 'N99999896'], [113, 'N00035686'], [113, 'N00035267'], [113, 'N00043087'], [114, 'N00001955'], [114, 'N00004118']]
bad_crp_ids_2 = [[114, 'N00043087'], [115, 'N00040607'], [115, 'N00024817'], [115, 'N00006406'], [115, 'N00041275'], [115, 'N00042353'], [115, 'N00043298']]
bad_crp_ids_3 = [[115, 'N00043087'], [116, 'N00041275'], [116, 'N00046125'], [116, 'N00043087']]

print('Length of bad ids: ', len(bad_crp_ids_1)+len(bad_crp_ids_2)+len(bad_crp_ids_3))

failed_crp_ids = bad_crp_ids_1 + bad_crp_ids_2 + bad_crp_ids_3

print('Length of combined list: ', len(failed_crp_ids))
failed_crp_ids

Length of bad ids:  19
Length of combined list:  19


[[113, 'N00028138'],
 [113, 'N00035021'],
 [113, 'N99999896'],
 [113, 'N00035686'],
 [113, 'N00035267'],
 [113, 'N00043087'],
 [114, 'N00001955'],
 [114, 'N00004118'],
 [114, 'N00043087'],
 [115, 'N00040607'],
 [115, 'N00024817'],
 [115, 'N00006406'],
 [115, 'N00041275'],
 [115, 'N00042353'],
 [115, 'N00043298'],
 [115, 'N00043087'],
 [116, 'N00041275'],
 [116, 'N00046125'],
 [116, 'N00043087']]

In [38]:
i = 1
key_path = "/Users/flatironschool/.secret/open_secrets_api{}.json".format(i)
keys = get_keys(key_path)

api_key = keys['api_key']

# get_crp_industry_info(failed_crp_ids, api_key)

Congress/ID pairs parsed:  19

Industries found from pairs:  2

Pairs with issues:  [[113, 'N00028138'], [113, 'N00035021'], [113, 'N99999896'], [113, 'N00035686'], [113, 'N00035267'], [113, 'N00043087'], [114, 'N00043087'], [115, 'N00040607'], [115, 'N00024817'], [115, 'N00006406'], [115, 'N00041275'], [115, 'N00042353'], [115, 'N00043298'], [115, 'N00043087'], [116, 'N00041275'], [116, 'N00046125'], [116, 'N00043087']]


In [77]:
second_fail_ids = [[113, 'N00028138'], [113, 'N00035021'], [113, 'N99999896'], [113, 'N00035686'], 
                   [113, 'N00035267'], [113, 'N00043087'], 
                   [114, 'N00043087'], 
                   [115, 'N00040607'], [115, 'N00024817'], [115, 'N00006406'], [115, 'N00041275'], 
                   [115, 'N00042353'], [115, 'N00043298'], [115, 'N00043087'], 
                   [116, 'N00041275'], [116, 'N00046125'], 
                   [116, 'N00043087']]


In [79]:
test = member_df_with_ids[member_df_with_ids['cid'].isin([x[1] for x in second_fail_ids])]
test[['congress', 'first_name', 'alt_first_name', 'last_name', 'cid']]

Unnamed: 0,congress,first_name,alt_first_name,last_name,cid
21,113,Brian,Brian,Schatz,N00028138
43,113,William,William,Cowan,N99999896
54,113,John,John,Walsh,N00035686
64,113,Cory,Cory,Booker,N00035267
126,114,Brian,Brian,Schatz,N00028138
164,114,Cory,Cory,Booker,N00035267
205,115,Luther,Luther,Strange,N00040607
206,115,Gordon,Doug,Jones,N00024817
212,115,Jon,Jon,Kyl,N00006406
229,115,Brian,Brian,Schatz,N00028138


In [49]:
i = 1
key_path = "/Users/flatironschool/.secret/open_secrets_api{}.json".format(i)
keys = get_keys(key_path)

api_key = keys['api_key']

In [50]:
crp = CRP(api_key)

In [83]:
crp_id = 'N00024817'
year = '2015'

industries = crp.candidates.industries(crp_id, year)
industries

[{'@attributes': {'industry_code': 'W06',
   'industry_name': 'Retired',
   'indivs': '4351420',
   'pacs': '0',
   'total': '4351420'}},
 {'@attributes': {'industry_code': 'K01',
   'industry_name': 'Lawyers/Law Firms',
   'indivs': '3695532',
   'pacs': '128000',
   'total': '3823532'}},
 {'@attributes': {'industry_code': 'W04',
   'industry_name': 'Education',
   'indivs': '2240627',
   'pacs': '2000',
   'total': '2242627'}},
 {'@attributes': {'industry_code': 'F07',
   'industry_name': 'Securities & Investment',
   'indivs': '1424903',
   'pacs': '118800',
   'total': '1543703'}},
 {'@attributes': {'industry_code': 'H01',
   'industry_name': 'Health Professionals',
   'indivs': '951154',
   'pacs': '45900',
   'total': '997054'}},
 {'@attributes': {'industry_code': 'F10',
   'industry_name': 'Real Estate',
   'indivs': '860948',
   'pacs': '64000',
   'total': '924948'}},
 {'@attributes': {'industry_code': 'N05',
   'industry_name': 'Business Services',
   'indivs': '823600',
   '

In [None]:
with open('crp_jsons/{}_{}_industries.json'.format(crp_id, year), 'w') as f:
    json.dump(industries, f)

In [41]:
help(crp.candidates.sector)

Help on method sector in module crpapi:

sector(cid, cycle=None) method of crpapi.CandidatesClient instance

