In [48]:
import configparser
import requests
import sqlite3
import re
from bs4 import BeautifulSoup

In [83]:
COLS = [
    'id',
    'first_name',
    'middle_name',
    'last_name',
    'suffix',
    'date_of_birth',
    'gender',
    'party',
    'state',
    'congress',
    'chamber',
]

In [84]:
class ApiToData:
    '''
    Class object to retrieve ProPublica API general data on past and current congress members.
    
    Parameters
    ----------
    config_file - string, can be set with methods later
    db_name - string, can be set with method later
    '''
    def __init__(self, config_file=None, db_name=None, columns=[]):
        if type(config_file) == str:
            config_ = configparser.ConfigParser()
            config_.read(config_file)
            self.api_key = config_.get('propublica', 'PROPUBLICA_API_KEY')
            del config_
        else:
            self.api_key = None
            
        if type(db_name) == str:
            self.conn = sqlite3.connect(db_name)
        else:
            self.conn = None
    
        self.columns = columns
        
    def set_config_file(self, config_file):
        '''
        Enter path of config_file as string. Reads config.ini with configparser.
        For ProPublica API key register at:
        https://www.propublica.org/datastore/api/propublica-congress-api
        '''
        config_ = configparser.ConfigParser()
        config_.read(config_file)
        self.api_key = config_.get('propublica', 'PROPUBLICA_API_KEY')
        del config_
    
    def set_api_key(self, api_key):
        '''
        Enter ProPublica api_key as string. For ProPublica API key register at:
        https://www.propublica.org/datastore/api/propublica-congress-api
        '''
        self.api_key = api_key
    
    def set_db_name(self, db_name):
        '''
        Enter db_name as string.
        '''
        self.conn = sqlite3.connect(db_name)
    
    def get_pp_data(self, chamber, congresses=None):
        '''
        Retrieve congressional member data from ProPublica API.
        '''
        assert chamber.lower() == 'senate' or 'house', "Enter 'senate' or 'house'"
        self.chamber = chamber.lower()
        
        assert self.api_key != None, 'Enter ProPublica API Key with set_config_file() or set_api_key()'
        
        assert (congresses == None) | (type(congresses) == range), 'Congresses must be range or None type'
        if self.chamber == 'senate':
            if congresses == None:
                self.senate_range = self.__scrape_range()
            else:
                self.senate_range = congresses
        else:
            if congresses == None:
                self.house_range = self.__scrape_range()
            else:
                self.house_range = congresses
        
        assert self.conn != None, 'Set db_name with set_db_name()'
        self.create_table()
        
            
    def __scrape_range(self):
        url = 'https://projects.propublica.org/api-docs/congress-api/members/'
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        param_string = soup.find('td', string='congress').find_next_sibling().text
        
        if self.chamber == 'senate':
            s = str(re.search(r'(?<=House, ).*(?= for Senate)', param_string)[0])
        else:
            s = str(re.search(r'.*(?= for H)', param_string)[0])
        range_ = s.split('-')
        range_ = range(int(range_[0]), int(range_[1])+1)
        
        return range_
    
    def __create_table(self):
        cols_ = [ f'{col} TEXT' for col in self.columns ]
        cols_ = ', '.join(cols_)
        cols_ = cols_ + ', congress TEXT, chamber TEXT'
        c = self.conn.cursor()
        c.execute(
            f'''
            CREATE TABLE congress (
                {cols_}
            )
            ;
            '''
        )
        c.close()
    
    def __insert_data(self):
        s = ','.join('?'*len(self.columns+2))
        insert_statement = f'''INSERT INTO congress VALUES ({s})'''
        congress = get_congress(n, chamber)
        members = get_members(n, congress)
        c = conn.cursor()
        c.executemany(insert_statement, members)
        c.close()
    
    def __get_congress(self):
        r = requests.get(
            f'https://api.propublica.org/congress/v1/{n}/{self.chamber}/members.json',
            headers={'X-API-Key': self.api_key}
        )
        results = r.json()['results'][0]['members']
        return results
    
    def __get_members(self):
        if len(congress[0]) == 43:
            members = [ tuple(list(member.values()) + [f'{n}']) for member in congress ]
        else:
            members = [ tuple(list(member.values())[:42] + [f'{n}']) for member in congress ]
        return members

In [85]:
test = ApiToData('config.ini')

In [73]:
test.get_pp_data('house')

In [75]:
test.house_range

range(102, 117)